draw pca plots with ggplot2 (2D) and plotly (3D)

gene_x 0 like s 706 view s

Tags: plot, packages

#TODOs: next week
#- try install ggplot2_3d
#- try install kaleido

TODO: using python to generate the 3D plot! https://pypi.org/project/plotly/

# -- before pca --
#png("pca.png", 1200, 800)
svg("pca.svg")
plotPCA(rld, intgroup=c("replicates"))
#plotPCA(rld, intgroup = c("replicates", "batch"))
#plotPCA(rld, intgroup = c("replicates", "ids"))
#plotPCA(rld, "batch")
dev.off()

#TODO:adding label in the figure, change Donor I in blue and donor II in orange
#https://loading.io/color/feature/Paired-12/
svg("pca2.svg")
#plotPCA(rld, intgroup=c("replicates"))
#plotPCA(rld, intgroup = c("replicates", "batch"))
plotPCA(rld, intgroup = c("donor"))
#plotPCA(rld, "batch")
dev.off()
#TODO: adding label in the figure
svg("pca3.svg")
plotPCA(rld, intgroup=c("replicates2"))
dev.off()



#https://loading.io/color/feature/Paired-12/
#https://support.bioconductor.org/p/66404/
# -- calculate PC3 from rld --
library(genefilter)
ntop <- 500
rv <- rowVars(assay(rld))
select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))]
mat <- t( assay(rld)[select, ] )
pc <- prcomp(mat)
pc$x[,1:3]

#To my case:
mat <- t( assay(rld) )
pc <- prcomp(mat)
#pca <- yourFavoritePCA( mat )

pc$x[,1:3]
#                          PC1         PC2        PC3
#untreated DI      -27.5379705   1.4478299  -6.389731
#untreated DII     -28.3320463   0.6794066   2.073768
#mCh d3 DII          2.8988953  -6.4372647  10.252829
#sT d3 DII           5.1869876   2.6116282  13.816117
#mCh d8 DII        -20.8047275   1.0708861   3.394721
#sT d8 DII          -4.5144119  19.6230473   8.357902
#mCh d3 DI          -4.5690693  -8.8938297  -7.391567
#sT d3 DI           -7.6326832   5.3781061   2.214181
#mCh d8 DI           0.8536828  -5.0593045 -13.325567
#sT d8 DI            1.9232111  24.8795741  -4.162946
#GFP d3 DII        -12.5042914  -3.3424106  15.207755
#LTtr d3 DII         5.2309178  -9.6124712   8.328132
#GFP d8 DII        -13.0652347  -8.2058086  15.078469
#LTtr d8 DII        13.0678654  -2.0677676   9.188943
#GFP d3 DI         -13.9999251  -1.4988226  -3.335085
#LTtr d3 DI          2.6090782  -9.5753559 -10.022324
#GFP d8 DI         -12.4430571  -6.0670545 -14.725450
#LTtr d8 DI          8.9794396   3.4918629 -14.410118
#LT d8 DII          18.8388058   0.2459081   2.334700
#LT d8 DI           15.2986278  -0.6055500 -11.034778
#GFP+mCh d9/12 DI  -17.3162152   3.2939931  -6.917358
#sT+LTtr d9/12 DI    6.8517730  17.9282911  -6.209778
#GFP+mCh d9/12 DII   2.0874834  -6.7379107   8.810602
#sT+LTtr d9/12 DII  19.3883422  19.6033774   4.314808
#LT d3 DI            6.5376031  -8.5766236  -6.500155
#LT d3 DII          17.8400725 -11.7362896   1.117396
#sT+LT d3 DI        16.6029944  -7.7951798  -5.593658
#sT+LT d3 DII       18.5238521  -4.0422674   5.528193

# vs.
#data
#                     PC1        PC2         group condition donor          name
#untreated DI  -27.537970  1.4478299  untreated:DI untreated    DI  untreated DI
#untreated DII -28.332046  0.6794066 untreated:DII untreated   DII untreated DII
#mCh d3 DII      2.898895 -6.4372647    mCh d3:DII    mCh d3   DII    mCh d3 DII

# -- construct a data structure (merged_df) as above with data and pc --
library(ggplot2)
data <- plotPCA(rld, intgroup=c("condition", "donor"), returnData=TRUE)
#calculate all PCs including PC3 with the following codes
library(genefilter)
ntop <- 500
rv <- rowVars(assay(rld))
select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))]
mat <- t( assay(rld)[select, ] )
pc <- prcomp(mat)
pc$x[,1:3]
df_pc <- data.frame(pc$x[,1:3])

identical(rownames(data), rownames(df_pc)) #-->TRUE
## define the desired order of row names
#desired_order <- rownames(data)
## sort the data frame by the desired order of row names
#df <- df[match(desired_order, rownames(df_pc)), ]

data$PC1 <- NULL
data$PC2 <- NULL
merged_df <- merge(data, df_pc, by = "row.names")
#merged_df <- merged_df[, -1]
row.names(merged_df) <- merged_df$Row.names
merged_df$Row.names <- NULL  # remove the "name" column
merged_df$name <- NULL 
merged_df <- merged_df[, c("PC1","PC2","PC3","group","condition","donor")]

# -- draw 3D with merged_df using plot3D --
#https://stackoverflow.com/questions/45052188/how-to-plot-3d-scatter-diagram-using-ggplot
devtools::install_github("AckerDWM/gg3D")
library("gg3D")
png("pca10.png",800,800)
#svg("pca10.svg",10,10)
#methods(class = "prcomp")
#summary(pc) #--> Proportion of Variance  0.3647 0.1731 0.1515
#percentVar <- round(100 * attr(data, "percentVar"))
percentVar <- c(36,17,15)
#scatterplot3d

#Unfortunately, ggplot does not support 3D plotting. It is designed for creating 2D plots and visualizations in R. However, there are other packages available in R for creating 3D plots, such as plot3D, scatterplot3d, and rgl. These packages can be used to create 3D scatter plots, surface plots, and more complex 3D visualizations. You can install and load these packages in R using the following commands:
install.packages("plot3D")
library(plot3D)
install.packages("scatterplot3d")
library(scatterplot3d)
install.packages("rgl")
library(rgl)
#Once you have loaded these packages, you can create 3D plots using their respective functions. For example, you can create a 3D scatter plot using the plot3D package with the following code:


#https://plotly.com/r/3d-scatter-plots/
library(plotly)
data(mtcars)
png("xxx.png", 1200, 800)
plot_ly(mtcars, x = ~mpg, y = ~wt, z = ~qsec, type = "scatter3d", mode = "markers")
dev.off()


#  zlab(paste0("PC3: ",percentVar[3],"% variance")) + 
#scatterplot3d(merged_df[,c("PC1","PC2","PC3")], pch=16, color="blue", main="3D Scatter Plot")
# labs(x = "PC1", y = "PC2", z = "PC3") +
#axes_3D() + stat_3D() +
colors = "Set1",
#marker = list(symbol = ~shapes[group])
#using the corresponding keywords ("square", "triangle-up", "diamond", etc.).

labs <- list(x = paste0("PC1: ",percentVar[1],"% variance"), y = paste0("PC2: ",percentVar[2],"% variance"), z = paste0("PC3: ",percentVar[3],"% variance"))
#ggplot(merged_df, aes(x=PC1, y=PC2, z=PC3, color=condition, shape=donor)) +

#https://stackoverflow.com/questions/75452609/update-color-in-different-marker-in-plotly-r
dt <- iris
dt$shape_1 <- c("Yes","No")
dt$color_1 <- c("Medium","Large","Small")


library(plotly)
library(kaleido)
fig <- plot_ly(dt,
        x=1:nrow(iris),
        y=~Sepal.Length,
        type="scatter",
        mode='markers',
        color=~Species,
        colors = c("#4477AA","#DDCC77","#CC6677"),
        symbol = ~shape_1,
        symbols = c("triangle-up", "circle"),
        size = 20) %>% add_surface()
# save the chart as an SVG file using Kaleido
kaleido(fig, file = "chart.svg", format = "svg", width = 800, height = 600, 
        scale = 2, output_options = list(bg = "white"))

#        inherit = F,
#        size = ~Sepal.Width, 
#        sizes = c(10, 100) * 10)


plot_ly(dt,
+         x=1:nrow(iris),
+         y=~Sepal.Length,
+         type="scatter",
+         mode='markers',
+         color=~Species,
+         colors = c("#4477AA","#DDCC77","#CC6677"),
+         size = ~Sepal.Width, 
+         symbol = ~shape_1,
+         symbols = c("triangle-up", "circle"),
+         inherit = F,
+         sizes = c(10, 100) * 10))
%>%
+ add_trace(type="scatter",
+           mode = "text",
+           text=~Sepal.Width,
+           textposition = "top right",
+           color = ~color_1,
+           colors = c("black","green","blue"),
+           textfont = list(size = 10)
+ )

factors(merged_df$condition)

"GFP d3"        "GFP d8"        "GFP+mCh d9/12" "LT d3"        
 [5] "LT d8"         "LTtr d3"       "LTtr d8"       "mCh d3"       
 [9] "mCh d8"        "sT d3"         "sT d8"         "sT+LT d3"     
[13] "sT+LTtr d9/12" "untreated"

merged_df$condition <- factor(merged_df$condition, levels=c("untreated","mCh d3","mCh d8","GFP+mCh d9/12","GFP d3","GFP d8","sT d3","sT d8","LT d3","LT d8","LTtr d3","LTtr d8","sT+LT d3","sT+LTtr d9/12"))
merged_df$donor <- as.character(merged_df$donor)
# Define a list of shapes for each group
shapes <- list("circle", "triangle-up")
plot_ly(merged_df, x=~PC1, y=~PC2, z=~PC3, type = "scatter3d", mode = "markers",   color=~condition, colors = c("grey","#a6cee3","#1f78b4","cyan","#b2df8a","#33a02c","#fb9a99","#e31a1c","#fdbf6f","#ff7f00","#cab2d6","#6a3d9a","#ffff99","#a14a1a"), symbol=~donor, symbols = c("triangle-up", "circle"))
  geom_point_3d() +
  scale_x_continuous(name = labs$x) +
  scale_y_continuous(name = labs$y) +
  scale_z_continuous(name = labs$z) +
  geom_point(size=8) + 
  scale_color_manual(values = c("untreated" = "grey",
                                "mCh d3"="#a6cee3",
                                "mCh d8"="#1f78b4",
                                "GFP+mCh d9/12"="cyan",
                                "GFP d3"="#b2df8a",
                                "GFP d8"="#33a02c",
                                "sT d3"="#fb9a99",
                                "sT d8"="#e31a1c",
                                "LT d3"="#fdbf6f",
                                "LT d8"="#ff7f00",
                                "LTtr d3"="#cab2d6",
                                "LTtr d8"="#6a3d9a",
                                "sT+LT d3"="#ffff99",                        
                                "sT+LTtr d9/12"="#a14a1a")) +
                                theme(axis.text = element_text(face="bold",size = 21), axis.title = element_text(face="bold",size = 21)) + theme(legend.text = element_text(size = 20)) + theme(legend.title = element_text(size = 22)) + guides(color = guide_legend(override.aes = list(size = 10)), shape = guide_legend(override.aes = list(size = 10)), alpha = guide_legend(override.aes = list(size = 10)))

#axis.title = element_text(face="bold",size = 20)
#p + theme(axis.text.x = element_text(face="bold",size=14), axis.text.y = element_text(face="bold",size=14))
#+ coord_fixed()
#+ theme(
#    # Set the width to 6 inches
#    fig.width = 6,
#    # Set the height to 4 inches
#    fig.height = 4
#  )

svg("pca4.svg")
plotPCA(rld, intgroup=c("days"))
dev.off()

like unlike

点赞本文的读者

还没有人对此文章表态


本文有评论

没有评论

看文章,发评论,不要沉默


© 2023 XGenes.com Impressum