Draw 3D PCA and calculate background genes

gene_x 0 like s 1073 view s

Tags: plot, python

Download python3 script for generating the plot

Download raw data 1 for generating the plot

Download raw data 2 for generating the plot

PCA_3D_2.png

construct a data structure (merged_df) as above with data and pc

  1. library(ggplot2)
  2. data <- plotPCA(rld, intgroup=c("condition", "donor"), returnData=TRUE)
  3. write.csv(data, file="plotPCA_data.csv")
  4. #calculate all PCs including PC3 with the following codes
  5. library(genefilter)
  6. ntop <- 500
  7. rv <- rowVars(assay(rld))
  8. select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))]
  9. mat <- t( assay(rld)[select, ] )
  10. pc <- prcomp(mat)
  11. pc$x[,1:3]
  12. #df_pc <- data.frame(pc$x[,1:3])
  13. df_pc <- data.frame(pc$x)
  14. identical(rownames(data), rownames(df_pc)) #-->TRUE
  15. ## define the desired order of row names
  16. #desired_order <- rownames(data)
  17. ## sort the data frame by the desired order of row names
  18. #df <- df[match(desired_order, rownames(df_pc)), ]
  19. data$PC1 <- NULL
  20. data$PC2 <- NULL
  21. merged_df <- merge(data, df_pc, by = "row.names")
  22. #merged_df <- merged_df[, -1]
  23. row.names(merged_df) <- merged_df$Row.names
  24. merged_df$Row.names <- NULL # remove the "name" column
  25. merged_df$name <- NULL
  26. merged_df <- merged_df[, c("PC1","PC2","PC3","PC4","PC5","PC6","PC7","PC8","PC9","PC10","PC11","PC12","PC13","PC14","PC15","PC16","PC17","PC18","PC19","PC20","PC21","PC22","PC23","PC24","PC25","PC26","PC27","PC28","group","condition","donor")]
  27. #results in 26PCs: merged_df <- merged_df[, c("PC1","PC2","PC3","PC4","PC5","PC6","PC7","PC8","PC9","PC10","PC11","PC12","PC13","PC14","PC15","PC16","PC17","PC18","PC19","PC20","PC21","PC22","PC23","PC24","PC25","PC26","group","condition","donor")]
  28. write.csv(merged_df, file="merged_df_28PCs.csv")
  29. #> summary(pc) #--> variances of PC1, PC2 and PC3 are 0.6795 0.08596 0.06599
  30. #Importance of components:
  31. # PC1 PC2 PC3 PC4 PC5 PC6 PC7
  32. #Standard deviation 2.6011 0.92510 0.81059 0.67065 0.51952 0.46429 0.41632
  33. #Proportion of Variance 0.6795 0.08596 0.06599 0.04517 0.02711 0.02165 0.01741
  34. #Cumulative Proportion 0.6795 0.76548 0.83148 0.87665 0.90376 0.92541 0.94282
  35. # PC8 PC9 PC10 PC11 PC12 PC13 PC14
  36. #Standard deviation 0.38738 0.32048 0.27993 0.24977 0.20217 0.17316 0.16293
  37. #Proportion of Variance 0.01507 0.01032 0.00787 0.00627 0.00411 0.00301 0.00267
  38. #Cumulative Proportion 0.95789 0.96821 0.97608 0.98234 0.98645 0.98946 0.99213
  39. # PC15 PC16 PC17 PC18 PC19 PC20 PC21
  40. #Standard deviation 0.15058 0.13083 0.12449 0.08789 0.06933 0.06064 0.04999
  41. #Proportion of Variance 0.00228 0.00172 0.00156 0.00078 0.00048 0.00037 0.00025
  42. #Cumulative Proportion 0.99441 0.99612 0.99768 0.99846 0.99894 0.99931 0.99956
  43. # PC22 PC23 PC24 PC25 PC26
  44. #Standard deviation 0.04143 0.03876 0.03202 0.01054 0.005059
  45. #Proportion of Variance 0.00017 0.00015 0.00010 0.00001 0.000000
  46. #Cumulative Proportion 0.99973 0.99988 0.99999 1.00000 1.000000

draw 3D with merged_df using plot3D

  1. import plotly.graph_objects as go
  2. import pandas as pd
  3. from sklearn.decomposition import PCA
  4. import numpy as np
  5. # Read in data as a pandas dataframe
  6. #df = pd.DataFrame({
  7. # 'PC1': [-13.999925, -12.504291, -12.443057, -13.065235, -17.316215],
  8. # 'PC2': [-1.498823, -3.342411, -6.067055, -8.205809, 3.293993],
  9. # 'PC3': [-3.335085, 15.207755, -14.725450, 15.078469, -6.917358],
  10. # 'condition': ['GFP d3', 'GFP d3', 'GFP d8', 'GFP d8', 'GFP+mCh d9/12'],
  11. # 'donor': ['DI', 'DII', 'DI', 'DII', 'DI']
  12. #})
  13. df = pd.read_csv('merged_df_28PCs.csv', index_col=0, header=0)
  14. df['condition'] = df['condition'].replace("GFP+mCh d9/12", "ctrl LTtr+sT d9/12")
  15. df['condition'] = df['condition'].replace("sT+LTtr d9/12", "LTtr+sT d9/12")
  16. df['condition'] = df['condition'].replace("GFP d3", "ctrl LT/LTtr d3")
  17. df['condition'] = df['condition'].replace("GFP d8", "ctrl LT/LTtr d8")
  18. df['condition'] = df['condition'].replace("mCh d3", "ctrl sT d3")
  19. df['condition'] = df['condition'].replace("mCh d8", "ctrl sT d8")
  20. # Fit PCA model to reduce data dimensions to 3
  21. pca = PCA(n_components=3)
  22. pca.fit(df.iloc[:, :-3])
  23. X_reduced = pca.transform(df.iloc[:, :-3])
  24. # Add reduced data back to dataframe
  25. df['PC1'] = X_reduced[:, 0]
  26. df['PC2'] = X_reduced[:, 1]
  27. df['PC3'] = X_reduced[:, 2]
  28. # Create PCA plot with 3D scatter
  29. fig = go.Figure()
  30. #['circle', 'circle-open', 'square', 'square-open', 'diamond', 'diamond-open', 'cross', 'x']
  31. # if donor == 'DI' else marker=dict(size=2, opacity=0.8, color=condition_color, symbol=donor_symbol)
  32. #decrease diamond size to 6 while keep the circle as size 10 in the following code:
  33. #'rgb(128, 150, 128)'
  34. #I need three families of colors, always from light to deep, the first one should close to grey.
  35. #the first serie for 'ctrl LTtr+sT d9/12', 'LTtr+sT d9/12'
  36. #the second serie for 'ctrl LT/LTtr d3', 'ctrl LT/LTtr d8', 'LT d3', 'LT d8', 'LTtr d3', 'LTtr d8'
  37. #the third serie for 'ctrl sT d3', 'ctrl sT d8', 'sT d3', 'sT d8', 'sT+LT d3'
  38. condition_color_map_untreated = {'untreated':'black'}
  39. donor_symbol_map_untreated = {'DI': 'circle-open', 'DII': 'diamond-open'}
  40. #condition_color_map = {'ctrl LTtr+sT d9/12': 'green', 'GFP d3': 'blue', 'GFP d8': 'red', 'GFP+mCh d9/12': 'green', 'LT d3': 'orange'}
  41. condition_color_map = {
  42. 'ctrl LTtr+sT d9/12': 'black',
  43. 'LTtr+sT d9/12': '#a14a1a',
  44. 'ctrl LT/LTtr d3': '#fdbf6f',
  45. 'ctrl LT/LTtr d8': '#ff7f00',
  46. 'LT d3': '#b2df8a',
  47. 'LT d8': '#33a02c',
  48. 'LTtr d3': '#a6cee3',
  49. 'LTtr d8': '#1f78b4',
  50. 'ctrl sT d3': 'rgb(200, 200, 200)',
  51. 'ctrl sT d8': 'rgb(100, 100, 100)',
  52. 'sT d3': '#fb9a99',
  53. 'sT d8': '#e31a1c',
  54. 'sT+LT d3': 'magenta'
  55. }
  56. donor_symbol_map = {'DI': 'circle', 'DII': 'diamond'}
  57. for donor, donor_symbol in donor_symbol_map_untreated.items():
  58. for condition, condition_color in condition_color_map_untreated.items():
  59. mask = (df['condition'] == condition) & (df['donor'] == donor)
  60. fig.add_trace(go.Scatter3d(x=df.loc[mask, 'PC1'], y=df.loc[mask, 'PC2'], z=df.loc[mask, 'PC3'],
  61. mode='markers',
  62. name=f'{condition}' if donor == 'DI' else None,
  63. legendgroup=f'{condition}',
  64. showlegend=True if donor == 'DI' else False,
  65. marker=dict(size=6 if donor_symbol in ['diamond-open'] else 10, opacity=0.8, color=condition_color, symbol=donor_symbol)))
  66. for donor, donor_symbol in donor_symbol_map.items():
  67. for condition, condition_color in condition_color_map.items():
  68. mask = (df['condition'] == condition) & (df['donor'] == donor)
  69. fig.add_trace(go.Scatter3d(x=df.loc[mask, 'PC1'], y=df.loc[mask, 'PC2'], z=df.loc[mask, 'PC3'],
  70. mode='markers',
  71. name=f'{condition}' if donor == 'DI' else None,
  72. legendgroup=f'{condition}',
  73. showlegend=True if donor == 'DI' else False,
  74. marker=dict(size=6 if donor_symbol in ['diamond'] else 10, opacity=0.8, color=condition_color, symbol=donor_symbol)))
  75. for donor, donor_symbol in donor_symbol_map.items():
  76. fig.add_trace(go.Scatter3d(x=[None], y=[None], z=[None],
  77. mode='markers',
  78. name=donor,
  79. legendgroup=f'{donor}',
  80. showlegend=True,
  81. marker=dict(size=10, opacity=1, color='black', symbol=donor_symbol),
  82. hoverinfo='none'))
  83. # Annotations for the legend blocks
  84. fig.update_layout(
  85. annotations=[
  86. dict(x=1.1, y=1.0, xref='paper', yref='paper', showarrow=False,
  87. text='Condition', font=dict(size=15)),
  88. dict(x=1.1, y=0.6, xref='paper', yref='paper', showarrow=False,
  89. text='Donor', font=dict(size=15))
  90. ],
  91. scene=dict(
  92. aspectmode='cube',
  93. xaxis=dict(gridcolor='black', backgroundcolor='white', zerolinecolor='black', title='PC1: 36% v.'),
  94. yaxis=dict(gridcolor='black', backgroundcolor='white', zerolinecolor='black', title='PC2: 17% v.'),
  95. zaxis=dict(gridcolor='black', backgroundcolor='white', zerolinecolor='black', title='PC3: 15% variance'),
  96. bgcolor='white'
  97. ),
  98. margin=dict(l=5, r=5, b=5, t=0) # Adjust the margins to prevent clipping of axis titles
  99. )
  100. #fig.show()
  101. fig.write_image("fig1.svg")

calculate background genes

  1. # [1] ensembl_gene_id gene_name MKL.1_RNA
  2. # [4] MKL.1_RNA_118 MKL.1_RNA_147 MKL.1_EV.RNA
  3. # [7] MKL.1_EV.RNA_2 MKL.1_EV.RNA_118 MKL.1_EV.RNA_87
  4. #[10] MKL.1_EV.RNA_27 X042_MKL.1_wt_EV X0505_MKL.1_wt_EV
  5. #[13] X042_MKL.1_sT_DMSO X0505_MKL.1_sT_DMSO_EV X042_MKL.1_scr_DMSO_EV
  6. #[16] X0505_MKL.1_scr_DMSO_EV X042_MKL.1_sT_Dox X0505_MKL.1_sT_Dox_EV
  7. #[19] X042_MKL.1_scr_Dox_EV X0505_MKL.1_scr_Dox_EV
  8. "ensembl_gene_id
  9. gene_name
  10. python process.py ../fpkm_values_MKL-1.csv 3 > MKL.1_RNA
  11. python process.py ../fpkm_values_MKL-1.csv 4 > MKL.1_RNA_118
  12. python process.py ../fpkm_values_MKL-1.csv 5 > MKL.1_RNA_147
  13. python process.py ../fpkm_values_MKL-1.csv 6 > MKL.1_EV.RNA
  14. python process.py ../fpkm_values_MKL-1.csv 7 > MKL.1_EV.RNA_2
  15. python process.py ../fpkm_values_MKL-1.csv 8 > MKL.1_EV.RNA_118
  16. python process.py ../fpkm_values_MKL-1.csv 9 > MKL.1_EV.RNA_87
  17. python process.py ../fpkm_values_MKL-1.csv 10 > MKL.1_EV.RNA_27
  18. python process.py ../fpkm_values_MKL-1.csv 11 > X042_MKL.1_wt_EV
  19. python process.py ../fpkm_values_MKL-1.csv 12 > X0505_MKL.1_wt_EV
  20. python process.py ../fpkm_values_MKL-1.csv 13 > X042_MKL.1_sT_DMSO
  21. python process.py ../fpkm_values_MKL-1.csv 14 > X0505_MKL.1_sT_DMSO_EV
  22. python process.py ../fpkm_values_MKL-1.csv 15 > X042_MKL.1_scr_DMSO_EV
  23. python process.py ../fpkm_values_MKL-1.csv 16 > X0505_MKL.1_scr_DMSO_EV
  24. python process.py ../fpkm_values_MKL-1.csv 17 > X042_MKL.1_sT_Dox
  25. python process.py ../fpkm_values_MKL-1.csv 18 > X0505_MKL.1_sT_Dox_EV
  26. python process.py ../fpkm_values_MKL-1.csv 19 > X042_MKL.1_scr_Dox_EV
  27. python process.py ../fpkm_values_MKL-1.csv 20 > X0505_MKL.1_scr_Dox_EV
  28. python process.py ../fpkm_values_MKL-1.csv 21 > median_length
  29. cut -f1 -d',' MKL.1_RNA>_MKL-1_RNA
  30. cut -f1 -d',' MKL.1_RNA_118>_MKL-1_RNA_118
  31. cut -f1 -d',' MKL.1_RNA_147>_MKL-1_RNA_147
  32. cut -f1 -d',' MKL.1_EV.RNA>_MKL-1_EV_RNA
  33. cut -f1 -d',' MKL.1_EV.RNA_2>_MKL-1_EV_RNA_2
  34. cut -f1 -d',' MKL.1_EV.RNA_118>_MKL-1_EV_RNA_118
  35. cut -f1 -d',' MKL.1_EV.RNA_87>_MKL-1_EV_RNA_87
  36. cut -f1 -d',' MKL.1_EV.RNA_27>_MKL-1_EV_RNA_27
  37. cut -f1 -d',' X042_MKL.1_wt_EV>_X042_MKL-1_wt_EV
  38. cut -f1 -d',' X0505_MKL.1_wt_EV>_X0505_MKL-1_wt_EV
  39. cut -f1 -d',' X042_MKL.1_sT_DMSO>_X042_MKL-1_sT_DMSO
  40. cut -f1 -d',' X0505_MKL.1_sT_DMSO_EV>_X0505_MKL-1_sT_DMSO_EV
  41. cut -f1 -d',' X042_MKL.1_scr_DMSO_EV>_X042_MKL-1_scr_DMSO_EV
  42. cut -f1 -d',' X0505_MKL.1_scr_DMSO_EV>_X0505_MKL-1_scr_DMSO_EV
  43. cut -f1 -d',' X042_MKL.1_sT_Dox>_X042_MKL-1_sT_Dox
  44. cut -f1 -d',' X0505_MKL.1_sT_Dox_EV>_X0505_MKL-1_sT_Dox_EV
  45. cut -f1 -d',' X042_MKL.1_scr_Dox_EV>_X042_MKL-1_scr_Dox_EV
  46. cut -f1 -d',' X0505_MKL.1_scr_Dox_EV>_X0505_MKL-1_scr_Dox_EV
  47. ~/Tools/csv2xls-0.4/csv_to_xls.py _MKL-1_RNA _MKL-1_RNA_118 _MKL-1_RNA_147 _MKL-1_EV_RNA _MKL-1_EV_RNA_2 _MKL-1_EV_RNA_118 _MKL-1_EV_RNA_87 _MKL-1_EV_RNA_27 _X042_MKL-1_wt_EV _X0505_MKL-1_wt_EV _X042_MKL-1_sT_DMSO _X0505_MKL-1_sT_DMSO_EV _X042_MKL-1_scr_DMSO_EV _X0505_MKL-1_scr_DMSO_EV _X042_MKL-1_sT_Dox _X0505_MKL-1_sT_Dox_EV _X042_MKL-1_scr_Dox_EV _X0505_MKL-1_scr_Dox_EV -d',' -o background_genes_MKL-1.xls;

process.py

  1. import sys
  2. filename = sys.argv[1]
  3. n = int(sys.argv[2])
  4. with open(filename, 'r') as file:
  5. for line in file:
  6. words = line.split(",")
  7. if len(words) >= n and words[n-1] != '0':
  8. print(line.strip())
  9. # -- in the example of K331A data --
  10. #FORMAT normalized_counts.txt to normalized_counts.csv
  11. #DEBUG1: add gene_symbol in the first line
  12. #DEBUG2: '\n' --> ',\n'
  13. #RESULT: it looks like #gene_symbol,ctrl_d3_DonorI,ctrl_d3_DonorII,ctrl_d8_DonorI,ctrl_d8_DonorII,LT_d3_DonorI,LT_d3_DonorII,LT_d8_DonorI,LT_d8_DonorII,LTtr_d3_DonorI,LTtr_d3_DonorII,LTtr_d8_DonorI,LTtr_d8_DonorII,K331A_DonorI,K331A_DonorII,
  14. #DDX11L1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  15. python process.py normalized_counts.csv 2 > ctrl_d3_DonorI
  16. python process.py normalized_counts.csv 3 > ctrl_d3_DonorII
  17. python process.py normalized_counts.csv 4 > ctrl_d8_DonorI
  18. python process.py normalized_counts.csv 5 > ctrl_d8_DonorII
  19. python process.py normalized_counts.csv 6 > LT_d3_DonorI
  20. python process.py normalized_counts.csv 7 > LT_d3_DonorII
  21. python process.py normalized_counts.csv 8 > LT_d8_DonorI
  22. python process.py normalized_counts.csv 9 > LT_d8_DonorII
  23. python process.py normalized_counts.csv 10 > LTtr_d3_DonorI
  24. python process.py normalized_counts.csv 11 > LTtr_d3_DonorII
  25. python process.py normalized_counts.csv 12 > LTtr_d8_DonorI
  26. python process.py normalized_counts.csv 13 > LTtr_d8_DonorII
  27. python process.py normalized_counts.csv 14 > K331A_DonorI
  28. python process.py normalized_counts.csv 15 > K331A_DonorII
  29. cut -f1 -d',' ctrl_d3_DonorI > _ctrl_d3_DonorI
  30. cut -f1 -d',' ctrl_d3_DonorII > _ctrl_d3_DonorII
  31. cut -f1 -d',' ctrl_d8_DonorI > _ctrl_d8_DonorI
  32. cut -f1 -d',' ctrl_d8_DonorII > _ctrl_d8_DonorII
  33. cut -f1 -d',' LT_d3_DonorI > _LT_d3_DonorI
  34. cut -f1 -d',' LT_d3_DonorII > _LT_d3_DonorII
  35. cut -f1 -d',' LT_d8_DonorI > _LT_d8_DonorI
  36. cut -f1 -d',' LT_d8_DonorII > _LT_d8_DonorII
  37. cut -f1 -d',' LTtr_d3_DonorI > _LTtr_d3_DonorI
  38. cut -f1 -d',' LTtr_d3_DonorII > _LTtr_d3_DonorII
  39. cut -f1 -d',' LTtr_d8_DonorI > _LTtr_d8_DonorI
  40. cut -f1 -d',' LTtr_d8_DonorII > _LTtr_d8_DonorII
  41. cut -f1 -d',' K331A_DonorI > _K331A_DonorI
  42. cut -f1 -d',' K331A_DonorII > _K331A_DonorII
  43. ~/Tools/csv2xls-0.4/csv_to_xls.py _ctrl_d3_DonorI _ctrl_d3_DonorII _ctrl_d8_DonorI _ctrl_d8_DonorII _LT_d3_DonorI _LT_d3_DonorII _LT_d8_DonorI _LT_d8_DonorII _LTtr_d3_DonorI _LTtr_d3_DonorII _LTtr_d8_DonorI _LTtr_d8_DonorII _K331A_DonorI _K331A_DonorII -d',' -o background_genes.xls;

like unlike

点赞本文的读者

还没有人对此文章表态


本文有评论

没有评论

看文章,发评论,不要沉默


© 2023 XGenes.com Impressum