GSVA-plot for carotis nanoString data

gene_x 0 like s 872 view s

Tags: plot, R, RNA-seq

Carotis_NanoString

Carotis_NanoString_grid_1

Carotis_NanoString_grid_2

  1. preparing exprs for gsva(exprs, selected_geneSets, method="gsva")

    1. #Input: Images/*png and (hard-coded) /mnt/Samsung_T5/Data_Susanne_spatialRNA/M666_UKE_Hamburg_box/ dccs/*.dcc, pkcs3/all.zip, and annotation/M666_All_Data_SP.xlsx
    2. library("rmarkdown")
    3. library("tidyverse")
    4. library(rmarkdown)
    5. library("GeomxTools")
    6. library("GeoMxWorkflows")
    7. library("NanoStringNCTools")
    8. setwd("/home/jhuang/DATA/Data_Susanne_Carotis_spatialRNA_PUBLISHING/run_2023_2_GSVA")
    9. render("run.Rmd", c("html_document"))
    10. #identical(exprs(target_m666Data), assay(target_m666Data, "exprs")): contains the gene expression values that have been both normalized and log-transformed, which are often used for downstream analysis and visualization in many genomic studies.
    11. #assay(target_m666Data, "log_q"): These are likely log2-transformed values of the quantile-normalized data. Quantile normalization ensures that the distributions of gene expression values across samples are the same, and taking the log2 of these normalized values is a common step in the analysis of high-throughput gene expression data. It's a way to make the data more suitable for downstream statistical analysis and visualization.
    12. #assay(target_m666Data, "q_norm"): This typically represents the quantile-normalized gene expression values. Quantile normalization adjusts the expression values in each sample so that they have the same distribution. This normalization method helps remove systematic biases and makes the data more comparable across samples.
    13. # For the following calculation, the GSVA input requires a gene expression matrix 'exprs' where rows are genes and columns are samples. This matrix must be in non-log space.
    14. exprs <- exprs(target_m666Data)
    15. #> dim(exprs)
    16. #[1] 18677 45
    17. #exprs <- exprs(filtered_or_neg_target_m666Data)
  2. preparing selected_geneSets in gsva(exprs, selected_geneSets, method="gsva")

    1. #Input: Signatures.xls
    2. library(readxl)
    3. library(gridExtra)
    4. library(ggplot2)
    5. library(GSVA)
    6. # Paths to the Excel files
    7. file_paths <- list("Signatures.xls", "Signatures_additional.xls")
    8. # Get sheet names for each file
    9. sheet_names_list <- lapply(file_paths, excel_sheets)
    10. # Initialize an empty list to hold gene sets
    11. geneSets <- list()
    12. # Loop over each file path and its corresponding sheet names
    13. for (i in 1:length(file_paths)) {
    14. file_path <- file_paths[[i]]
    15. sheet_names <- sheet_names_list[[i]]
    16. # Loop over each sheet, extract the ENSEMBL IDs, and add to the list
    17. for (sheet in sheet_names) {
    18. # Read the sheet
    19. data <- read_excel(file_path, sheet = sheet)
    20. # Process the GeneSet names (replacing spaces with underscores, for example)
    21. gene_set_name <- gsub(" ", "_", unique(data$GeneSet)[1])
    22. # Add ENSEMBL IDs to the list
    23. geneSets[[gene_set_name]] <- unique(as.character(data$geneSymbol))
    24. }
    25. }
    26. # Print the result to check
    27. summary(geneSets)
    28. #desired_geneSets <- c("Monocytes", "Plasma_cells", "T_regs", "Cyt._act._T_cells", "Neutrophils", "Inflammatory_neutrophils", "Suppressive_neutrophils", "LDG", "CD40_activated")
    29. desired_geneSets <- c("IFN", "TNF", "IL-6R_complex", "IL-1_cytokines", "Pro-inflam._IL-1", "Monocyte_secreted", "Apoptosis", "NFkB_complex", "NLRP3_inflammasome")
    30. selected_geneSets <- geneSets[desired_geneSets]
    31. # Print the selected gene sets
    32. print(selected_geneSets)
  3. prepare violin plots for 1_vs_3

    1. # 1. Compute GSVA scores:
    2. gsva_scores <- gsva(exprs, selected_geneSets, method="gsva")
    3. # 2. Convert to data.frame for ggplot:
    4. gsva_df <- as.data.frame(t(gsva_scores))
    5. # 3. Add conditions to gsva_df:
    6. identical(rownames(pData(target_m666Data)), rownames(gsva_df))
    7. gsva_df$Condition <- pData(target_m666Data)$Grp
    8. #identical(rownames(gsva_df_filtered), rownames(pData(target_m666Data)) )
    9. gsva_df$SampleID <- pData(target_m666Data)$SampleID
    10. # 4. Filter the gsva_df to retain only the desired conditions:
    11. #group 1 vs. group 3 in the nanostring data
    12. gsva_df_filtered <- gsva_df[gsva_df$Condition %in% c("1", "3"), ]
    13. # 5. Define a function to plot violin plots:
    14. # Define custom colors
    15. custom_colors <- c("Group1" = "lightblue", "Group1a" = "red", "Group3" = "grey")
    16. #To implement the custom colors, and make the adjustments to abbreviate "Inflammatory" and "Suppressive", as well as increase the font size for the groups on the x-axis, we can modify the plot_violin function as follows:
    17. gsva_df_filtered$Condition <- gsub("1", "Group1", gsva_df_filtered$Condition)
    18. gsva_df_filtered$Condition <- gsub("3", "Group3", gsva_df_filtered$Condition)
    19. gsva_df_filtered$Condition <- factor(gsva_df_filtered$Condition, levels = c("Group1", "Group3"))
    20. plot_violin <- function(data, gene_name) {
    21. # Calculate the t-test p-value for the two conditions
    22. condition1_data <- data[data$Condition == "Group1", gene_name]
    23. condition2_data <- data[data$Condition == "Group3", gene_name]
    24. p_value <- t.test(condition1_data, condition2_data)$p.value
    25. # Convert p-value to annotation
    26. p_annotation <- ifelse(p_value < 0.01, "**", ifelse(p_value < 0.05, "*", ""))
    27. rounded_p_value <- paste0("p = ", round(p_value, 2))
    28. plot_title <- gsub("_", " ", gene_name)
    29. p <- ggplot(data, aes(x=Condition, y=!!sym(gene_name), fill=Condition)) +
    30. geom_violin(linewidth=1.2) +
    31. scale_fill_manual(values = custom_colors) +
    32. labs(title=plot_title, y="GSVA Score") +
    33. ylim(-1, 1) +
    34. theme_light() +
    35. theme(
    36. axis.title.x = element_text(size=12),
    37. axis.title.y = element_text(size=12),
    38. axis.text.x = element_text(size=10),
    39. axis.text.y = element_text(size=10),
    40. plot.title = element_text(size=12, hjust=0.5),
    41. legend.position = "none" # Hide legend since the colors are self-explanatory
    42. )
    43. # Add p-value annotation to the plot
    44. p <- p + annotate("text", x=1.5, y=0.9, label=paste0(p_annotation, " ", rounded_p_value), size=5, hjust=0.5)
    45. return(p)
    46. }
    47. # 6. Generate the list of plots in a predefined order:
    48. #desired_order <- c("Monocytes", "Plasma_cells", "T_regs", "Cyt._act._T_cells", "Neutrophils", "Inflammatory_neutrophils", "Suppressive_neutrophils", "LDG", "CD40_activated")
    49. desired_order <- c("IFN", "TNF", "IL-6R_complex", "IL-1_cytokines", "Pro-inflam._IL-1", "Monocyte_secreted", "Apoptosis", "NFkB_complex", "NLRP3_inflammasome")
    50. genes <- colnames(gsva_df_filtered)[!colnames(gsva_df_filtered) %in% "Condition"]
    51. genes <- genes[match(desired_order, genes)]
    52. genes <- genes[!is.na(genes)]
    53. second_row_plots <- lapply(genes, function(gene) plot_violin(gsva_df_filtered, gene))
  4. prepare violin plots for 1a_vs_3

    1. # 2. Convert to data.frame for ggplot:
    2. gsva_df <- as.data.frame(t(gsva_scores))
    3. # 3. Add conditions to gsva_df:
    4. identical(rownames(pData(target_m666Data)), rownames(gsva_df))
    5. gsva_df$Condition <- pData(target_m666Data)$Group
    6. #identical(rownames(gsva_df_filtered), rownames(pData(target_m666Data)) )
    7. gsva_df$SampleID <- pData(target_m666Data)$SampleID
    8. # 4. Filter the gsva_df to retain only the desired conditions:
    9. #group 1 vs. group 3 in the nanostring data
    10. gsva_df_filtered <- gsva_df[gsva_df$Condition %in% c("1a", "3"), ]
    11. # 5. Define a function to plot violin plots:
    12. # Update the condition levels in gsva_df_filtered to ensure the desired order on x-axis:
    13. gsva_df_filtered$Condition <- gsub("1a", "Group1a", gsva_df_filtered$Condition)
    14. gsva_df_filtered$Condition <- gsub("3", "Group3", gsva_df_filtered$Condition)
    15. gsva_df_filtered$Condition <- factor(gsva_df_filtered$Condition, levels = c("Group1a", "Group3"))
    16. plot_violin <- function(data, gene_name) {
    17. # Calculate the t-test p-value for the two conditions
    18. condition1_data <- data[data$Condition == "Group1a", gene_name]
    19. condition2_data <- data[data$Condition == "Group3", gene_name]
    20. p_value <- t.test(condition1_data, condition2_data)$p.value
    21. # Convert p-value to annotation
    22. p_annotation <- ifelse(p_value < 0.01, "**", ifelse(p_value < 0.05, "*", ""))
    23. rounded_p_value <- paste0("p = ", round(p_value, 2))
    24. plot_title <- gsub("_", " ", gene_name)
    25. p <- ggplot(data, aes(x=Condition, y=!!sym(gene_name), fill=Condition)) +
    26. geom_violin(linewidth=1.2) +
    27. scale_fill_manual(values = custom_colors) +
    28. labs(title=plot_title, y="GSVA Score") +
    29. ylim(-1, 1) +
    30. theme_light() +
    31. theme(
    32. axis.title.x = element_text(size=12),
    33. axis.title.y = element_text(size=12),
    34. axis.text.x = element_text(size=10),
    35. axis.text.y = element_text(size=10),
    36. plot.title = element_text(size=12, hjust=0.5),
    37. legend.position = "none" # Hide legend since the colors are self-explanatory
    38. )
    39. # Add p-value annotation to the plot
    40. p <- p + annotate("text", x=1.5, y=0.9, label=paste0(p_annotation, " ", rounded_p_value), size=5, hjust=0.5)
    41. return(p)
    42. }
    43. # 6. Generate the list of plots in a predefined order:
    44. genes <- colnames(gsva_df_filtered)[!colnames(gsva_df_filtered) %in% "Condition"]
    45. genes <- genes[match(desired_order, genes)]
    46. genes <- genes[!is.na(genes)]
    47. first_row_plots <- lapply(genes, function(gene) plot_violin(gsva_df_filtered, gene))
  5. (option 1 for plot 1) merge first_row_plots and second_row_plots to a final plot using cowplot

    1. library(cowplot)
    2. library(ggplot2)
    3. # Start by extracting the 1-8 plots from each list
    4. first_row_plots <- first_row_plots[1:9]
    5. second_row_plots <- second_row_plots[1:9]
    6. # Function to modify the individual plots based on position in the grid
    7. modify_plot <- function(p, row, col) {
    8. #if (col > 1) {
    9. p <- p + theme(axis.title.y = element_blank()) # remove y-axis title if not the first plot
    10. #}
    11. #if (row == 2) {
    12. # p <- p + theme(plot.title = element_blank()) # remove plot title for second row, commented because it has alreay been done below.
    13. #}
    14. p <- p + theme(
    15. axis.title.x = element_blank(), # remove x-axis title for all plots
    16. axis.title.y = element_blank(), # remove x-axis title for all plots
    17. axis.text = element_text(size = 14), # Increase axis text size
    18. axis.title = element_text(size = 16), # Increase axis title size
    19. plot.title = element_text(size = 16) # Increase plot title size
    20. )
    21. return(p)
    22. }
    23. # Abbreviate titles for specific plots
    24. first_row_plots[[6]]$labels$title <- "Inflam. neutrophils"
    25. first_row_plots[[7]]$labels$title <- "Suppr. neutrophils"
    26. second_row_plots[[6]]$labels$title <- "Inflam. neutrophils"
    27. second_row_plots[[7]]$labels$title <- "Suppr. neutrophils"
    28. # Apply the modifications to the plots
    29. for (i in 1:9) {
    30. first_row_plots[[i]] <- modify_plot(first_row_plots[[i]], 1, i)
    31. second_row_plots[[i]] <- modify_plot(second_row_plots[[i]], 2, i)
    32. }
    33. # Increase the font size of x-axis labels for each plot
    34. for (i in 1:9) {
    35. first_row_plots[[i]] <- first_row_plots[[i]] + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 12))
    36. second_row_plots[[i]] <- second_row_plots[[i]] + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 12), plot.title= element_blank())
    37. }
    38. # Now, combine the modified plots into a single list
    39. all_plots <- c(first_row_plots, second_row_plots)
    40. # Generate the 2x9 grid
    41. final_plot <- plot_grid(plotlist = all_plots, ncol = 9)
    42. # Save the plot to a PNG file
    43. png("Carotis_NanoString2.png", width=1400, height=380)
    44. print(final_plot)
    45. dev.off()
  6. (option 2 for plot 2) generating two 3x3 grid plots

    1. # Start by extracting the 1-9 plots from each list
    2. first_row_plots <- first_row_plots[1:9]
    3. second_row_plots <- second_row_plots[1:9]
    4. # Function to modify the individual plots based on position in the grid
    5. modify_plot <- function(p, row, col) {
    6. p <- p + theme(axis.title.y = element_blank()) # remove y-axis title if not the first plot
    7. p <- p + theme(
    8. axis.title.x = element_blank(), # remove x-axis title for all plots
    9. axis.title.y = element_blank(), # remove x-axis title for all plots
    10. axis.text = element_text(size = 14), # Increase axis text size
    11. axis.title = element_text(size = 16), # Increase axis title size
    12. plot.title = element_text(size = 16) # Increase plot title size
    13. )
    14. return(p)
    15. }
    16. # Apply the modifications to the plots
    17. for (i in 1:9) {
    18. first_row_plots[[i]] <- modify_plot(first_row_plots[[i]], 1, i)
    19. second_row_plots[[i]] <- modify_plot(second_row_plots[[i]], 2, i)
    20. }
    21. # Increase the font size of x-axis labels for each plot
    22. for (i in 1:9) {
    23. first_row_plots[[i]] <- first_row_plots[[i]] + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 12))
    24. second_row_plots[[i]] <- second_row_plots[[i]] + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 12))
    25. }
    26. # Pad first_row_plots to have 9 plots
    27. remaining_plots <- 9 - length(first_row_plots)
    28. if (remaining_plots > 0) {
    29. first_row_plots <- c(first_row_plots, rep(list(NULL), remaining_plots))
    30. }
    31. # Pad second_row_plots to have 9 plots
    32. remaining_plots2 <- 9 - length(second_row_plots)
    33. if (remaining_plots2 > 0) {
    34. second_row_plots <- c(second_row_plots, rep(list(NULL), remaining_plots2))
    35. }
    36. # Convert the first_row_plots to a matrix and draw
    37. plots_matrix_1 <- matrix(first_row_plots, ncol=3, byrow=TRUE)
    38. png("Carotis_NanoString_grid_1.png", width=600, height=600)
    39. do.call("grid.arrange", c(plots_matrix_1, list(ncol=3)))
    40. dev.off()
    41. # Convert the second_row_plots to a matrix and draw
    42. plots_matrix_2 <- matrix(second_row_plots, ncol=3, byrow=TRUE)
    43. png("Carotis_NanoString_grid_2.png", width=600, height=600)
    44. do.call("grid.arrange", c(plots_matrix_2, list(ncol=3)))
    45. dev.off()

like unlike

点赞本文的读者

还没有人对此文章表态


本文有评论

没有评论

看文章,发评论,不要沉默


© 2023 XGenes.com Impressum