Compressing Y-Axis Ranges in Volcano Plots (火山图)

gene_x 0 like s 365 view s

Tags: R, RNA-seq

Volcano plots are graphical representations of the significance versus fold-change for values obtained from a particular data analysis. They are especially useful in high-throughput studies, such as genomics, to visually represent the relationship between statistical significance and the magnitude of change. However, in some datasets, extreme values might stretch the scale of the plot, making it difficult to discern data points in regions of particular interest. By introducing compressed ranges, we can "squeeze" a specific range of values into a shorter y-axis scale, allowing for clearer visualization of data points within that range, without losing the overall context of the entire dataset. This method retains the informative nature of the volcano plot while emphasizing specific data regions of interest.

火山图是表示从特定数据分析获得的显著性与倍数变化的图形表示。它们在高通量研究中,如基因组学,特别有用,用于直观地表示统计显著性和变化幅度之间的关系。然而,在某些数据集中,极端值可能会拉长图的比例,使得在特定感兴趣的区域难以辨别数据点。通过引入压缩范围,我们可以将特定范围的值“挤压”到较短的y轴比例中,从而更清晰地显示该范围内的数据点,同时不失去整个数据集的整体背景。这种方法保留了火山图的信息性,同时强调了特定的感兴趣的数据区域。

K3R_24h_vs_control

library(ggplot2)
library(ggrepel)

setwd("~/DATA/Data_Anastasia_RNASeq_PUBLISHING/results/featureCounts/degenes_publishing_compressed_ranges")

#---------------- K3R_24h ----------------
geness_res <- read.csv(file = "K3R_24h.csv", sep="\t", row.names=1)

# Color setting
geness_res$Color <- ifelse(geness_res$padj > 0.05 | abs(geness_res$log2FoldChange) < 0.585, "gray", 
                          ifelse(geness_res$log2FoldChange > 0, "red", "blue"))

# Predefined genes colored in green
predefined_genes <- c("GNPDA1", "POR", "GEM", "SPOCD1", "DUSP10", "DUSP12", "SLC16A6", "HLA-B", "LSS", "SMPD1", "NDRG1", "USP2")
geness_res$Color[geness_res$external_gene_name %in% predefined_genes] <- "green"

geness_res$invert_Padj <- (-log10(geness_res$padj)) * sign(geness_res$log2FoldChange)

top_g <- unique(c(geness_res[order(geness_res$invert_Padj, decreasing = TRUE), 'external_gene_name'][1:100],
                 geness_res[order(geness_res$invert_Padj, decreasing = FALSE), 'external_gene_name'][1:100]))

# Define the original and compressed ranges
original_range <- c(35, 60)
compressed_range <- c(35.0, 36.0)

# Adjust the p-values based on the ranges
geness_res$adjusted_pvalue <- with(geness_res, 
                                   ifelse(-log10(padj) > original_range[1] & -log10(padj) <= original_range[2],
                                          ((-log10(padj) - original_range[1]) / (original_range[2] - original_range[1])) * (compressed_range[2] - compressed_range[1]) + compressed_range[1],
                                          ifelse(-log10(padj) > original_range[2], 
                                                 -log10(padj) - (original_range[2] - original_range[1]) + (compressed_range[2] - compressed_range[1]),
                                                 -log10(padj))))

# Calculate breaks for the y-axis
y_breaks_below <- seq(0, 30, by=10)
y_breaks_compressed <- c(35.0, 36.0)
y_breaks_above <- c()
y_breaks <- c(y_breaks_below, y_breaks_compressed, y_breaks_above)

y_labels_below <- seq(0, 30, by=10)
y_labels_compressed <- c(35, 60)
y_labels_above <- c()
y_labels <- c(y_labels_below, y_labels_compressed, y_labels_above)

# Create the plot
png("K3R_24h.png", width=1200, height=1200)
ggplot(geness_res, aes(x = log2FoldChange, y = adjusted_pvalue, color = Color, label = external_gene_name)) + 
  geom_vline(xintercept = c(0.585, -0.585), lty = "dashed", size = 1.5) +  
  geom_hline(yintercept = -log10(0.05), lty = "dashed", size = 1.5) +     
  geom_point(size = 3) +
  labs(x = "log2(Fold change)", y = "-log10(P-adj)", color = "Significance") + 
  scale_color_identity() +
  geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & padj < 0.05 & (abs(log2FoldChange) >= 0.585)), 
                  size = 8,   
                  fontface = "bold",
                  point.padding = 0.15, 
                  color = "black", 
                  min.segment.length = .1, 
                  box.padding = .2, 
                  lwd = 2) + 
  theme_bw(base_size = 28) +
  theme(legend.position = "bottom") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[1], ymax = compressed_range[1], linetype = "dashed", color = "grey") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[2], ymax = compressed_range[2], linetype = "dashed", color = "grey") +
  annotate("text", x = -Inf, y = compressed_range[1], label = "/", hjust = 0, size = 10) +
  annotate("text", x = -Inf, y = compressed_range[2], label = "/", hjust = 0, size = 10) +
  scale_y_continuous(breaks = sort(y_breaks), labels = sort(y_labels))
dev.off()

#---------------- wt_24h ----------------
geness_res <- read.csv(file = "wt_24h.csv", sep="\t", row.names=1)
#K3R_24h.csv
#wt_24h.csv
#wt_3+21h.csv
#K3R_3+21h.csv
#geness_res <- read.csv(file = "K3R_3hdox21hchase_vs_control-all.txt", sep=",", row.names=1)
#WT_24hdox_vs_control-all.txt
#K3R_24hdox_vs_control-all.txt
#WT_3hdox21hchase_vs_control-all.txt
#K3R_3hdox21hchase_vs_control-all.txt

# Color setting
geness_res$Color <- ifelse(geness_res$padj > 0.05 | abs(geness_res$log2FoldChange) < 0.585, "gray", 
                          ifelse(geness_res$log2FoldChange > 0, "red", "blue"))

# Predefined genes colored in green
predefined_genes <- c("TRIM63", "ATP6V0D2", "SULT1C2", "UPP1", "SLC16A6", "RRAGD", "HEXA-AS1", "IL6R", "GPNMB", "PRKAG2", "STS", "RASGRP3", "KCNAB2", "PLEKHM1", "SLC39A1", "USP2", "ADGRG1", "SPRING1", "FNIP2", "CTNS", "CTSD", "NDRG1", "BHLHE40", "VPS18", "PLIN2", "ASAH1", "PPP1R3B", "SNX8", "GEM", "AMDHD2", "PER3", "PFKFB2", "LSS", "WBP2", "BRI3", "GRN", "TPRA1", "ATP6V1D", "ATP6V1B2", "ATP6V1C1", "TMC6", "SLC26A11", "GNPDA1", "EPG5", "GNA13", "VAT1", "DUSP3", "ATP6V1H", "KIAA0930", "ATP6V0D1", "HMOX1", "RRAGC", "SLC25A13", "ATP6V0A1", "WDR81", "DUSP10", "CSTB", "FLCN", "PPARGC1A", "TOM1", "MTM1", "NEU1", "ZFYVE26", "ZCCHC8", "ATP6V1F", "CD63", "CTSB", "PEA15", "NRBF2", "CLCN7", "PGAP6", "PSAP", "GLA", "SGSH", "S100A6", "PIP4K2C", "SLC38A7", "GLMP", "GSTO1", "THAP8", "MCOLN1", "POR", "SNX16", "M6PR", "USP32", "VPS8", "LONP1", "GPX1", "MVP", "UGDH", "CC2D1B", "CTSA", "GBA", "RBM19", "LACTB2", "UTP18", "AP5Z1", "NAGLU", "EIF4B", "SOCS5", "HLA-B")  #for wt_24h.png
#predefined_genes <- c()  #for wt_3+21h.png, K3R_3+21h.png, and *_vs_control.png
geness_res$Color[geness_res$external_gene_name %in% predefined_genes] <- "green"

geness_res$invert_Padj <- (-log10(geness_res$padj)) * sign(geness_res$log2FoldChange)

top_g <- unique(c(geness_res[order(geness_res$invert_Padj, decreasing = TRUE), 'external_gene_name'][1:100],
                 geness_res[order(geness_res$invert_Padj, decreasing = FALSE), 'external_gene_name'][1:100]))

# Define the original and compressed ranges
original_range <- c(100, 175)
compressed_range <- c(100.0, 104.0)

# Adjust the p-values based on the ranges
geness_res$adjusted_pvalue <- with(geness_res, 
                                   ifelse(-log10(padj) > original_range[1] & -log10(padj) <= original_range[2],
                                          ((-log10(padj) - original_range[1]) / (original_range[2] - original_range[1])) * (compressed_range[2] - compressed_range[1]) + compressed_range[1],
                                          ifelse(-log10(padj) > original_range[2], 
                                                 -log10(padj) - (original_range[2] - original_range[1]) + (compressed_range[2] - compressed_range[1]),
                                                 -log10(padj))))

# Calculate breaks for the y-axis
y_breaks_below <- seq(0, 100, by=25)
y_breaks_compressed <- c(104.0)  # We already have 35 in y_breaks_below
#y_breaks_above <- if (max(geness_res$adjusted_pvalue, na.rm = TRUE)+5 < 63) {
#                   seq(60, max(geness_res$adjusted_pvalue, na.rm = TRUE), by=-5)
#                 } else {
#                   seq(63, max(geness_res$adjusted_pvalue, na.rm = TRUE)+5, by=5)
#                 }
y_breaks_above <- c(129.0)
y_breaks <- c(y_breaks_below, y_breaks_compressed, y_breaks_above)

y_labels_below <- seq(0, 100, by=25)
y_labels_compressed <- c(175)  # We already have 35 in y_labels_below
#y_labels_above <- if (max(geness_res$adjusted_pvalue, na.rm = TRUE)+5 < 63) {
#                   seq(60, max(geness_res$adjusted_pvalue, na.rm = TRUE), by=-5)
#                 } else {
#                   seq(63, max(geness_res$adjusted_pvalue, na.rm = TRUE)+5, by=5)
#                 }
y_labels_above <- c(200)
y_labels <- c(y_labels_below, y_labels_compressed, y_labels_above)


# Create the plot
png("wt_24h.png", width=1200, height=1200)
ggplot(geness_res, aes(x = log2FoldChange, y = adjusted_pvalue, color = Color, label = external_gene_name)) + 
  geom_vline(xintercept = c(0.585, -0.585), lty = "dashed", size = 1.5) +  
  geom_hline(yintercept = -log10(0.05), lty = "dashed", size = 1.5) +     
  geom_point(size = 3) +
  labs(x = "log2(Fold change)", y = "-log10(P-adj)", color = "Significance") + 
  scale_color_identity() +
  geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & padj < 0.05 & (abs(log2FoldChange) >= 0.585)), 
                  size = 8,   
                  fontface = "bold",
                  point.padding = 0.15, 
                  color = "black", 
                  min.segment.length = .1, 
                  box.padding = .2, 
                  lwd = 2) + 
  theme_bw(base_size = 28) +
  theme(legend.position = "bottom") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[1], ymax = compressed_range[1], linetype = "dashed", color = "grey") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[2], ymax = compressed_range[2], linetype = "dashed", color = "grey") +
  annotate("text", x = -Inf, y = compressed_range[1], label = "/", hjust = 0, size = 10) +
  annotate("text", x = -Inf, y = compressed_range[2], label = "/", hjust = 0, size = 10) +
  scale_y_continuous(breaks = sort(y_breaks), labels = sort(y_labels))
dev.off()




#---------------- wt_3+21h ... ----------------
geness_res <- read.csv(file = "wt_3+21h.csv", sep="\t", row.names=1)
#wt_3+21h.csv
#K3R_3+21h.csv
#geness_res <- read.csv(file = "K3R_3hdox21hchase_vs_control-all.txt", sep=",", row.names=1)
#WT_24hdox_vs_control-all.txt
#K3R_24hdox_vs_control-all.txt
#WT_3hdox21hchase_vs_control-all.txt
#K3R_3hdox21hchase_vs_control-all.txt

# Color setting
geness_res$Color <- ifelse(geness_res$padj > 0.05 | abs(geness_res$log2FoldChange) < 0.585, "gray", 
                          ifelse(geness_res$log2FoldChange > 0, "red", "blue"))

# Predefined genes colored in green
predefined_genes <- c()  #for wt_3+21h.png, K3R_3+21h.png, and *_vs_control.png
geness_res$Color[geness_res$external_gene_name %in% predefined_genes] <- "green"

geness_res$invert_Padj <- (-log10(geness_res$padj)) * sign(geness_res$log2FoldChange)

top_g <- unique(c(geness_res[order(geness_res$invert_Padj, decreasing = TRUE), 'external_gene_name'][1:100],
                 geness_res[order(geness_res$invert_Padj, decreasing = FALSE), 'external_gene_name'][1:100]))

# Define the original and compressed ranges
original_range <- c(50, 100)
compressed_range <- c(50.0, 52.0)

# Adjust the p-values based on the ranges
geness_res$adjusted_pvalue <- with(geness_res, 
                                   ifelse(-log10(padj) > original_range[1] & -log10(padj) <= original_range[2],
                                          ((-log10(padj) - original_range[1]) / (original_range[2] - original_range[1])) * (compressed_range[2] - compressed_range[1]) + compressed_range[1],
                                          ifelse(-log10(padj) > original_range[2], 
                                                 -log10(padj) - (original_range[2] - original_range[1]) + (compressed_range[2] - compressed_range[1]),
                                                 -log10(padj))))

# Calculate breaks for the y-axis
y_breaks_below <- seq(0, 50, by=25)
y_breaks_compressed <- c(52.0)  # We already have 35 in y_breaks_below
y_breaks_above <- c(77.0)
y_breaks <- c(y_breaks_below, y_breaks_compressed, y_breaks_above)

y_labels_below <- seq(0, 50, by=25)
y_labels_compressed <- c(100)  # We already have 35 in y_labels_below
y_labels_above <- c(125)
y_labels <- c(y_labels_below, y_labels_compressed, y_labels_above)


# Create the plot
png("wt_3+21h.png", width=1200, height=1200)
ggplot(geness_res, aes(x = log2FoldChange, y = adjusted_pvalue, color = Color, label = external_gene_name)) + 
  geom_vline(xintercept = c(0.585, -0.585), lty = "dashed", size = 1.5) +  
  geom_hline(yintercept = -log10(0.05), lty = "dashed", size = 1.5) +     
  geom_point(size = 3) +
  labs(x = "log2(Fold change)", y = "-log10(P-adj)", color = "Significance") + 
  scale_color_identity() +
  geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & padj < 0.05 & (abs(log2FoldChange) >= 0.585)), 
                  size = 8,   
                  fontface = "bold",
                  point.padding = 0.15, 
                  color = "black", 
                  min.segment.length = .1, 
                  box.padding = .2, 
                  lwd = 2) + 
  theme_bw(base_size = 28) +
  theme(legend.position = "bottom") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[1], ymax = compressed_range[1], linetype = "dashed", color = "grey") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[2], ymax = compressed_range[2], linetype = "dashed", color = "grey") +
  annotate("text", x = -Inf, y = compressed_range[1], label = "/", hjust = 0, size = 10) +
  annotate("text", x = -Inf, y = compressed_range[2], label = "/", hjust = 0, size = 10) +
  scale_y_continuous(breaks = sort(y_breaks), labels = sort(y_labels))
dev.off()



#---------------- K3R_3+21h ----------------
geness_res <- read.csv(file = "K3R_3+21h.csv", sep="\t", row.names=1)
#geness_res <- read.csv(file = "K3R_3hdox21hchase_vs_control-all.txt", sep=",", row.names=1)
#WT_24hdox_vs_control-all.txt
#K3R_24hdox_vs_control-all.txt
#WT_3hdox21hchase_vs_control-all.txt
#K3R_3hdox21hchase_vs_control-all.txt

# Color setting
geness_res$Color <- ifelse(geness_res$padj > 0.05 | abs(geness_res$log2FoldChange) < 0.585, "gray", 
                          ifelse(geness_res$log2FoldChange > 0, "red", "blue"))

# Predefined genes colored in green
predefined_genes <- c()
geness_res$Color[geness_res$external_gene_name %in% predefined_genes] <- "green"

geness_res$invert_Padj <- (-log10(geness_res$padj)) * sign(geness_res$log2FoldChange)

top_g <- unique(c(geness_res[order(geness_res$invert_Padj, decreasing = TRUE), 'external_gene_name'][1:100],
                 geness_res[order(geness_res$invert_Padj, decreasing = FALSE), 'external_gene_name'][1:100]))

# Define the original and compressed ranges
original_range <- c(15, 25)
compressed_range <- c(15.0, 16.0)

# Adjust the p-values based on the ranges
geness_res$adjusted_pvalue <- with(geness_res, 
                                   ifelse(-log10(padj) > original_range[1] & -log10(padj) <= original_range[2],
                                          ((-log10(padj) - original_range[1]) / (original_range[2] - original_range[1])) * (compressed_range[2] - compressed_range[1]) + compressed_range[1],
                                          ifelse(-log10(padj) > original_range[2], 
                                                 -log10(padj) - (original_range[2] - original_range[1]) + (compressed_range[2] - compressed_range[1]),
                                                 -log10(padj))))

# Calculate breaks for the y-axis
y_breaks_below <- seq(0, 15, by=5)
y_breaks_compressed <- c(16.0)
y_breaks_above <- c(21.0)
y_breaks <- c(y_breaks_below, y_breaks_compressed, y_breaks_above)

y_labels_below <- seq(0, 15, by=5)
y_labels_compressed <- c(25)
y_labels_above <- c(30)
y_labels <- c(y_labels_below, y_labels_compressed, y_labels_above)

# Create the plot
png("K3R_3+21h.png", width=1200, height=1200)
ggplot(geness_res, aes(x = log2FoldChange, y = adjusted_pvalue, color = Color, label = external_gene_name)) + 
  geom_vline(xintercept = c(0.585, -0.585), lty = "dashed", size = 1.5) +  
  geom_hline(yintercept = -log10(0.05), lty = "dashed", size = 1.5) +     
  geom_point(size = 3) +
  labs(x = "log2(Fold change)", y = "-log10(P-adj)", color = "Significance") + 
  scale_color_identity() +
  geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & padj < 0.05 & (abs(log2FoldChange) >= 0.585)), 
                  size = 8,   
                  fontface = "bold",
                  point.padding = 0.15, 
                  color = "black", 
                  min.segment.length = .1, 
                  box.padding = .2, 
                  lwd = 2) + 
  theme_bw(base_size = 28) +
  theme(legend.position = "bottom") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[1], ymax = compressed_range[1], linetype = "dashed", color = "grey") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[2], ymax = compressed_range[2], linetype = "dashed", color = "grey") +
  annotate("text", x = -Inf, y = compressed_range[1], label = "/", hjust = 0, size = 10) +
  annotate("text", x = -Inf, y = compressed_range[2], label = "/", hjust = 0, size = 10) +
  scale_y_continuous(breaks = sort(y_breaks), labels = sort(y_labels))
dev.off()




#---------------- wt_3+21h_vs_control ----------------
geness_res <- read.csv(file = "WT_3hdox21hchase_vs_control-all.txt", sep=",", row.names=1)
#K3R_24hdox_vs_control-all.txt
#WT_3hdox21hchase_vs_control-all.txt
#K3R_3hdox21hchase_vs_control-all.txt

# Color setting
geness_res$Color <- ifelse(geness_res$padj > 0.05 | abs(geness_res$log2FoldChange) < 0.585, "gray", 
                          ifelse(geness_res$log2FoldChange > 0, "red", "blue"))

# Predefined genes colored in green
predefined_genes <- c()
geness_res$Color[geness_res$external_gene_name %in% predefined_genes] <- "green"

geness_res$invert_Padj <- (-log10(geness_res$padj)) * sign(geness_res$log2FoldChange)

top_g <- unique(c(geness_res[order(geness_res$invert_Padj, decreasing = TRUE), 'external_gene_name'][1:100],
                 geness_res[order(geness_res$invert_Padj, decreasing = FALSE), 'external_gene_name'][1:100]))

# Define the original and compressed ranges
original_range <- c(150, 250)
compressed_range <- c(150.0, 154.0)

# Adjust the p-values based on the ranges
geness_res$adjusted_pvalue <- with(geness_res, 
                                   ifelse(-log10(padj) > original_range[1] & -log10(padj) <= original_range[2],
                                          ((-log10(padj) - original_range[1]) / (original_range[2] - original_range[1])) * (compressed_range[2] - compressed_range[1]) + compressed_range[1],
                                          ifelse(-log10(padj) > original_range[2], 
                                                 -log10(padj) - (original_range[2] - original_range[1]) + (compressed_range[2] - compressed_range[1]),
                                                 -log10(padj))))

# Calculate breaks for the y-axis
y_breaks_below <- seq(0, 150, by=50)
y_breaks_compressed <- c(154.0)
y_breaks_above <- c()
y_breaks <- c(y_breaks_below, y_breaks_compressed, y_breaks_above)

y_labels_below <- seq(0, 150, by=50)
y_labels_compressed <- c(250)
y_labels_above <- c()
y_labels <- c(y_labels_below, y_labels_compressed, y_labels_above)

# Create the plot
png("wt_3+21h_vs_control.png", width=1200, height=1200)
ggplot(geness_res, aes(x = log2FoldChange, y = adjusted_pvalue, color = Color, label = external_gene_name)) + 
  geom_vline(xintercept = c(0.585, -0.585), lty = "dashed", size = 1.5) +  
  geom_hline(yintercept = -log10(0.05), lty = "dashed", size = 1.5) +     
  geom_point(size = 3) +
  labs(x = "log2(Fold change)", y = "-log10(P-adj)", color = "Significance") + 
  scale_color_identity() +
  geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & padj < 0.05 & (abs(log2FoldChange) >= 0.585)), 
                  size = 8,   
                  fontface = "bold",
                  point.padding = 0.15, 
                  color = "black", 
                  min.segment.length = .1, 
                  box.padding = .2, 
                  lwd = 2) + 
  theme_bw(base_size = 28) +
  theme(legend.position = "bottom") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[1], ymax = compressed_range[1], linetype = "dashed", color = "grey") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[2], ymax = compressed_range[2], linetype = "dashed", color = "grey") +
  annotate("text", x = -Inf, y = compressed_range[1], label = "/", hjust = 0, size = 10) +
  annotate("text", x = -Inf, y = compressed_range[2], label = "/", hjust = 0, size = 10) +
  scale_y_continuous(breaks = sort(y_breaks), labels = sort(y_labels))
dev.off()


#---------------- K3R_3+21h_vs_control ----------------
geness_res <- read.csv(file = "K3R_3hdox21hchase_vs_control-all.txt", sep=",", row.names=1)
#K3R_24hdox_vs_control-all.txt
#WT_24hdox_vs_control-all.txt

# Color setting
geness_res$Color <- ifelse(geness_res$padj > 0.05 | abs(geness_res$log2FoldChange) < 0.585, "gray", 
                          ifelse(geness_res$log2FoldChange > 0, "red", "blue"))

# Predefined genes colored in green
predefined_genes <- c()
geness_res$Color[geness_res$external_gene_name %in% predefined_genes] <- "green"

geness_res$invert_Padj <- (-log10(geness_res$padj)) * sign(geness_res$log2FoldChange)

top_g <- unique(c(geness_res[order(geness_res$invert_Padj, decreasing = TRUE), 'external_gene_name'][1:100],
                 geness_res[order(geness_res$invert_Padj, decreasing = FALSE), 'external_gene_name'][1:100]))

# Define the original and compressed ranges
original_range <- c(40, 65)
compressed_range <- c(40.0, 43.0)

# Adjust the p-values based on the ranges
geness_res$adjusted_pvalue <- with(geness_res, 
                                   ifelse(-log10(padj) > original_range[1] & -log10(padj) <= original_range[2],
                                          ((-log10(padj) - original_range[1]) / (original_range[2] - original_range[1])) * (compressed_range[2] - compressed_range[1]) + compressed_range[1],
                                          ifelse(-log10(padj) > original_range[2], 
                                                 -log10(padj) - (original_range[2] - original_range[1]) + (compressed_range[2] - compressed_range[1]),
                                                 -log10(padj))))

# Calculate breaks for the y-axis
y_breaks_below <- seq(0, 40, by=20)
y_breaks_compressed <- c(43.0)
y_breaks_above <- c()
y_breaks <- c(y_breaks_below, y_breaks_compressed, y_breaks_above)

y_labels_below <- seq(0, 40, by=20)
y_labels_compressed <- c(65)
y_labels_above <- c()
y_labels <- c(y_labels_below, y_labels_compressed, y_labels_above)

# Create the plot
png("K3R_3+21h_vs_control.png", width=1200, height=1200)
ggplot(geness_res, aes(x = log2FoldChange, y = adjusted_pvalue, color = Color, label = external_gene_name)) + 
  geom_vline(xintercept = c(0.585, -0.585), lty = "dashed", size = 1.5) +  
  geom_hline(yintercept = -log10(0.05), lty = "dashed", size = 1.5) +     
  geom_point(size = 3) +
  labs(x = "log2(Fold change)", y = "-log10(P-adj)", color = "Significance") + 
  scale_color_identity() +
  geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & padj < 0.05 & (abs(log2FoldChange) >= 0.585)), 
                  size = 8,   
                  fontface = "bold",
                  point.padding = 0.15, 
                  color = "black", 
                  min.segment.length = .1, 
                  box.padding = .2, 
                  lwd = 2) + 
  theme_bw(base_size = 28) +
  theme(legend.position = "bottom") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[1], ymax = compressed_range[1], linetype = "dashed", color = "grey") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[2], ymax = compressed_range[2], linetype = "dashed", color = "grey") +
  annotate("text", x = -Inf, y = compressed_range[1], label = "/", hjust = 0, size = 10) +
  annotate("text", x = -Inf, y = compressed_range[2], label = "/", hjust = 0, size = 10) +
  scale_y_continuous(breaks = sort(y_breaks), labels = sort(y_labels))
dev.off()


#---------------- wt_24h_vs_control ----------------
geness_res <- read.csv(file = "WT_24hdox_vs_control-all.txt", sep=",", row.names=1)
#K3R_24hdox_vs_control-all.txt

# Color setting
geness_res$Color <- ifelse(geness_res$padj > 0.05 | abs(geness_res$log2FoldChange) < 0.585, "gray", 
                          ifelse(geness_res$log2FoldChange > 0, "red", "blue"))

# Predefined genes colored in green
predefined_genes <- c()
geness_res$Color[geness_res$external_gene_name %in% predefined_genes] <- "green"

geness_res$invert_Padj <- (-log10(geness_res$padj)) * sign(geness_res$log2FoldChange)

top_g <- unique(c(geness_res[order(geness_res$invert_Padj, decreasing = TRUE), 'external_gene_name'][1:100],
                 geness_res[order(geness_res$invert_Padj, decreasing = FALSE), 'external_gene_name'][1:100]))

# Define the original and compressed ranges
original_range <- c(130, 180)
compressed_range <- c(130.0, 134.0)

# Adjust the p-values based on the ranges
geness_res$adjusted_pvalue <- with(geness_res, 
                                   ifelse(-log10(padj) > original_range[1] & -log10(padj) <= original_range[2],
                                          ((-log10(padj) - original_range[1]) / (original_range[2] - original_range[1])) * (compressed_range[2] - compressed_range[1]) + compressed_range[1],
                                          ifelse(-log10(padj) > original_range[2], 
                                                 -log10(padj) - (original_range[2] - original_range[1]) + (compressed_range[2] - compressed_range[1]),
                                                 -log10(padj))))

# Calculate breaks for the y-axis
y_breaks_below <- seq(0, 100, by=50)
y_breaks_compressed <- c(130.0, 134.0)
y_breaks_above <- c(154)
y_breaks <- c(y_breaks_below, y_breaks_compressed, y_breaks_above)

y_labels_below <- seq(0, 100, by=50)
y_labels_compressed <- c(130, 180)
y_labels_above <- c(200)
y_labels <- c(y_labels_below, y_labels_compressed, y_labels_above)

# Create the plot
png("wt_24h_vs_control.png", width=1200, height=1200)
ggplot(geness_res, aes(x = log2FoldChange, y = adjusted_pvalue, color = Color, label = external_gene_name)) + 
  geom_vline(xintercept = c(0.585, -0.585), lty = "dashed", size = 1.5) +  
  geom_hline(yintercept = -log10(0.05), lty = "dashed", size = 1.5) +     
  geom_point(size = 3) +
  labs(x = "log2(Fold change)", y = "-log10(P-adj)", color = "Significance") + 
  scale_color_identity() +
  geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & padj < 0.05 & (abs(log2FoldChange) >= 0.585)), 
                  size = 8,   
                  fontface = "bold",
                  point.padding = 0.15, 
                  color = "black", 
                  min.segment.length = .1, 
                  box.padding = .2, 
                  lwd = 2) + 
  theme_bw(base_size = 28) +
  theme(legend.position = "bottom") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[1], ymax = compressed_range[1], linetype = "dashed", color = "grey") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[2], ymax = compressed_range[2], linetype = "dashed", color = "grey") +
  annotate("text", x = -Inf, y = compressed_range[1], label = "/", hjust = 0, size = 10) +
  annotate("text", x = -Inf, y = compressed_range[2], label = "/", hjust = 0, size = 10) +
  scale_y_continuous(breaks = sort(y_breaks), labels = sort(y_labels))
dev.off()



#---------------- K3R_24h_vs_control ----------------
geness_res <- read.csv(file = "K3R_24hdox_vs_control-all.txt", sep=",", row.names=1)
#K3R_24hdox_vs_control-all.txt

# Color setting
geness_res$Color <- ifelse(geness_res$padj > 0.05 | abs(geness_res$log2FoldChange) < 0.585, "gray", 
                          ifelse(geness_res$log2FoldChange > 0, "red", "blue"))

# Predefined genes colored in green
predefined_genes <- c()
geness_res$Color[geness_res$external_gene_name %in% predefined_genes] <- "green"

geness_res$invert_Padj <- (-log10(geness_res$padj)) * sign(geness_res$log2FoldChange)

top_g <- unique(c(geness_res[order(geness_res$invert_Padj, decreasing = TRUE), 'external_gene_name'][1:100],
                 geness_res[order(geness_res$invert_Padj, decreasing = FALSE), 'external_gene_name'][1:100]))

# Define the original and compressed ranges
original_range <- c(60, 180)
compressed_range <- c(60.0, 64.0)

# Adjust the p-values based on the ranges
geness_res$adjusted_pvalue <- with(geness_res, 
                                   ifelse(-log10(padj) > original_range[1] & -log10(padj) <= original_range[2],
                                          ((-log10(padj) - original_range[1]) / (original_range[2] - original_range[1])) * (compressed_range[2] - compressed_range[1]) + compressed_range[1],
                                          ifelse(-log10(padj) > original_range[2], 
                                                 -log10(padj) - (original_range[2] - original_range[1]) + (compressed_range[2] - compressed_range[1]),
                                                 -log10(padj))))

# Calculate breaks for the y-axis
y_breaks_below <- seq(0, 60, by=25)
y_breaks_compressed <- c(60.0, 64.0)
y_breaks_above <- c()
y_breaks <- c(y_breaks_below, y_breaks_compressed, y_breaks_above)

y_labels_below <- seq(0, 60, by=25)
y_labels_compressed <- c(60, 180)
y_labels_above <- c()
y_labels <- c(y_labels_below, y_labels_compressed, y_labels_above)

# Create the plot
png("K3R_24h_vs_control.png", width=1200, height=1200)
ggplot(geness_res, aes(x = log2FoldChange, y = adjusted_pvalue, color = Color, label = external_gene_name)) + 
  geom_vline(xintercept = c(0.585, -0.585), lty = "dashed", size = 1.5) +  
  geom_hline(yintercept = -log10(0.05), lty = "dashed", size = 1.5) +     
  geom_point(size = 3) +
  labs(x = "log2(Fold change)", y = "-log10(P-adj)", color = "Significance") + 
  scale_color_identity() +
  geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & padj < 0.05 & (abs(log2FoldChange) >= 0.585)), 
                  size = 8,   
                  fontface = "bold",
                  point.padding = 0.15, 
                  color = "black", 
                  min.segment.length = .1, 
                  box.padding = .2, 
                  lwd = 2) + 
  theme_bw(base_size = 28) +
  theme(legend.position = "bottom") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[1], ymax = compressed_range[1], linetype = "dashed", color = "grey") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[2], ymax = compressed_range[2], linetype = "dashed", color = "grey") +
  annotate("text", x = -Inf, y = compressed_range[1], label = "/", hjust = 0, size = 10) +
  annotate("text", x = -Inf, y = compressed_range[2], label = "/", hjust = 0, size = 10) +
  scale_y_continuous(breaks = sort(y_breaks), labels = sort(y_labels))
dev.off()


#---------------- K3R_3+21h_vs_control (multiple compressed, NOT_FINISHED) ----------------
geness_res <- read.csv(file = "K3R_3hdox21hchase_vs_control-all.txt", sep=",", row.names=1)

# Color setting
geness_res$Color <- ifelse(geness_res$padj > 0.05 | abs(geness_res$log2FoldChange) < 0.585, "gray", 
                          ifelse(geness_res$log2FoldChange > 0, "red", "blue"))

# Predefined genes colored in green
predefined_genes <- c()
geness_res$Color[geness_res$external_gene_name %in% predefined_genes] <- "green"

geness_res$invert_Padj <- (-log10(geness_res$padj)) * sign(geness_res$log2FoldChange)

top_g <- unique(c(geness_res[order(geness_res$invert_Padj, decreasing = TRUE), 'external_gene_name'][1:100],
                 geness_res[order(geness_res$invert_Padj, decreasing = FALSE), 'external_gene_name'][1:100]))

# Define the original and compressed ranges for multiple regions
original_ranges <- list(c(40, 49), c(51, 57))
compressed_ranges <- list(c(40.0, 41.0), c(51.0, 52.0))

adjust_value <- function(padj_value, original_range, compressed_range) {
  if (padj_value > original_range[1] && padj_value <= original_range[2]) {
    return(((padj_value - original_range[1]) / (original_range[2] - original_range[1])) * 
              (compressed_range[2] - compressed_range[1]) + compressed_range[1])
  } else if (padj_value > original_range[2]) {
    return(padj_value - (original_range[2] - original_range[1]) + (compressed_range[2] - compressed_range[1]))
  } else {
    return(padj_value)
  }
}

geness_res$adjusted_pvalue <- -log10(geness_res$padj)
for (i in 1:length(original_ranges)) {
  geness_res$adjusted_pvalue <- mapply(adjust_value, geness_res$adjusted_pvalue, 
                                        MoreArgs = list(original_range = original_ranges[[i]], 
                                                        compressed_range = compressed_ranges[[i]]))
}

# Calculate breaks and labels for the y-axis
y_breaks_below <- seq(0, 15, by=5)
y_breaks_compressed1 <- c(15.0, 16.0)
y_breaks_compressed2 <- c(35.0, 36.0)
y_breaks_above <- c(50)
y_breaks <- c(y_breaks_below, y_breaks_compressed1, y_breaks_compressed2, y_breaks_above)

y_labels_below <- seq(0, 15, by=5)
y_labels_compressed1 <- c(15, 25)
y_labels_compressed2 <- c(35, 45)
y_labels_above <- c(50)
y_labels <- c(y_labels_below, y_labels_compressed1, y_labels_compressed2, y_labels_above)

# Create the plot
png("K3R_3+21h_vs_control.png", width=1200, height=1200)
ggplot(geness_res, aes(x = log2FoldChange, y = adjusted_pvalue, color = Color, label = external_gene_name)) + 
  geom_vline(xintercept = c(0.585, -0.585), lty = "dashed", size = 1.5) +  
  geom_hline(yintercept = -log10(0.05), lty = "dashed", size = 1.5) +     
  geom_point(size = 3) +
  labs(x = "log2(Fold change)", y = "-log10(P-adj)", color = "Significance") + 
  scale_color_identity() +
  geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & padj < 0.05 & (abs(log2FoldChange) >= 0.585)), 
                  size = 8,   
                  fontface = "bold",
                  point.padding = 0.15, 
                  color = "black", 
                  min.segment.length = .1, 
                  box.padding = .2, 
                  lwd = 2) + 
  theme_bw(base_size = 28) +
  theme(legend.position = "bottom") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_ranges[[1]][1], ymax = compressed_ranges[[1]][1], linetype = "dashed", color = "grey") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_ranges[[1]][2], ymax = compressed_ranges[[1]][2], linetype = "dashed", color = "grey") +
  annotate("text", x = -Inf, y = compressed_ranges[[1]][1], label = "/", hjust = 0, size = 10) +
  annotate("text", x = -Inf, y = compressed_ranges[[1]][2], label = "/", hjust = 0, size = 10) +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_ranges[[2]][1], ymax = compressed_ranges[[2]][1], linetype = "dashed", color = "grey") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_ranges[[2]][2], ymax = compressed_ranges[[2]][2], linetype = "dashed", color = "grey") +
  annotate("text", x = -Inf, y = compressed_ranges[[2]][1], label = "/", hjust = 0, size = 10) +
  annotate("text", x = -Inf, y = compressed_ranges[[2]][2], label = "/", hjust = 0, size = 10) +
  scale_y_continuous(breaks = sort(y_breaks), labels = sort(y_labels))
dev.off()

like unlike

点赞本文的读者

还没有人对此文章表态


本文有评论

没有评论

看文章,发评论,不要沉默


© 2023 XGenes.com Impressum