Compressing Y-Axis Ranges in Volcano Plots (火山图)

Tags: R, RNA-seq

Volcano plots are graphical representations of the significance versus fold-change for values obtained from a particular data analysis. They are especially useful in high-throughput studies, such as genomics, to visually represent the relationship between statistical significance and the magnitude of change. However, in some datasets, extreme values might stretch the scale of the plot, making it difficult to discern data points in regions of particular interest. By introducing compressed ranges, we can "squeeze" a specific range of values into a shorter y-axis scale, allowing for clearer visualization of data points within that range, without losing the overall context of the entire dataset. This method retains the informative nature of the volcano plot while emphasizing specific data regions of interest.





#---------------- K3R_24h ----------------
geness_res <- read.csv(file = "K3R_24h.csv", sep="\t", row.names=1)

# Color setting
geness_res$Color <- ifelse(geness_res$padj > 0.05 | abs(geness_res$log2FoldChange) < 0.585, "gray", 
                          ifelse(geness_res$log2FoldChange > 0, "red", "blue"))

# Predefined genes colored in green
predefined_genes <- c("GNPDA1", "POR", "GEM", "SPOCD1", "DUSP10", "DUSP12", "SLC16A6", "HLA-B", "LSS", "SMPD1", "NDRG1", "USP2")
geness_res$Color[geness_res$external_gene_name %in% predefined_genes] <- "green"

geness_res$invert_Padj <- (-log10(geness_res$padj)) * sign(geness_res$log2FoldChange)

top_g <- unique(c(geness_res[order(geness_res$invert_Padj, decreasing = TRUE), 'external_gene_name'][1:100],
                 geness_res[order(geness_res$invert_Padj, decreasing = FALSE), 'external_gene_name'][1:100]))

# Define the original and compressed ranges
original_range <- c(35, 60)
compressed_range <- c(35.0, 36.0)

# Adjust the p-values based on the ranges
geness_res$adjusted_pvalue <- with(geness_res, 
                                   ifelse(-log10(padj) > original_range[1] & -log10(padj) <= original_range[2],
                                          ((-log10(padj) - original_range[1]) / (original_range[2] - original_range[1])) * (compressed_range[2] - compressed_range[1]) + compressed_range[1],
                                          ifelse(-log10(padj) > original_range[2], 
                                                 -log10(padj) - (original_range[2] - original_range[1]) + (compressed_range[2] - compressed_range[1]),

# Calculate breaks for the y-axis
y_breaks_below <- seq(0, 30, by=10)
y_breaks_compressed <- c(35.0, 36.0)
y_breaks_above <- c()
y_breaks <- c(y_breaks_below, y_breaks_compressed, y_breaks_above)

y_labels_below <- seq(0, 30, by=10)
y_labels_compressed <- c(35, 60)
y_labels_above <- c()
y_labels <- c(y_labels_below, y_labels_compressed, y_labels_above)

# Create the plot
png("K3R_24h.png", width=1200, height=1200)
ggplot(geness_res, aes(x = log2FoldChange, y = adjusted_pvalue, color = Color, label = external_gene_name)) + 
  geom_vline(xintercept = c(0.585, -0.585), lty = "dashed", size = 1.5) +  
  geom_hline(yintercept = -log10(0.05), lty = "dashed", size = 1.5) +     
  geom_point(size = 3) +
  labs(x = "log2(Fold change)", y = "-log10(P-adj)", color = "Significance") + 
  scale_color_identity() +
  geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & padj < 0.05 & (abs(log2FoldChange) >= 0.585)), 
                  size = 8,   
                  fontface = "bold",
                  point.padding = 0.15, 
                  color = "black", 
                  min.segment.length = .1, 
                  box.padding = .2, 
                  lwd = 2) + 
  theme_bw(base_size = 28) +
  theme(legend.position = "bottom") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[1], ymax = compressed_range[1], linetype = "dashed", color = "grey") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[2], ymax = compressed_range[2], linetype = "dashed", color = "grey") +
  annotate("text", x = -Inf, y = compressed_range[1], label = "/", hjust = 0, size = 10) +
  annotate("text", x = -Inf, y = compressed_range[2], label = "/", hjust = 0, size = 10) +
  scale_y_continuous(breaks = sort(y_breaks), labels = sort(y_labels))

#---------------- wt_24h ----------------
geness_res <- read.csv(file = "wt_24h.csv", sep="\t", row.names=1)
#geness_res <- read.csv(file = "K3R_3hdox21hchase_vs_control-all.txt", sep=",", row.names=1)

# Color setting
geness_res$Color <- ifelse(geness_res$padj > 0.05 | abs(geness_res$log2FoldChange) < 0.585, "gray", 
                          ifelse(geness_res$log2FoldChange > 0, "red", "blue"))

# Predefined genes colored in green
predefined_genes <- c("TRIM63", "ATP6V0D2", "SULT1C2", "UPP1", "SLC16A6", "RRAGD", "HEXA-AS1", "IL6R", "GPNMB", "PRKAG2", "STS", "RASGRP3", "KCNAB2", "PLEKHM1", "SLC39A1", "USP2", "ADGRG1", "SPRING1", "FNIP2", "CTNS", "CTSD", "NDRG1", "BHLHE40", "VPS18", "PLIN2", "ASAH1", "PPP1R3B", "SNX8", "GEM", "AMDHD2", "PER3", "PFKFB2", "LSS", "WBP2", "BRI3", "GRN", "TPRA1", "ATP6V1D", "ATP6V1B2", "ATP6V1C1", "TMC6", "SLC26A11", "GNPDA1", "EPG5", "GNA13", "VAT1", "DUSP3", "ATP6V1H", "KIAA0930", "ATP6V0D1", "HMOX1", "RRAGC", "SLC25A13", "ATP6V0A1", "WDR81", "DUSP10", "CSTB", "FLCN", "PPARGC1A", "TOM1", "MTM1", "NEU1", "ZFYVE26", "ZCCHC8", "ATP6V1F", "CD63", "CTSB", "PEA15", "NRBF2", "CLCN7", "PGAP6", "PSAP", "GLA", "SGSH", "S100A6", "PIP4K2C", "SLC38A7", "GLMP", "GSTO1", "THAP8", "MCOLN1", "POR", "SNX16", "M6PR", "USP32", "VPS8", "LONP1", "GPX1", "MVP", "UGDH", "CC2D1B", "CTSA", "GBA", "RBM19", "LACTB2", "UTP18", "AP5Z1", "NAGLU", "EIF4B", "SOCS5", "HLA-B")  #for wt_24h.png
#predefined_genes <- c()  #for wt_3+21h.png, K3R_3+21h.png, and *_vs_control.png
geness_res$Color[geness_res$external_gene_name %in% predefined_genes] <- "green"

geness_res$invert_Padj <- (-log10(geness_res$padj)) * sign(geness_res$log2FoldChange)

top_g <- unique(c(geness_res[order(geness_res$invert_Padj, decreasing = TRUE), 'external_gene_name'][1:100],
                 geness_res[order(geness_res$invert_Padj, decreasing = FALSE), 'external_gene_name'][1:100]))

# Define the original and compressed ranges
original_range <- c(100, 175)
compressed_range <- c(100.0, 104.0)

# Adjust the p-values based on the ranges
geness_res$adjusted_pvalue <- with(geness_res, 
                                   ifelse(-log10(padj) > original_range[1] & -log10(padj) <= original_range[2],
                                          ((-log10(padj) - original_range[1]) / (original_range[2] - original_range[1])) * (compressed_range[2] - compressed_range[1]) + compressed_range[1],
                                          ifelse(-log10(padj) > original_range[2], 
                                                 -log10(padj) - (original_range[2] - original_range[1]) + (compressed_range[2] - compressed_range[1]),

# Calculate breaks for the y-axis
y_breaks_below <- seq(0, 100, by=25)
y_breaks_compressed <- c(104.0)  # We already have 35 in y_breaks_below
#y_breaks_above <- if (max(geness_res$adjusted_pvalue, na.rm = TRUE)+5 < 63) {
#                   seq(60, max(geness_res$adjusted_pvalue, na.rm = TRUE), by=-5)
#                 } else {
#                   seq(63, max(geness_res$adjusted_pvalue, na.rm = TRUE)+5, by=5)
#                 }
y_breaks_above <- c(129.0)
y_breaks <- c(y_breaks_below, y_breaks_compressed, y_breaks_above)

y_labels_below <- seq(0, 100, by=25)
y_labels_compressed <- c(175)  # We already have 35 in y_labels_below
#y_labels_above <- if (max(geness_res$adjusted_pvalue, na.rm = TRUE)+5 < 63) {
#                   seq(60, max(geness_res$adjusted_pvalue, na.rm = TRUE), by=-5)
#                 } else {
#                   seq(63, max(geness_res$adjusted_pvalue, na.rm = TRUE)+5, by=5)
#                 }
y_labels_above <- c(200)
y_labels <- c(y_labels_below, y_labels_compressed, y_labels_above)

# Create the plot
png("wt_24h.png", width=1200, height=1200)
ggplot(geness_res, aes(x = log2FoldChange, y = adjusted_pvalue, color = Color, label = external_gene_name)) + 
  geom_vline(xintercept = c(0.585, -0.585), lty = "dashed", size = 1.5) +  
  geom_hline(yintercept = -log10(0.05), lty = "dashed", size = 1.5) +     
  geom_point(size = 3) +
  labs(x = "log2(Fold change)", y = "-log10(P-adj)", color = "Significance") + 
  scale_color_identity() +
  geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & padj < 0.05 & (abs(log2FoldChange) >= 0.585)), 
                  size = 8,   
                  fontface = "bold",
                  point.padding = 0.15, 
                  color = "black", 
                  min.segment.length = .1, 
                  box.padding = .2, 
                  lwd = 2) + 
  theme_bw(base_size = 28) +
  theme(legend.position = "bottom") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[1], ymax = compressed_range[1], linetype = "dashed", color = "grey") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[2], ymax = compressed_range[2], linetype = "dashed", color = "grey") +
  annotate("text", x = -Inf, y = compressed_range[1], label = "/", hjust = 0, size = 10) +
  annotate("text", x = -Inf, y = compressed_range[2], label = "/", hjust = 0, size = 10) +
  scale_y_continuous(breaks = sort(y_breaks), labels = sort(y_labels))

#---------------- wt_3+21h ... ----------------
geness_res <- read.csv(file = "wt_3+21h.csv", sep="\t", row.names=1)
#geness_res <- read.csv(file = "K3R_3hdox21hchase_vs_control-all.txt", sep=",", row.names=1)

# Color setting
geness_res$Color <- ifelse(geness_res$padj > 0.05 | abs(geness_res$log2FoldChange) < 0.585, "gray", 
                          ifelse(geness_res$log2FoldChange > 0, "red", "blue"))

# Predefined genes colored in green
predefined_genes <- c()  #for wt_3+21h.png, K3R_3+21h.png, and *_vs_control.png
geness_res$Color[geness_res$external_gene_name %in% predefined_genes] <- "green"

geness_res$invert_Padj <- (-log10(geness_res$padj)) * sign(geness_res$log2FoldChange)

top_g <- unique(c(geness_res[order(geness_res$invert_Padj, decreasing = TRUE), 'external_gene_name'][1:100],
                 geness_res[order(geness_res$invert_Padj, decreasing = FALSE), 'external_gene_name'][1:100]))

# Define the original and compressed ranges
original_range <- c(50, 100)
compressed_range <- c(50.0, 52.0)

# Adjust the p-values based on the ranges
geness_res$adjusted_pvalue <- with(geness_res, 
                                   ifelse(-log10(padj) > original_range[1] & -log10(padj) <= original_range[2],
                                          ((-log10(padj) - original_range[1]) / (original_range[2] - original_range[1])) * (compressed_range[2] - compressed_range[1]) + compressed_range[1],
                                          ifelse(-log10(padj) > original_range[2], 
                                                 -log10(padj) - (original_range[2] - original_range[1]) + (compressed_range[2] - compressed_range[1]),

# Calculate breaks for the y-axis
y_breaks_below <- seq(0, 50, by=25)
y_breaks_compressed <- c(52.0)  # We already have 35 in y_breaks_below
y_breaks_above <- c(77.0)
y_breaks <- c(y_breaks_below, y_breaks_compressed, y_breaks_above)

y_labels_below <- seq(0, 50, by=25)
y_labels_compressed <- c(100)  # We already have 35 in y_labels_below
y_labels_above <- c(125)
y_labels <- c(y_labels_below, y_labels_compressed, y_labels_above)

# Create the plot
png("wt_3+21h.png", width=1200, height=1200)
ggplot(geness_res, aes(x = log2FoldChange, y = adjusted_pvalue, color = Color, label = external_gene_name)) + 
  geom_vline(xintercept = c(0.585, -0.585), lty = "dashed", size = 1.5) +  
  geom_hline(yintercept = -log10(0.05), lty = "dashed", size = 1.5) +     
  geom_point(size = 3) +
  labs(x = "log2(Fold change)", y = "-log10(P-adj)", color = "Significance") + 
  scale_color_identity() +
  geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & padj < 0.05 & (abs(log2FoldChange) >= 0.585)), 
                  size = 8,   
                  fontface = "bold",
                  point.padding = 0.15, 
                  color = "black", 
                  min.segment.length = .1, 
                  box.padding = .2, 
                  lwd = 2) + 
  theme_bw(base_size = 28) +
  theme(legend.position = "bottom") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[1], ymax = compressed_range[1], linetype = "dashed", color = "grey") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[2], ymax = compressed_range[2], linetype = "dashed", color = "grey") +
  annotate("text", x = -Inf, y = compressed_range[1], label = "/", hjust = 0, size = 10) +
  annotate("text", x = -Inf, y = compressed_range[2], label = "/", hjust = 0, size = 10) +
  scale_y_continuous(breaks = sort(y_breaks), labels = sort(y_labels))

#---------------- K3R_3+21h ----------------
geness_res <- read.csv(file = "K3R_3+21h.csv", sep="\t", row.names=1)
#geness_res <- read.csv(file = "K3R_3hdox21hchase_vs_control-all.txt", sep=",", row.names=1)

# Color setting
geness_res$Color <- ifelse(geness_res$padj > 0.05 | abs(geness_res$log2FoldChange) < 0.585, "gray", 
                          ifelse(geness_res$log2FoldChange > 0, "red", "blue"))

# Predefined genes colored in green
predefined_genes <- c()
geness_res$Color[geness_res$external_gene_name %in% predefined_genes] <- "green"

geness_res$invert_Padj <- (-log10(geness_res$padj)) * sign(geness_res$log2FoldChange)

top_g <- unique(c(geness_res[order(geness_res$invert_Padj, decreasing = TRUE), 'external_gene_name'][1:100],
                 geness_res[order(geness_res$invert_Padj, decreasing = FALSE), 'external_gene_name'][1:100]))

# Define the original and compressed ranges
original_range <- c(15, 25)
compressed_range <- c(15.0, 16.0)

# Adjust the p-values based on the ranges
geness_res$adjusted_pvalue <- with(geness_res, 
                                   ifelse(-log10(padj) > original_range[1] & -log10(padj) <= original_range[2],
                                          ((-log10(padj) - original_range[1]) / (original_range[2] - original_range[1])) * (compressed_range[2] - compressed_range[1]) + compressed_range[1],
                                          ifelse(-log10(padj) > original_range[2], 
                                                 -log10(padj) - (original_range[2] - original_range[1]) + (compressed_range[2] - compressed_range[1]),

# Calculate breaks for the y-axis
y_breaks_below <- seq(0, 15, by=5)
y_breaks_compressed <- c(16.0)
y_breaks_above <- c(21.0)
y_breaks <- c(y_breaks_below, y_breaks_compressed, y_breaks_above)

y_labels_below <- seq(0, 15, by=5)
y_labels_compressed <- c(25)
y_labels_above <- c(30)
y_labels <- c(y_labels_below, y_labels_compressed, y_labels_above)

# Create the plot
png("K3R_3+21h.png", width=1200, height=1200)
ggplot(geness_res, aes(x = log2FoldChange, y = adjusted_pvalue, color = Color, label = external_gene_name)) + 
  geom_vline(xintercept = c(0.585, -0.585), lty = "dashed", size = 1.5) +  
  geom_hline(yintercept = -log10(0.05), lty = "dashed", size = 1.5) +     
  geom_point(size = 3) +
  labs(x = "log2(Fold change)", y = "-log10(P-adj)", color = "Significance") + 
  scale_color_identity() +
  geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & padj < 0.05 & (abs(log2FoldChange) >= 0.585)), 
                  size = 8,   
                  fontface = "bold",
                  point.padding = 0.15, 
                  color = "black", 
                  min.segment.length = .1, 
                  box.padding = .2, 
                  lwd = 2) + 
  theme_bw(base_size = 28) +
  theme(legend.position = "bottom") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[1], ymax = compressed_range[1], linetype = "dashed", color = "grey") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[2], ymax = compressed_range[2], linetype = "dashed", color = "grey") +
  annotate("text", x = -Inf, y = compressed_range[1], label = "/", hjust = 0, size = 10) +
  annotate("text", x = -Inf, y = compressed_range[2], label = "/", hjust = 0, size = 10) +
  scale_y_continuous(breaks = sort(y_breaks), labels = sort(y_labels))

#---------------- wt_3+21h_vs_control ----------------
geness_res <- read.csv(file = "WT_3hdox21hchase_vs_control-all.txt", sep=",", row.names=1)

# Color setting
geness_res$Color <- ifelse(geness_res$padj > 0.05 | abs(geness_res$log2FoldChange) < 0.585, "gray", 
                          ifelse(geness_res$log2FoldChange > 0, "red", "blue"))

# Predefined genes colored in green
predefined_genes <- c()
geness_res$Color[geness_res$external_gene_name %in% predefined_genes] <- "green"

geness_res$invert_Padj <- (-log10(geness_res$padj)) * sign(geness_res$log2FoldChange)

top_g <- unique(c(geness_res[order(geness_res$invert_Padj, decreasing = TRUE), 'external_gene_name'][1:100],
                 geness_res[order(geness_res$invert_Padj, decreasing = FALSE), 'external_gene_name'][1:100]))

# Define the original and compressed ranges
original_range <- c(150, 250)
compressed_range <- c(150.0, 154.0)

# Adjust the p-values based on the ranges
geness_res$adjusted_pvalue <- with(geness_res, 
                                   ifelse(-log10(padj) > original_range[1] & -log10(padj) <= original_range[2],
                                          ((-log10(padj) - original_range[1]) / (original_range[2] - original_range[1])) * (compressed_range[2] - compressed_range[1]) + compressed_range[1],
                                          ifelse(-log10(padj) > original_range[2], 
                                                 -log10(padj) - (original_range[2] - original_range[1]) + (compressed_range[2] - compressed_range[1]),

# Calculate breaks for the y-axis
y_breaks_below <- seq(0, 150, by=50)
y_breaks_compressed <- c(154.0)
y_breaks_above <- c()
y_breaks <- c(y_breaks_below, y_breaks_compressed, y_breaks_above)

y_labels_below <- seq(0, 150, by=50)
y_labels_compressed <- c(250)
y_labels_above <- c()
y_labels <- c(y_labels_below, y_labels_compressed, y_labels_above)

# Create the plot
png("wt_3+21h_vs_control.png", width=1200, height=1200)
ggplot(geness_res, aes(x = log2FoldChange, y = adjusted_pvalue, color = Color, label = external_gene_name)) + 
  geom_vline(xintercept = c(0.585, -0.585), lty = "dashed", size = 1.5) +  
  geom_hline(yintercept = -log10(0.05), lty = "dashed", size = 1.5) +     
  geom_point(size = 3) +
  labs(x = "log2(Fold change)", y = "-log10(P-adj)", color = "Significance") + 
  scale_color_identity() +
  geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & padj < 0.05 & (abs(log2FoldChange) >= 0.585)), 
                  size = 8,   
                  fontface = "bold",
                  point.padding = 0.15, 
                  color = "black", 
                  min.segment.length = .1, 
                  box.padding = .2, 
                  lwd = 2) + 
  theme_bw(base_size = 28) +
  theme(legend.position = "bottom") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[1], ymax = compressed_range[1], linetype = "dashed", color = "grey") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[2], ymax = compressed_range[2], linetype = "dashed", color = "grey") +
  annotate("text", x = -Inf, y = compressed_range[1], label = "/", hjust = 0, size = 10) +
  annotate("text", x = -Inf, y = compressed_range[2], label = "/", hjust = 0, size = 10) +
  scale_y_continuous(breaks = sort(y_breaks), labels = sort(y_labels))

#---------------- K3R_3+21h_vs_control ----------------
geness_res <- read.csv(file = "K3R_3hdox21hchase_vs_control-all.txt", sep=",", row.names=1)

# Color setting
geness_res$Color <- ifelse(geness_res$padj > 0.05 | abs(geness_res$log2FoldChange) < 0.585, "gray", 
                          ifelse(geness_res$log2FoldChange > 0, "red", "blue"))

# Predefined genes colored in green
predefined_genes <- c()
geness_res$Color[geness_res$external_gene_name %in% predefined_genes] <- "green"

geness_res$invert_Padj <- (-log10(geness_res$padj)) * sign(geness_res$log2FoldChange)

top_g <- unique(c(geness_res[order(geness_res$invert_Padj, decreasing = TRUE), 'external_gene_name'][1:100],
                 geness_res[order(geness_res$invert_Padj, decreasing = FALSE), 'external_gene_name'][1:100]))

# Define the original and compressed ranges
original_range <- c(40, 65)
compressed_range <- c(40.0, 43.0)

# Adjust the p-values based on the ranges
geness_res$adjusted_pvalue <- with(geness_res, 
                                   ifelse(-log10(padj) > original_range[1] & -log10(padj) <= original_range[2],
                                          ((-log10(padj) - original_range[1]) / (original_range[2] - original_range[1])) * (compressed_range[2] - compressed_range[1]) + compressed_range[1],
                                          ifelse(-log10(padj) > original_range[2], 
                                                 -log10(padj) - (original_range[2] - original_range[1]) + (compressed_range[2] - compressed_range[1]),

# Calculate breaks for the y-axis
y_breaks_below <- seq(0, 40, by=20)
y_breaks_compressed <- c(43.0)
y_breaks_above <- c()
y_breaks <- c(y_breaks_below, y_breaks_compressed, y_breaks_above)

y_labels_below <- seq(0, 40, by=20)
y_labels_compressed <- c(65)
y_labels_above <- c()
y_labels <- c(y_labels_below, y_labels_compressed, y_labels_above)

# Create the plot
png("K3R_3+21h_vs_control.png", width=1200, height=1200)
ggplot(geness_res, aes(x = log2FoldChange, y = adjusted_pvalue, color = Color, label = external_gene_name)) + 
  geom_vline(xintercept = c(0.585, -0.585), lty = "dashed", size = 1.5) +  
  geom_hline(yintercept = -log10(0.05), lty = "dashed", size = 1.5) +     
  geom_point(size = 3) +
  labs(x = "log2(Fold change)", y = "-log10(P-adj)", color = "Significance") + 
  scale_color_identity() +
  geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & padj < 0.05 & (abs(log2FoldChange) >= 0.585)), 
                  size = 8,   
                  fontface = "bold",
                  point.padding = 0.15, 
                  color = "black", 
                  min.segment.length = .1, 
                  box.padding = .2, 
                  lwd = 2) + 
  theme_bw(base_size = 28) +
  theme(legend.position = "bottom") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[1], ymax = compressed_range[1], linetype = "dashed", color = "grey") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[2], ymax = compressed_range[2], linetype = "dashed", color = "grey") +
  annotate("text", x = -Inf, y = compressed_range[1], label = "/", hjust = 0, size = 10) +
  annotate("text", x = -Inf, y = compressed_range[2], label = "/", hjust = 0, size = 10) +
  scale_y_continuous(breaks = sort(y_breaks), labels = sort(y_labels))

#---------------- wt_24h_vs_control ----------------
geness_res <- read.csv(file = "WT_24hdox_vs_control-all.txt", sep=",", row.names=1)

# Color setting
geness_res$Color <- ifelse(geness_res$padj > 0.05 | abs(geness_res$log2FoldChange) < 0.585, "gray", 
                          ifelse(geness_res$log2FoldChange > 0, "red", "blue"))

# Predefined genes colored in green
predefined_genes <- c()
geness_res$Color[geness_res$external_gene_name %in% predefined_genes] <- "green"

geness_res$invert_Padj <- (-log10(geness_res$padj)) * sign(geness_res$log2FoldChange)

top_g <- unique(c(geness_res[order(geness_res$invert_Padj, decreasing = TRUE), 'external_gene_name'][1:100],
                 geness_res[order(geness_res$invert_Padj, decreasing = FALSE), 'external_gene_name'][1:100]))

# Define the original and compressed ranges
original_range <- c(130, 180)
compressed_range <- c(130.0, 134.0)

# Adjust the p-values based on the ranges
geness_res$adjusted_pvalue <- with(geness_res, 
                                   ifelse(-log10(padj) > original_range[1] & -log10(padj) <= original_range[2],
                                          ((-log10(padj) - original_range[1]) / (original_range[2] - original_range[1])) * (compressed_range[2] - compressed_range[1]) + compressed_range[1],
                                          ifelse(-log10(padj) > original_range[2], 
                                                 -log10(padj) - (original_range[2] - original_range[1]) + (compressed_range[2] - compressed_range[1]),

# Calculate breaks for the y-axis
y_breaks_below <- seq(0, 100, by=50)
y_breaks_compressed <- c(130.0, 134.0)
y_breaks_above <- c(154)
y_breaks <- c(y_breaks_below, y_breaks_compressed, y_breaks_above)

y_labels_below <- seq(0, 100, by=50)
y_labels_compressed <- c(130, 180)
y_labels_above <- c(200)
y_labels <- c(y_labels_below, y_labels_compressed, y_labels_above)

# Create the plot
png("wt_24h_vs_control.png", width=1200, height=1200)
ggplot(geness_res, aes(x = log2FoldChange, y = adjusted_pvalue, color = Color, label = external_gene_name)) + 
  geom_vline(xintercept = c(0.585, -0.585), lty = "dashed", size = 1.5) +  
  geom_hline(yintercept = -log10(0.05), lty = "dashed", size = 1.5) +     
  geom_point(size = 3) +
  labs(x = "log2(Fold change)", y = "-log10(P-adj)", color = "Significance") + 
  scale_color_identity() +
  geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & padj < 0.05 & (abs(log2FoldChange) >= 0.585)), 
                  size = 8,   
                  fontface = "bold",
                  point.padding = 0.15, 
                  color = "black", 
                  min.segment.length = .1, 
                  box.padding = .2, 
                  lwd = 2) + 
  theme_bw(base_size = 28) +
  theme(legend.position = "bottom") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[1], ymax = compressed_range[1], linetype = "dashed", color = "grey") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[2], ymax = compressed_range[2], linetype = "dashed", color = "grey") +
  annotate("text", x = -Inf, y = compressed_range[1], label = "/", hjust = 0, size = 10) +
  annotate("text", x = -Inf, y = compressed_range[2], label = "/", hjust = 0, size = 10) +
  scale_y_continuous(breaks = sort(y_breaks), labels = sort(y_labels))

#---------------- K3R_24h_vs_control ----------------
geness_res <- read.csv(file = "K3R_24hdox_vs_control-all.txt", sep=",", row.names=1)

# Color setting
geness_res$Color <- ifelse(geness_res$padj > 0.05 | abs(geness_res$log2FoldChange) < 0.585, "gray", 
                          ifelse(geness_res$log2FoldChange > 0, "red", "blue"))

# Predefined genes colored in green
predefined_genes <- c()
geness_res$Color[geness_res$external_gene_name %in% predefined_genes] <- "green"

geness_res$invert_Padj <- (-log10(geness_res$padj)) * sign(geness_res$log2FoldChange)

top_g <- unique(c(geness_res[order(geness_res$invert_Padj, decreasing = TRUE), 'external_gene_name'][1:100],
                 geness_res[order(geness_res$invert_Padj, decreasing = FALSE), 'external_gene_name'][1:100]))

# Define the original and compressed ranges
original_range <- c(60, 180)
compressed_range <- c(60.0, 64.0)

# Adjust the p-values based on the ranges
geness_res$adjusted_pvalue <- with(geness_res, 
                                   ifelse(-log10(padj) > original_range[1] & -log10(padj) <= original_range[2],
                                          ((-log10(padj) - original_range[1]) / (original_range[2] - original_range[1])) * (compressed_range[2] - compressed_range[1]) + compressed_range[1],
                                          ifelse(-log10(padj) > original_range[2], 
                                                 -log10(padj) - (original_range[2] - original_range[1]) + (compressed_range[2] - compressed_range[1]),

# Calculate breaks for the y-axis
y_breaks_below <- seq(0, 60, by=25)
y_breaks_compressed <- c(60.0, 64.0)
y_breaks_above <- c()
y_breaks <- c(y_breaks_below, y_breaks_compressed, y_breaks_above)

y_labels_below <- seq(0, 60, by=25)
y_labels_compressed <- c(60, 180)
y_labels_above <- c()
y_labels <- c(y_labels_below, y_labels_compressed, y_labels_above)

# Create the plot
png("K3R_24h_vs_control.png", width=1200, height=1200)
ggplot(geness_res, aes(x = log2FoldChange, y = adjusted_pvalue, color = Color, label = external_gene_name)) + 
  geom_vline(xintercept = c(0.585, -0.585), lty = "dashed", size = 1.5) +  
  geom_hline(yintercept = -log10(0.05), lty = "dashed", size = 1.5) +     
  geom_point(size = 3) +
  labs(x = "log2(Fold change)", y = "-log10(P-adj)", color = "Significance") + 
  scale_color_identity() +
  geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & padj < 0.05 & (abs(log2FoldChange) >= 0.585)), 
                  size = 8,   
                  fontface = "bold",
                  point.padding = 0.15, 
                  color = "black", 
                  min.segment.length = .1, 
                  box.padding = .2, 
                  lwd = 2) + 
  theme_bw(base_size = 28) +
  theme(legend.position = "bottom") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[1], ymax = compressed_range[1], linetype = "dashed", color = "grey") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_range[2], ymax = compressed_range[2], linetype = "dashed", color = "grey") +
  annotate("text", x = -Inf, y = compressed_range[1], label = "/", hjust = 0, size = 10) +
  annotate("text", x = -Inf, y = compressed_range[2], label = "/", hjust = 0, size = 10) +
  scale_y_continuous(breaks = sort(y_breaks), labels = sort(y_labels))

#---------------- K3R_3+21h_vs_control (multiple compressed, NOT_FINISHED) ----------------
geness_res <- read.csv(file = "K3R_3hdox21hchase_vs_control-all.txt", sep=",", row.names=1)

# Color setting
geness_res$Color <- ifelse(geness_res$padj > 0.05 | abs(geness_res$log2FoldChange) < 0.585, "gray", 
                          ifelse(geness_res$log2FoldChange > 0, "red", "blue"))

# Predefined genes colored in green
predefined_genes <- c()
geness_res$Color[geness_res$external_gene_name %in% predefined_genes] <- "green"

geness_res$invert_Padj <- (-log10(geness_res$padj)) * sign(geness_res$log2FoldChange)

top_g <- unique(c(geness_res[order(geness_res$invert_Padj, decreasing = TRUE), 'external_gene_name'][1:100],
                 geness_res[order(geness_res$invert_Padj, decreasing = FALSE), 'external_gene_name'][1:100]))

# Define the original and compressed ranges for multiple regions
original_ranges <- list(c(40, 49), c(51, 57))
compressed_ranges <- list(c(40.0, 41.0), c(51.0, 52.0))

adjust_value <- function(padj_value, original_range, compressed_range) {
  if (padj_value > original_range[1] && padj_value <= original_range[2]) {
    return(((padj_value - original_range[1]) / (original_range[2] - original_range[1])) * 
              (compressed_range[2] - compressed_range[1]) + compressed_range[1])
  } else if (padj_value > original_range[2]) {
    return(padj_value - (original_range[2] - original_range[1]) + (compressed_range[2] - compressed_range[1]))
  } else {

geness_res$adjusted_pvalue <- -log10(geness_res$padj)
for (i in 1:length(original_ranges)) {
  geness_res$adjusted_pvalue <- mapply(adjust_value, geness_res$adjusted_pvalue, 
                                        MoreArgs = list(original_range = original_ranges[[i]], 
                                                        compressed_range = compressed_ranges[[i]]))

# Calculate breaks and labels for the y-axis
y_breaks_below <- seq(0, 15, by=5)
y_breaks_compressed1 <- c(15.0, 16.0)
y_breaks_compressed2 <- c(35.0, 36.0)
y_breaks_above <- c(50)
y_breaks <- c(y_breaks_below, y_breaks_compressed1, y_breaks_compressed2, y_breaks_above)

y_labels_below <- seq(0, 15, by=5)
y_labels_compressed1 <- c(15, 25)
y_labels_compressed2 <- c(35, 45)
y_labels_above <- c(50)
y_labels <- c(y_labels_below, y_labels_compressed1, y_labels_compressed2, y_labels_above)

# Create the plot
png("K3R_3+21h_vs_control.png", width=1200, height=1200)
ggplot(geness_res, aes(x = log2FoldChange, y = adjusted_pvalue, color = Color, label = external_gene_name)) + 
  geom_vline(xintercept = c(0.585, -0.585), lty = "dashed", size = 1.5) +  
  geom_hline(yintercept = -log10(0.05), lty = "dashed", size = 1.5) +     
  geom_point(size = 3) +
  labs(x = "log2(Fold change)", y = "-log10(P-adj)", color = "Significance") + 
  scale_color_identity() +
  geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & padj < 0.05 & (abs(log2FoldChange) >= 0.585)), 
                  size = 8,   
                  fontface = "bold",
                  point.padding = 0.15, 
                  color = "black", 
                  min.segment.length = .1, 
                  box.padding = .2, 
                  lwd = 2) + 
  theme_bw(base_size = 28) +
  theme(legend.position = "bottom") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_ranges[[1]][1], ymax = compressed_ranges[[1]][1], linetype = "dashed", color = "grey") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_ranges[[1]][2], ymax = compressed_ranges[[1]][2], linetype = "dashed", color = "grey") +
  annotate("text", x = -Inf, y = compressed_ranges[[1]][1], label = "/", hjust = 0, size = 10) +
  annotate("text", x = -Inf, y = compressed_ranges[[1]][2], label = "/", hjust = 0, size = 10) +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_ranges[[2]][1], ymax = compressed_ranges[[2]][1], linetype = "dashed", color = "grey") +
  annotate("rect", xmin = -Inf, xmax = Inf, ymin = compressed_ranges[[2]][2], ymax = compressed_ranges[[2]][2], linetype = "dashed", color = "grey") +
  annotate("text", x = -Inf, y = compressed_ranges[[2]][1], label = "/", hjust = 0, size = 10) +
  annotate("text", x = -Inf, y = compressed_ranges[[2]][2], label = "/", hjust = 0, size = 10) +
  scale_y_continuous(breaks = sort(y_breaks), labels = sort(y_labels))

