huang 0 like s 493 view s
Tags: R
due to very low alignment (less than 5%):
# - 0505_MKL-1_wt_EV,EV.RNA
#### -------
setwd("/home/jhuang/DATA/Data_Ute/Data_RNA-Seq_MKL-1_WaGa/results/featureCounts")
library("AnnotationDbi")
library("clusterProfiler")
library("ReactomePA")
#library("org.Mm.eg.db")
library(DESeq2)
library(gplots)
[1] gene_name
[2] X042_MKL.1_wt_EV
[3] MKL.1_RNA_147
[4] X042_MKL.1_sT_DMSO #EIGENTLICH _EV
[5] MKL.1_RNA
[6] X0505_MKL.1_scr_DMSO_EV
[7] X0505_MKL.1_sT_DMSO_EV
[8] MKL.1_EV.RNA_87
[9] MKL.1_EV.RNA_27
[10] X042_MKL.1_scr_Dox_EV
[11] X042_MKL.1_scr_DMSO_EV
[12] MKL.1_EV.RNA_118
[13] X0505_MKL.1_scr_Dox_EV
[14] MKL.1_EV.RNA
[15] X042_MKL.1_sT_Dox #EIGENTLICH _EV
[16] X0505_MKL.1_sT_Dox_EV
[17] MKL.1_RNA_118
[18] MKL.1_EV.RNA_2
[1] gene_name
[2] X042_MKL.1_wt_EV
[3] MKL.1_RNA_147
[4] X042_MKL.1_sT_DMSO #EIGENTLICH _EV
[5] MKL.1_RNA
[6] X0505_MKL.1_scr_DMSO_EV
[7] X0505_MKL.1_sT_DMSO_EV
[8] MKL.1_EV.RNA_87
[9] MKL.1_EV.RNA_27
[10] X042_MKL.1_scr_Dox_EV
[11] X042_MKL.1_scr_DMSO_EV
[12] MKL.1_EV.RNA_118
[13] X0505_MKL.1_scr_Dox_EV
[14] MKL.1_EV.RNA
[15] X042_MKL.1_sT_Dox #EIGENTLICH _EV
[16] X0505_MKL.1_sT_Dox_EV
[17] MKL.1_RNA_118
[18] MKL.1_EV.RNA_2
[19] Geneid.1
[20] gene_name.1
[21] X1605_WaGa_sT_DMSO_EV
[22] WaGa_EV.RNA_118
[23] WaGa_EV.RNA
[24] X2706_WaGa_scr_Dox_EV
[25] X2706_WaGa_sT_DMSO_EV
[26] X1107_WaGa_wt_EV
[27] X1107_WaGa_sT_Dox_EV
[28] WaGa_RNA
[29] X1605_WaGa_scr_DMSO_EV
[30] X2706_WaGa_scr_DMSO_EV
[31] WaGa_EV.RNA_226
[32] X2706_WaGa_sT_Dox_EV
[33] X1605_WaGa_scr_Dox_EV
[34] X1605_WaGa_wt_EV
[35] X1605_WaGa_sT_Dox_EV
[36] X1107_WaGa_scr_DMSO_EV
[37] WaGa_EV.RNA_2
[38] WaGa_EV.RNA_147
[39] WaGa_RNA_118
[40] X1107_WaGa_scr_Dox_EV
[41] X1107_WaGa_sT_DMSO_EV
[42] WaGa_RNA_147
[43] X2706_WaGa_wt_EV
d.raw<- read.delim2("merged_gene_counts.txt",sep="\t", header=TRUE, row.names=1)
colnames(d.raw)<- c("gene_name","X042_MKL.1_wt_EV","MKL.1_RNA_147","X042_MKL.1_sT_DMSO","MKL.1_RNA","X0505_MKL.1_scr_DMSO_EV","X0505_MKL.1_sT_DMSO_EV","MKL.1_EV.RNA_87","MKL.1_EV.RNA_27","X042_MKL.1_scr_Dox_EV","X042_MKL.1_scr_DMSO_EV","MKL.1_EV.RNA_118","X0505_MKL.1_scr_Dox_EV","MKL.1_EV.RNA","X042_MKL.1_sT_Dox","X0505_MKL.1_sT_Dox_EV","MKL.1_RNA_118","MKL.1_EV.RNA_2", "Geneid.1","gene_name.1","X1605_WaGa_sT_DMSO_EV","WaGa_EV.RNA_118","WaGa_EV.RNA","X2706_WaGa_scr_Dox_EV","X2706_WaGa_sT_DMSO_EV","X1107_WaGa_wt_EV","X1107_WaGa_sT_Dox_EV","WaGa_RNA","X1605_WaGa_scr_DMSO_EV","X2706_WaGa_scr_DMSO_EV","WaGa_EV.RNA_226","X2706_WaGa_sT_Dox_EV","X1605_WaGa_scr_Dox_EV","X1605_WaGa_wt_EV","X1605_WaGa_sT_Dox_EV","X1107_WaGa_scr_DMSO_EV","WaGa_EV.RNA_2","WaGa_EV.RNA_147","WaGa_RNA_118","X1107_WaGa_scr_Dox_EV","X1107_WaGa_sT_DMSO_EV","WaGa_RNA_147","X2706_WaGa_wt_EV")
col_order <- c("gene_name", "MKL.1_RNA","MKL.1_RNA_118","MKL.1_RNA_147","MKL.1_EV.RNA","MKL.1_EV.RNA_2","MKL.1_EV.RNA_118","MKL.1_EV.RNA_87","MKL.1_EV.RNA_27","X042_MKL.1_wt_EV","X042_MKL.1_sT_DMSO","X0505_MKL.1_sT_DMSO_EV","X042_MKL.1_scr_DMSO_EV","X0505_MKL.1_scr_DMSO_EV","X042_MKL.1_sT_Dox","X0505_MKL.1_sT_Dox_EV","X042_MKL.1_scr_Dox_EV","X0505_MKL.1_scr_Dox_EV", "Geneid.1","gene_name.1", "WaGa_RNA","WaGa_RNA_118","WaGa_RNA_147", "WaGa_EV.RNA","WaGa_EV.RNA_2","WaGa_EV.RNA_118","WaGa_EV.RNA_147","WaGa_EV.RNA_226","X1107_WaGa_wt_EV","X1605_WaGa_wt_EV","X2706_WaGa_wt_EV", "X1107_WaGa_sT_DMSO_EV","X1605_WaGa_sT_DMSO_EV","X2706_WaGa_sT_DMSO_EV", "X1107_WaGa_scr_DMSO_EV","X1605_WaGa_scr_DMSO_EV","X2706_WaGa_scr_DMSO_EV", "X1107_WaGa_sT_Dox_EV","X1605_WaGa_sT_Dox_EV","X2706_WaGa_sT_Dox_EV", "X1107_WaGa_scr_Dox_EV","X1605_WaGa_scr_Dox_EV","X2706_WaGa_scr_Dox_EV")
reordered.raw <- d.raw[,col_order]
#rename
colnames(reordered.raw) <- c("gene_name", "MKL-1 RNA","MKL-1 RNA 118","MKL-1 RNA 147","MKL-1 EV","MKL-1 EV 2","MKL-1 EV 118","MKL-1 EV 87","MKL-1 EV 27","MKL-1 EV 042","MKL-1 EV sT DMSO 042","MKL-1 EV sT DMSO 0505","MKL-1 EV scr DMSO 042","MKL-1 EV scr DMSO 0505","MKL-1 EV sT Dox 042","MKL-1 EV sT Dox 0505","MKL-1 EV scr Dox 042","MKL-1 EV scr Dox 0505", "Geneid.1","gene_name.1", "WaGa RNA","WaGa RNA 118","WaGa RNA 147", "WaGa EV","WaGa EV 2","WaGa EV 118","WaGa EV 147","WaGa EV 226","WaGa EV 1107","WaGa EV 1605","WaGa EV 2706", "WaGa EV sT DMSO 1107","WaGa EV sT DMSO 1605","WaGa EV sT DMSO 2706", "WaGa EV scr DMSO 1107","WaGa EV scr DMSO 1605","WaGa EV scr DMSO 2706", "WaGa EV sT Dox 1107","WaGa EV sT Dox 1605","WaGa EV sT Dox 2706", "WaGa EV scr Dox 1107","WaGa EV scr Dox 1605","WaGa EV scr Dox 2706")
reordered.raw$gene_name <- NULL
reordered.raw$Geneid.1 <- NULL
reordered.raw$gene_name.1 <- NULL
write.csv(reordered.raw, file="counts.txt")
#IMPORTANT that we should filter the data with the counts in the STEP!
d <- reordered.raw[rowSums(reordered.raw>3)>2,]
condition_for_pca = as.factor(c("RNA","RNA","RNA","EV","EV","EV","EV","EV","EV","sT.DMSO","sT.DMSO","scr.DMSO","scr.DMSO","sT.Dox","sT.Dox","scr.Dox","scr.Dox", "RNA","RNA","RNA","EV","EV","EV","EV","EV","EV","EV","EV","sT.DMSO","sT.DMSO","sT.DMSO","scr.DMSO","scr.DMSO","scr.DMSO","sT.Dox","sT.Dox","sT.Dox","scr.Dox","scr.Dox","scr.Dox"))
condition = as.factor(c("MKL1.RNA","MKL1.RNA","MKL1.RNA","MKL1.EV","MKL1.EV","MKL1.EV","MKL1.EV","MKL1.EV","MKL1.EV","MKL1.sT.DMSO","MKL1.sT.DMSO","MKL1.scr.DMSO","MKL1.scr.DMSO","MKL1.sT.Dox","MKL1.sT.Dox","MKL1.scr.Dox","MKL1.scr.Dox", "WaGa.RNA","WaGa.RNA","WaGa.RNA","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.sT.DMSO","WaGa.sT.DMSO","WaGa.sT.DMSO","WaGa.scr.DMSO","WaGa.scr.DMSO","WaGa.scr.DMSO","WaGa.sT.Dox","WaGa.sT.Dox","WaGa.sT.Dox","WaGa.scr.Dox","WaGa.scr.Dox","WaGa.scr.Dox"))
#not sure if they rep1 in the first time is the same to the second time.
donor = as.factor(c("1","118","147","1","2","118","87","27","042","042","0505","042","0505","042","0505","042","0505", "1","118","147", "1","2","118","147","226","1107","1605","2706", "1107","1605","2706","1107","1605","2706","1107","1605","2706","1107","1605","2706"))
batch = as.factor(c("2021.08","2021.09","2021.09","2021.08","2021.08","2021.09","2021.09","2021.09","2022.08","2022.08","2022.08","2022.08","2022.08","2022.08","2022.08","2022.08","2022.08", "2021.08","2021.09","2021.09","2021.08","2021.08","2021.09","2021.09","2021.09","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11"))
cell.line = as.factor(c("MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1", "WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa"))
ids = as.factor(c("MKL1.RNA","MKL1.RNA.118","MKL1.RNA.147", "MKL1.EV","MKL1.EV.2","MKL1.EV.118","MKL1.EV.87","MKL1.EV.27","MKL1.EV.042", "MKL1.EV.sT.DMSO.042","MKL1.EV.sT.DMSO.0505", "MKL1.EV.scr.DMSO.042","MKL1.EV.scr.DMSO.0505", "MKL1.EV.sT.Dox.042","MKL1.EV.sT.Dox.0505", "MKL1.EV.scr.Dox.042","MKL1.EV.scr.Dox.0505", "WaGa.RNA","WaGa.RNA.118","WaGa.RNA.147", "WaGa.EV","WaGa.EV.2","WaGa.EV.118","WaGa.EV.147","WaGa.EV.226","WaGa.EV.1107","WaGa.EV.1605","WaGa.EV.2706", "WaGa.EV.sT.DMSO.1107","WaGa.EV.sT.DMSO.1605","WaGa.EV.sT.DMSO.2706", "WaGa.EV.scr.DMSO.1107","WaGa.EV.scr.DMSO.1605","WaGa.EV.scr.DMSO.2706", "WaGa.EV.sT.Dox.1107","WaGa.EV.sT.Dox.1605","WaGa.EV.sT.Dox.2706", "WaGa.EV.scr.Dox.1107","WaGa.EV.scr.Dox.1605","WaGa.EV.scr.Dox.2706"))
#DEL ids = as.factor(c("MKL.1_RNA","MKL.1_RNA_118","MKL.1_RNA_147","MKL.1_EV.RNA","MKL.1_EV.RNA_2","MKL.1_EV.RNA_118","MKL.1_EV.RNA_87","MKL.1_EV.RNA_27","042_MKL.1_wt_EV","042_MKL.1_sT_DMSO","0505_MKL.1_sT_DMSO_EV","042_MKL.1_scr_DMSO_EV","0505_MKL.1_scr_DMSO_EV","042_MKL.1_sT_Dox","0505_MKL.1_sT_Dox_EV","042_MKL.1_scr_Dox_EV","0505_MKL.1_scr_Dox_EV"))
#IMPORTANT: using d instead of reordered.raw.
#cData = data.frame(row.names=colnames(d), condition=condition, batch=batch, ids=ids)
#dds<-DESeqDataSetFromMatrix(countData=d, colData=cData, design=~batch+condition)
#cData = data.frame(row.names=colnames(d), condition=condition, ids=ids)
#dds<-DESeqDataSetFromMatrix(countData=d, colData=cData, design=~condition)
cData = data.frame(row.names=colnames(d), condition=condition, donor=donor, batch=batch, cell.line=cell.line, ids=ids)
#dds<-DESeqDataSetFromMatrix(countData=d, colData=cData, design=~batch+condition_for_pca)
dds<-DESeqDataSetFromMatrix(countData=d, colData=cData, design=~batch+condition)
#rld <- vst(dds)
rld <- rlogTransformation(dds)
# -- before pca --
png("pca_before_batch_correction.png", 1200, 800)
#plotPCA(rld, intgroup=c("condition"))
plotPCA(rld, intgroup = c("condition", "batch"))
#plotPCA(rld, intgroup = c("condition", "ids"))
#plotPCA(rld, "batch")
dev.off()
# -- before heatmap --
## generate the pairwise comparison between samples
library(gplots)
library("RColorBrewer")
png("heatmap_before_donor_correction.png", 1200, 800)
distsRL <- dist(t(assay(rld)))
mat <- as.matrix(distsRL)
#paste( rld$dex, rld$cell, sep="-" )
#rownames(mat) <- colnames(mat) <- with(colData(dds),paste(condition,batch, sep=":"))
#rownames(mat) <- colnames(mat) <- with(colData(dds),paste(condition,ids, sep=":"))
hc <- hclust(distsRL)
hmcol <- colorRampPalette(brewer.pal(9,"GnBu"))(100)
heatmap.2(mat, Rowv=as.dendrogram(hc),symm=TRUE, trace="none",col = rev(hmcol), margin=c(13, 13))
dev.off()
# -- remove batch effect --
mat <- assay(rld)
mm <- model.matrix(~condition, colData(rld))
mat <- limma::removeBatchEffect(mat, batch=rld$batch, design=mm)
assay(rld) <- mat
# -- after pca --
png("pca_after_batch_correction.png", 1200, 800)
svg("pca_after_batch_correction.svg")
plotPCA(rld, intgroup=c("condition"))
dev.off()
library(ggplot2)
data <- plotPCA(rld, intgroup=c("condition", "cell.line"), returnData=TRUE)
colnames(data) <- c("PC1","PC2","group2","Group","Cell.line","name")
percentVar <- round(100 * attr(data, "percentVar"))
#, shape=donor
#png("pca6.png", 1000, 1000)
svg("pca6.svg", 10, 8)
ggplot(data, aes(PC1, PC2, color=Group, shape=Cell.line)) +
geom_point(size=3) +
scale_color_manual(values = c("RNA" = "grey",
"EV"="cyan",
"scr.DMSO"="#b2df8a",
"sT.DMSO"="#33a02c",
"scr.Dox"="#fb9a99",
"sT.Dox"="#e31a1c")) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed()
dev.off()
ggplot(data, aes(PC1, PC2, color=condition, shape=donor)) +
geom_point(size=8) +
scale_color_manual(values = c("untreated" = "grey",
"mCh d3"="#a6cee3",
"mCh d8"="#1f78b4",
"GFP+mCh d9/12"="cyan",
"GFP d3"="#b2df8a",
"GFP d8"="#33a02c",
"sT d3"="#fb9a99",
"sT d8"="#e31a1c",
"LT d3"="#fdbf6f",
"LT d8"="#ff7f00",
"LTtr d3"="#cab2d6",
"LTtr d8"="#6a3d9a",
"sT+LT d3"="#ffff99",
"sT+LTtr d9/12"="#a14a1a")) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) + theme(axis.text = element_text(face="bold",size = 21), axis.title = element_text(face="bold",size = 21)) + theme(legend.text = element_text(size = 20)) + theme(legend.title = element_text(size = 22)) + guides(color = guide_legend(override.aes = list(size = 10)), shape = guide_legend(override.aes = list(size = 10)), alpha = guide_legend(override.aes = list(size = 10)))
#axis.title = element_text(face="bold",size = 20)
#p + theme(axis.text.x = element_text(face="bold",size=14), axis.text.y = element_text(face="bold",size=14))
#+ coord_fixed()
#+ theme(
# # Set the width to 6 inches
# fig.width = 6,
# # Set the height to 4 inches
# fig.height = 4
# )
# -- after heatmap --
## generate the pairwise comparison between samples
png("heatmap_after_donor_correction.png", 1200, 800)
distsRL <- dist(t(assay(rld)))
mat <- as.matrix(distsRL)
rownames(mat) <- colnames(mat) <- with(colData(dds),paste(condition,donor, sep=":"))
#rownames(mat) <- colnames(mat) <- with(colData(dds),paste(condition,ids, sep=":"))
hc <- hclust(distsRL)
hmcol <- colorRampPalette(brewer.pal(9,"GnBu"))(100)
heatmap.2(mat, Rowv=as.dendrogram(hc),symm=TRUE, trace="none",col = rev(hmcol), margin=c(13, 13))
dev.off()
#convert bam to bigwig using deepTools by feeding inverse of DESeq’s size Factor
sizeFactors(dds)
#NULL
dds <- estimateSizeFactors(dds)
> sizeFactors(dds)
MKL-1 RNA MKL-1 RNA 118 MKL-1 RNA 147
2.5698732 1.6270903 1.9233161
MKL-1 EV MKL-1 EV 2 MKL-1 EV 118
1.5298145 1.2829153 0.6786873
MKL-1 EV 87 MKL-1 EV 27 MKL-1 EV 042
0.6741877 0.4176214 1.3361018
MKL-1 EV sT DMSO 042 MKL-1 EV sT DMSO 0505 MKL-1 EV scr DMSO 042
0.9539241 1.7357673 1.2957555
MKL-1 EV scr DMSO 0505 MKL-1 EV sT Dox 042 MKL-1 EV sT Dox 0505
1.0908451 1.3658071 1.1222026
MKL-1 EV scr Dox 042 MKL-1 EV scr Dox 0505 WaGa RNA
1.4708224 1.1242917 2.1093923
WaGa RNA 118 WaGa RNA 147 WaGa EV
1.6923137 1.6712181 0.6021000
WaGa EV 2 WaGa EV 118 WaGa EV 147
1.2486671 0.4519720 0.5693254
WaGa EV 226 WaGa EV 1107 WaGa EV 1605
0.5449690 1.4884895 1.2630959
WaGa EV 2706 WaGa EV sT DMSO 1107 WaGa EV sT DMSO 1605
0.7678406 0.7981256 0.9055005
WaGa EV sT DMSO 2706 WaGa EV scr DMSO 1107 WaGa EV scr DMSO 1605
1.0602975 0.7928814 0.8116066
WaGa EV scr DMSO 2706 WaGa EV sT Dox 1107 WaGa EV sT Dox 1605
1.0660429 0.8551613 0.9547309
WaGa EV sT Dox 2706 WaGa EV scr Dox 1107 WaGa EV scr Dox 1605
0.9081678 0.7440897 0.8735994
WaGa EV scr Dox 2706
1.0146042
raw_counts <- counts(dds)
normalized_counts <- counts(dds, normalized=TRUE)
write.table(raw_counts, file="raw_counts.txt", sep="\t", quote=F, col.names=NA)
write.table(normalized_counts, file="normalized_counts.txt", sep="\t", quote=F, col.names=NA)
#bamCoverage --bam ../markDuplicates/${sample}Aligned.sortedByCoord.out.bam -o ${sample}_norm.bw --binSize 10 --scaleFactor --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/WaGa_RNAAligned.sortedByCoord.out.markDups.bam -o WaGa_RNA.bw --binSize 10 --scaleFactor 0.4958732 --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/WaGa_RNA_118Aligned.sortedByCoord.out.markDups.bam -o WaGa_RNA_118.bw --binSize 10 --scaleFactor 0.6013898 --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/WaGa_RNA_147Aligned.sortedByCoord.out.markDups.bam -o WaGa_RNA_147.bw --binSize 10 --scaleFactor 0.6154516 --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/MKL1_RNAAligned.sortedByCoord.out.markDups.bam -o MKL1_RNA.bw --binSize 10 --scaleFactor 0.4078775 --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/MKL1_RNA_118Aligned.sortedByCoord.out.markDups.bam -o MKL1_RNA_118.bw --binSize 10 --scaleFactor 0.6525297 --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/MKL1_RNA_147Aligned.sortedByCoord.out.markDups.bam -o MKL1_RNA_147.bw --binSize 10 --scaleFactor 0.5331748 --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/WaGa_EV_RNAAligned.sortedByCoord.out.markDups.bam -o WaGa_EV_RNA.bw --binSize 10 --scaleFactor 1.733964 --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/WaGa_EV_RNA_2Aligned.sortedByCoord.out.markDups.bam -o WaGa_EV_RNA_2.bw --binSize 10 --scaleFactor 0.7761222 --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/WaGa_EV_RNA_118Aligned.sortedByCoord.out.markDups.bam -o WaGa_EV_RNA_118.bw --binSize 10 --scaleFactor 2.222971 --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/WaGa_EV_RNA_147Aligned.sortedByCoord.out.markDups.bam -o WaGa_EV_RNA_147.bw --binSize 10 --scaleFactor 1.679485 --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/WaGa_EV_RNA_226Aligned.sortedByCoord.out.markDups.bam -o WaGa_EV_RNA_226.bw --binSize 10 --scaleFactor 1.901282 --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/MKL1_EV_RNAAligned.sortedByCoord.out.markDups.bam -o MKL1_EV_RNA.bw --binSize 10 --scaleFactor 0.6469583 --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/MKL1_EV_RNA_2Aligned.sortedByCoord.out.markDups.bam -o MKL1_EV_RNA_2.bw --binSize 10 --scaleFactor 0.6877478 --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/MKL1_EV_RNA_27Aligned.sortedByCoord.out.markDups.bam -o MKL1_EV_RNA_27.bw --binSize 10 --scaleFactor 2.462424 --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/MKL1_EV_RNA_87Aligned.sortedByCoord.out.markDups.bam -o MKL1_EV_RNA_87.bw --binSize 10 --scaleFactor 1.411154 --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/MKL1_EV_RNA_118Aligned.sortedByCoord.out.markDups.bam -o MKL1_EV_RNA_118.bw --binSize 10 --scaleFactor 1.544239 --effectiveGenomeSize 2864785220
setwd("/home/jhuang/DATA/Data_Ute/Data_RNA-Seq_MKL-1_WaGa/results/featureCounts/degenes")
#---- * to untreated ----
dds$condition <- relevel(dds$condition, "WaGa.RNA")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("WaGa.EV_vs_WaGa.RNA")
dds$condition <- relevel(dds$condition, "WaGa.EV")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("WaGa.scr.DMSO_vs_WaGa.EV", "WaGa.sT.DMSO_vs_WaGa.EV", "WaGa.scr.Dox_vs_WaGa.EV", "WaGa.sT.Dox_vs_WaGa.EV")
dds$condition <- relevel(dds$condition, "WaGa.scr.Dox")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("WaGa.sT.Dox_vs_WaGa.scr.Dox")
dds$condition <- relevel(dds$condition, "WaGa.scr.DMSO")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("WaGa.sT.DMSO_vs_WaGa.scr.DMSO")
dds$condition <- relevel(dds$condition, "MKL1.RNA")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("MKL1.EV_vs_MKL1.RNA", "WaGa.RNA_vs_MKL1.RNA")
dds$condition <- relevel(dds$condition, "MKL1.EV")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("MKL1.scr.DMSO_vs_MKL1.EV", "MKL1.sT.DMSO_vs_MKL1.EV", "MKL1.scr.Dox_vs_MKL1.EV", "MKL1.sT.Dox_vs_MKL1.EV", "WaGa.EV_vs_MKL1.EV")
dds$condition <- relevel(dds$condition, "MKL1.scr.Dox")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("MKL1.sT.Dox_vs_MKL1.scr.Dox")
dds$condition <- relevel(dds$condition, "MKL1.scr.DMSO")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("MKL1.sT.DMSO_vs_MKL1.scr.DMSO")
##https://bioconductor.statistik.tu-dortmund.de/packages/3.7/data/annotation/
#BiocManager::install("EnsDb.Mmusculus.v79")
#library(EnsDb.Mmusculus.v79)
#edb <- EnsDb.Mmusculus.v79
#https://bioconductor.org/packages/release/bioc/vignettes/biomaRt/inst/doc/accessing_ensembl.html#selecting-an-ensembl-biomart-database-and-dataset
#https://bioconductor.org/packages/release/bioc/vignettes/biomaRt/inst/doc/accessing_ensembl.html#selecting-an-ensembl-biomart-database-and-dataset
library(biomaRt)
listEnsembl()
listMarts()
#ensembl <- useEnsembl(biomart = "genes", mirror="asia") # default is Mouse strains 104
#ensembl <- useEnsembl(biomart = "ensembl", dataset = "mmusculus_gene_ensembl", mirror = "www")
#ensembl = useMart("ensembl_mart_44", dataset="hsapiens_gene_ensembl",archive=TRUE, mysql=TRUE)
#ensembl <- useEnsembl(biomart = "ensembl", dataset = "mmusculus_gene_ensembl", version="104")
#ensembl <- useEnsembl(biomart = "ensembl", dataset = "hsapiens_gene_ensembl", version="86")
#ensembl <- useEnsembl(biomart = "ensembl", dataset = "hsapiens_gene_ensembl", version="GRCh37")
#--> total 69, 27 GRCh38.p7 and 39 GRCm38.p4
ensembl <- useEnsembl(biomart = "ensembl", dataset = "hsapiens_gene_ensembl", version="104")
datasets <- listDatasets(ensembl)
#--> total 202 80 GRCh38.p13 107 GRCm39
#80 hsapiens_gene_ensembl Human genes (GRCh38.p13) GRCh38.p13
#107 mmusculus_gene_ensembl Mouse genes (GRCm39) GRCm39
> listEnsemblArchives()
name date url version
1 Ensembl GRCh37 Feb 2014 http://grch37.ensembl.org GRCh37 *
2 Ensembl 104 May 2021 http://may2021.archive.ensembl.org 104 *
3 Ensembl 103 Feb 2021 http://feb2021.archive.ensembl.org 103
4 Ensembl 102 Nov 2020 http://nov2020.archive.ensembl.org 102
attributes = listAttributes(ensembl)
attributes[1:25,]
#library("dplyr")
for (i in clist) {
#i<-clist[1]
contrast = paste("condition", i, sep="_")
res = results(dds, name=contrast)
res <- res[!is.na(res$log2FoldChange),]
#geness <- AnnotationDbi::select(edb86, keys = rownames(res), keytype = "GENEID", columns = c("ENTREZID","EXONID","GENEBIOTYPE","GENEID","GENENAME","PROTEINDOMAINSOURCE","PROTEINID","SEQNAME","SEQSTRAND","SYMBOL","TXBIOTYPE","TXID","TXNAME","UNIPROTID"))
#geness <- AnnotationDbi::select(edb86, keys = rownames(res), keytype = "GENEID", columns = c("GENEID", "ENTREZID", "SYMBOL", "GENENAME","GENEBIOTYPE","TXBIOTYPE","SEQSTRAND","UNIPROTID"))
# In the ENSEMBL-database, GENEID is ENSEMBL-ID.
#geness <- AnnotationDbi::select(edb86, keys = rownames(res), keytype = "GENEID", columns = c("GENEID", "SYMBOL", "GENEBIOTYPE")) # "ENTREZID", "TXID","TXBIOTYPE","TXSEQSTART","TXSEQEND"
#geness <- geness[!duplicated(geness$GENEID), ]
#using getBM replacing AnnotationDbi::select
#filters = 'ensembl_gene_id' means the records should always have a valid ensembl_gene_ids.
geness <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'gene_biotype', 'entrezgene_id', 'chromosome_name', 'start_position', 'end_position', 'strand', 'description'),
filters = 'ensembl_gene_id',
values = rownames(res),
mart = ensembl)
geness_uniq <- distinct(geness, ensembl_gene_id, .keep_all= TRUE)
#merge by column by common colunmn name, in the case "GENEID"
res$ENSEMBL = rownames(res)
identical(rownames(res), rownames(geness_uniq))
res_df <- as.data.frame(res)
geness_res <- merge(geness_uniq, res_df, by.x="ensembl_gene_id", by.y="ENSEMBL")
dim(geness_res)
rownames(geness_res) <- geness_res$ensembl_gene_id
geness_res$ensembl_gene_id <- NULL
write.csv(as.data.frame(geness_res[order(geness_res$pvalue),]), file = paste(i, "all.txt", sep="-"))
up <- subset(geness_res, padj<=0.05 & log2FoldChange>=2)
down <- subset(geness_res, padj<=0.05 & log2FoldChange<=-2)
write.csv(as.data.frame(up[order(up$log2FoldChange,decreasing=TRUE),]), file = paste(i, "up.txt", sep="-"))
write.csv(as.data.frame(down[order(abs(down$log2FoldChange),decreasing=TRUE),]), file = paste(i, "down.txt", sep="-"))
}
# 49751 scr_DMSO_vs_scr_Dox-all.txt
# 1 scr_DMSO_vs_scr_Dox-down.txt
# 7 scr_DMSO_vs_scr_Dox-up.txt
# 49751 sT_DMSO_vs_sT_Dox-all.txt
# 1 sT_DMSO_vs_sT_Dox-down.txt
# 1 sT_DMSO_vs_sT_Dox-up.txt
# 99512 total
# 846 EV.RNA_vs_RNA-down.id
# 18874 EV.RNA_vs_RNA-up.id
#1803 wt_EV_vs_RNA-down.txt
#20538 wt_EV_vs_RNA-up.txt
# --------------------------------------------------------
# ---- Volcano Plots with automatically finding top_g ----
#A canonical visualization for interpreting differential gene expression results is the volcano plot.
library(ggrepel)
#"EV.RNA_vs_RNA" "scr.DMSO_vs_EV.RNA" "sT.DMSO_vs_EV.RNA" "scr.Dox_vs_EV.RNA" "sT.Dox_vs_EV.RNA" "sT.Dox_vs_scr.Dox" "sT.DMSO_vs_scr.DMSO"
geness_res <- read.csv(file = paste("WaGa.EV_vs_WaGa.RNA", "all.txt", sep="-"), row.names=1)
geness_res$Color <- "NS or log2FC < 2.0"
geness_res$Color[geness_res$pvalue < 0.05] <- "P < 0.05"
geness_res$Color[geness_res$padj < 0.05] <- "P-adj < 0.05"
geness_res$Color[geness_res$padj < 0.001] <- "P-adj < 0.001"
geness_res$Color[abs(geness_res$log2FoldChange) < 2.0] <- "NS or log2FC < 2.0"
geness_res$Color <- factor(geness_res$Color,
levels = c("NS or log2FC < 2.0", "P < 0.05",
"P-adj < 0.05", "P-adj < 0.001"))
geness_res$invert_P <- (-log10(geness_res$pvalue)) * sign(geness_res$log2FoldChange)
top_g <- c()
top_g <- c(top_g, geness_res[, 'external_gene_name'][order(geness_res[, 'invert_P'], decreasing = TRUE)[1:200]], geness_res[, 'external_gene_name'][order(geness_res[, 'invert_P'], decreasing = FALSE)[1:200]])
top_g <- unique(top_g)
geness_res <- geness_res[, -1*ncol(geness_res)] #remove invert_P from matrix
png("WaGa.EV_vs_WaGa.RNA.png",width=1400, height=1000)
ggplot(geness_res,
aes(x = log2FoldChange, y = -log10(pvalue),
color = Color, label = external_gene_name)) +
geom_vline(xintercept = c(2.0, -2.0), lty = "dashed") +
geom_hline(yintercept = -log10(0.05), lty = "dashed") +
geom_point() +
labs(x = "log2(FC)",
y = "Significance, -log10(P)",
color = "Significance") +
scale_color_manual(values = c(`P-adj < 0.001` = "dodgerblue",
`P-adj < 0.05` = "lightblue",
`P < 0.05` = "orange2",
`NS or log2FC < 2.0` = "gray"),
guide = guide_legend(override.aes = list(size = 4))) +
scale_y_continuous(expand = expansion(mult = c(0,0.05))) +
geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & pvalue < 0.05 & (abs(geness_res$log2FoldChange) >= 2.0)),
size = 4, point.padding = 0.15, color = "black",
min.segment.length = .1, box.padding = .2, lwd = 2) +
theme_bw(base_size = 16) +
theme(legend.position = "bottom")
dev.off()
#sed -i -e 's/Color/Category/g' *_Category.csv
#x <- data.frame(k1 = c(NA,3,4,5,2), k2 = c(1,NA,4,5,2), data = 6:10)
#merge(x, y, by = "k2")
for cmp in "EV.RNA_vs_RNA" "scr.DMSO_vs_EV.RNA" "sT.DMSO_vs_EV.RNA" "scr.Dox_vs_EV.RNA" "sT.Dox_vs_EV.RNA" "sT.Dox_vs_scr.Dox" "sT.DMSO_vs_scr.DMSO"; do
echo "~/Tools/csv2xls-0.4/csv_to_xls.py ${cmp}-all.txt ${cmp}-up.txt ${cmp}-down.txt -d$',' -o ${cmp}.xls"
done
~/Tools/csv2xls-0.4/csv_to_xls.py EV.RNA_vs_RNA-all.txt EV.RNA_vs_RNA-up.txt EV.RNA_vs_RNA-down.txt -d$',' -o EV.RNA_vs_RNA.xls
~/Tools/csv2xls-0.4/csv_to_xls.py scr.DMSO_vs_EV.RNA-all.txt scr.DMSO_vs_EV.RNA-up.txt scr.DMSO_vs_EV.RNA-down.txt -d$',' -o scr.DMSO_vs_EV.RNA.xls
~/Tools/csv2xls-0.4/csv_to_xls.py sT.DMSO_vs_EV.RNA-all.txt sT.DMSO_vs_EV.RNA-up.txt sT.DMSO_vs_EV.RNA-down.txt -d$',' -o sT.DMSO_vs_EV.RNA.xls
~/Tools/csv2xls-0.4/csv_to_xls.py scr.Dox_vs_EV.RNA-all.txt scr.Dox_vs_EV.RNA-up.txt scr.Dox_vs_EV.RNA-down.txt -d$',' -o scr.Dox_vs_EV.RNA.xls
~/Tools/csv2xls-0.4/csv_to_xls.py sT.Dox_vs_EV.RNA-all.txt sT.Dox_vs_EV.RNA-up.txt sT.Dox_vs_EV.RNA-down.txt -d$',' -o sT.Dox_vs_EV.RNA.xls
~/Tools/csv2xls-0.4/csv_to_xls.py sT.Dox_vs_scr.Dox-all.txt sT.Dox_vs_scr.Dox-up.txt sT.Dox_vs_scr.Dox-down.txt -d$',' -o sT.Dox_vs_scr.Dox.xls
~/Tools/csv2xls-0.4/csv_to_xls.py sT.DMSO_vs_scr.DMSO-all.txt sT.DMSO_vs_scr.DMSO-up.txt sT.DMSO_vs_scr.DMSO-down.txt -d$',' -o sT.DMSO_vs_scr.DMSO.xls
~/Tools/csv2xls-0.4/csv_to_xls.py WaGa.EV_vs_WaGa.RNA-all.txt WaGa.EV_vs_WaGa.RNA-up.txt WaGa.EV_vs_WaGa.RNA-down.txt -d$',' -o WaGa.EV_vs_WaGa.RNA.xls
# --------------------------------------------------------
# -- clustering the genes and draw heatmap --
./MKL1.EV_vs_MKL1.RNA-up.txt
./MKL1.sT.DMSO_vs_MKL1.EV-up.txt
./WaGa.scr.DMSO_vs_WaGa.EV-up.txt
./WaGa.sT.Dox_vs_WaGa.EV-up.txt
./WaGa.sT.DMSO_vs_WaGa.scr.DMSO-up.txt
./MKL1.scr.Dox_vs_MKL1.EV-up.txt
./MKL1.sT.Dox_vs_MKL1.EV-up.txt
./WaGa.scr.Dox_vs_WaGa.EV-up.txt
./MKL1.sT.DMSO_vs_MKL1.scr.DMSO-up.txt
./WaGa.sT.DMSO_vs_WaGa.EV-up.txt
./WaGa.EV_vs_MKL1.EV-up.txt
./WaGa.RNA_vs_MKL1.RNA-up.txt
./MKL1.sT.Dox_vs_MKL1.scr.Dox-up.txt
./WaGa.EV_vs_WaGa.RNA-up.txt
./WaGa.sT.Dox_vs_WaGa.scr.Dox-up.txt
./MKL1.scr.DMSO_vs_MKL1.EV-up.txt
./WaGa.sT.Dox_vs_WaGa.scr.Dox-down.txt
./WaGa.sT.Dox_vs_WaGa.EV-down.txt
./MKL1.sT.Dox_vs_MKL1.scr.Dox-down.txt
./WaGa.sT.DMSO_vs_WaGa.scr.DMSO-down.txt
./MKL1.sT.DMSO_vs_MKL1.scr.DMSO-down.txt
./WaGa.EV_vs_MKL1.EV-down.txt
./MKL1.scr.DMSO_vs_MKL1.EV-down.txt
./WaGa.scr.Dox_vs_WaGa.EV-down.txt
./MKL1.EV_vs_MKL1.RNA-down.txt
./MKL1.sT.Dox_vs_MKL1.EV-down.txt
./MKL1.sT.DMSO_vs_MKL1.EV-down.txt
./WaGa.scr.DMSO_vs_WaGa.EV-down.txt
./WaGa.RNA_vs_MKL1.RNA-down.txt
./WaGa.sT.DMSO_vs_WaGa.EV-down.txt
./WaGa.EV_vs_WaGa.RNA-down.txt
./MKL1.scr.Dox_vs_MKL1.EV-down.txt
install.packages("gplots")
library("gplots")
#Option3: as paper described, A heatmap showing expression values of all DEGs which are significant between any pair conditions.
all_genes <- c(rownames(mock_sT_d8_vs_mock_sT_d3_sig),rownames(sT_d3_vs_mock_sT_d3_sig),rownames(sT_d8_vs_mock_sT_d8_sig),rownames(sT_d8_vs_sT_d3_sig)) #873
all_genes <- unique(all_genes) #663
#all_genes2 <- c(rownames(WAC_vs_mock_sig),rownames(WAP_vs_mock_sig),rownames(WAC_vs_WAP_sig)) #3917
#all_genes2 <- unique(all_genes2) #2608
#intersected_genes <- intersect(all_genes, all_genes2) # 2608
#RNASeq.NoCellLine <- read.csv(file ="gene_expression_keeping_condition.txt", row.names=1)
RNASeq.NoCellLine_ <- RNASeq.NoCellLine[all_genes,]
write.csv(as.data.frame(RNASeq.NoCellLine_), file ="gene_expression_keeping_condition.txt")
RNASeq.NoCellLine_ <- cbind(RNASeq.NoCellLine_, mock_sT_d3 = rowMeans(RNASeq.NoCellLine_[, 1:2]))
RNASeq.NoCellLine_ <- cbind(RNASeq.NoCellLine_, mock_sT_d8 = rowMeans(RNASeq.NoCellLine_[, 3:4]))
RNASeq.NoCellLine_ <- cbind(RNASeq.NoCellLine_, sT_d3 = rowMeans(RNASeq.NoCellLine_[, 5:6]))
RNASeq.NoCellLine_ <- cbind(RNASeq.NoCellLine_, sT_d8 = rowMeans(RNASeq.NoCellLine_[, 7:8]))
RNASeq.NoCellLine_ <- RNASeq.NoCellLine_[,c(-1:-8)] #663x4
#RNASeq.NoCellLine__ <- read.csv(file ="gene_expression_keeping_condition.txt", row.names=1)
write.csv(as.data.frame(RNASeq.NoCellLine_), file ="gene_expression_merging_condition.txt")
cut -d',' -f1-1 ./sT.DMSO_vs_scr.DMSO-up.txt > sT.DMSO_vs_scr.DMSO-up.id
cut -d',' -f1-1 ./scr.DMSO_vs_EV.RNA-up.txt > scr.DMSO_vs_EV.RNA-up.id
cut -d',' -f1-1 ./sT.DMSO_vs_EV.RNA-up.txt > sT.DMSO_vs_EV.RNA-up.id
cut -d',' -f1-1 ./sT.Dox_vs_scr.Dox-up.txt > sT.Dox_vs_scr.Dox-up.id
cut -d',' -f1-1 ./sT.Dox_vs_EV.RNA-up.txt > sT.Dox_vs_EV.RNA-up.id
cut -d',' -f1-1 ./scr.Dox_vs_EV.RNA-up.txt > scr.Dox_vs_EV.RNA-up.id
cut -d',' -f1-1 ./EV.RNA_vs_RNA-up.txt > EV.RNA_vs_RNA-up.id
cut -d',' -f1-1 ./sT.DMSO_vs_scr.DMSO-down.txt > sT.DMSO_vs_scr.DMSO-down.id
cut -d',' -f1-1 ./scr.DMSO_vs_EV.RNA-down.txt > scr.DMSO_vs_EV.RNA-down.id
cut -d',' -f1-1 ./sT.DMSO_vs_EV.RNA-down.txt > sT.DMSO_vs_EV.RNA-down.id
cut -d',' -f1-1 ./sT.Dox_vs_scr.Dox-down.txt > sT.Dox_vs_scr.Dox-down.id
cut -d',' -f1-1 ./sT.Dox_vs_EV.RNA-down.txt > sT.Dox_vs_EV.RNA-down.id
cut -d',' -f1-1 ./scr.Dox_vs_EV.RNA-down.txt > scr.Dox_vs_EV.RNA-down.id
cut -d',' -f1-1 ./EV.RNA_vs_RNA-down.txt > EV.RNA_vs_RNA-down.id
#MKL-1 571 MKL1_EV_RNA_vs_MKL1_RNA-down.id
#MKL-1 16443 MKL1_EV_RNA_vs_MKL1_RNA-up.id
92 WaGa_EV_RNA_vs_MKL1_EV_RNA-down.id
114 WaGa_EV_RNA_vs_MKL1_EV_RNA-up.id
324 WaGa_EV_RNA_vs_WaGa_RNA-down.id
19911 WaGa_EV_RNA_vs_WaGa_RNA-up.id
#-- MKL-1 vs WaGa --
# 690 WaGa_RNA_vs_MKL1_RNA-down.id
761 WaGa_RNA_vs_MKL1_RNA-up.id
38906 total
38906 ids
#-- MKL-1 --
#add Gene_Id in the first line.
GOI <- read.csv("ids")$Gene_Id
RNASeq.NoCellLine <- assay(rld)
#clustering methods: "ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). pearson or spearman
datamat = RNASeq.NoCellLine[GOI, ]
hr <- hclust(as.dist(1-cor(t(datamat), method="pearson")), method="complete")
hc <- hclust(as.dist(1-cor(datamat, method="spearman")), method="complete")
mycl = cutree(hr, h=max(hr$height)/1.05)
mycol = c("YELLOW", "DARKBLUE", "DARKORANGE", "DARKMAGENTA", "DARKCYAN", "DARKRED", "MAROON", "DARKGREEN", "LIGHTBLUE", "PINK", "MAGENTA", "LIGHTCYAN","LIGHTGREEN", "BLUE", "ORANGE", "CYAN", "RED", "GREEN");
mycol = mycol[as.vector(mycl)]
sampleCols <- rep('GREY',ncol(datamat))
names(sampleCols) <- c("RNA","RNA 118","RNA 147","EV RNA","EV RNA 2","EV RNA 118","EV RNA 87","EV RNA 27","EV RNA 042","sT DMSO 042","sT DMSO 0505","scr DMSO 042","scr DMSO 0505","sT Dox 042","sT Dox 0505","scr Dox 042","scr Dox 0505")
#sampleCols[substr(colnames(RNASeq.NoCellLine_),1,4)=='mock'] <- 'GREY'
sampleCols["RNA"] <- 'DARKBLUE'
sampleCols["RNA 118"] <- 'DARKBLUE'
sampleCols["RNA 147"] <- 'DARKBLUE'
sampleCols["EV RNA"] <- 'DARKRED'
sampleCols["EV RNA 2"] <- 'DARKRED'
sampleCols["EV RNA 118"] <- 'DARKRED'
sampleCols["EV RNA 87"] <- 'DARKRED'
sampleCols["EV RNA 27"] <- 'DARKRED'
sampleCols["EV RNA 042"] <- 'DARKRED'
sampleCols["sT DMSO 042"] <- 'DARKORANGE'
sampleCols["sT DMSO 0505"] <- 'DARKORANGE'
sampleCols["scr DMSO 042"] <- 'DARKORANGE'
sampleCols["scr DMSO 0505"] <- 'DARKORANGE'
sampleCols["sT Dox 042"] <- 'DARKGREEN'
sampleCols["sT Dox 0505"] <- 'DARKGREEN'
sampleCols["scr Dox 042"] <- 'DARKGREEN'
sampleCols["scr Dox 0505"] <- 'DARKGREEN'
png("DEGs_heatmap.png", width=1000, height=1200)
heatmap.2(as.matrix(datamat),Rowv=as.dendrogram(hr),Colv = NA, dendrogram = 'row',
scale='row',trace='none',col=bluered(75),
RowSideColors = mycol, ColSideColors = sampleCols, labRow="", margins=c(22,10), cexRow=8, cexCol=2, srtCol=45, lwid=c(1,7), lhei = c(1, 8))
legend("top", title = "",legend=c("WaGa_RNA","MKL1_RNA","WaGa_EV_RNA","MKL1_EV_RNA"), fill=c("DARKBLUE","DARKRED","DARKORANGE","DARKGREEN"), cex=0.8, box.lty=0)
dev.off()
#-- MKL-1 vs WaGa --
#add Gene_Id in the first line.
GOI <- read.csv("ids")$Gene_Id
RNASeq.NoCellLine <- assay(rld)
#clustering methods: "ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC). pearson or spearman
datamat = RNASeq.NoCellLine[GOI, ]
hr <- hclust(as.dist(1-cor(t(datamat), method="pearson")), method="complete")
hc <- hclust(as.dist(1-cor(datamat, method="spearman")), method="complete")
mycl = cutree(hr, h=max(hr$height)/1.05)
mycol = c("YELLOW", "DARKBLUE", "DARKORANGE", "DARKMAGENTA", "DARKCYAN", "DARKRED", "MAROON", "DARKGREEN", "LIGHTBLUE", "PINK", "MAGENTA", "LIGHTCYAN","LIGHTGREEN", "BLUE", "ORANGE", "CYAN", "RED", "GREEN");
mycol = mycol[as.vector(mycl)]
sampleCols <- rep('GREY',ncol(datamat))
names(sampleCols) <- c("WaGa_RNA","WaGa_RNA_118","WaGa_RNA_147", "MKL1_RNA","MKL1_RNA_118","MKL1_RNA_147", "WaGa_EV_RNA","WaGa_EV_RNA_2","WaGa_EV_RNA_118","WaGa_EV_RNA_147","WaGa_EV_RNA_226", "MKL1_EV_RNA","MKL1_EV_RNA_2","MKL1_EV_RNA_27","MKL1_EV_RNA_87","MKL1_EV_RNA_118")
#sampleCols[substr(colnames(RNASeq.NoCellLine_),1,4)=='mock'] <- 'GREY'
sampleCols["WaGa_RNA"] <- 'DARKBLUE'
sampleCols["WaGa_RNA_118"] <- 'DARKBLUE'
sampleCols["WaGa_RNA_147"] <- 'DARKBLUE'
sampleCols["MKL1_RNA"] <- 'DARKRED'
sampleCols["MKL1_RNA_118"] <- 'DARKRED'
sampleCols["MKL1_RNA_147"] <- 'DARKRED'
sampleCols["WaGa_EV_RNA"] <- 'DARKORANGE'
sampleCols["WaGa_EV_RNA_2"] <- 'DARKORANGE'
sampleCols["WaGa_EV_RNA_118"] <- 'DARKORANGE'
sampleCols["WaGa_EV_RNA_147"] <- 'DARKORANGE'
sampleCols["WaGa_EV_RNA_226"] <- 'DARKORANGE'
sampleCols["MKL1_EV_RNA"] <- 'DARKGREEN'
sampleCols["MKL1_EV_RNA_2"] <- 'DARKGREEN'
sampleCols["MKL1_EV_RNA_27"] <- 'DARKGREEN'
sampleCols["MKL1_EV_RNA_87"] <- 'DARKGREEN'
sampleCols["MKL1_EV_RNA_118"] <- 'DARKGREEN'
png("DEGs_heatmap.png", width=1000, height=1200)
heatmap.2(as.matrix(datamat),Rowv=as.dendrogram(hr),Colv = NA, dendrogram = 'row',
scale='row',trace='none',col=bluered(75),
RowSideColors = mycol, ColSideColors = sampleCols, labRow="", margins=c(22,10), cexRow=8, cexCol=2, srtCol=45, lwid=c(1,7), lhei = c(1, 8))
legend("top", title = "",legend=c("WaGa_RNA","MKL1_RNA","WaGa_EV_RNA","MKL1_EV_RNA"), fill=c("DARKBLUE","DARKRED","DARKORANGE","DARKGREEN"), cex=0.8, box.lty=0)
dev.off()
#-- cluster members --
#c("YELLOW", "DARKBLUE", "DARKORANGE", "DARKMAGENTA", "DARKCYAN", "DARKRED", "MAROON", "DARKGREEN", "LIGHTBLUE", "PINK", "MAGENTA", "LIGHTCYAN","LIGHTGREEN", "BLUE", "ORANGE", "CYAN", "RED", "GREEN");
write.csv(names(subset(mycl, mycl == '1')),file='cluster1_YELLOW.txt')
write.csv(names(subset(mycl, mycl == '2')),file='cluster2_DARKBLUE.txt')
write.csv(names(subset(mycl, mycl == '3')),file='cluster3_DARKORANGE.txt')
write.csv(names(subset(mycl, mycl == '4')),file='cluster4_DARKMAGENTA.txt')
write.csv(names(subset(mycl, mycl == '5')),file='cluster5_DARKCYAN.txt')
#~/Tools/csv2xls-0.4/csv_to_xls.py cluster*.txt -d',' -o genelist_clusters.xls
# ------------------
# ---- Pathways ----
mkdir pathways
#--continue from BREAK POINT--
##
#source("https://bioconductor.org/biocLite.R")
#biocLite("AnnotationDbi")
library("clusterProfiler")
library("ReactomePA")
setwd("~/DATA/Data_Anastasia_RNASeq/results/featureCounts/pathways")
for sample in WaGa_RNA_vs_MKL1_RNA MKL1_EV_RNA_vs_MKL1_RNA WaGa_EV_RNA_vs_MKL1_EV_RNA WaGa_EV_RNA_vs_WaGa_RNA; do \
echo "${sample}_up <- read.csv('../degenes/${sample}-up.txt', row.names=1)"
echo "${sample}_up_KEGG <- enrichKEGG(${sample}_up\$entrezgene_id)"
echo "write.table(as.data.frame(${sample}_up_KEGG), file = '${sample}_up_KEGG.txt', sep = '\t', row.names = FALSE)"
echo "${sample}_down <- read.csv('../degenes/${sample}-down.txt', row.names=1)"
echo "${sample}_down_KEGG <- enrichKEGG(${sample}_down\$entrezgene_id)"
echo "write.table(as.data.frame(${sample}_down_KEGG), file = '${sample}_down_KEGG.txt', sep = '\t', row.names = FALSE)"
echo "${sample}_sig <- rbind(${sample}_up, ${sample}_down)"
echo "${sample}_sig_KEGG <- enrichKEGG(${sample}_sig\$entrezgene_id)"
echo "write.table(as.data.frame(${sample}_sig_KEGG), file = '${sample}_sig_KEGG.txt', sep = '\t', row.names = FALSE)"
done
WaGa_RNA_vs_MKL1_RNA_up <- read.csv('../degenes/WaGa_RNA_vs_MKL1_RNA-up.txt', row.names=1)
WaGa_RNA_vs_MKL1_RNA_up_KEGG <- enrichKEGG(WaGa_RNA_vs_MKL1_RNA_up$entrezgene_id)
write.table(as.data.frame(WaGa_RNA_vs_MKL1_RNA_up_KEGG), file = 'WaGa_RNA_vs_MKL1_RNA_up_KEGG.txt', sep = '\t', row.names = FALSE)
WaGa_RNA_vs_MKL1_RNA_down <- read.csv('../degenes/WaGa_RNA_vs_MKL1_RNA-down.txt', row.names=1)
WaGa_RNA_vs_MKL1_RNA_down_KEGG <- enrichKEGG(WaGa_RNA_vs_MKL1_RNA_down$entrezgene_id)
write.table(as.data.frame(WaGa_RNA_vs_MKL1_RNA_down_KEGG), file = 'WaGa_RNA_vs_MKL1_RNA_down_KEGG.txt', sep = '\t', row.names = FALSE)
WaGa_RNA_vs_MKL1_RNA_sig <- rbind(WaGa_RNA_vs_MKL1_RNA_up, WaGa_RNA_vs_MKL1_RNA_down)
WaGa_RNA_vs_MKL1_RNA_sig_KEGG <- enrichKEGG(WaGa_RNA_vs_MKL1_RNA_sig$entrezgene_id)
write.table(as.data.frame(WaGa_RNA_vs_MKL1_RNA_sig_KEGG), file = 'WaGa_RNA_vs_MKL1_RNA_sig_KEGG.txt', sep = '\t', row.names = FALSE)
MKL1_EV_RNA_vs_MKL1_RNA_up <- read.csv('../degenes/MKL1_EV_RNA_vs_MKL1_RNA-up.txt', row.names=1)
MKL1_EV_RNA_vs_MKL1_RNA_up_KEGG <- enrichKEGG(MKL1_EV_RNA_vs_MKL1_RNA_up$entrezgene_id)
write.table(as.data.frame(MKL1_EV_RNA_vs_MKL1_RNA_up_KEGG), file = 'MKL1_EV_RNA_vs_MKL1_RNA_up_KEGG.txt', sep = '\t', row.names = FALSE)
MKL1_EV_RNA_vs_MKL1_RNA_down <- read.csv('../degenes/MKL1_EV_RNA_vs_MKL1_RNA-down.txt', row.names=1)
MKL1_EV_RNA_vs_MKL1_RNA_down_KEGG <- enrichKEGG(MKL1_EV_RNA_vs_MKL1_RNA_down$entrezgene_id)
write.table(as.data.frame(MKL1_EV_RNA_vs_MKL1_RNA_down_KEGG), file = 'MKL1_EV_RNA_vs_MKL1_RNA_down_KEGG.txt', sep = '\t', row.names = FALSE)
MKL1_EV_RNA_vs_MKL1_RNA_sig <- rbind(MKL1_EV_RNA_vs_MKL1_RNA_up, MKL1_EV_RNA_vs_MKL1_RNA_down)
MKL1_EV_RNA_vs_MKL1_RNA_sig_KEGG <- enrichKEGG(MKL1_EV_RNA_vs_MKL1_RNA_sig$entrezgene_id)
write.table(as.data.frame(MKL1_EV_RNA_vs_MKL1_RNA_sig_KEGG), file = 'MKL1_EV_RNA_vs_MKL1_RNA_sig_KEGG.txt', sep = '\t', row.names = FALSE)
WaGa_EV_RNA_vs_MKL1_EV_RNA_up <- read.csv('../degenes/WaGa_EV_RNA_vs_MKL1_EV_RNA-up.txt', row.names=1)
WaGa_EV_RNA_vs_MKL1_EV_RNA_up_KEGG <- enrichKEGG(WaGa_EV_RNA_vs_MKL1_EV_RNA_up$entrezgene_id)
write.table(as.data.frame(WaGa_EV_RNA_vs_MKL1_EV_RNA_up_KEGG), file = 'WaGa_EV_RNA_vs_MKL1_EV_RNA_up_KEGG.txt', sep = '\t', row.names = FALSE)
WaGa_EV_RNA_vs_MKL1_EV_RNA_down <- read.csv('../degenes/WaGa_EV_RNA_vs_MKL1_EV_RNA-down.txt', row.names=1)
WaGa_EV_RNA_vs_MKL1_EV_RNA_down_KEGG <- enrichKEGG(WaGa_EV_RNA_vs_MKL1_EV_RNA_down$entrezgene_id)
write.table(as.data.frame(WaGa_EV_RNA_vs_MKL1_EV_RNA_down_KEGG), file = 'WaGa_EV_RNA_vs_MKL1_EV_RNA_down_KEGG.txt', sep = '\t', row.names = FALSE)
WaGa_EV_RNA_vs_MKL1_EV_RNA_sig <- rbind(WaGa_EV_RNA_vs_MKL1_EV_RNA_up, WaGa_EV_RNA_vs_MKL1_EV_RNA_down)
WaGa_EV_RNA_vs_MKL1_EV_RNA_sig_KEGG <- enrichKEGG(WaGa_EV_RNA_vs_MKL1_EV_RNA_sig$entrezgene_id)
write.table(as.data.frame(WaGa_EV_RNA_vs_MKL1_EV_RNA_sig_KEGG), file = 'WaGa_EV_RNA_vs_MKL1_EV_RNA_sig_KEGG.txt', sep = '\t', row.names = FALSE)
WaGa_EV_RNA_vs_WaGa_RNA_up <- read.csv('../degenes/WaGa_EV_RNA_vs_WaGa_RNA-up.txt', row.names=1)
WaGa_EV_RNA_vs_WaGa_RNA_up_KEGG <- enrichKEGG(WaGa_EV_RNA_vs_WaGa_RNA_up$entrezgene_id)
write.table(as.data.frame(WaGa_EV_RNA_vs_WaGa_RNA_up_KEGG), file = 'WaGa_EV_RNA_vs_WaGa_RNA_up_KEGG.txt', sep = '\t', row.names = FALSE)
WaGa_EV_RNA_vs_WaGa_RNA_down <- read.csv('../degenes/WaGa_EV_RNA_vs_WaGa_RNA-down.txt', row.names=1)
WaGa_EV_RNA_vs_WaGa_RNA_down_KEGG <- enrichKEGG(WaGa_EV_RNA_vs_WaGa_RNA_down$entrezgene_id)
write.table(as.data.frame(WaGa_EV_RNA_vs_WaGa_RNA_down_KEGG), file = 'WaGa_EV_RNA_vs_WaGa_RNA_down_KEGG.txt', sep = '\t', row.names = FALSE)
WaGa_EV_RNA_vs_WaGa_RNA_sig <- rbind(WaGa_EV_RNA_vs_WaGa_RNA_up, WaGa_EV_RNA_vs_WaGa_RNA_down)
WaGa_EV_RNA_vs_WaGa_RNA_sig_KEGG <- enrichKEGG(WaGa_EV_RNA_vs_WaGa_RNA_sig$entrezgene_id)
write.table(as.data.frame(WaGa_EV_RNA_vs_WaGa_RNA_sig_KEGG), file = 'WaGa_EV_RNA_vs_WaGa_RNA_sig_KEGG.txt', sep = '\t', row.names = FALSE)
png("pathways_KEGG.png",width=1260, height=1000)
merged_list <- merge_result(list('WaGa_RNA vs MKL1_RNA'=WaGa_RNA_vs_MKL1_RNA_sig_KEGG, 'MKL1_EV_RNA vs MKL1_RNA'=MKL1_EV_RNA_vs_MKL1_RNA_sig_KEGG, 'WaGa_EV_RNA vs MKL1_EV_RNA'=WaGa_EV_RNA_vs_MKL1_EV_RNA_sig_KEGG, 'WaGa_EV_RNA vs WaGa_RNA'=WaGa_EV_RNA_vs_WaGa_RNA_sig_KEGG))
dotplot(merged_list, showCategory=1000) #, font.size=6, srt = 35
dev.off()
# under CONSOLE
cd pathways_KEGG
~/Tools/csv2xls-0.4/csv_to_xls.py WaGa_RNA_vs_MKL1_RNA_sig_KEGG.txt MKL1_EV_RNA_vs_MKL1_RNA_sig_KEGG.txt WaGa_EV_RNA_vs_MKL1_EV_RNA_sig_KEGG.txt WaGa_EV_RNA_vs_WaGa_RNA_sig_KEGG.txt -d$'\t' -o pathways_KEGG.xls
# -------------
# ---- GOs ----
setwd("~/DATA/Data_Anastasia_RNASeq/results/featureCounts/GOs")
for sample in IKK1_vs_Cre IKK2_vs_Cre Nemo_vs_Cre NIK_vs_Cre; do \
echo "${sample}_up <- read.csv('../degenes/${sample}-up.txt', row.names=1)"
echo "${sample}_up_GO_BP <- enrichGO(${sample}_up\$entrezgene_id, 'org.Mm.eg.db', ont='BP')"
echo "${sample}_up_GO_MF <- enrichGO(${sample}_up\$entrezgene_id, 'org.Mm.eg.db', ont='MF')"
echo "${sample}_up_GO_CC <- enrichGO(${sample}_up\$entrezgene_id, 'org.Mm.eg.db', ont='CC')"
#echo "${sample}_up_GO_ALL <- enrichGO(${sample}_up\$entrezgene_id, 'org.Mm.eg.db', ont='ALL')"
echo "write.table(as.data.frame(${sample}_up_GO_BP), file = '${sample}_up_GO_BP.txt', sep = '\t', row.names = FALSE)"
echo "write.table(as.data.frame(${sample}_up_GO_MF), file = '${sample}_up_GO_MF.txt', sep = '\t', row.names = FALSE)"
echo "write.table(as.data.frame(${sample}_up_GO_CC), file = '${sample}_up_GO_CC.txt', sep = '\t', row.names = FALSE)"
#echo "write.table(as.data.frame(${sample}_up_GO_ALL), file = '${sample}_up_GO_ALL.txt', sep = '\t', row.names = FALSE)"
echo "${sample}_down <- read.csv('../degenes/${sample}-down.txt', row.names=1)"
echo "${sample}_down_GO_BP <- enrichGO(${sample}_down\$entrezgene_id, 'org.Mm.eg.db', ont='BP')"
echo "${sample}_down_GO_MF <- enrichGO(${sample}_down\$entrezgene_id, 'org.Mm.eg.db', ont='MF')"
echo "${sample}_down_GO_CC <- enrichGO(${sample}_down\$entrezgene_id, 'org.Mm.eg.db', ont='CC')"
#echo "${sample}_down_GO_ALL <- enrichGO(${sample}_down\$entrezgene_id, 'org.Mm.eg.db', ont='ALL')"
echo "write.table(as.data.frame(${sample}_down_GO_BP), file = '${sample}_down_GO_BP.txt', sep = '\t', row.names = FALSE)"
echo "write.table(as.data.frame(${sample}_down_GO_MF), file = '${sample}_down_GO_MF.txt', sep = '\t', row.names = FALSE)"
echo "write.table(as.data.frame(${sample}_down_GO_CC), file = '${sample}_down_GO_CC.txt', sep = '\t', row.names = FALSE)"
#echo "write.table(as.data.frame(${sample}_down_GO_ALL), file = '${sample}_down_GO_ALL.txt', sep = '\t', row.names = FALSE)"
echo "${sample}_sig <- rbind(${sample}_up, ${sample}_down)"
echo "${sample}_sig_GO_BP <- enrichGO(${sample}_sig\$entrezgene_id, 'org.Mm.eg.db', ont='BP')"
echo "${sample}_sig_GO_MF <- enrichGO(${sample}_sig\$entrezgene_id, 'org.Mm.eg.db', ont='MF')"
echo "${sample}_sig_GO_CC <- enrichGO(${sample}_sig\$entrezgene_id, 'org.Mm.eg.db', ont='CC')"
#echo "${sample}_sig_GO_ALL <- enrichGO(${sample}_sig\$entrezgene_id, 'org.Mm.eg.db', ont='ALL')"
echo "write.table(as.data.frame(${sample}_sig_GO_BP), file = '${sample}_sig_GO_BP.txt', sep = '\t', row.names = FALSE)"
echo "write.table(as.data.frame(${sample}_sig_GO_MF), file = '${sample}_sig_GO_MF.txt', sep = '\t', row.names = FALSE)"
echo "write.table(as.data.frame(${sample}_sig_GO_CC), file = '${sample}_sig_GO_CC.txt', sep = '\t', row.names = FALSE)"
#echo "write.table(as.data.frame(${sample}_sig_GO_ALL), file = '${sample}_sig_GO_ALL.txt', sep = '\t', row.names = FALSE)"
done
png("GOs_BP.png",width=3000, height=24000)
merged_list <- merge_result(list('IKK1_vs_Cre_up'=IKK1_vs_Cre_up_GO_BP, 'IKK1_vs_Cre_down'=IKK1_vs_Cre_down_GO_BP, 'IKK1_vs_Cre_sig'=IKK1_vs_Cre_sig_GO_BP, 'IKK2_vs_Cre_up'=IKK2_vs_Cre_up_GO_BP, 'IKK2_vs_Cre_down'=IKK2_vs_Cre_down_GO_BP, 'IKK2_vs_Cre_sig'=IKK2_vs_Cre_sig_GO_BP, 'Nemo_vs_Cre_up'= Nemo_vs_Cre_up_GO_BP, 'Nemo_vs_Cre_down'=Nemo_vs_Cre_down_GO_BP, 'Nemo_vs_Cre_sig'=Nemo_vs_Cre_sig_GO_BP, 'NIK_vs_Cre_up'=NIK_vs_Cre_up_GO_BP, 'NIK_vs_Cre_down'=NIK_vs_Cre_down_GO_BP, 'NIK_vs_Cre_sig'=NIK_vs_Cre_sig_GO_BP))
dotplot(merged_list, showCategory=10000)
dev.off()
# under CONSOLE
#
cd GOs
~/Tools/csv2xls-0.4/csv_to_xls.py IKK1_vs_Cre_up_GO_BP.txt IKK1_vs_Cre_down_GO_BP.txt IKK1_vs_Cre_sig_GO_BP.txt IKK2_vs_Cre_up_GO_BP.txt IKK2_vs_Cre_down_GO_BP.txt IKK2_vs_Cre_sig_GO_BP.txt Nemo_vs_Cre_up_GO_BP.txt Nemo_vs_Cre_down_GO_BP.txt Nemo_vs_Cre_sig_GO_BP.txt NIK_vs_Cre_up_GO_BP.txt NIK_vs_Cre_down_GO_BP.txt NIK_vs_Cre_sig_GO_BP.txt -d$'\t' -o GOs_BP.xls
~/Tools/csv2xls-0.4/csv_to_xls.py IKK1_vs_Cre_up_GO_MF.txt IKK1_vs_Cre_down_GO_MF.txt IKK1_vs_Cre_sig_GO_MF.txt IKK2_vs_Cre_up_GO_MF.txt IKK2_vs_Cre_down_GO_MF.txt IKK2_vs_Cre_sig_GO_MF.txt Nemo_vs_Cre_up_GO_MF.txt Nemo_vs_Cre_down_GO_MF.txt Nemo_vs_Cre_sig_GO_MF.txt NIK_vs_Cre_up_GO_MF.txt NIK_vs_Cre_down_GO_MF.txt NIK_vs_Cre_sig_GO_MF.txt -d$'\t' -o GOs_MF.xls
~/Tools/csv2xls-0.4/csv_to_xls.py IKK1_vs_Cre_up_GO_CC.txt IKK1_vs_Cre_down_GO_CC.txt IKK1_vs_Cre_sig_GO_CC.txt IKK2_vs_Cre_up_GO_CC.txt IKK2_vs_Cre_down_GO_CC.txt IKK2_vs_Cre_sig_GO_CC.txt Nemo_vs_Cre_up_GO_CC.txt Nemo_vs_Cre_down_GO_CC.txt Nemo_vs_Cre_sig_GO_CC.txt NIK_vs_Cre_up_GO_CC.txt NIK_vs_Cre_down_GO_CC.txt NIK_vs_Cre_sig_GO_CC.txt -d$'\t' -o GOs_CC.xls
点赞本文的读者
还没有人对此文章表态
没有评论
Sorry, 没有相似文章
© 2023 XGenes.com Impressum