Draw plots for snRNA generated by COMPSRA

gene_x 0 like s 256 view s

Tags: pipeline

snRNA_pca.png

snRNA_heatmap.png

  1. Generate the following files according to STEPS 1-4 from http://xgenes.com/article/article-content/239/small-rna-sequencing-processing-in-the-example-of-smallrna-7/, http://xgenes.com/article/article-content/232/small-rna-sequencing-processing-in-the-example-of-smallrna-7/, and http://xgenes.com/article/article-content/156/small-rna-processing/. For COMPSRA_MERGE_0_miRNA.txt, we also need STEP 5 to add the read numbers of MCPyV-M1.

    COMPSRA_MERGE_0_miRNA.txt
    COMPSRA_MERGE_0_piRNA.txt
    COMPSRA_MERGE_0_snRNA.txt *
    COMPSRA_MERGE_0_tRNA.txt
    COMPSRA_MERGE_0_snoRNA.txt
    COMPSRA_MERGE_0_circRNA.txt
    
  2. Input files for snRNA are two files: COMPSRA_MERGE_0_snRNA.txt and ids

    • COMPSRA_MERGE_0_snRNA.txt

      #The former are more precise due to the reads from virus will be mapped on the virus-genome diff ./our_out_on_hg38+JN707599/COMPSRA_MERGE_0_snRNA.txt ./our_out_on_hg38/COMPSRA_MERGE_0_snRNA.txt cp ../our_out_on_hg38+JN707599/COMPSRA_MERGE_0_snRNA.txt .

    • prepare the file ids

      #see Option4: manully defining

  3. Draw plots with R using DESeq2

    #BiocManager::install("AnnotationDbi")
    #BiocManager::install("clusterProfiler")
    #BiocManager::install(c("ReactomePA","org.Hs.eg.db"))
    #BiocManager::install("limma")
    library("AnnotationDbi")
    library("clusterProfiler")
    library("ReactomePA")
    library("org.Hs.eg.db")
    library(DESeq2)
    library(gplots)
    library(limma)
    # Check the current library paths
    .libPaths()
    #setwd("/home/jhuang/DATA/Data_Ute/Data_Ute_smallRNA_7/our_out_on_hg38+JN707599_2024_corrected/")
    
    d.raw<- read.delim2("COMPSRA_MERGE_0_snRNA.txt",sep="\t", header=TRUE, row.names=1)
    d.raw$X <- NULL
    d.raw[] <- lapply(d.raw, as.numeric)
    
    EV_or_parental = as.factor(c("EV","EV", "EV","EV", "EV","EV", "EV","EV", "EV","EV", "parental","parental"))
    donor = as.factor(c("0505","1905", "0505","1905", "0505","1905", "0505","1905", "0505","1905", "0505","1905"))
    replicates = as.factor(c("sT_DMSO","sT_DMSO", "sT_Dox","sT_Dox", "scr_DMSO","scr_DMSO", "scr_Dox","scr_Dox", "wt","wt", "control","control"))
    ids = as.factor(c("0505_WaGa_sT_DMSO","1905_WaGa_sT_DMSO","0505_WaGa_sT_Dox","1905_WaGa_sT_Dox","0505_WaGa_scr_DMSO","1905_WaGa_scr_DMSO","0505_WaGa_scr_Dox","1905_WaGa_scr_Dox","0505_WaGa_wt","1905_WaGa_wt","control_MKL1","control_WaGa"))
    
    cData = data.frame(row.names=colnames(d.raw), replicates=replicates, ids=ids, donor=donor, EV_or_parental=EV_or_parental)
    dds<-DESeqDataSetFromMatrix(countData=d.raw, colData=cData, design=~replicates+donor)
    
    rld <- rlogTransformation(dds)
    
    # -- before pca --
    png("pca.png", 1200, 800)
    plotPCA(rld, intgroup=c("replicates"))
    #plotPCA(rld, intgroup = c("replicates", "batch"))
    #plotPCA(rld, intgroup = c("replicates", "ids"))
    #plotPCA(rld, "batch")
    dev.off()
    
    png("pca2.png", 1200, 800)
    plotPCA(rld, intgroup=c("donor"))
    dev.off()
    
    #### STEP2: DEGs ####
    #convert bam to bigwig using deepTools by feeding inverse of DESeq’s size Factor
    sizeFactors(dds)
    #NULL
    dds <- estimateSizeFactors(dds)
    sizeFactors(dds)
    normalized_counts <- counts(dds, normalized=TRUE)
    write.table(normalized_counts, file="normalized_counts.txt", sep="\t", quote=F, col.names=NA)
    
    #---- * to untreated ----
    dds<-DESeqDataSetFromMatrix(countData=d.raw, colData=cData, design=~EV_or_parental+donor)
    dds$EV_or_parental <- relevel(dds$EV_or_parental, "parental")
    dds = DESeq(dds, betaPrior=FALSE)
    resultsNames(dds)
    clist <- c("EV_vs_parental")
    for (i in clist) {
      contrast = paste("EV_or_parental", i, sep="_")
      res = results(dds, name=contrast)
      res <- res[!is.na(res$log2FoldChange),]
      #https://bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#why-are-some-p-values-set-to-na
      res$padj <- ifelse(is.na(res$padj), 1, res$padj)
      res_df <- as.data.frame(res)
      write.csv(as.data.frame(res_df[order(res_df$pvalue),]), file = paste(i, "all.txt", sep="-"))
      up <- subset(res_df, padj<=0.1 & log2FoldChange>=2)
      down <- subset(res_df, padj<=0.1 & log2FoldChange<=-2)
      write.csv(as.data.frame(up[order(up$log2FoldChange,decreasing=TRUE),]), file = paste(i, "up.txt", sep="-"))
      write.csv(as.data.frame(down[order(abs(down$log2FoldChange),decreasing=TRUE),]), file = paste(i, "down.txt", sep="-"))
    }
    #~/Tools/csv2xls-0.4/csv_to_xls.py EV_vs_parental-all.txt EV_vs_parental-up.txt EV_vs_parental-down.txt -d$',' -o EV_vs_parental.xls;
    
    dds<-DESeqDataSetFromMatrix(countData=d.raw, colData=cData, design=~replicates+donor)
    dds$replicates <- relevel(dds$replicates, "sT_DMSO")
    dds = DESeq(dds, betaPrior=FALSE)
    resultsNames(dds)
    clist <- c("sT_Dox_vs_sT_DMSO")
    
    dds$replicates <- relevel(dds$replicates, "scr_Dox")
    dds = DESeq(dds, betaPrior=FALSE)
    resultsNames(dds)
    clist <- c("sT_Dox_vs_scr_Dox")
    
    dds$replicates <- relevel(dds$replicates, "scr_DMSO")
    dds = DESeq(dds, betaPrior=FALSE)
    resultsNames(dds)
    clist <- c("scr_Dox_vs_scr_DMSO", "sT_DMSO_vs_scr_DMSO")
    
    for (i in clist) {
      contrast = paste("replicates", i, sep="_")
      res = results(dds, name=contrast)
      res <- res[!is.na(res$log2FoldChange),]
      #https://bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#why-are-some-p-values-set-to-na
      res$padj <- ifelse(is.na(res$padj), 1, res$padj)
      res_df <- as.data.frame(res)
      write.csv(as.data.frame(res_df[order(res_df$pvalue),]), file = paste(i, "all.txt", sep="-"))
      up <- subset(res_df, padj<=0.1 & log2FoldChange>=2)
      down <- subset(res_df, padj<=0.1 & log2FoldChange<=-2)
      write.csv(as.data.frame(up[order(up$log2FoldChange,decreasing=TRUE),]), file = paste(i, "up.txt", sep="-"))
      write.csv(as.data.frame(down[order(abs(down$log2FoldChange),decreasing=TRUE),]), file = paste(i, "down.txt", sep="-"))
    }
    
    ~/Tools/csv2xls-0.4/csv_to_xls.py \
    sT_Dox_vs_sT_DMSO-all.txt \
    sT_Dox_vs_sT_DMSO-up.txt \
    sT_Dox_vs_sT_DMSO-down.txt \
    -d$',' -o sT_Dox_vs_sT_DMSO.xls;
    
    ~/Tools/csv2xls-0.4/csv_to_xls.py \
    sT_Dox_vs_scr_Dox-all.txt \
    sT_Dox_vs_scr_Dox-up.txt \
    sT_Dox_vs_scr_Dox-down.txt \
    -d$',' -o sT_Dox_vs_scr_Dox.xls;
    
    ~/Tools/csv2xls-0.4/csv_to_xls.py \
    scr_Dox_vs_scr_DMSO-all.txt \
    scr_Dox_vs_scr_DMSO-up.txt \
    scr_Dox_vs_scr_DMSO-down.txt \
    -d$',' -o scr_Dox_vs_scr_DMSO.xls;
    
    ~/Tools/csv2xls-0.4/csv_to_xls.py \
    sT_DMSO_vs_scr_DMSO-all.txt \
    sT_DMSO_vs_scr_DMSO-up.txt \
    sT_DMSO_vs_scr_DMSO-down.txt \
    -d$',' -o sT_DMSO_vs_scr_DMSO.xls;
    
    ##### STEP3: prepare all_genes #####
    rld <- rlogTransformation(dds)
    mat <- assay(rld)
    mm <- model.matrix(~replicates, colData(rld))
    mat <- limma::removeBatchEffect(mat, batch=rld$donor, design=mm)
    assay(rld) <- mat
    RNASeq.NoCellLine <- assay(rld)
    # reorder the columns
    colnames(RNASeq.NoCellLine) = c("0505 WaGa sT DMSO","1905 WaGa sT DMSO","0505 WaGa sT Dox","1905 WaGa sT Dox","0505 WaGa scr DMSO","1905 WaGa scr DMSO","0505 WaGa scr Dox","1905 WaGa scr Dox","0505 WaGa wt","1905 WaGa wt","control MKL1","control WaGa")
    col.order <-c("control MKL1",  "control WaGa","0505 WaGa wt","1905 WaGa wt","0505 WaGa sT DMSO","1905 WaGa sT DMSO","0505 WaGa sT Dox","1905 WaGa sT Dox","0505 WaGa scr DMSO","1905 WaGa scr DMSO","0505 WaGa scr Dox","1905 WaGa scr Dox")
    RNASeq.NoCellLine <- RNASeq.NoCellLine[,col.order]
    
    #Option4: manully defining
    #for i in EV_vs_parental sT_Dox_vs_sT_DMSO sT_Dox_vs_scr_Dox scr_Dox_vs_scr_DMSO sT_DMSO_vs_scr_DMSO; do echo "cut -d',' -f1-1 ${i}-up.txt > ${i}-up.id"; echo "cut -d',' -f1-1 ${i}-down.txt > ${i}-down.id"; done
    #cat *.id | sort -u > ids
    ##add Gene_Id in the first line, delete the ""
    GOI <- read.csv("ids")$Gene_Id
    datamat = RNASeq.NoCellLine[GOI, ]
    
    ##### STEP4: clustering the genes and draw heatmap #####
    datamat <- datamat[,-1]  #delete the sample "control MKL1"
    colnames(datamat)[1] <- "WaGa control"  #rename the isolate names according to the style of RNA-seq as follows?
    colnames(datamat)[2] <- "WaGa wildtype 0505"
    colnames(datamat)[3] <- "WaGa wildtype 1905"
    colnames(datamat)[4] <- "WaGa sT DMSO 0505"
    colnames(datamat)[5] <- "WaGa sT DMSO 1905"
    colnames(datamat)[6] <- "WaGa sT Dox 0505"
    colnames(datamat)[7] <- "WaGa sT Dox 1905"
    colnames(datamat)[8] <- "WaGa scr DMSO 0505"
    colnames(datamat)[9] <- "WaGa scr DMSO 1905"
    colnames(datamat)[10] <- "WaGa scr Dox 0505"
    colnames(datamat)[11] <- "WaGa scr Dox 1905"
    write.csv(datamat, file ="gene_expression_keeping_replicates.txt")
    
    #"ward.D"’, ‘"ward.D2"’,‘"single"’, ‘"complete"’, ‘"average"’ (= UPGMA), ‘"mcquitty"’(= WPGMA), ‘"median"’ (= WPGMC) or ‘"centroid"’ (= UPGMC)
    hr <- hclust(as.dist(1-cor(t(datamat), method="pearson")), method="complete")
    hc <- hclust(as.dist(1-cor(datamat, method="spearman")), method="complete")
    mycl = cutree(hr, h=max(hr$height)/1.5)
    mycol = c("YELLOW", "BLUE", "ORANGE", "CYAN", "GREEN", "MAGENTA", "GREY", "LIGHTCYAN", "RED",     "PINK", "DARKORANGE", "MAROON",  "LIGHTGREEN", "DARKBLUE",  "DARKRED",   "LIGHTBLUE", "DARKCYAN",  "DARKGREEN", "DARKMAGENTA");
    
    mycol = mycol[as.vector(mycl)]
    png("snRNA_heatmap_keeping_replicates.png", width=800, height=1000)
    #svg("DEGs_heatmap_keeping_replicates.svg", width=6, height=8)
    heatmap.2(as.matrix(datamat),
      Rowv=as.dendrogram(hr),
      Colv=NA,
      dendrogram='row',
      labRow="",
      scale='row',
      trace='none',
      col=bluered(75),
      RowSideColors=mycol,
      srtCol=20,
      lhei=c(1,8),
      #cexRow=1.2,   # Increase row label font size
      cexCol=1.7,    # Increase column label font size
      margin=c(7,1)
     )
    dev.off()
    
    #### cluster members #####
    write.csv(names(subset(mycl, mycl == '1')),file='YELLOW.txt')
    write.csv(names(subset(mycl, mycl == '2')),file='BLUE.txt')
    write.csv(names(subset(mycl, mycl == '3')),file='ORANGE.txt')
    #~/Tools/csv2xls-0.4/csv_to_xls.py gene_expression_keeping_replicates.txt YELLOW.txt ORANGE.txt BLUE.txt -d',' -o snRNA_heatmap_keeping_replicates.xls
    
    mv snRNA_heatmap_keeping_replicates.png snRNA_heatmap.png
    mv snRNA_heatmap_keeping_replicates.xls snRNA_heatmap.xls
    mv pca.png snRNA_pca.png
    mv EV_vs_parental.xls snRNA_EV_vs_parental.xls
    mv sT_DMSO_vs_scr_DMSO.xls snRNA_sT_DMSO_vs_scr_DMSO.xls
    mv sT_Dox_vs_scr_Dox.xls snRNA_sT_Dox_vs_scr_Dox.xls
    mv sT_Dox_vs_sT_DMSO.xls snRNA_sT_Dox_vs_sT_DMSO.xls
    mv scr_Dox_vs_scr_DMSO.xls snRNA_scr_Dox_vs_scr_DMSO.xls
    # --> SENDING snRNA_*.png, snRNA_EV_vs_parental.xls, and snRNA_heatmap.xls
    
    # ---- NOT WORKING WELL ----
    # merging replicates
    datamat <- cbind(datamat, "WaGa wildtype" = rowMeans(datamat[, 2:3]))
    datamat <- cbind(datamat, "WaGa sT DMSO" = rowMeans(datamat[, 4:5]))
    datamat <- cbind(datamat, "WaGa sT Dox" = rowMeans(datamat[, 6:7]))
    datamat <- cbind(datamat, "WaGa scr DMSO" = rowMeans(datamat[, 8:9]))
    datamat <- cbind(datamat, "WaGa scr Dox" = rowMeans(datamat[, 10:11]))
    datamat <- datamat[,c(-2:-11)]
    write.csv(datamat, file ="gene_expression_merging_replicates.txt")
    
    # Ensure 'mycl' is calculated properly.
    mycl <- cutree(hr, h=max(hr$height)/2.9)
    # mycol = c("YELLOW", "BLUE", "ORANGE", "CYAN", "GREEN", "MAGENTA", "GREY", "LIGHTCYAN", "RED",     "PINK", "DARKORANGE", "MAROON",  "LIGHTGREEN", "DARKBLUE",  "DARKRED",   "LIGHTBLUE", "DARKCYAN",  "DARKGREEN", "DARKMAGENTA");
    
    # Now map your clusters to colors, making sure that there's one color for each row:
    actualColors <- mycol[mycl]  # Assign colors based on cluster assignment
    
    # Then use these 'actualColors' in your heatmap:
    png("snRNA_heatmap_merging_replicates.png", width=800, height=1000)
    heatmap.2(as.matrix(datamat),
              Rowv=as.dendrogram(hr),
              Colv=NA,
              dendrogram='row',
              labRow="",
              scale='row',
              trace='none',
              col=bluered(75),
              RowSideColors=actualColors, # Update this part
              srtCol=20,
              lhei=c(1,8),
              cexCol=1.7,    # Increase column label font size
              margin=c(7,1)
            )
    dev.off()
    
    #### cluster members #####
    write.csv(names(subset(mycl, mycl == '1')),file='YELLOW.txt')
    write.csv(names(subset(mycl, mycl == '2')),file='BLUE.txt')
    write.csv(names(subset(mycl, mycl == '3')),file='ORANGE.txt')
    write.csv(names(subset(mycl, mycl == '4')),file='CYAN.txt')
    write.csv(names(subset(mycl, mycl == '5')),file='GREEN.txt')
    write.csv(names(subset(mycl, mycl == '6')),file='MAGENTA.txt')
    write.csv(names(subset(mycl, mycl == '7')),file='GREY.txt')
    write.csv(names(subset(mycl, mycl == '8')),file='LIGHTCYAN.txt')
    #write.csv(names(subset(mycl, mycl == '9')),file='RED.txt')
    #~/Tools/csv2xls-0.4/csv_to_xls.py gene_expression_merging_replicates.txt YELLOW.txt BLUE.txt ORANGE.txt CYAN.txt MAGENTA.txt GREEN.txt LIGHTCYAN.txt GREY.txt -d',' -o snRNA_heatmap_merging_replicates.xls
    

like unlike

点赞本文的读者

还没有人对此文章表态


本文有评论

没有评论

看文章,发评论,不要沉默


© 2023 XGenes.com Impressum