Ute RNA-Seq Data Methods

gene_x 0 like s 22 view s

Tags: pipeline

PCA_WaGa.png

PCA_MKL1.png

WaGa_wt.EV_vs_parental.png

MKL-1_wt.EV_vs_parental.png

DEGs_Heatmap_WaGa.png

DEGs_Heatmap_MKL-1.png

rseqc_read_distribution_plot_WaGa.png

rseqc_read_distribution_plot_MKL-1.png

PCA plot untreated/wt vs parental cells; 1x für WaGa cell line und 1x für MKL-1 cells

Heatmap untreated/wt vs parental; 1x für WaGa cell line und 1x für MKL-1 cells

Volcano plot untreated/wt vs parental; 1x für WaGa cell line und 1x für MKL-1 cells

Distribution of different RNA Species untreated/wt and parental; 1x für WaGa cell line und 1x für MKL-1 cells

RNA fragmentation patterns: is EV-RNA full length or fragmented?

RNA binding protein motifs: do we find specific motifs in EV-RNA?

miRNA target analysis

  1. Input files (use R 4.3.3)

        ~/DATA/Data_Ute/Data_RNA-Seq_MKL-1+WaGa/merged_gene_counts_40samples.txt
        ~/DATA/Data_Ute/Data_RNA-Seq_MKL-1+WaGa/merged_gene_counts_virus_rounded.txt
    
  2. Common processing for the data MKL+1 + WaGa

        #[1] "/home/jhuang/mambaforge/envs/r_env/lib/R/library"
    
        setwd("/home/jhuang/DATA/Data_Ute/Data_RNA-Seq_MKL-1_WaGa/results_2025_1/")
    
        library("AnnotationDbi")
        library("clusterProfiler")
        library("ReactomePA")
        #library("org.Mm.eg.db")
        library(DESeq2)
        library(gplots)
    
        d.raw_human<- read.delim2("../merged_gene_counts_40samples.txt",sep="\t", header=TRUE, row.names=1)
        colnames(d.raw_human)<- c("gene_name","X042_MKL.1_wt_EV","MKL.1_RNA_147","X042_MKL.1_sT_DMSO","MKL.1_RNA","X0505_MKL.1_scr_DMSO_EV","X0505_MKL.1_sT_DMSO_EV","MKL.1_EV.RNA_87","MKL.1_EV.RNA_27","X042_MKL.1_scr_Dox_EV","X042_MKL.1_scr_DMSO_EV","MKL.1_EV.RNA_118","X0505_MKL.1_scr_Dox_EV","MKL.1_EV.RNA","X042_MKL.1_sT_Dox","X0505_MKL.1_sT_Dox_EV","MKL.1_RNA_118","MKL.1_EV.RNA_2",      "Geneid.1","gene_name.1","X1605_WaGa_sT_DMSO_EV","WaGa_EV.RNA_118","WaGa_EV.RNA","X2706_WaGa_scr_Dox_EV","X2706_WaGa_sT_DMSO_EV","X1107_WaGa_wt_EV","X1107_WaGa_sT_Dox_EV","WaGa_RNA","X1605_WaGa_scr_DMSO_EV","X2706_WaGa_scr_DMSO_EV","WaGa_EV.RNA_226","X2706_WaGa_sT_Dox_EV","X1605_WaGa_scr_Dox_EV","X1605_WaGa_wt_EV","X1605_WaGa_sT_Dox_EV","X1107_WaGa_scr_DMSO_EV","WaGa_EV.RNA_2","WaGa_EV.RNA_147","WaGa_RNA_118","X1107_WaGa_scr_Dox_EV","X1107_WaGa_sT_DMSO_EV","WaGa_RNA_147","X2706_WaGa_wt_EV")
    
        col_order <- c("gene_name",  "MKL.1_RNA","MKL.1_RNA_118","MKL.1_RNA_147","MKL.1_EV.RNA","MKL.1_EV.RNA_2","MKL.1_EV.RNA_118","MKL.1_EV.RNA_87","MKL.1_EV.RNA_27","X042_MKL.1_wt_EV","X042_MKL.1_sT_DMSO","X0505_MKL.1_sT_DMSO_EV","X042_MKL.1_scr_DMSO_EV","X0505_MKL.1_scr_DMSO_EV","X042_MKL.1_sT_Dox","X0505_MKL.1_sT_Dox_EV","X042_MKL.1_scr_Dox_EV","X0505_MKL.1_scr_Dox_EV",     "Geneid.1","gene_name.1",    "WaGa_RNA","WaGa_RNA_118","WaGa_RNA_147",    "WaGa_EV.RNA","WaGa_EV.RNA_2","WaGa_EV.RNA_118","WaGa_EV.RNA_147","WaGa_EV.RNA_226","X1107_WaGa_wt_EV","X1605_WaGa_wt_EV","X2706_WaGa_wt_EV",    "X1107_WaGa_sT_DMSO_EV","X1605_WaGa_sT_DMSO_EV","X2706_WaGa_sT_DMSO_EV",    "X1107_WaGa_scr_DMSO_EV","X1605_WaGa_scr_DMSO_EV","X2706_WaGa_scr_DMSO_EV",     "X1107_WaGa_sT_Dox_EV","X1605_WaGa_sT_Dox_EV","X2706_WaGa_sT_Dox_EV",     "X1107_WaGa_scr_Dox_EV","X1605_WaGa_scr_Dox_EV","X2706_WaGa_scr_Dox_EV")
        reordered.raw_human <- d.raw_human[,col_order]
    
        d.raw_virus <- read.delim2("../merged_gene_counts_virus_rounded.txt",sep="\t", header=TRUE, row.names=1)
        reordered.raw_virus <- d.raw_virus[,col_order]
    
        identical(colnames(reordered.raw_human), colnames(reordered.raw_virus))
    
        reordered.raw <- rbind(reordered.raw_human, reordered.raw_virus)
    
        #rename
        colnames(reordered.raw) <- c("gene_name", "MKL-1 parental cell RNA","MKL-1 parental cell RNA 118","MKL-1 parental cell RNA 147",    "MKL-1 wt EV RNA","MKL-1 wt EV RNA 2","MKL-1 wt EV RNA 118","MKL-1 wt EV RNA 87","MKL-1 wt EV RNA 27","MKL-1 wt EV RNA 042",    "MKL-1 sT DMSO EV RNA 042","MKL-1 sT DMSO EV RNA 0505",  "MKL-1 scr DMSO EV RNA 042","MKL-1 scr DMSO EV RNA 0505",  "MKL-1 sT Dox EV RNA 042","MKL-1 sT Dox EV RNA 0505",  "MKL-1 scr Dox EV RNA 042","MKL-1 scr Dox EV RNA 0505",   "Geneid.1","gene_name.1",    "WaGa parental cell RNA","WaGa parental cell RNA 118","WaGa parental cell RNA 147",    "WaGa wt EV RNA","WaGa wt EV RNA 2","WaGa wt EV RNA 118","WaGa wt EV RNA 147","WaGa wt EV RNA 226","WaGa wt EV RNA 1107","WaGa wt EV RNA 1605","WaGa wt EV RNA 2706",    "WaGa sT DMSO EV RNA 1107","WaGa sT DMSO EV RNA 1605","WaGa sT DMSO EV RNA 2706",     "WaGa scr DMSO EV RNA 1107","WaGa scr DMSO EV RNA 1605","WaGa scr DMSO EV RNA 2706",     "WaGa sT Dox EV RNA 1107","WaGa sT Dox EV RNA 1605","WaGa sT Dox EV RNA 2706",     "WaGa scr Dox EV RNA 1107","WaGa scr Dox EV RNA 1605","WaGa scr Dox EV RNA 2706")
    
        reordered.raw$gene_name <- NULL
        reordered.raw$Geneid.1 <- NULL
        reordered.raw$gene_name.1 <- NULL
        write.csv(reordered.raw, file="counts.txt")
        #IMPORTANT that we should filter the data with the counts in the STEP!
        d <- reordered.raw[rowSums(reordered.raw>3)>2,]
    
        condition_for_pca = as.factor(c("RNA","RNA","RNA","EV","EV","EV","EV","EV","EV","sT.DMSO","sT.DMSO","scr.DMSO","scr.DMSO","sT.Dox","sT.Dox","scr.Dox","scr.Dox",    "RNA","RNA","RNA","EV","EV","EV","EV","EV","EV","EV","EV","sT.DMSO","sT.DMSO","sT.DMSO","scr.DMSO","scr.DMSO","scr.DMSO","sT.Dox","sT.Dox","sT.Dox","scr.Dox","scr.Dox","scr.Dox"))
    
        condition = as.factor(c("MKL1.RNA","MKL1.RNA","MKL1.RNA","MKL1.EV","MKL1.EV","MKL1.EV","MKL1.EV","MKL1.EV","MKL1.EV","MKL1.sT.DMSO","MKL1.sT.DMSO","MKL1.scr.DMSO","MKL1.scr.DMSO","MKL1.sT.Dox","MKL1.sT.Dox","MKL1.scr.Dox","MKL1.scr.Dox",    "WaGa.RNA","WaGa.RNA","WaGa.RNA","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.sT.DMSO","WaGa.sT.DMSO","WaGa.sT.DMSO","WaGa.scr.DMSO","WaGa.scr.DMSO","WaGa.scr.DMSO","WaGa.sT.Dox","WaGa.sT.Dox","WaGa.sT.Dox","WaGa.scr.Dox","WaGa.scr.Dox","WaGa.scr.Dox"))
    
        donor = as.factor(c("1","118","147",  "1","2","118","87","27","042",  "042","0505","042","0505","042","0505","042","0505",    "1","118","147",  "1","2","118","147","226","1107","1605","2706",   "1107","1605","2706","1107","1605","2706","1107","1605","2706","1107","1605","2706"))
    
        batch = as.factor(c("2021.08","2021.09","2021.09","2021.08","2021.08","2021.09","2021.09","2021.09","2022.08","2022.08","2022.08","2022.08","2022.08","2022.08","2022.08","2022.08","2022.08",    "2021.08","2021.09","2021.09","2021.08","2021.08","2021.09","2021.09","2021.09","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11"))
    
        cell.line = as.factor(c("MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1",    "WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa"))
    
        ids = as.factor(c("MKL1.RNA","MKL1.RNA.118","MKL1.RNA.147",  "MKL1.EV","MKL1.EV.2","MKL1.EV.118","MKL1.EV.87","MKL1.EV.27","MKL1.EV.042",  "MKL1.EV.sT.DMSO.042","MKL1.EV.sT.DMSO.0505",  "MKL1.EV.scr.DMSO.042","MKL1.EV.scr.DMSO.0505",  "MKL1.EV.sT.Dox.042","MKL1.EV.sT.Dox.0505",  "MKL1.EV.scr.Dox.042","MKL1.EV.scr.Dox.0505",      "WaGa.RNA","WaGa.RNA.118","WaGa.RNA.147",  "WaGa.EV","WaGa.EV.2","WaGa.EV.118","WaGa.EV.147","WaGa.EV.226","WaGa.EV.1107","WaGa.EV.1605","WaGa.EV.2706",  "WaGa.EV.sT.DMSO.1107","WaGa.EV.sT.DMSO.1605","WaGa.EV.sT.DMSO.2706",  "WaGa.EV.scr.DMSO.1107","WaGa.EV.scr.DMSO.1605","WaGa.EV.scr.DMSO.2706",  "WaGa.EV.sT.Dox.1107","WaGa.EV.sT.Dox.1605","WaGa.EV.sT.Dox.2706",  "WaGa.EV.scr.Dox.1107","WaGa.EV.scr.Dox.1605","WaGa.EV.scr.Dox.2706"))
    
        cData = data.frame(row.names=colnames(d), condition=condition,  donor=donor, batch=batch, cell.line=cell.line, ids=ids)
        dds<-DESeqDataSetFromMatrix(countData=d, colData=cData, design=~batch+condition)
    
        #rld <- rlogTransformation(dds)
        rld <- vst(dds)
    
  3. Preparing the data for PCA_MKL1 and PCA_WaGa drawing

        #d_WaGa <- d[, !grepl("parental|MKL-1", names(d))]
        d_WaGa <- d[, !grepl("MKL-1", names(d))]
        condition = as.factor(c("WaGa.RNA","WaGa.RNA","WaGa.RNA","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.sT.DMSO","WaGa.sT.DMSO","WaGa.sT.DMSO","WaGa.scr.DMSO","WaGa.scr.DMSO","WaGa.scr.DMSO","WaGa.sT.Dox","WaGa.sT.Dox","WaGa.sT.Dox","WaGa.scr.Dox","WaGa.scr.Dox","WaGa.scr.Dox"))
        donor = as.factor(c("1","118","147","1","2","118","147","226","1107","1605","2706",   "1107","1605","2706","1107","1605","2706","1107","1605","2706","1107","1605","2706"))
        batch = as.factor(c("2021.08","2021.09","2021.09","2021.08","2021.08","2021.09","2021.09","2021.09","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11"))
        cell.line = as.factor(c("WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa"))
        ids = as.factor(c("WaGa.RNA","WaGa.RNA.118","WaGa.RNA.147","WaGa.EV","WaGa.EV.2","WaGa.EV.118","WaGa.EV.147","WaGa.EV.226","WaGa.EV.1107","WaGa.EV.1605","WaGa.EV.2706",  "WaGa.EV.sT.DMSO.1107","WaGa.EV.sT.DMSO.1605","WaGa.EV.sT.DMSO.2706",  "WaGa.EV.scr.DMSO.1107","WaGa.EV.scr.DMSO.1605","WaGa.EV.scr.DMSO.2706",  "WaGa.EV.sT.Dox.1107","WaGa.EV.sT.Dox.1605","WaGa.EV.sT.Dox.2706",  "WaGa.EV.scr.Dox.1107","WaGa.EV.scr.Dox.1605","WaGa.EV.scr.Dox.2706"))
        cData = data.frame(row.names=colnames(d_WaGa), condition=condition,  donor=donor, batch=batch, cell.line=cell.line, ids=ids)
        dds_WaGa<-DESeqDataSetFromMatrix(countData=d_WaGa, colData=cData, design=~batch+condition)
        rld_WaGa <- vst(dds_WaGa)
    
        #d_MKL1 <- d[, !grepl("parental|WaGa", names(d))]
        d_MKL1 <- d[, !grepl("WaGa", names(d))]
        condition = as.factor(c("MKL1.RNA","MKL1.RNA","MKL1.RNA","MKL1.EV","MKL1.EV","MKL1.EV","MKL1.EV","MKL1.EV","MKL1.EV","MKL1.sT.DMSO","MKL1.sT.DMSO","MKL1.scr.DMSO","MKL1.scr.DMSO","MKL1.sT.Dox","MKL1.sT.Dox","MKL1.scr.Dox","MKL1.scr.Dox"))
        donor = as.factor(c("1","118","147","1","2","118","87","27","042",  "042","0505","042","0505","042","0505","042","0505"))
        batch = as.factor(c("2021.08","2021.09","2021.09","2021.08","2021.08","2021.09","2021.09","2021.09","2022.08","2022.08","2022.08","2022.08","2022.08","2022.08","2022.08","2022.08","2022.08"))
        cell.line = as.factor(c("MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1"))
        ids = as.factor(c("MKL1.RNA","MKL1.RNA.118","MKL1.RNA.147", "MKL1.EV","MKL1.EV.2","MKL1.EV.118","MKL1.EV.87","MKL1.EV.27","MKL1.EV.042",  "MKL1.EV.sT.DMSO.042","MKL1.EV.sT.DMSO.0505",  "MKL1.EV.scr.DMSO.042","MKL1.EV.scr.DMSO.0505",  "MKL1.EV.sT.Dox.042","MKL1.EV.sT.Dox.0505",  "MKL1.EV.scr.Dox.042","MKL1.EV.scr.Dox.0505"))
        cData = data.frame(row.names=colnames(d_MKL1), condition=condition,  donor=donor, batch=batch, cell.line=cell.line, ids=ids)
        dds_MKL1<-DESeqDataSetFromMatrix(countData=d_MKL1, colData=cData, design=~batch+condition)
        rld_MKL1 <- vst(dds_MKL1)
    
        # -- before pca --
        png("pca.png", 1200, 800)
        plotPCA(rld, intgroup=c("condition"))
        #plotPCA(rld, intgroup = c("condition", "batch"))
        #plotPCA(rld, intgroup = c("condition", "ids"))
        #plotPCA(rld, "batch")
        dev.off()
        #72% (PC1), 11% (PC2)   8% (PC3)
    
        png("pca_WaGa.png", 1200, 800)
        plotPCA(rld_WaGa, intgroup = c("condition"))
        dev.off()
        #80%, 9%
    
        png("pca_MKL1.png", 1200, 800)
        plotPCA(rld_MKL1, intgroup = c("condition"))
        dev.off()
        #77%, 15%
    
  4. PCA plot untreated/wt vs parental cells; 1x für WaGa cell line und 1x für MKL-1 cells

        #install.packages("BiocManager")
        #BiocManager::install("genefilter")
        #install.packages("writexl")
        #install.packages("plotly")
        #install.packages("dplyr")
        #install.packages("ggrepel")
        # Load required libraries
        library(ggrepel)
        library(dplyr)
        library(ggplot2)
        library(genefilter)
        library(writexl)
    
        # ---- drawing PCA WaGa ----
    
        # Perform PCA on WaGa dataset
        pca_data <- plotPCA(rld_WaGa, intgroup = c("condition", "batch", "cell.line"), returnData = TRUE)
        write.csv(pca_data, file = "PCA_data_WaGa.csv")
    
        # Compute top variable genes and perform PCA
        top_variable_genes <- 500
        gene_variance <- rowVars(assay(rld_WaGa))
        selected_genes <- order(gene_variance, decreasing = TRUE)[seq_len(min(top_variable_genes, length(gene_variance)))]
    
        # Extract data and run PCA
        expression_matrix <- t(assay(rld_WaGa)[selected_genes, ])
        pca_results <- prcomp(expression_matrix, center = TRUE, scale. = TRUE)
        summary(pca_results)  # Display variance explained by PCs
    
        # Extract variance percentages
        variance_explained <- summary(pca_results)$importance[2, ]
        pc1_variance <- sprintf("%.1f", variance_explained[1] * 100)
        pc2_variance <- sprintf("%.1f", variance_explained[2] * 100)
    
        # Create a PCA dataframe
        pca_df <- as.data.frame(pca_results$x)
        rownames(pca_df) <- rownames(pca_data)
    
        # Merge PCA results with original metadata
        pca_data <- pca_data %>%
        select(-PC1, -PC2)  # Remove old PCA values
    
        merged_pca_data <- merge(pca_data, pca_df, by = "row.names")
        rownames(merged_pca_data) <- merged_pca_data$Row.names
        merged_pca_data$Row.names <- NULL
    
        # Save merged PCA data
        write.csv(merged_pca_data, file = "Merged_PCA_WaGa.csv")
        write_xlsx(merged_pca_data, path = "Merged_PCA_WaGa.xlsx")
    
        # Define condition labels
        condition_labels <- c(
              "WaGa.EV" = "wt EV RNA",
              "WaGa.scr.DMSO" = "scr DMSO EV RNA",
              "WaGa.scr.Dox" = "scr Dox EV RNA",
              "WaGa.sT.DMSO" = "sT DMSO EV RNA",
              "WaGa.sT.Dox" = "sT Dox EV RNA",
              "WaGa.RNA" = "parental cell RNA"
        )
    
        # Apply condition labels and filter relevant conditions
        filtered_pca_data <- merged_pca_data %>%
        mutate(condition = recode(condition, !!!condition_labels)) %>%
        filter(condition %in% c("wt EV RNA", "parental cell RNA"))
    
        # Prepare PCA data for plotting
        plot_pca_df <- as.data.frame(filtered_pca_data[, c("PC1", "PC2")])
        plot_pca_df$condition <- filtered_pca_data$condition
        plot_pca_df$sample_name <- rownames(filtered_pca_data)
    
        # Save PCA plot as PNG
        png("PCA_WaGa.png", width = 1000, height = 600, res = 150)
    
        ggplot(plot_pca_df, aes(x = PC1, y = PC2, color = condition)) +
        geom_point(size = 4, alpha = 0.8) +  # Plot points
        labs(
        title = "",
        x = paste0("Principal Component 1 (", pc1_variance, "%)"),
        y = paste0("Principal Component 2 (", pc2_variance, "%)")
        ) +
        theme_minimal() +
        scale_color_manual(values = c("blue", "red"))  # Customize colors
    
        dev.off()  # Close the PNG device and save the file
    
        # ---- drawing PCA MKL-1 ----
    
        # Compute PCA on MKL1 dataset
        pca_data <- plotPCA(rld_MKL1, intgroup = c("condition", "batch", "cell.line"), returnData = TRUE)
        write.csv(pca_data, file = "PCA_data_MKL1.csv")
    
        # Compute top variable genes and perform PCA
        top_variable_genes <- 500
        gene_variance <- rowVars(assay(rld_MKL1))
        selected_genes <- order(gene_variance, decreasing = TRUE)[seq_len(min(top_variable_genes, length(gene_variance)))]
    
        # Extract and transpose data for PCA
        expression_matrix <- t(assay(rld_MKL1)[selected_genes, ])
        pca_results <- prcomp(expression_matrix, center = TRUE, scale. = TRUE)
    
        # Extract variance explained percentages
        variance_explained <- summary(pca_results)$importance[2, ]
        pc1_variance <- sprintf("%.1f", variance_explained[1] * 100)
        pc2_variance <- sprintf("%.1f", variance_explained[2] * 100)
    
        # Create a PCA dataframe
        pca_df <- as.data.frame(pca_results$x)
        rownames(pca_df) <- rownames(pca_data)
    
        # Remove previous PC1 & PC2 columns and merge new PCA results
        pca_data <- pca_data %>%
        select(-PC1, -PC2)
    
        merged_pca_data <- merge(pca_data, pca_df, by = "row.names")
        rownames(merged_pca_data) <- merged_pca_data$Row.names
        merged_pca_data$Row.names <- NULL
    
        # Save merged PCA data
        write.csv(merged_pca_data, file = "Merged_PCA_MKL1.csv")
        write_xlsx(merged_pca_data, path = "Merged_PCA_MKL1.xlsx")
    
        # Define condition labels for clarity
        condition_labels <- c(
        "MKL1.EV" = "wt EV RNA",
        "MKL1.scr.DMSO" = "scr DMSO EV RNA",
        "MKL1.scr.Dox" = "scr Dox EV RNA",
        "MKL1.sT.DMSO" = "sT DMSO EV RNA",
        "MKL1.sT.Dox" = "sT Dox EV RNA",
        "MKL1.RNA" = "parental cell RNA"
        )
    
        # Apply condition labels and filter for relevant conditions
        filtered_pca_data <- merged_pca_data %>%
        mutate(condition = recode(condition, !!!condition_labels)) %>%
        filter(condition %in% c("wt EV RNA", "parental cell RNA"))
    
        # Prepare PCA data for plotting
        plot_pca_df <- as.data.frame(filtered_pca_data[, c("PC1", "PC2")])
        plot_pca_df$condition <- filtered_pca_data$condition
        plot_pca_df$sample_name <- rownames(filtered_pca_data)
    
        # Save PCA plot as PNG
        png("PCA_MKL1.png", width = 1000, height = 600, res = 150)
    
        ggplot(plot_pca_df, aes(x = PC1, y = PC2, color = condition)) +
        geom_point(size = 4, alpha = 0.8) +  # Plot points
        labs(
        title = "",
        x = paste0("Principal Component 1 (", pc1_variance, "%)"),
        y = paste0("Principal Component 2 (", pc2_variance, "%)")
        ) +
        theme_minimal() +
        scale_color_manual(values = c("blue", "red"))  # Customize colors
    
        dev.off()  # Close the PNG device and save the file
    
  5. Convert bam to bigwig using deepTools by feeding inverse of DESeq’s size Factor

        sizeFactors(dds)
        #NULL
        dds <- estimateSizeFactors(dds)
        > sizeFactors(dds)
        #WaGa parental cell RNA*
        #                  1.1242096                   2.1097851
        #WaGa parental cell RNA 118*  WaGa parental cell RNA 147*
        #                  1.6925780                   1.6712182
    
        raw_counts <- counts(dds)
        normalized_counts <- counts(dds, normalized=TRUE)
        write.table(raw_counts, file="raw_counts.txt", sep="\t", quote=F, col.names=NA)
        write.table(normalized_counts, file="normalized_counts.txt", sep="\t", quote=F, col.names=NA)
    
        #1/2,1097851=0,473981924 (!=0.4958732)
        bamCoverage --bam ../markDuplicates/WaGa_RNAAligned.sortedByCoord.out.markDups.bam -o WaGa_RNA.bw --binSize 10 --scaleFactor 0.473981924 --effectiveGenomeSize 2864785220
        #1/1,6925780=0,590814722 (!=0.6013898)
        bamCoverage --bam ../markDuplicates/WaGa_RNA_118Aligned.sortedByCoord.out.markDups.bam -o WaGa_RNA_118.bw --binSize 10 --scaleFactor 0.590814722 --effectiveGenomeSize 2864785220
        #1/1,6712182=0,598365911 (!=0.6154516)
        bamCoverage --bam ../markDuplicates/WaGa_RNA_147Aligned.sortedByCoord.out.markDups.bam -o WaGa_RNA_147.bw --binSize 10 --scaleFactor 0.598365911 --effectiveGenomeSize 2864785220
    
  6. DEG analysis

        ##https://bioconductor.statistik.tu-dortmund.de/packages/3.7/data/annotation/
        #BiocManager::install("EnsDb.Mmusculus.v79")
        #library(EnsDb.Mmusculus.v79)
        #edb <- EnsDb.Mmusculus.v79
        #https://bioconductor.org/packages/release/bioc/vignettes/biomaRt/inst/doc/accessing_ensembl.html#selecting-an-ensembl-biomart-database-and-dataset
        #https://bioconductor.org/packages/release/bioc/vignettes/biomaRt/inst/doc/accessing_ensembl.html#selecting-an-ensembl-biomart-database-and-dataset
        library(biomaRt)
        #listMarts() == listEnsembl() == Ensembl Genes 113
        listEnsembl()
        listMarts()
        listEnsemblArchives()
    
        #mkdir degenes
        setwd("degenes")
    
        dds$condition <- relevel(dds$condition, "WaGa.RNA")
        dds = DESeq(dds, betaPrior=FALSE)
        resultsNames(dds)
        clist <- c("WaGa.EV_vs_WaGa.RNA")
    
        dds$condition <- relevel(dds$condition, "MKL1.RNA")
        dds = DESeq(dds, betaPrior=FALSE)
        resultsNames(dds)
        clist <- c("MKL1.EV_vs_MKL1.RNA")
    
        #ensembl <- useEnsembl(biomart = "ensembl", dataset = "hsapiens_gene_ensembl", version="112")
        #ensembl <- useMart("ensembl", dataset="hsapiens_gene_ensembl", host="https://may2023.archive.ensembl.org")
        ensembl <- useMart("ensembl", dataset="hsapiens_gene_ensembl", host="https://www.ensembl.org")
    
        listDatasets(ensembl)
        attributes = listAttributes(ensembl)
        attributes[1:25,]
    
        for (i in clist) {
              #i<-clist[1]
              contrast = paste("condition", i, sep="_")
              res = results(dds, name=contrast)
              res <- res[!is.na(res$log2FoldChange),]
              geness <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'gene_biotype', 'entrezgene_id', 'chromosome_name', 'start_position', 'end_position', 'strand', 'description'), filters = 'ensembl_gene_id', values = rownames(res), mart = ensembl)
              geness_uniq <- distinct(geness, ensembl_gene_id, .keep_all= TRUE)
    
              #merge by column by common colunmn name, in the case "GENEID"
              res$ENSEMBL = rownames(res)
              identical(rownames(res), rownames(geness_uniq))
              res_df <- as.data.frame(res)
              geness_res <- merge(geness_uniq, res_df, by.x="ensembl_gene_id", by.y="ENSEMBL")
              dim(geness_res)
              rownames(geness_res) <- geness_res$ensembl_gene_id
              geness_res$ensembl_gene_id <- NULL
              write.csv(as.data.frame(geness_res[order(geness_res$pvalue),]), file = paste(i, "all.txt", sep="-"))
              up <- subset(geness_res, padj<=0.05 & log2FoldChange>=2)
              down <- subset(geness_res, padj<=0.05 & log2FoldChange<=-2)
              write.csv(as.data.frame(up[order(up$log2FoldChange,decreasing=TRUE),]), file = paste(i, "up.txt", sep="-"))
              write.csv(as.data.frame(down[order(abs(down$log2FoldChange),decreasing=TRUE),]), file = paste(i, "down.txt", sep="-"))
        }
    
  7. Volcano plot untreated/wt vs parental; 1x für WaGa cell line und 1x für MKL-1 cells

        #A canonical visualization for interpreting differential gene expression results is the volcano plot.
        library(ggrepel)
    
        # -- WaGa.EV_vs_WaGa.RNA --
        geness_res <- read.csv(file = paste("WaGa.EV_vs_WaGa.RNA", "all.txt", sep="-"), row.names=1)
        geness_res$Color <- "NS or log2FC < 2.0"
        geness_res$Color[geness_res$pvalue < 0.05] <- "P < 0.05"
        geness_res$Color[geness_res$padj < 0.05] <- "P-adj < 0.05"
        geness_res$Color[geness_res$padj < 0.001] <- "P-adj < 0.001"
        geness_res$Color[abs(geness_res$log2FoldChange) < 2.0] <- "NS or log2FC < 2.0"
        geness_res$Color <- factor(geness_res$Color,
                                levels = c("NS or log2FC < 2.0", "P < 0.05",
                                      "P-adj < 0.05", "P-adj < 0.001"))
    
        geness_res$invert_P <- (-log10(geness_res$pvalue)) * sign(geness_res$log2FoldChange)
        top_g <- c()
        top_g <- c(top_g, geness_res[, 'external_gene_name'][order(geness_res[, 'invert_P'], decreasing = TRUE)[1:200]], geness_res[, 'external_gene_name'][order(geness_res[, 'invert_P'], decreasing = FALSE)[1:200]])
        top_g <- unique(top_g)
        geness_res <- geness_res[, -1*ncol(geness_res)]  #remove invert_P from matrix
    
        png("WaGa_wt.EV_vs_parental.png",width=1400, height=1000)
        ggplot(geness_res,
              aes(x = log2FoldChange, y = -log10(pvalue),
              color = Color, label = external_gene_name)) +
        geom_vline(xintercept = c(2.0, -2.0), lty = "dashed") +
        geom_hline(yintercept = -log10(0.05), lty = "dashed") +
        geom_point() +
        labs(x = "log2(FC)",
              y = "Significance, -log10(P)",
              color = "Significance") +
        scale_color_manual(values = c(`P-adj < 0.001` = "dodgerblue",
                                      `P-adj < 0.05` = "lightblue",
                                      `P < 0.05` = "orange2",
                                      `NS or log2FC < 2.0` = "gray"),
                          guide = guide_legend(override.aes = list(size = 4))) +
        scale_y_continuous(expand = expansion(mult = c(0,0.05))) +
        geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & pvalue < 0.05 & (abs(geness_res$log2FoldChange) >= 2.0)),
                          size = 4, point.padding = 0.15, color = "black",
                          min.segment.length = .1, box.padding = .2, lwd = 2) +
        theme_bw(base_size = 16) +
        theme(legend.position = "bottom")
        dev.off()
    
        # -- MKL1.EV_vs_MKL1.RNA --
        geness_res <- read.csv(file = paste("MKL1.EV_vs_MKL1.RNA", "all.txt", sep="-"), row.names=1)
        geness_res$Color <- "NS or log2FC < 2.0"
        geness_res$Color[geness_res$pvalue < 0.05] <- "P < 0.05"
        geness_res$Color[geness_res$padj < 0.05] <- "P-adj < 0.05"
        geness_res$Color[geness_res$padj < 0.001] <- "P-adj < 0.001"
        geness_res$Color[abs(geness_res$log2FoldChange) < 2.0] <- "NS or log2FC < 2.0"
        geness_res$Color <- factor(geness_res$Color,
                                levels = c("NS or log2FC < 2.0", "P < 0.05",
                                      "P-adj < 0.05", "P-adj < 0.001"))
    
        geness_res$invert_P <- (-log10(geness_res$pvalue)) * sign(geness_res$log2FoldChange)
        top_g <- c()
        top_g <- c(top_g, geness_res[, 'external_gene_name'][order(geness_res[, 'invert_P'], decreasing = TRUE)[1:200]], geness_res[, 'external_gene_name'][order(geness_res[, 'invert_P'], decreasing = FALSE)[1:200]])
        top_g <- unique(top_g)
        geness_res <- geness_res[, -1*ncol(geness_res)]  #remove invert_P from matrix
    
        png("MKL-1_wt.EV_vs_parental.png",width=1400, height=1000)
        ggplot(geness_res,
              aes(x = log2FoldChange, y = -log10(pvalue),
              color = Color, label = external_gene_name)) +
        geom_vline(xintercept = c(2.0, -2.0), lty = "dashed") +
        geom_hline(yintercept = -log10(0.05), lty = "dashed") +
        geom_point() +
        labs(x = "log2(FC)",
              y = "Significance, -log10(P)",
              color = "Significance") +
        scale_color_manual(values = c(`P-adj < 0.001` = "dodgerblue",
                                      `P-adj < 0.05` = "lightblue",
                                      `P < 0.05` = "orange2",
                                      `NS or log2FC < 2.0` = "gray"),
                          guide = guide_legend(override.aes = list(size = 4))) +
        scale_y_continuous(expand = expansion(mult = c(0,0.05))) +
        geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & pvalue < 0.05 & (abs(geness_res$log2FoldChange) >= 2.0)),
                          size = 4, point.padding = 0.15, color = "black",
                          min.segment.length = .1, box.padding = .2, lwd = 2) +
        theme_bw(base_size = 16) +
        theme(legend.position = "bottom")
        dev.off()
    
        # -- under console --
        mv WaGa.EV_vs_WaGa.RNA-all.txt WaGa_wt.EV_vs_parental-all.txt
        mv WaGa.EV_vs_WaGa.RNA-up.txt WaGa_wt.EV_vs_parental-up.txt
        mv WaGa.EV_vs_WaGa.RNA-down.txt WaGa_wt.EV_vs_parental-down.txt
        mv MKL1.EV_vs_MKL1.RNA-all.txt MKL-1_wt.EV_vs_parental-all.txt
        mv MKL1.EV_vs_MKL1.RNA-up.txt MKL-1_wt.EV_vs_parental-up.txt
        mv MKL1.EV_vs_MKL1.RNA-down.txt MKL-1_wt.EV_vs_parental-down.txt
        for cmp in "WaGa_wt.EV_vs_parental" "MKL-1_wt.EV_vs_parental"; do
              echo "~/Tools/csv2xls-0.4/csv_to_xls.py ${cmp}-all.txt ${cmp}-up.txt ${cmp}-down.txt -d$',' -o ${cmp}.xls"
        done
        ~/Tools/csv2xls-0.4/csv_to_xls.py WaGa_wt.EV_vs_parental-all.txt WaGa_wt.EV_vs_parental-up.txt WaGa_wt.EV_vs_parental-down.txt -d$',' -o WaGa_wt.EV_vs_parental.xls
        ~/Tools/csv2xls-0.4/csv_to_xls.py MKL-1_wt.EV_vs_parental-all.txt MKL-1_wt.EV_vs_parental-up.txt MKL-1_wt.EV_vs_parental-down.txt -d$',' -o MKL-1_wt.EV_vs_parental.xls
    
  8. Heatmap untreated/wt vs parental; 1x für WaGa cell line und 1x für MKL-1 cells

        install.packages("gplots")
        library("gplots")
    
        # -- WaGa cell line --
        cut -d',' -f1-1 ./WaGa_wt.EV_vs_parental-up.txt > WaGa_wt.EV_vs_parental-up.id
        cut -d',' -f1-1 ./WaGa_wt.EV_vs_parental-down.txt > WaGa_wt.EV_vs_parental-down.id
        cat WaGa_wt.EV_vs_parental-up.id WaGa_wt.EV_vs_parental-down.id | sort -u > ids  #24207
    
        #add Gene_Id in the first line.
        GOI <- read.csv("ids")$Gene_Id
        RNASeq.NoCellLine <- assay(rld)
    
        #clustering methods: "ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC).  pearson or spearman
        datamat = RNASeq.NoCellLine[GOI, c("WaGa parental cell RNA","WaGa parental cell RNA 118","WaGa parental cell RNA 147", "WaGa wt EV RNA","WaGa wt EV RNA 2","WaGa wt EV RNA 118","WaGa wt EV RNA 147","WaGa wt EV RNA 226",  "WaGa wt EV RNA 1107","WaGa wt EV RNA 1605","WaGa wt EV RNA 2706")]
        hr <- hclust(as.dist(1-cor(t(datamat), method="pearson")), method="average")
        hc <- hclust(as.dist(1-cor(datamat, method="spearman")), method="average")
        mycl = cutree(hr, h=max(hr$height)/1.05)
        mycol = c("YELLOW", "DARKBLUE", "DARKORANGE", "DARKMAGENTA", "DARKCYAN", "DARKRED",  "MAROON", "DARKGREEN", "LIGHTBLUE", "PINK", "MAGENTA", "LIGHTCYAN","LIGHTGREEN", "BLUE", "ORANGE", "CYAN", "RED", "GREEN");
    
        mycol = mycol[as.vector(mycl)]
        sampleCols <- rep('GREY',ncol(datamat))
        names(sampleCols) <- c("WaGa parental cell RNA","WaGa parental cell RNA 118","WaGa parental cell RNA 147", "WaGa wt EV RNA","WaGa wt EV RNA 2","WaGa wt EV RNA 118","WaGa wt EV RNA 147","WaGa wt EV RNA 226",  "WaGa wt EV RNA 1107","WaGa wt EV RNA 1605","WaGa wt EV RNA 2706")
    
        #DARKBLUE DARKGREEN DARKRED DARKORANGE
        sampleCols["WaGa parental cell RNA"] <- 'GREY'
        sampleCols["WaGa parental cell RNA 118"] <- 'GREY'
        sampleCols["WaGa parental cell RNA 147"] <- 'GREY'
        sampleCols["WaGa wt EV RNA"] <- 'GREEN'
        sampleCols["WaGa wt EV RNA 2"] <- 'GREEN'
        sampleCols["WaGa wt EV RNA 118"] <- 'GREEN'
        sampleCols["WaGa wt EV RNA 147"] <- 'GREEN'
        sampleCols["WaGa wt EV RNA 226"] <- 'GREEN'
        sampleCols["WaGa wt EV RNA 1107"] <- 'GREEN'
        sampleCols["WaGa wt EV RNA 1605"] <- 'GREEN'
        sampleCols["WaGa wt EV RNA 2706"] <- 'GREEN'
    
        png("DEGs_Heatmap_WaGa.png", width=1000, height=1200)
        heatmap.2(as.matrix(datamat),Rowv=as.dendrogram(hr),Colv = NA, dendrogram = 'row',
                    scale='row',trace='none',col=bluered(75),
                    RowSideColors = mycol, ColSideColors = sampleCols, labRow="", margins=c(22,10), cexRow=8, cexCol=2, srtCol=30, lwid=c(1,7), lhei = c(1, 8), legend("top", title = "",legend=c("WaGa parental cell RNA","WaGa wt EV RNA"), fill=c("GREY","GREEN"), cex=1.0, box.lty=0))
        dev.off()
    
        # save cluster members
        subset_1<-names(subset(mycl, mycl == '1'))
        subset_1_ <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'gene_biotype', 'entrezgene_id', 'chromosome_name', 'start_position', 'end_position', 'strand', 'description'),
              filters = 'ensembl_gene_id',
              values = subset_1,
              mart = ensembl)
        subset_1_uniq <- distinct(subset_1_, ensembl_gene_id, .keep_all= TRUE)
        subset_1_expr  <- datamat[subset_1,]
        subset_1_expr <- as.data.frame(subset_1_expr)
        subset_1_expr$ENSEMBL = rownames(subset_1_expr)
        cluster1_YELLOW <- merge(subset_1_uniq, subset_1_expr, by.x="ensembl_gene_id", by.y="ENSEMBL")
        #write.csv(cluster1_YELLOW,file='cluster1_YELLOW.txt')
    
        subset_2<-names(subset(mycl, mycl == '2'))
        subset_2_ <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'gene_biotype', 'entrezgene_id', 'chromosome_name', 'start_position', 'end_position', 'strand', 'description'),
              filters = 'ensembl_gene_id',
              values = subset_2,
              mart = ensembl)
        subset_2_uniq <- distinct(subset_2_, ensembl_gene_id, .keep_all= TRUE)
        subset_2_expr  <- datamat[subset_2,]
        subset_2_expr <- as.data.frame(subset_2_expr)
        subset_2_expr$ENSEMBL = rownames(subset_2_expr)
        cluster2_DARKBLUE <- merge(subset_2_uniq, subset_2_expr, by.x="ensembl_gene_id", by.y="ENSEMBL")
        #write.csv(cluster2_DARKBLUE,file='cluster2_DARKBLUE.txt')
    
        write_xlsx(list(
        "Cluster 1 YELLOW" = cluster1_YELLOW,
        "Cluster 2 DARKBLUE" = cluster2_DARKBLUE
        ), "DEGs_heatmap_data_WaGa.xlsx")
    
        # -- MKL-1 cell line --
        cut -d',' -f1-1 ./MKL-1_wt.EV_vs_parental-up.txt > MKL-1_wt.EV_vs_parental-up.id
        cut -d',' -f1-1 ./MKL-1_wt.EV_vs_parental-down.txt > MKL-1_wt.EV_vs_parental-down.id
        cat MKL-1_wt.EV_vs_parental-up.id MKL-1_wt.EV_vs_parental-down.id | sort -u > ids  #20720
    
        #add Gene_Id in the first line.
        GOI <- read.csv("ids")$Gene_Id
        RNASeq.NoCellLine <- assay(rld)
    
        #clustering methods: "ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC).  pearson or spearman
        datamat = RNASeq.NoCellLine[GOI, c("MKL-1 parental cell RNA","MKL-1 parental cell RNA 118","MKL-1 parental cell RNA 147", "MKL-1 wt EV RNA","MKL-1 wt EV RNA 2","MKL-1 wt EV RNA 27","MKL-1 wt EV RNA 87","MKL-1 wt EV RNA 118",  "MKL-1 wt EV RNA 042")]
        #Check for Zero Variance Rows; If any row has variance 0, remove it.
        #datamat <- datamat[apply(datamat, 1, var) != 0, ]
        hr <- hclust(as.dist(1-cor(t(datamat), method="pearson")), method="average")
        hc <- hclust(as.dist(1-cor(datamat, method="spearman")), method="average")
        mycl = cutree(hr, h=max(hr$height)/1.05)
        mycol = c("YELLOW", "DARKBLUE", "DARKORANGE", "DARKMAGENTA", "DARKCYAN", "DARKRED",  "MAROON", "DARKGREEN", "LIGHTBLUE", "PINK", "MAGENTA", "LIGHTCYAN","LIGHTGREEN", "BLUE", "ORANGE", "CYAN", "RED", "GREEN");
    
        mycol = mycol[as.vector(mycl)]
        sampleCols <- rep('GREY',ncol(datamat))
        names(sampleCols) <- c("MKL-1 parental cell RNA","MKL-1 parental cell RNA 118","MKL-1 parental cell RNA 147", "MKL-1 wt EV RNA","MKL-1 wt EV RNA 2","MKL-1 wt EV RNA 27","MKL-1 wt EV RNA 87","MKL-1 wt EV RNA 118",  "MKL-1 wt EV RNA 042")
    
        sampleCols["MKL-1 parental cell RNA"] <- 'GREY'
        sampleCols["MKL-1 parental cell RNA 118"] <- 'GREY'
        sampleCols["MKL-1 parental cell RNA 147"] <- 'GREY'
        sampleCols["MKL-1 wt EV RNA"] <- 'GREEN'
        sampleCols["MKL-1 wt EV RNA 2"] <- 'GREEN'
        sampleCols["MKL-1 wt EV RNA 27"] <- 'GREEN'
        sampleCols["MKL-1 wt EV RNA 87"] <- 'GREEN'
        sampleCols["MKL-1 wt EV RNA 118"] <- 'GREEN'
        sampleCols["MKL-1 wt EV RNA 042"] <- 'GREEN'
    
        png("DEGs_Heatmap_MKL-1.png", width=1000, height=1200)
        heatmap.2(as.matrix(datamat),Rowv=as.dendrogram(hr),Colv = NA, dendrogram = 'row',
                    scale='row',trace='none',col=bluered(75),
                    RowSideColors = mycol, ColSideColors = sampleCols, labRow="", margins=c(22,10), cexRow=8, cexCol=2, srtCol=30, lwid=c(1,7), lhei = c(1, 8), legend("top", title = "",legend=c("MKL-1 parental cell RNA","MKL-1 wt EV RNA"), fill=c("GREY","GREEN"), cex=1.0, box.lty=0))
        dev.off()
    
        # save cluster members
        subset_1<-names(subset(mycl, mycl == '1'))
        subset_1_ <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'gene_biotype', 'entrezgene_id', 'chromosome_name', 'start_position', 'end_position', 'strand', 'description'),
              filters = 'ensembl_gene_id',
              values = subset_1,
              mart = ensembl)
        subset_1_uniq <- distinct(subset_1_, ensembl_gene_id, .keep_all= TRUE)
        subset_1_expr  <- datamat[subset_1,]
        subset_1_expr <- as.data.frame(subset_1_expr)
        subset_1_expr$ENSEMBL = rownames(subset_1_expr)
        cluster1_YELLOW <- merge(subset_1_uniq, subset_1_expr, by.x="ensembl_gene_id", by.y="ENSEMBL")
        #write.csv(cluster1_YELLOW,file='cluster1_YELLOW.txt')
    
        subset_2<-names(subset(mycl, mycl == '2'))
        subset_2_ <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'gene_biotype', 'entrezgene_id', 'chromosome_name', 'start_position', 'end_position', 'strand', 'description'),
              filters = 'ensembl_gene_id',
              values = subset_2,
              mart = ensembl)
        subset_2_uniq <- distinct(subset_2_, ensembl_gene_id, .keep_all= TRUE)
        subset_2_expr  <- datamat[subset_2,]
        subset_2_expr <- as.data.frame(subset_2_expr)
        subset_2_expr$ENSEMBL = rownames(subset_2_expr)
        cluster2_DARKBLUE <- merge(subset_2_uniq, subset_2_expr, by.x="ensembl_gene_id", by.y="ENSEMBL")
        #write.csv(cluster2_DARKBLUE,file='cluster2_DARKBLUE.txt')
    
        write_xlsx(list(
        "Cluster 1 YELLOW" = cluster1_YELLOW,
        "Cluster 2 DARKBLUE" = cluster2_DARKBLUE
        ), "DEGs_heatmap_data_MKL-1.xlsx")
    
  9. Distribution of different RNA Species untreated/wt and parental; 1x für WaGa cell line und 1x für MKL-1 cells

        cd ~/DATA/Data_Ute/Data_RNA-Seq_MKL-1+WaGa/results_2025_1/read_distributions
        cp ../../../Data_RNA-Seq_WaGa/results/rseqc/read_distribution/WaGa_RNAAligned.sortedByCoord.out.read_distribution.txt WaGa_parental_cell_RNA_read_distribution.txt
        cp ../../../Data_RNA-Seq_WaGa/results/rseqc/read_distribution/WaGa_RNA_118Aligned.sortedByCoord.out.read_distribution.txt WaGa_parental_cell_RNA_118_read_distribution.txt
        cp ../../../Data_RNA-Seq_WaGa/results/rseqc/read_distribution/WaGa_RNA_147Aligned.sortedByCoord.out.read_distribution.txt WaGa_parental_cell_RNA_147_read_distribution.txt
        cp ../../../Data_RNA-Seq_WaGa/results/rseqc/read_distribution/WaGa_EV-RNAAligned.sortedByCoord.out.read_distribution.txt  WaGa_wt_EV_RNA_read_distribution.txt
        cp ../../../Data_RNA-Seq_WaGa/results/rseqc/read_distribution/WaGa_EV-RNA_2Aligned.sortedByCoord.out.read_distribution.txt WaGa_wt_EV_RNA_2_read_distribution.txt
        cp ../../../Data_RNA-Seq_WaGa/results/rseqc/read_distribution/WaGa_EV-RNA_118Aligned.sortedByCoord.out.read_distribution.txt WaGa_wt_EV_RNA_118_read_distribution.txt
        cp ../../../Data_RNA-Seq_WaGa/results/rseqc/read_distribution/WaGa_EV-RNA_147Aligned.sortedByCoord.out.read_distribution.txt WaGa_wt_EV_RNA_147_read_distribution.txt
        cp ../../../Data_RNA-Seq_WaGa/results/rseqc/read_distribution/WaGa_EV-RNA_226Aligned.sortedByCoord.out.read_distribution.txt WaGa_wt_EV_RNA_226_read_distribution.txt
        cp ../../../Data_RNA-Seq_WaGa/results/rseqc/read_distribution/1107_WaGa_wt_EVAligned.sortedByCoord.out.read_distribution.txt WaGa_wt_EV_RNA_1107_read_distribution.txt
        cp ../../../Data_RNA-Seq_WaGa/results/rseqc/read_distribution/1605_WaGa_wt_EVAligned.sortedByCoord.out.read_distribution.txt WaGa_wt_EV_RNA_1605_read_distribution.txt
        cp ../../../Data_RNA-Seq_WaGa/results/rseqc/read_distribution/2706_WaGa_wt_EVAligned.sortedByCoord.out.read_distribution.txt WaGa_wt_EV_RNA_2706_read_distribution.txt
    
        cp ../../../Data_RNA-Seq_MKL-1/results/rseqc/read_distribution/MKL-1_RNAAligned.sortedByCoord.out.read_distribution.txt MKL-1_parental_cell_RNA_read_distribution.txt
        cp ../../../Data_RNA-Seq_MKL-1/results/rseqc/read_distribution/MKL-1_RNA_118Aligned.sortedByCoord.out.read_distribution.txt MKL-1_parental_cell_RNA_118_read_distribution.txt
        cp ../../../Data_RNA-Seq_MKL-1/results/rseqc/read_distribution/MKL-1_RNA_147Aligned.sortedByCoord.out.read_distribution.txt MKL-1_parental_cell_RNA_147_read_distribution.txt
        cp ../../../Data_RNA-Seq_MKL-1/results/rseqc/read_distribution/MKL-1_EV-RNAAligned.sortedByCoord.out.read_distribution.txt MKL-1_wt_EV_RNA_read_distribution.txt
        cp ../../../Data_RNA-Seq_MKL-1/results/rseqc/read_distribution/MKL-1_EV-RNA_2Aligned.sortedByCoord.out.read_distribution.txt MKL-1_wt_EV_RNA_2_read_distribution.txt
        cp ../../../Data_RNA-Seq_MKL-1/results/rseqc/read_distribution/MKL-1_EV-RNA_27Aligned.sortedByCoord.out.read_distribution.txt MKL-1_wt_EV_RNA_27_read_distribution.txt
        cp ../../../Data_RNA-Seq_MKL-1/results/rseqc/read_distribution/MKL-1_EV-RNA_87Aligned.sortedByCoord.out.read_distribution.txt MKL-1_wt_EV_RNA_87_read_distribution.txt
    
  10. RNA fragmentation patterns: is EV-RNA full length or fragmented?

        Since our data consists of single-end RNA-Seq reads, determining the exact RNA fragment length is challenging, as only the read length is available. Unlike paired-end sequencing, where insert size can be calculated, single-end sequencing does not provide direct information about the full fragment size.
    

like unlike

点赞本文的读者

还没有人对此文章表态


本文有评论

没有评论

看文章,发评论,不要沉默


© 2023 XGenes.com Impressum