Small RNA sequencing processing in the example of smallRNA_7

gene_x 0 like s 309 view s

Tags: pipeline

  1. adapter sequence

    Lexogen small RNA-Seq kit
    
    some common adapter sequences from different kits for reference:
    
        - TruSeq Small RNA (Illumina): TGGAATTCTCGGGTGCCAAGG
        - Small RNA Kits V1 (Illumina): TCGTATGCCGTCTTCTGCTTGT
        - Small RNA Kits V1.5 (Illumina): ATCTCGTATGCCGTCTTCTGCTTG
        - NEXTflex Small RNA Sequencing Kit v3 for Illumina Platforms (Bioo Scientific): TGGAATTCTCGGGTGCCAAGG
        - LEXOGEN Small RNA-Seq Library Prep Kit (Illumina): TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC
    
    [Header],,,,,
    IEMFileVersion,4,,,,
    InvestigatorName,ag96,,,,
    ExperimentName,ag96,,,,
    Date,16.10.2023,,,,
    Workflow,GenerateFASTQ,,,,
    Application,NextSeqFASTQOnly,,,,
    Assay,TruSeq HT,,,,
    Description,pcr,,,,
    Chemistry,Amplicon,,,,
    ,,,,,
    [Reads],,,,,
    82,,,,,
    ,,,,,
    ,,,,,
    [Settings],,,,,
    Adapter,TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC,,,,
    ,,,,,
    ,,,,,
    [Data],,,,,
    Sample_ID,Sample_Name,I7_Index_ID,index,Sample_Project,Description
    nf930,01_0505_WaGa_wt_EV_RNA,SRi7001,CAGCGT,2023_064_nf_ute,smallRNA-Seq
    nf931,02_0505_WaGa_sT_DMSO_EV_RNA,SRi7002,GATCAC,2023_064_nf_ute,smallRNA-Seq
    nf932,03_0505_WaGa_sT_Dox_EV_RNA,SRi7003,ACCAGT,2023_064_nf_ute,smallRNA-Seq
    nf933,04_0505_WaGa_scr_DMSO_EV_RNA,SRi7004,TGCACG,2023_064_nf_ute,smallRNA-Seq
    nf934,05_0505_WaGa_scr_Dox_EV_RNA,SRi7005,ACATTA,2023_064_nf_ute,smallRNA-Seq
    nf935,06_1905_WaGa_wt_EV_RNA,SRi7006,GTGTAG,2023_064_nf_ute,smallRNA-Seq
    nf936,07_1905_WaGa_sT_DMSO_EV_RNA,SRi7007,CTAGTC,2023_064_nf_ute,smallRNA-Seq
    nf937,08_1905_WaGa_sT_Dox_EV_RNA,SRi7008,TGTGCA,2023_064_nf_ute,smallRNA-Seq
    nf938,09_1905_WaGa_scr_DMSO_EV_RNA,SRi7009,TCAGGA,2023_064_nf_ute,smallRNA-Seq
    nf939,10_1905_WaGa_scr_Dox_EV_RNA,SRi7010,CGGTTA,2023_064_nf_ute,smallRNA-Seq
    nf940,11_control_MKL1,SRi7011,TTAACT,2023_064_nf_ute,smallRNA-Seq
    nf941,12_control_WaGa,SRi7012,ATGAAC,2023_064_nf_ute,smallRNA-Seq
    
  2. input data

    ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf930/01_0505_WaGa_wt_EV_RNA_S1_R1_001.fastq.gz         0505_WaGa_wt.fastq.gz
    ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf931/02_0505_WaGa_sT_DMSO_EV_RNA_S2_R1_001.fastq.gz    0505_WaGa_sT_DMSO.fastq.gz
    ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf932/03_0505_WaGa_sT_Dox_EV_RNA_S3_R1_001.fastq.gz     0505_WaGa_sT_Dox.fastq.gz
    ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf933/04_0505_WaGa_scr_DMSO_EV_RNA_S4_R1_001.fastq.gz   0505_WaGa_scr_DMSO.fastq.gz
    ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf934/05_0505_WaGa_scr_Dox_EV_RNA_S5_R1_001.fastq.gz    0505_WaGa_scr_Dox.fastq.gz
    ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf935/06_1905_WaGa_wt_EV_RNA_S6_R1_001.fastq.gz         1905_WaGa_wt.fastq.gz
    ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf936/07_1905_WaGa_sT_DMSO_EV_RNA_S7_R1_001.fastq.gz    1905_WaGa_sT_DMSO.fastq.gz
    ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf937/08_1905_WaGa_sT_Dox_EV_RNA_S8_R1_001.fastq.gz     1905_WaGa_sT_Dox.fastq.gz
    ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf938/09_1905_WaGa_scr_DMSO_EV_RNA_S9_R1_001.fastq.gz   1905_WaGa_scr_DMSO.fastq.gz
    ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf939/10_1905_WaGa_scr_Dox_EV_RNA_S10_R1_001.fastq.gz   1905_WaGa_scr_Dox.fastq.gz
    ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf940/11_control_MKL1_S11_R1_001.fastq.gz               control_MKL1.fastq.gz
    ln -s ./231016_NB501882_0435_AHG7HMBGXV/nf941/12_control_WaGa_S12_R1_001.fastq.gz               control_WaGa.fastq.gz
    
  3. run cutadapt

    for sample in 0505_WaGa_wt 0505_WaGa_sT_DMSO 0505_WaGa_sT_Dox 0505_WaGa_scr_DMSO 0505_WaGa_scr_Dox 1905_WaGa_wt 1905_WaGa_sT_DMSO 1905_WaGa_sT_Dox 1905_WaGa_scr_DMSO 1905_WaGa_scr_Dox  control_MKL1 control_WaGa; do
      cutadapt -a TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC -q 20 -o ${sample}2.fastq.gz --minimum-length 5 --trim-n ${sample}.fastq.gz >> LOG
    done
    #jhuang@hamburg:~/DATA/Data_Ute/Data_Ute_smallRNA_7$ fastp -i 0505_WaGa_wt.fastq.gz -o 0505_WaGa_wt3.fastq.gz -a TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC
    
  4. run COMPSRA

    ln -s ../Data_Ute_smallRNA_3/bundle_v1 .
    
    # DEBUG_1: Make sure the file COMPSRA.jar under Data_Ute_smallRNA_7
    # DEBUG_2: "-qc -ra TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC -rb 4" does not work! Using cutadapt -a xxxx -q 20 replace those parameters!
    for sample in 0505_WaGa_wt 0505_WaGa_sT_DMSO 0505_WaGa_sT_Dox 0505_WaGa_scr_DMSO 0505_WaGa_scr_Dox 1905_WaGa_wt 1905_WaGa_sT_DMSO 1905_WaGa_sT_Dox 1905_WaGa_scr_DMSO 1905_WaGa_scr_Dox  control_MKL1 control_WaGa; do
      mkdir our_out/${sample}2/
      java -jar COMPSRA.jar -ref hg38       -rh 20 -rt 20 -rr 20 -rlh 8,17 -aln -mt star -ann -ac 1,2,3,4,5,6  -in ${sample}2.fastq.gz -out ./our_out/
    done
    
    #4.2.3 -rb/-rm_bias n
    #To remove n random bases in both 5’ (5-prime) and 3’ (3-prime) ends after removing the adapter sequence.
    #4.2.4 -rh/-rm_low_quality_head score
    #To remove the low quality bases with the score less than score from 5’ (5-prime) end.
    #4.2.5 -rt/-rm_low_quality_tail score
    #To remove the low quality bases with the score less than score from 3’ (3-prime) end.
    #4.2.6 -rr/-rm_low_quality_read score
    #To remove the low quality reads with the average score less than score.
    #4.6.3 -fdclass/-fun_diff_class A1,A2,...,An
    #To set the small RNAs that will be performed the differential expression analysis. The format is the same as the parameter -ac/-ann_class A1,A2,...,An.
    #4.6.4 -fdcase/-fun_diff_case ID1,ID2,...,IDn
    #To set the IDs of case samples.
    #4.6.5 -fdctrl/-fun_diff_control ID1,ID2,...,IDn
    #To set the IDs of control samples.
    #4.4.2 -ac/-ann_class A1,A2,...,An
    #To set the small RNA categories that will be annotated. The index of small RNA is listed:
    #    1 miRNA
    #    2 piRNA
    #    3 tRNA
    #    4 snoRNA
    #    5 snRNA
    #    6 circRNA
    
    java -jar COMPSRA.jar -ref hg38 -fun -fm -fms 1-5 -fdclass 1,2,3,4,5 -fdann -pro COMPSRA_MERGE -inf ./sample.list -out ./our_out/
    java -jar COMPSRA.jar -ref hg38 -fun -fd -fdclass 1,2,3,4,5 -fdcase 1-2 -fdctrl 3-6 -fdnorm cpm -fdtest mwu -fdann -pro COMPSRA_DEG -inf ./sample.list -out ./our_out/
    
  5. The results without using cutadapt for comparison

    mkdir our_out
    for sample in 0505_WaGa_wt 0505_WaGa_sT_DMSO 0505_WaGa_sT_Dox 0505_WaGa_scr_DMSO 0505_WaGa_scr_Dox 1905_WaGa_wt 1905_WaGa_sT_DMSO 1905_WaGa_sT_Dox 1905_WaGa_scr_DMSO 1905_WaGa_scr_Dox  control_MKL1 control_WaGa; do
      mkdir our_out/${sample}/
      java -jar COMPSRA.jar -ref hg38   -rh 20 -rt 20 -rr 20 -rlh 8,17 -aln -mt star -ann -ac 1,2,3,4,5,6  -in ${sample}2.fastq.gz -out ./our_out/
    done
    
    {miRNA}:
    [miRBase]
    Total Annotation Items: 4697
    Annotated Items (covered by least one read): 587
    Unannotated Items: 4110
    Reads Support the Annotation: 791636
    
    {piRNA}:
    [piRNABank]
    Total Annotation Items: 665175
    Annotated Items (covered by least one read): 480
    Unannotated Items: 664695
    Reads Support the Annotation: 6363051
    [piRBase]
    Total Annotation Items: 804849
    Annotated Items (covered by least one read): 1220
    Unannotated Items: 803629
    Reads Support the Annotation: 41374788
    
    {tRNA}:
    [GtRNAdb]
    Total Annotation Items: 601
    Annotated Items (covered by least one read): 440
    Unannotated Items: 161
    Reads Support the Annotation: 18690795
    
    {snoRNA}:
    [GEN_snoRNA]
    Total Annotation Items: 1006
    Annotated Items (covered by least one read): 250
    Unannotated Items: 756
    Reads Support the Annotation: 416228
    
    {snRNA}:
    [GEN_snRNA]
    Total Annotation Items: 2053
    Annotated Items (covered by least one read): 267
    Unannotated Items: 1786
    Reads Support the Annotation: 793559
    
    {circRNA}:
    [circRNA]
    Total Annotation Items: 140195
    Annotated Items (covered by least one read): 51488
    Unannotated Items: 88707
    Reads Support the Annotation: 14238651
    

like unlike

点赞本文的读者

还没有人对此文章表态


本文有评论

没有评论

看文章,发评论,不要沉默


© 2023 XGenes.com Impressum