gene_x 0 like s 149 view s
Tags: pipeline
Input files
mkdir raw_data; cd raw_data;
# Note that the names must be ending with fastq.gz
ln -s ../VZV/241121_VH00358_117_AAGFF7FM5_Dongdong/wb180/01_VZV_20S_S1_R1_001.fastq.gz VZV_20S_R1.fastq.gz
ln -s ../VZV/241121_VH00358_117_AAGFF7FM5_Dongdong/wb180/01_VZV_20S_S1_R2_001.fastq.gz VZV_20S_R2.fastq.gz
ln -s ../VZV/241121_VH00358_117_AAGFF7FM5_Dongdong/wb181/02_VZV_20c_S2_R1_001.fastq.gz VZV_20c_R1.fastq.gz
ln -s ../VZV/241121_VH00358_117_AAGFF7FM5_Dongdong/wb181/02_VZV_20c_S2_R2_001.fastq.gz VZV_20c_R2.fastq.gz
ln -s ../VZV/241121_VH00358_117_AAGFF7FM5_Dongdong/wb182/03_VZV_60S_S3_R1_001.fastq.gz VZV_60S_R1.fastq.gz
ln -s ../VZV/241121_VH00358_117_AAGFF7FM5_Dongdong/wb182/03_VZV_60S_S3_R2_001.fastq.gz VZV_60S_R2.fastq.gz
ln -s ../VZV/241121_VH00358_117_AAGFF7FM5_Dongdong/wb183/04_VZV_60c_S4_R1_001.fastq.gz VZV_60c_R1.fastq.gz
ln -s ../VZV/241121_VH00358_117_AAGFF7FM5_Dongdong/wb183/04_VZV_60c_S4_R2_001.fastq.gz VZV_60c_R2.fastq.gz
ln -s ../VZV/241121_VH00358_117_AAGFF7FM5_Dongdong/wb184/05_VZV_1451S_S5_R1_001.fastq.gz VZV_1451S_R1.fastq.gz
ln -s ../VZV/241121_VH00358_117_AAGFF7FM5_Dongdong/wb184/05_VZV_1451S_S5_R2_001.fastq.gz VZV_1451S_R2.fastq.gz
ln -s ../VZV/241121_VH00358_117_AAGFF7FM5_Dongdong/wb185/06_Pcc1_1451_S6_R1_001.fastq.gz Pcc1_1451_R1.fastq.gz
ln -s ../VZV/241121_VH00358_117_AAGFF7FM5_Dongdong/wb185/06_Pcc1_1451_S6_R2_001.fastq.gz Pcc1_1451_R2.fastq.gz
ln -s ../VZV/2024_081_wb_dongdong/wb190/PCC1_VZV_20_1_S36_R1_001.fastq.gz PCC1_VZV_20_1_R1.fastq.gz
ln -s ../VZV/2024_081_wb_dongdong/wb190/PCC1_VZV_20_1_S36_R2_001.fastq.gz PCC1_VZV_20_1_R2.fastq.gz
ln -s ../VZV/2024_081_wb_dongdong/wb191/PCC1_VZV_20_2_S37_R1_001.fastq.gz PCC1_VZV_20_2_R1.fastq.gz
ln -s ../VZV/2024_081_wb_dongdong/wb191/PCC1_VZV_20_2_S37_R2_001.fastq.gz PCC1_VZV_20_2_R2.fastq.gz
ln -s ../VZV/2024_081_wb_dongdong/wb192/PCC1_VZV_20_5_S38_R1_001.fastq.gz PCC1_VZV_20_5_R1.fastq.gz
ln -s ../VZV/2024_081_wb_dongdong/wb192/PCC1_VZV_20_5_S38_R2_001.fastq.gz PCC1_VZV_20_5_R2.fastq.gz
ln -s ../VZV/2024_081_wb_dongdong/wb193/PCC1_VZV_60_1_S39_R1_001.fastq.gz PCC1_VZV_60_1_R1.fastq.gz
ln -s ../VZV/2024_081_wb_dongdong/wb193/PCC1_VZV_60_1_S39_R2_001.fastq.gz PCC1_VZV_60_1_R2.fastq.gz
ln -s ../VZV/2024_081_wb_dongdong/wb194/PCC1_VZV_60_4_S40_R1_001.fastq.gz PCC1_VZV_60_4_R1.fastq.gz
ln -s ../VZV/2024_081_wb_dongdong/wb194/PCC1_VZV_60_4_S40_R2_001.fastq.gz PCC1_VZV_60_4_R2.fastq.gz
ln -s ../VZV/2024_081_wb_dongdong/wb195/PCC1_VZV_60_6_S41_R1_001.fastq.gz PCC1_VZV_60_6_R1.fastq.gz
ln -s ../VZV/2024_081_wb_dongdong/wb195/PCC1_VZV_60_6_S41_R2_001.fastq.gz PCC1_VZV_60_6_R2.fastq.gz
Call variant calling using snippy
ln -s ~/Tools/bacto/db/ .;
ln -s ~/Tools/bacto/envs/ .;
ln -s ~/Tools/bacto/local/ .;
cp ~/Tools/bacto/Snakefile .;
cp ~/Tools/bacto/bacto-0.1.json .;
cp ~/Tools/bacto/cluster.json .;
#download CU459141.gb from GenBank
mv ~/Downloads/sequence\(1\).gb db/NC_001348.gb = X04370.1
mv ~/Downloads/sequence\(2\).gb db/AB097932.gb
#X04370
#setting the following in bacto-0.1.json
"fastqc": false,
"taxonomic_classifier": false,
"assembly": true,
"typing_ariba": false,
"typing_mlst": true,
"pangenome": true,
"variants_calling": true,
"phylogeny_fasttree": true,
"phylogeny_raxml": true,
"recombination": false, (due to gubbins-error set false)
"genus": "Varicella-zoster virus",
"kingdom": "Viruses",
"species": "Varicella-zoster virus"(in both prokka and mykrobe)
"reference": "db/NC_001348.gb"
conda activate bengal3_ac3
(bengal3_ac3) /home/jhuang/miniconda3/envs/snakemake_4_3_1/bin/snakemake --printshellcmds
Summarize all SNPs and Indels from the snippy result directory.
#Output: snippy/summary_snps_indels.csv
# IMPORTANT_ADAPT the array isolates = ["AYE-S", "AYE-Q", "AYE-WT on Tig4", "AYE-craA on Tig4", "AYE-craA-1 on Cm200", "AYE-craA-2 on Cm200"]
python3 ~/Scripts/summarize_snippy_res.py snippy
cd snippy
grep -v "None,,,,,,None,None" summary_snps_indels.csv > summary_snps_indels_.csv
Using spandx calling variants (almost the same results to the one from viral-ngs!)
mamba activate /home/jhuang/miniconda3/envs/spandx
mkdir ~/miniconda3/envs/spandx/share/snpeff-5.1-2/data/NC_001348
cp NC_001348.gb ~/miniconda3/envs/spandx/share/snpeff-5.1-2/data/NC_001348/genes.gbk
vim ~/miniconda3/envs/spandx/share/snpeff-5.1-2/snpEff.config
/home/jhuang/miniconda3/envs/spandx/bin/snpEff build NC_001348 #-d
~/Scripts/genbank2fasta.py NC_001348.gb
mv NC_001348.gb_converted.fna NC_001348.fasta #rename "NC_001348.1 xxxxx" to "NC_001348" in the fasta-file
ln -s /home/jhuang/Tools/spandx/ spandx
(spandx) nextflow run spandx/main.nf --fastq "trimmed/*_P_{1,2}.fastq" --ref NC_001348.fasta --annotation --database NC_001348 -resume
# Rerun SNP_matrix.sh due to the error ERROR_CHROMOSOME_NOT_FOUND in the variants annotation
cd Outputs/Master_vcf
(spandx) cp -r ../../snippy/VZV_20S/reference .
(spandx) cp ../../spandx/bin/SNP_matrix.sh ./
#Note that ${variant_genome_path}=NC_001348 in the following command, but it was not used after command replacement.
#Adapt "snpEff eff -no-downstream -no-intergenic -ud 100 -formatEff -v ${variant_genome_path} out.vcf > out.annotated.vcf" to
"/home/jhuang/miniconda3/envs/bengal3_ac3/bin/snpEff eff -no-downstream -no-intergenic -ud 100 -formatEff -c reference/snpeff.config -dataDir . ref out.vcf > out.annotated.vcf" in SNP_matrix.sh
(spandx) bash SNP_matrix.sh NC_001348 .
Calling inter-host variants by merging the results from snippy+spandx (Manually!)
# Inter-host variants(宿主间变异):一种病毒在两个人之间有不同的基因变异,这些变异可能与宿主的免疫反应、疾病表现或病毒传播的方式相关。
cp All_SNPs_indels_annotated.txt All_SNPs_indels_annotated_backup.txt
vim All_SNPs_indels_annotated.txt
Calling intra-host variants using viral-ngs (http://xgenes.com/article/article-content/347/variant-calling-for-herpes-simplex-virus-1-from-patient-sample-using-capture-probe-sequencing/)
# Intra-host variants(宿主内变异):同一个人感染了某种病毒,但在其体内的不同细胞或器官中可能存在多个不同的病毒变异株。
mamba activate /home/jhuang/miniconda3/envs/viral-ngs4
mkdir viralngs
ln -s ~/Tools/viral-ngs/Snakefile Snakefile
ln -s ~/Tools/viral-ngs/bin bin
cp ~/Tools/viral-ngs/refsel.acids refsel.acids
cp ~/Tools/viral-ngs/lastal.acids lastal.acids
cp ~/Tools/viral-ngs/config.yaml config.yaml
cp ~/Tools/viral-ngs/samples-runs.txt samples-runs.txt
cp ~/Tools/viral-ngs/samples-depletion.txt samples-depletion.txt
cp ~/Tools/viral-ngs/samples-metagenomics.txt samples-metagenomics.txt
cp ~/Tools/viral-ngs/samples-assembly.txt samples-assembly.txt
cp ~/Tools/viral-ngs/samples-assembly-failures.txt samples-assembly-failures.txt
mkdir data
cd data
mkdir 00_raw
cd ../..
mkdir bams
ref_fa="NC_001348.fasta";
for sample in VZV_20S VZV_20c VZV_60S VZV_60c PCC1_VZV_20_1 PCC1_VZV_20_2 PCC1_VZV_20_5 PCC1_VZV_60_1 PCC1_VZV_60_4 PCC1_VZV_60_6; do
bwa index ${ref_fa}; \
bwa mem -M -t 16 ${ref_fa} trimmed/${sample}_trimmed_P_1.fastq trimmed/${sample}_trimmed_P_2.fastq | samtools view -bS - > bams/${sample}_genome_alignment.bam; \
done
for sample in VZV_20S VZV_20c VZV_60S VZV_60c PCC1_VZV_20_1 PCC1_VZV_20_2 PCC1_VZV_20_5 PCC1_VZV_60_1 PCC1_VZV_60_4 PCC1_VZV_60_6; do
picard AddOrReplaceReadGroups I=bams/${sample}_genome_alignment.bam O=viralngs/data/00_raw/${sample}.bam SORT_ORDER=coordinate CREATE_INDEX=true RGPL=illumina RGID=$sample RGSM=$sample RGLB=standard RGPU=$sample VALIDATION_STRINGENCY=LENIENT; \
done
cd viralngs
(viral-ngs4) snakemake --printshellcmds --cores 80
# -- DEBUG: If the env disappeared, reinstall the env viral-ngs4 --
# -- Running time hints --
#Note that novoalign is not installed. The used Novoalign path: /home/jhuang/Tools/novocraft_v3/novoalign; the used gatk: /usr/local/bin/gatk using /home/jhuang/Tools/GenomeAnalysisTK-3.6/GenomeAnalysisTK.jar.
#Samtools path: #Why, the samtools in the env is v1.6?
#Novoalign path: /home/jhuang/Tools/novocraft_v3/novoalign
#GATK path: /usr/local/bin/gatk # jar_file in the file: jar_file = '/home/jhuang/Tools/GenomeAnalysisTK-3.6/GenomeAnalysisTK.jar'
# -- in config.yaml --
#GATK_PATH: "/home/jhuang/Tools/GenomeAnalysisTK-3.6"
#NOVOALIGN_PATH: "/home/jhuang/Tools/novocraft_v3"
mamba create -n viral-ngs4 python=3.6
mamba activate viral-ngs4
mamba install blast=2.6.0 bmtagger biopython pysam pyyaml picard mvicuna pybedtools fastqc matplotlib spades last=876 -c conda-forge -c bioconda
#mafft=7.221 --> mafft since └─ mafft 7.221** is not installable because it conflicts with any installable versions previously reported.
mamba install cd-hit cd-hit-auxtools diamond gap2seq=2.1 mafft mummer4 muscle=3.8 parallel pigz prinseq samtools=1.6 tbl2asn trimmomatic trinity unzip vphaser2 bedtools -c r -c defaults -c conda-forge -c bioconda
mamba install bwa
mamba install vphaser2=2.0
# Sovle confilict between bowtie, bowtie2 and snpeff
mamba remove bowtie
mamba install bowtie2
mamba remove snpeff
mamba install snpeff=4.1l
#which snpEff
mamba install gatk=3.6
#DEBUG if FileNotFoundError: [Errno 2] No such file or directory: '/usr/local/bin/gatk': '/usr/local/bin/gatk'
#IMPORTANT_UPDATE jar_file in the file /home/jhuang/mambaforge/envs/viral-ngs4/bin/gatk3 with "/home/jhuang/Tools/GenomeAnalysisTK-3.6/GenomeAnalysisTK.jar"
#IMPORTANT_REPLACE "sudo cp /home/jhuang/mambaforge/envs/viral-ngs4/bin/gatk3 /usr/local/bin/gatk"
#IMPORTANT_SET /home/jhuang/Tools/GenomeAnalysisTK-3.6 as GATK_PATH in config.yaml
#IMPORTANT_CHECK if it works
# java -jar /home/jhuang/Tools/GenomeAnalysisTK-3.6/GenomeAnalysisTK.jar -T RealignerTargetCreator --help
# /usr/local/bin/gatk -T RealignerTargetCreator --help
#IMPORTANT_NOTE that the env viral-ngs4 cannot logined from the base env due to the python3-conflict!
Merge intra- and inter-host variants, comparing the variants to the alignments of the assemblies to confirm its correctness.
cat NC_001348.fasta viralngs/data/02_assembly/VZV_20S.fasta viralngs/data/02_assembly/VZV_60S.fasta > aligned_1.fasta
mafft --clustalout aligned_1.fasta > aligned_1.aln
#~/Scripts/convert_fasta_to_clustal.py aligned_1.fasta_orig aligned_1.aln
~/Scripts/convert_clustal_to_clustal.py aligned_1.aln aligned_1_.aln
#manully delete the postion with all or '-' in aligned_1_.aln
~/Scripts/check_sequence_differences.py aligned_1_.aln
~/Scripts/check_sequence_differences.py aligned_1_.aln > aligned_1.res
grep -v " = n" aligned_1.res > aligned_1_.res
cat NC_001348.fasta viralngs/tmp/02_assembly/VZV_20S.assembly4-refined.fasta viralngs/tmp/02_assembly/VZV_60S.assembly4-refined.fasta > aligned_1.fasta
mafft --clustalout aligned_1.fasta > aligned_1.aln
~/Scripts/convert_clustal_to_clustal.py aligned_1.aln aligned_1_.aln
~/Scripts/check_sequence_differences.py aligned_1_.aln > aligned_1.res
grep -v " = n" aligned_1.res > aligned_1_.res
#Differences found at the following positions (150):
Position 8956: OP297860.1 = A, HSV1_S1-1 = A, HSV-Klinik_S2-1 = G
Position 8991: OP297860.1 = A, HSV1_S1-1 = A, HSV-Klinik_S2-1 = C
Position 8992: OP297860.1 = T, HSV1_S1-1 = C, HSV-Klinik_S2-1 = C
Position 8995: OP297860.1 = T, HSV1_S1-1 = T, HSV-Klinik_S2-1 = C
Position 9190: OP297860.1 = T, HSV1_S1-1 = A, HSV-Klinik_S2-1 = T
* Position 13659: OP297860.1 = G, HSV1_S1-1 = T, HSV-Klinik_S2-1 = G
* Position 47969: OP297860.1 = C, HSV1_S1-1 = T, HSV-Klinik_S2-1 = C
* Position 53691: OP297860.1 = G, HSV1_S1-1 = T, HSV-Klinik_S2-1 = G
* Position 55501: OP297860.1 = T, HSV1_S1-1 = C, HSV-Klinik_S2-1 = C
* Position 63248: OP297860.1 = G, HSV1_S1-1 = T, HSV-Klinik_S2-1 = G
Position 63799: OP297860.1 = T, HSV1_S1-1 = C, HSV-Klinik_S2-1 = T
* Position 64328: OP297860.1 = C, HSV1_S1-1 = A, HSV-Klinik_S2-1 = C
Position 65179: OP297860.1 = T, HSV1_S1-1 = T, HSV-Klinik_S2-1 = C
* Position 65225: OP297860.1 = G, HSV1_S1-1 = G, HSV-Klinik_S2-1 = A
* Position 95302: OP297860.1 = C, HSV1_S1-1 = A, HSV-Klinik_S2-1 = C
gunzip isnvs.annot.txt.gz
~/Scripts/filter_isnv.py isnvs.annot.txt 0.05
cut -d$'\t' filtered_isnvs.annot.txt -f1-7
chr pos sample patient time alleles iSNV_freq
OP297860 13203 HSV1_S1 HSV1_S1 T,C,A 1.0
OP297860 13203 HSV-Klinik_S2 HSV-Klinik_S2 T,C,A 1.0
OP297860 13522 HSV1_S1 HSV1_S1 G,T 1.0
OP297860 13522 HSV-Klinik_S2 HSV-Klinik_S2 G,T 0.008905554253573941
OP297860 13659 HSV1_S1 HSV1_S1 G,T 1.0
OP297860 13659 HSV-Klinik_S2 HSV-Klinik_S2 G,T 0.008383233532934131
~/Scripts/convert_clustal_to_fasta.py aligned_1_.aln aligned_1.fasta
samtools faidx aligned_1.fasta
samtools faidx aligned_1.fasta OP297860.1 > OP297860.1.fasta
samtools faidx aligned_1.fasta HSV1_S1-1 > HSV1_S1-1.fasta
samtools faidx aligned_1.fasta HSV-Klinik_S2-1 > HSV-Klinik_S2-1.fasta
seqkit seq OP297860.1.fasta -w 70 > OP297860.1_w70.fasta
diff OP297860.1_w70.fasta ../../refsel_db/refsel.fasta
Consensus sequences of each and of all isolates
cp data/02_assembly/*.fasta ./
for sample in 838_S1 840_S2 820_S3 828_S4 815_S5 834_S6 808_S7 811_S8 837_S9 768_S10 773_S11 767_S12 810_S13 814_S14 10121-16_S15 7510-15_S16 828-17_S17 8806-15_S18 9881-16_S19 8981-14_S20; do
for sample in p953-84660-tsek p938-16972-nra p942-88507-nra p943-98523-nra p944-103323-nra p947-105565-nra p948-112830-nra; do \
mv ${sample}.fasta ${sample}.fa
cat all.fa ${sample}.fa >> all.fa
done
cat RSV_dedup.fa all.fa > RSV_all.fa
mafft --adjustdirection RSV_all.fa > RSV_all.aln
snp-sites RSV_all.aln -o RSV_all_.aln
Download all Human alphaherpesvirus 3 (Varicella-zoster virus) genomes
Human alphaherpesvirus 3
acronym: HHV-3 VZV
equivalent: Human herpes virus 3
Human alphaherpesvirus 3 (Varicella-zoster virus)
* Human herpesvirus 3 strain Dumas
* Human herpesvirus 3 strain Oka vaccine
* Human herpesvirus 3 VZV-32
#Taxonomy ID: 10335
esearch -db nucleotide -query "txid10335[Organism:exp]" | efetch -format fasta -email j.huang@uke.de > genome_10335_ncbi.fasta
python ~/Scripts/filter_fasta.py genome_10335_ncbi.fasta complete_genome_10335_ncbi.fasta #2041-->165
# ---- Download related genomes from ENA ----
https://www.ebi.ac.uk/ena/browser/view/10335
#Click "Sequence" and download "Counts" (2003) and "Taxon descendants count" (2005) if there is enough time! Downloading time points is 11.03.2025.
python ~/Scripts/filter_fasta.py ena_10335_sequence.fasta complete_genome_10335_ena_taxon_descendants_count.fasta #2005-->153
#python ~/Scripts/filter_fasta.py ena_10335_sequence_Counts.fasta complete_genome_10335_ena_Counts.fasta #xxx, 5.8G
Run vrap
#replace --virus to the specific taxonomy (e.g. Acinetobacter baumannii) --> change virus_user_db --> specific_bacteria_user_db
ln -s ~/Tools/vrap/ .
mamba activate /home/jhuang/miniconda3/envs/vrap
#!!!!! TODO: ignore the first parts! only take the virus genome in the vector-part!
vrap/vrap.py -1 trimmed/VZV_20c_trimmed_P_1.fastq -2 trimmed/VZV_20c_trimmed_P_2.fastq -o vrap_VZV_20c --bt2idx=/home/jhuang/REFs/genome --host=/home/jhuang/REFs/genome.fa --virus=/home/jhuang/DATA/Data_Huang_Human_herpesvirus_3/complete_genome_10335_ncbi.fasta --nt=/mnt/nvme1n1p1/blast/nt --nr=/mnt/nvme1n1p1/blast/nr -t 100 -l 200 -g
#-1 trimmed/VZV_20S_trimmed_P_1.fastq -2 trimmed/VZV_20S_trimmed_P_2.fastq
#(vrap) vrap/vrap.py -1 trimmed/VZV_20S_trimmed_P_1.fastq -2 trimmed/VZV_20S_trimmed_P_2.fastq -o vrap_VZV_20S --bt2idx=/home/jhuang/REFs/genome --host=/home/jhuang/REFs/genome.fa --virus=/home/jhuang/DATA/Data_Huang_Human_herpesvirus_3/complete_genome_10335_ncbi.fasta --nt=/mnt/nvme1n1p1/blast/nt --nr=/mnt/nvme1n1p1/blast/nr -t 100 -l 200 -g
vrap/vrap.py -1 trimmed/VZV_60c_trimmed_P_1.fastq -2 trimmed/VZV_60c_trimmed_P_2.fastq -o vrap_VZV_60c --bt2idx=/home/jhuang/REFs/genome --host=/home/jhuang/REFs/genome.fa --virus=/home/jhuang/DATA/Data_Huang_Human_herpesvirus_3/complete_genome_10335_ncbi.fasta --nt=/mnt/nvme1n1p1/blast/nt --nr=/mnt/nvme1n1p1/blast/nr -t 100 -l 200 -g
vrap/vrap.py -1 trimmed/VZV_60S_trimmed_P_1.fastq -2 trimmed/VZV_60S_trimmed_P_2.fastq -o vrap_VZV_60S
vrap/vrap.py -1 trimmed/VZV_1451S_trimmed_P_1.fastq -2 trimmed/VZV_1451S_trimmed_P_2.fastq -o vrap_VZV_1451S --bt2idx=/home/jhuang/REFs/genome --host=/home/jhuang/REFs/genome.fa --virus=/home/jhuang/DATA/Data_Huang_Human_herpesvirus_3/complete_genome_10335_ncbi.fasta --nt=/mnt/nvme1n1p1/blast/nt --nr=/mnt/nvme1n1p1/blast/nr -t 100 -l 200 -g
vrap/vrap.py -1 trimmed/Pcc1_1451_trimmed_P_1.fastq -2 trimmed/Pcc1_1451_trimmed_P_2.fastq -o vrap_Pcc1_1451 --bt2idx=/home/jhuang/REFs/genome --host=/home/jhuang/REFs/genome.fa --virus=/home/jhuang/DATA/Data_Huang_Human_herpesvirus_3/complete_genome_10335_ncbi.fasta --nt=/mnt/nvme1n1p1/blast/nt --nr=/mnt/nvme1n1p1/blast/nr -t 100 -l 200 -g
vrap/vrap.py -1 trimmed/PCC1_VZV_20_1_trimmed_P_1.fastq -2 trimmed/PCC1_VZV_20_1_trimmed_P_2.fastq -o vrap_PCC1_VZV_20_1 --bt2idx=/home/jhuang/REFs/genome --host=/home/jhuang/REFs/genome.fa --virus=/home/jhuang/DATA/Data_Huang_Human_herpesvirus_3/complete_genome_10335_ncbi.fasta --nt=/mnt/nvme1n1p1/blast/nt --nr=/mnt/nvme1n1p1/blast/nr -t 100 -l 200 -g
vrap/vrap.py -1 trimmed/PCC1_VZV_20_2_trimmed_P_1.fastq -2 trimmed/PCC1_VZV_20_2_trimmed_P_2.fastq -o vrap_PCC1_VZV_20_2 --bt2idx=/home/jhuang/REFs/genome --host=/home/jhuang/REFs/genome.fa --virus=/home/jhuang/DATA/Data_Huang_Human_herpesvirus_3/complete_genome_10335_ncbi.fasta --nt=/mnt/nvme1n1p1/blast/nt --nr=/mnt/nvme1n1p1/blast/nr -t 100 -l 200 -g
vrap/vrap.py -1 trimmed/PCC1_VZV_20_5_trimmed_P_1.fastq -2 trimmed/PCC1_VZV_20_5_trimmed_P_2.fastq -o vrap_PCC1_VZV_20_5 --bt2idx=/home/jhuang/REFs/genome --host=/home/jhuang/REFs/genome.fa --virus=/home/jhuang/DATA/Data_Huang_Human_herpesvirus_3/complete_genome_10335_ncbi.fasta --nt=/mnt/nvme1n1p1/blast/nt --nr=/mnt/nvme1n1p1/blast/nr -t 100 -l 200 -g
vrap/vrap.py -1 trimmed/PCC1_VZV_60_1_trimmed_P_1.fastq -2 trimmed/PCC1_VZV_60_1_trimmed_P_2.fastq -o vrap_PCC1_VZV_60_1 --bt2idx=/home/jhuang/REFs/genome --host=/home/jhuang/REFs/genome.fa --virus=/home/jhuang/DATA/Data_Huang_Human_herpesvirus_3/complete_genome_10335_ncbi.fasta --nt=/mnt/nvme1n1p1/blast/nt --nr=/mnt/nvme1n1p1/blast/nr -t 100 -l 200 -g
vrap/vrap.py -1 trimmed/PCC1_VZV_60_4_trimmed_P_1.fastq -2 trimmed/PCC1_VZV_60_4_trimmed_P_2.fastq -o vrap_PCC1_VZV_60_4 --bt2idx=/home/jhuang/REFs/genome --host=/home/jhuang/REFs/genome.fa --virus=/home/jhuang/DATA/Data_Huang_Human_herpesvirus_3/complete_genome_10335_ncbi.fasta --nt=/mnt/nvme1n1p1/blast/nt --nr=/mnt/nvme1n1p1/blast/nr -t 100 -l 200 -g
vrap/vrap.py -1 trimmed/PCC1_VZV_60_6_trimmed_P_1.fastq -2 trimmed/PCC1_VZV_60_6_trimmed_P_2.fastq -o vrap_PCC1_VZV_60_6 --bt2idx=/home/jhuang/REFs/genome --host=/home/jhuang/REFs/genome.fa --virus=/home/jhuang/DATA/Data_Huang_Human_herpesvirus_3/complete_genome_10335_ncbi.fasta --nt=/mnt/nvme1n1p1/blast/nt --nr=/mnt/nvme1n1p1/blast/nr -t 100 -l 200 -g
http://xgenes.com/article/article-content/365/virus-genome-analysis-pipeline-hybrid-capture-damian-blastn-and-vrap-mapping-for-measles-ma-zhen-sample/ Draw the mapping figures on the reference, consensus reference!
Using the bowtie of vrap to map the reads on ref_genome/reference.fasta (The reference refers to the closest related genome found from the list generated by vrap)
(vrap) vrap/vrap.py -1 trimmed/VZV_20S_trimmed_P_1.fastq -2 trimmed/VZV_20S_trimmed_P_2.fastq -o VZV_20S_on_X04370 --host /home/jhuang/DATA/Data_Huang_Human_herpesvirus_3/X04370.fasta -t 100 -l 200 -g
cd bowtie
mv mapped mapped.sam
samtools view -S -b mapped.sam > mapped.bam
samtools sort mapped.bam -o mapped_sorted.bam
samtools index mapped_sorted.bam
samtools view -H mapped_sorted.bam
samtools flagstat mapped_sorted.bam
Show the bw on IGV
点赞本文的读者
还没有人对此文章表态
没有评论
Comprehensive smallRNA-7 profiling using exceRpt pipeline with full reference databases (v2)
Mapping of reads to selected viruses in DAMIAN results (version 2)
Mapping of reads to selected viruses in DAMIAN results
Run the viral-ngs Snakemake pipelines inside a Docker environment
© 2023 XGenes.com Impressum