gene_x 0 like s 210 view s
Tags: software
https://sourceforge.net/projects/damian-pd/
1, install PostgreSQL and gem on 18.04
sudo apt-get update
sudo apt install ruby-dev libffi-dev build-essential
sudo apt-get install postgresql postgresql-contrib
sudo apt-get install libpq-dev
sudo apt install default-jre
sudo apt install hmmer
#sudo apt-get install pgadmin3
sudo gem install pg -v 0.19
sudo gem install axlsx
sudo gem install amatch
#interactive: sudo -u postgres createuser --interactive
#not_interactive: https://medium.com/coding-blocks/creating-user-database-and-adding-access-on-postgresql-8bfcd2f4a91e
#sudo -u postgres psql
#postgres=# create database mydb;
#postgres=# create user myuser with encrypted password 'mypass';
#postgres=# grant all privileges on database mydb to myuser;
sudo -u postgres psql
CREATE USER damian_user WITH PASSWORD 'hamburg_uke';
CREATE DATABASE damian_db WITH OWNER damian_user;
postgre=# \q
2, install blast, tax and pfam
cd databases;
./get_all.sh;
cd tax; ./get_tax.sh;
cd pfam; ./get_pfam.sh;
#Taxonomy
#The following taxonomy files are required:
#ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz (the downloaded file must unpacked using tar as well as decompressed.)
#http://s3.amazonaws.com/matrixsciencemisc/prot.av2taxid.gz
#
#curl ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz -O
#curl "ftp://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam32.0/Pfam-A.hmm.gz" | gunzip > Pfam-A.hmm.txt
#curl "ftp://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam32.0/database_files/pfamA_tax_depth.txt.gz" | gunzip > pfamA_tax_depth.txt
#change settings in config.rb
DB_NAME = 'damian_db'
DB_USER = 'damian_user'
DB_PASS = 'hamburg_uke'
./damian_database.rb --erase_and_rebuild --names databases/tax/names.dmp --nodes databases/tax/nodes.dmp --hmm databases/pfam/Pfam-A.hmm.txt --taxdepth databases/pfam/pfamA_tax_depth.txt
#### download and update the blast-database ####
cd /mnt/nvme0n1p1/REFs/blast/
#wget ftp://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nt.gz
#wget ftp://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz
#perl update_blastdb.pl --decompress nt
#perl update_blastdb.pl --decompress nr
##https://www.ncbi.nlm.nih.gov/books/NBK569850/
#update_blastdb.pl --decompress nt
#update_blastdb.pl --decompress nr
#curl ftp://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz -O
#curl ftp://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nt.gz -O
##makeblastdb -in exons_for_blastall.fasta -input_type fasta -dbtype nucl -title exons_for_blastall -parse_seqids -out exons_for_blastall
#makeblastdb -in nt -out nt -parse_seqids -dbtype nucl
#makeblastdb -in nr -out nr -parse_seqids -dbtype prot
##curl ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz -O
##curl "ftp://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam32.0/Pfam-A.hmm.gz" | gunzip > Pfam-A.hmm.txt
##curl "ftp://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam32.0/database_files/pfamA_tax_depth.txt.gz" | gunzip > pfamA_tax_depth.txt
##or:
##NO_THIS_SCRIPT: ./get_blast.sh
#Standard databases (nr etc.): rRNA/ITS databases Genomic + transcript databases Betacoronavirus
#curl ftp://ftp.ncbi.nlm.nih.gov/blast/db/Betacoronavirus.tar.gz -O
#The contents are the same between https://ftp.ncbi.nlm.nih.gov/blast/db/ and https://ftp.ncbi.nlm.nih.gov/blast/db/v5/ since v5 is the default!
#https://ftp.ncbi.nlm.nih.gov/blast/db/nt-nucl-metadata.json #158
for no in 000 001 002 003 004 005 006 007 008 009 010 011 012 013 014 015 016 017 018 019 020 021 022 023 024 025 026 027 028 029 030 031 032 033 034 035 036 037 038 039 040 041 042 043 044 045 046 047 048 049 050 051 052 053 054 055 056 057 058 059 060 061 062 063 064 065 066 067 068 069 070 071 072 073 074 075 076 077 078 079 080 081 082 083 084 085 086 087 088 089 090 091 092 093 094 095 096 097 098 099 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157; do
curl ftp://ftp.ncbi.nlm.nih.gov/blast/db/nt.${no}.tar.gz -O
done
#https://ftp.ncbi.nlm.nih.gov/blast/db/nr-prot-metadata.json #103
for no in 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102; do
curl ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.${no}.tar.gz -O
done
tar xzf *tar.gz
##Sind die RNA-data (Transcriptome)? by default is RNA-data
#damian_database.rb --erase_and_rebuild --names blast_2020_install/taxdump/names.dmp --nodes blast_2020_install/taxdump/nodes.dmp --hmm pfam/Pfam-A.hmm.txt --taxdepth pfam/#pfamA_tax_depth.txt #pfam annotation cannot be updated!!
3, create .ncbirc and setting
#in the file /home/jhuang/.ncbirc
BLASTDB=/mnt/nvme0n1p1/REFs/blast/
#echo "[BLAST]" > /home/jhuang/.ncbirc
#echo "BLASTDB=/media/jhuang/Elements1/BLAST_db_v5/nt_v5/" >> /home/jhuang/.ncbirc
#mv damian_release damian
# add damian into PATH
DAMIAN_LOCATION='/home/jhuang/Tools/damian'
export PATH=$PATH:$DAMIAN_LOCATION
4, generate bowtie2 index and set damian_reference
##human
##Using existing index /ref/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.
#
##Horse (equCab2)
#rsync -a -P rsync://hgdownload.soe.ucsc.edu/goldenPath/equCab2/bigZips/chromFa.tar.gz ./
##Cattle NCBI Genome ID: 82 (Bos taurus)
#rsync -a -P rsync://hgdownload.soe.ucsc.edu/goldenPath/bosTau8/bigZips/bosTau8.fa.gz ./
##ftp://ftp.ensembl.org/pub/release-95/fasta/bos_taurus/dna/
#
##Sheep NCBI Genome ID: 83 (Ovis aries)
#rsync -a -P rsync://hgdownload.soe.ucsc.edu/goldenPath/oviAri4/bigZips/oviAri4.fa.gz ./
#
##Wild boar NCBI Genome ID: 84 (Sus scrofa)
#rsync -a -P rsync://hgdownload.soe.ucsc.edu/goldenPath/susScr11/bigZips/susScr11.fa.gz ./
#
##salmon salar
#https://www.ncbi.nlm.nih.gov/genome/369?genome_assembly_id=248466
#https://www.ncbi.nlm.nih.gov/genome/?term=salmo%20salar
#https://www.ncbi.nlm.nih.gov/assembly/?term=salmon+salar
#rsync -avz /ref/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa jhuang@10.162.6.119:/home/jhuang/DATA/
#rsync -a -P salmon_salar_assemblies.tar jhuang@10.162.6.119:/home/jhuang/REFs
#
##Mosquitoes/culex pipiens
#https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?lvl=0&id=233155
#https://www.biorxiv.org/content/10.1101/240747v1.full
#aedes mascarensis
#Aedes albopictus
#
#Taxonomy ID: 7176 (Culex quinquefasciatus (southern house mosquito))
#https://www.ncbi.nlm.nih.gov/nuccore/?term=C.+pipiens
#https://www.ncbi.nlm.nih.gov/assembly?LinkName=bioproject_assembly_all&from_uid=18751
#https://www.ncbi.nlm.nih.gov/genome/?term=txid7176[orgn]
#https://www.ncbi.nlm.nih.gov/assembly/GCF_000208785.1/
#https://www.ncbi.nlm.nih.gov/genome/?term=txid263438[orgn]
#
#Taxonomy ID: 7175 (C. pipiens) --> no genome
#https://www.ncbi.nlm.nih.gov/genome/?term=txid7175[orgn]
#https://www.ncbi.nlm.nih.gov/assembly/GCF_000209185.1
#rsync -a -P GCF_000209185_1_CulPip1_0_genomic.fna.gz jhuang@10.162.6.119:/home/jhuang/REFs
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Ovis_aries.Oar_v3.1.dna.toplevel.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Ovis_aries.Oar_v3.1.cdna.all.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Ovis_aries.Oar_v3.1.ncrna.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Sus_scrofa.Sscrofa11.1.dna.toplevel.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Sus_scrofa.Sscrofa11.1.cdna.all.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Sus_scrofa.Sscrofa11.1.ncrna.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Bos_taurus.ARS-UCD1.2.dna.toplevel.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Bos_taurus.ARS-UCD1.2.cdna.all.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Bos_taurus.ARS-UCD1.2.ncrna.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Equus_caballus.EquCab3.0.dna.toplevel.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Equus_caballus.EquCab3.0.cdna.all.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Equus_caballus.EquCab3.0.ncrna.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Salmo_salar.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/GCF_000209185_1_CulPip1_0_genomic.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Macaca_mulatta.Mmul_8.0.1.dna.toplevel.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Macaca_mulatta.Mmul_8.0.1.cdna.all.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Macaca_mulatta.Mmul_8.0.1.ncrna.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Ovis_aries_musimon.fa .
rsync -a -P jhuang@10.162.6.119:/home/jhuang/REFs/Cervus_elaphus_hippelaphus.fa .
#damian_reference.rb --add --host hg38 --type both --fasta /mnt/h/jhuang/ref/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa --primary --description 'Homo_sapiens_UCSC_hg38 (dna)'
#damian_reference.rb --add --host wildboar --type both --fasta /home/jhuang/REFs/susScr11.fa --primary --description 'Wild boar NCBI Genome ID: 84 (Sus scrofa) (dna)'
#damian_reference.rb --add --host horse --type both --fasta /home/jhuang/REFs/equCab2.fa --primary --description 'Horse equCab2 (dna)'
#damian_reference.rb --add --host salmon --type both --fasta /home/jhuang/REFs/salmon_salar.fa --primary --description 'Salmon salar RefSeq assembly accession: GCF_000233375.1 (dna)'
##damian_reference.rb --add --host sheep --type both --fasta /home/jhuang/REFs/oviAri4.fa --primary --description 'Sheep NCBI Genome ID: 83 (Ovis aries) (dna)'
##damian_reference.rb --add --host cattle --type both --fasta /home/jhuang/REFs/bosTau8.fa --primary --description 'Cattle NCBI Genome ID: 82 (Bos taurus) (dna)'
##damian_reference.rb --add --host mosquito --type both --fasta /home/jhuang/REFs/GCF_000209185_1_CulPip1_0_genomic.fa --primary --description 'Culex pipiens quinquefasciatus (dna)'
# -- host index anlegen with ensemble-files --
ftp://ftp.ensembl.org/pub/release-95/fasta/ovis_aries/dna/
#human and human3
damian_reference.rb --add --host human --type both --fasta ./Homo_sapiens.GRCh38.dna.toplevel.fa --primary --description 'Homo sapiens (dna)'
damian_reference.rb --add --host human --type rna --fasta ./Homo_sapiens.GRCh38.cdna.all.fa --description 'Homo sapiens (cdna)'
damian_reference.rb --add --host human --type rna --fasta ./Homo_sapiens.GRCh38.ncrna.fa --description 'Homo sapiens (ncrna)'
#human3 (since for some fastqs, human delete too much and too strictly, therefore we genertate human3 for loose filtering of human reads.
damian_reference.rb --add --host human3 --type both --fasta ./genome.fa --primary --description 'Homo_sapiens_UCSC_hg38 (dna)'
damian_reference.rb --add --host human3 --type rna --fasta ./Homo_sapiens.GRCh38.cdna.all.fa --description 'Homo sapiens (cdna)'
damian_reference.rb --add --host human3 --type rna --fasta ./Homo_sapiens.GRCh38.ncrna.fa --description 'Homo sapiens (ncrna)'
#sheep
damian_reference.rb --add --host sheep --type both --fasta Ovis_aries.Oar_v3.1.dna.toplevel.fa --primary --description 'Ovis aries (dna)'
damian_reference.rb --add --host sheep --type rna --fasta Ovis_aries.Oar_v3.1.cdna.all.fa --description 'Ovis aries (cdna)'
damian_reference.rb --add --host sheep --type rna --fasta Ovis_aries.Oar_v3.1.ncrna.fa --description 'Ovis aries (ncrna)'
#pig
damian_reference.rb --add --host pig --type both --fasta Sus_scrofa.Sscrofa11.1.dna.toplevel.fa --primary --description 'Sus scrofa (dna)'
damian_reference.rb --add --host pig --type rna --fasta Sus_scrofa.Sscrofa11.1.cdna.all.fa --description 'Sus scrofa (cdna)'
damian_reference.rb --add --host pig --type rna --fasta Sus_scrofa.Sscrofa11.1.ncrna.fa --description 'Sus scrofa (ncrna)'
#cow
damian_reference.rb --add --host cow --type both --fasta Bos_taurus.ARS-UCD1.2.dna.toplevel.fa --primary --description 'Bos taurus (dna)'
damian_reference.rb --add --host cow --type rna --fasta Bos_taurus.ARS-UCD1.2.cdna.all.fa --description 'Bos taurus (cdna)'
damian_reference.rb --add --host cow --type rna --fasta Bos_taurus.ARS-UCD1.2.ncrna.fa --description 'Bos taurus (ncrna)'
#horse
damian_reference.rb --add --host horse --type both --fasta ./Equus_caballus.EquCab3.0.dna.toplevel.fa --primary --description 'Equus caballus (dna)'
damian_reference.rb --add --host horse --type rna --fasta ./Equus_caballus.EquCab3.0.cdna.all.fa --description 'Equus caballus (cdna)'
damian_reference.rb --add --host horse --type rna --fasta ./Equus_caballus.EquCab3.0.ncrna.fa --description 'Equus caballus (ncrna)'
#salmo
damian_reference.rb --add --host Salmo_salar --type both --fasta Salmo_salar.fa --primary --description 'Salmo salar (dna)'
#mosquito
damian_reference.rb --add --host Culex_pipiens --type both --fasta GCF_000209185_1_CulPip1_0_genomic.fa --primary --description 'Culex pipiens (dna)'
#macaque
damian_reference.rb --add --host macaque --type both --fasta ./Macaca_mulatta.Mmul_8.0.1.dna.toplevel.fa --primary --description 'Macaca mulatta (dna)'
damian_reference.rb --add --host macaque --type rna --fasta ./Macaca_mulatta.Mmul_8.0.1.cdna.all.fa --description 'Macaca mulatta (cdna)'
damian_reference.rb --add --host macaque --type rna --fasta ./Macaca_mulatta.Mmul_8.0.1.ncrna.fa --description 'Macaca mulatta (ncrna)'
#mouflon
damian_reference.rb --add --host mouflon --type both --fasta ./Ovis_aries_musimon.fa --primary --description 'Ovis aries musimon (dna)'
#reddeer
damian_reference.rb --add --host reddeer --type both --fasta ./Cervus_elaphus_hippelaphus.fa --primary --description 'Cervus elaphus hippelaphus (dna)'
##icebear
#damian_reference.rb --add --host polarbear --type both --fasta ./Ursus_maritimus.UrsMar_1.0.dna.toplevel.fa --primary --description 'Ursus_maritimus (dna)'
##Der Graue Mausmaki (Microcebus murinus) ist eine Primatenart aus der Gattung der Mausmakis innerhalb der Gruppe der Lemuren.
#damian_reference.rb --add --host lemur --type both --fasta ./Mmur3.0.fa --primary --description 'Microcebus murinus (dna)'
5, install and configure mutt
sudo apt install mutt
#in ~/.muttrc
set imap_user = 'xxx@yyy.com'
set imap_pass = 'xxxx'
set from= $imap_user
set use_from=yes
set realname='XXX YYY'
set folder = imaps://imap-mail.outlook.com:993
set spoolfile = "+INBOX"
set postponed="+[hotmail]/Drafts"
set mail_check = 100
set header_cache = "~/.mutt/cache/headers"
set message_cachedir = "~/.mutt/cache/bodies"
set certificate_file = "~/.mutt/certificates"
set smtp_url = "smtp://$imap_user@smtp-mail.outlook.com:587"
set smtp_pass = $imap_pass
set move = no
set imap_keepalive = 900
set record="+Sent"
Test: echo -e "Hi XXX,\n\nPlease find attached the latest results from our DAMIAN analysis.\n\nBest,\nYYY" | mutt -s "New results from DAMIAN" -- "xxx@googlemail.com"
6, intermediate commands
--1-- hmmsearch --domE 0.00001 -o /dev/null --domtblout /home/jhuang/rtpd_files/HD04_cons/idba_ud_assembly/domain.table --noali --cpu 10 /home/jhuang/Tools/damian/databases/pfam/Pfam-A.hmm.txt /home/jhuang/rtpd_files/HD04_cons/idba_ud_assembly/orfs.fasta
--2-- megablast
--3-- blastn or blastp
/home/jhuang/Tools/damian/3rd_party/ncbi-blast/bin/blastp -task blastp -evalue 10E-2 -num_threads 26 -query /tmp/rtpd__565_20190514-28525-1cqkejq -db nr -outfmt 6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore staxids qcovs qcovhsp
/home/jhuang/Tools/damian/3rd_party/ncbi-blast/bin/blastp -task blastp -evalue 10E-2 -num_threads 10 -query /tmp/rtpd__584_20190515-11072-i8ct4h -db nr -outfmt 6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore staxids qcovs qcovhsp
/home/jhuang/Tools/damian/3rd_party/ncbi-blast/bin/blastn -task blastn -evalue 10E-2 -num_threads 10 -query /tmp/rtpd__586_20190515-6605-1wfobqe -db nt -outfmt 6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore staxids qcovs qcovhsp
7, Verifying the installation
#damian.rb --left selftest/r1.fastq.gz --right selftest/r2.fastq.gz --sample testrun --threads 12
seqtk sample -s100 ./240621_M03701_0312_000000000-GHL9N/p20534/7448_7501_S0_R1_001.fastq.gz 0.1 > R1_0.1.fastq
seqtk sample -s100 ./240621_M03701_0312_000000000-GHL9N/p20534/7448_7501_S0_R2_001.fastq.gz 0.1 > R2_0.1.fastq
cd /mnt/nvme0n1p1/REFs/blast
damian.rb --host human3 --type rna -1 R1_0.1.fastq -2 R2_0.1.fastq --sample p20534_7448_7501_S0_megablast --blastn never --blastp never --min_contiglength 500 --threads 64 --force
damian_report.rb
zip -r p20534_7448_7501_S0_megablast.zip p20534_7448_7501_S0_megablast/
echo -e "Hi XXX,\n\nPlease find attached the latest results from our DAMIAN analysis.\n\nBest,\nYYY" | mutt -a "./p20534_7448_7501_S0_megablast.zip" -s "New results from DAMIAN" -- "xxx@googlemail.com"
damian.rb --host human3 --type rna -1 R1_0.1.fastq -2 R2_0.1.fastq --sample p20534_7448_7501_S0_blastn --blastn progressive --blastp never --min_contiglength 500 --threads 64 --force
damian_report.rb
zip -r p20534_7448_7501_S0_blastn.zip p20534_7448_7501_S0_blastn/
echo -e "Hi XXX,\n\nPlease find attached the latest results from our DAMIAN analysis.\n\nBest,\nYYY" | mutt -a "./p20534_7448_7501_S0_blastn.zip" -s "New results from DAMIAN" -- "xxx@googlemail.com"
damian.rb --host human3 --type rna -1 R1_0.1.fastq -2 R2_0.1.fastq --sample p20534_7448_7501_S0_blastp --blastn never --blastp progressive --min_contiglength 500 --threads 64 --force
damian_report.rb
zip -r p20534_7448_7501_S0_blastp.zip p20534_7448_7501_S0_blastp/
echo -e "Hi XXX,\n\nPlease find attached the latest results from our DAMIAN analysis.\n\nBest,\nYYY" | mutt -a "./p20534_7448_7501_S0_blastp.zip" -s "New results from DAMIAN" -- "xxx@googlemail.com"
点赞本文的读者
还没有人对此文章表态
没有评论
© 2023 XGenes.com Impressum