gene_x 0 like s 580 view s
Tags:
nextflow ChIP-seq run for NHDF_p783
#under Raw_Data for ChIP-seq
ln -s ./230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf859/3_NHDF_Donor_1_p783_input_S5_R1_001.fastq.gz p783_input_DonorI.fastq.gz
ln -s ./230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf860/4_NHDF_Donor_2_p783_input_S6_R1_001.fastq.gz p783_input_DonorII.fastq.gz
ln -s ./230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf861/5_NHDF_Donor_1_p783_ChIP_S7_R1_001.fastq.gz p783_ChIP_DonorI.fastq.gz
ln -s ./230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf862/6_NHDF_Donor_2_p783_ChIP_S8_R1_001.fastq.gz p783_ChIP_DonorII.fastq.gz
#'hg38' { bwa = "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/"
# blacklist = "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/blacklists/hg38-blacklist.bed"
# gtf = "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf"
# }
ln -s /home/jhuang/Tools/NGI-ChIPseq/ .
(chipseq) nextflow run NGI-ChIPseq/main.nf --reads '/home/jhuang/DATA/Data_Denise_LT_DNA_Binding/Raw_Data/*.fastq.gz' --genome hg38 --macsconfig macs.config --singleEnd --blacklist_filtering -profile standard --project Denise_LT_DNA_Bindung --outdir results_LT_DNA_Bindung_hg38 -resume
#By the way: nextflow RNA-seq run for NHDF_p783 (NOT the topics of the post).
#under Raw_Data for RNA-seq
cp ~/DATA/Data_Denise_tx_epi_MCPyV_PUBLISHING/Data_Denise_RNASeq/Raw_Data/V_8_2_4_p600_d8_DonorI.fastq.gz ./
cp ~/DATA/Data_Denise_tx_epi_MCPyV_PUBLISHING/Data_Denise_RNASeq/Raw_Data/V_8_2_3_p600_d8_DonorII.fastq.gz ./
#under Raw_Data_p783_RNAseq for RNA-seq
ln -s ../Raw_Data/V_8_2_4_p600_d8_DonorI.fastq.gz ctrl_DonorI.fastq.gz
ln -s ../Raw_Data/V_8_2_3_p600_d8_DonorII.fastq.gz ctrl_DonorII.fastq.gz
ln -s ../Raw_Data/230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf857/1_NHDF_Donor_1_p783_S1_R1_001.fastq.gz p783_DonorI.fastq.gz
ln -s ../Raw_Data/230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf858/2_NHDF_Donor_2_p783_S2_R1_001.fastq.gz p783_DonorII.fastq.gz
#Note that we need to regenerate MultiQC.html after ignoring 'Biotype Counts', since --fcGroupFeaturesType gene_name cannot generate the real biotype counts!
(rnaseq_2021) nextflow run rnaseq --reads '/home/jhuang/DATA/Data_Denise_LT_DNA_Binding/Raw_Data_p783/RNA_seq/*.fastq.gz' --fasta "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" --gtf "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" --bed12 "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" --singleEnd -profile standard --aligner star --saveReference -resume --saveAlignedIntermediates --skip_rseqc --skip_dupradar --skip_genebody_coverage --skip_preseq --skip_edger --fcGroupFeaturesType gene_name
nextflow ChIP-seq run for data of truncated LT-Ag + sT expression of WaGa and HEK293
#160719_SN7001212_0156_AC8K76ACXX
cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_1/293_input_1_10_p197_1_GTAGAG_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_1/293_input_1_10_p197_1_GTAGAG_L003_R1_001.fastq.gz > HEK293_Input_p197_r1.fastq.gz
cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_2/293_input_1_10_p197_2_GTCCGC_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_2/293_input_1_10_p197_2_GTCCGC_L003_R1_001.fastq.gz > HEK293_Input_p197_r2.fastq.gz
cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_3/293_input_1_10_p197_3_GTGAAA_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_3/293_input_1_10_p197_3_GTGAAA_L003_R1_001.fastq.gz > HEK293_Input_p197_r3.fastq.gz
cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_1/293_lt_p197_1_TAGCTT_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_1/293_lt_p197_1_TAGCTT_L003_R1_001.fastq.gz > HEK293_LT_p197_r1.fastq.gz
cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_2/293_lt_p197_2_GGCTAC_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_2/293_lt_p197_2_GGCTAC_L003_R1_001.fastq.gz > HEK293_LT_p197_r2.fastq.gz
cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_3/293_lt_p197_3_AGTCAA_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_3/293_lt_p197_3_AGTCAA_L003_R1_001.fastq.gz > HEK293_LT_p197_r3.fastq.gz
#140117_SN7001212_0097_AC3ECBACXX
cat ../140117_SN7001212_0097_AC3ECBACXX/Sample_waga_igg/waga_igg_TAGCTT_L003_R1_001.fastq.gz ../140117_SN7001212_0097_AC3ECBACXX/Sample_waga_igg/waga_igg_TAGCTT_L004_R1_001.fastq.gz > WaGa_IgG.fastq.gz
cat ../140117_SN7001212_0097_AC3ECBACXX/Sample_waga_lt/waga_lt_GGTAGC_L003_R1_001.fastq.gz ../140117_SN7001212_0097_AC3ECBACXX/Sample_waga_lt/waga_lt_GGTAGC_L004_R1_001.fastq.gz > WaGa_LT.fastq.gz
ln -s /home/jhuang/Tools/NGI-ChIPseq/ .
(chipseq) nextflow run NGI-ChIPseq/main.nf --reads '/home/jhuang/DATA/Data_Denise_LT_DNA_Binding/LTtr-ChIP/Raw_Data/*.fastq.gz' --genome hg38 --macsconfig macs.config --singleEnd --blacklist_filtering -profile standard --project Denise_LTtr_DNA_Bindung --outdir results_LTtr_DNA_Bindung_hg38 -resume
makeTagDirectory
conda activate myperl
mkdir results_ChIPseq_K331A_hg38/homer; cd results_ChIPseq_K331A_hg38/homer
#makeTagDirectory <output directory> <input file> -genome hg38
for sample in p783_ChIP_DonorI p783_ChIP_DonorII p783_input_DonorI p783_input_DonorII; do
makeTagDirectory ${sample} ../picard/${sample}.dedup.sorted.bam -genome hg38
done
generate bigwigs
#makeUCSCfile peaks.txt -f peaks.bed -o auto -noadj -bigWig sample.bw -genome hg38
for sample in p783_ChIP_DonorI p783_ChIP_DonorII p783_input_DonorI p783_input_DonorII; do
makeUCSCfile ${sample} -pseudo 1 -bigWig /home/jhuang/REFs/hg38.chromSizes -o auto -style chipseq -norm 1e7 -normLength 100 -fsize 1
done
mv ./p783_ChIP_DonorI/p783_ChIP_DonorI.ucsc.bigWig ./p783_ChIP_DonorI/LT_K331A_DI.bigWig
mv ./p783_ChIP_DonorII/p783_ChIP_DonorII.ucsc.bigWig ./p783_ChIP_DonorII/LT_K331A_DII.bigWig
mv ./p783_input_DonorI/p783_input_DonorI.ucsc.bigWig ./p783_input_DonorI/LT_K331A_DI_input.bigWig
mv ./p783_input_DonorII/p783_input_DonorII.ucsc.bigWig ./p783_input_DonorII/LT_K331A_DII_input.bigWig
peak calling, get peaks.txt
#findPeaks <tag directory> -i <input file> -o <output file> -genome hg38
findPeaks p783_ChIP_DonorI -style factor -o auto -i p783_input_DonorI
findPeaks p783_ChIP_DonorII -style factor -o auto -i p783_input_DonorII
cp ../reproduce_2023/tagDirectories/ ./
cd homer
ln -s ../tagDirectories/NHDF_LT_Donor1 ./
ln -s ../tagDirectories/NHDF_LT_Donor2 ./
ln -s ../tagDirectories/NHDF_LT_Donor1_Input ./
ln -s ../tagDirectories/NHDF_LT_Donor2_Input ./
ln -s ../tagDirectories/Pfsk-1B_LT+sT_r1 ./
ln -s ../tagDirectories/Pfsk-1B_LT+sT_r2 ./
ln -s ../tagDirectories/Pfsk-1B_LT+sT_r1_Input ./
ln -s ../tagDirectories/Pfsk-1B_LT+sT_r2_Input ./
ln -s ../tagDirectories/HEK293_LT+sT_r2 ./
ln -s ../tagDirectories/HEK293_LT+sT_r3 ./
ln -s ../tagDirectories/HEK293_LT+sT_r2_Input ./
ln -s ../tagDirectories/HEK293_LT+sT_r3_Input ./
findPeaks NHDF_LT_Donor1 -style factor -o auto -i NHDF_LT_Donor1_Input
findPeaks NHDF_LT_Donor2 -style factor -o auto -i NHDF_LT_Donor2_Input
findPeaks Pfsk-1B_LT+sT_r1 -style factor -o auto -i Pfsk-1B_LT+sT_r1_Input
findPeaks Pfsk-1B_LT+sT_r2 -style factor -o auto -i Pfsk-1B_LT+sT_r2_Input
findPeaks HEK293_LT+sT_r2 -style factor -o auto -i HEK293_LT+sT_r2_Input
findPeaks HEK293_LT+sT_r3 -style factor -o auto -i HEK293_LT+sT_r3_Input
peak calling using getDifferentialPeaksReplicates.pl
cp -r ../../reproduce_2023/tagDirectories/NHDF_LT_Donor1_Input ./
cp -r ../../reproduce_2023/tagDirectories/NHDF_LT_Donor2_Input ./
cp -r ../../reproduce_2023/tagDirectories/NHDF_LT_Donor1 ./
cp -r ../../reproduce_2023/tagDirectories/NHDF_LT_Donor2 ./
#-annStats annStats.txt
conda activate myperl
getDifferentialPeaksReplicates.pl -t p783_ChIP_DonorI p783_ChIP_DonorII -i p783_input_DonorI p783_input_DonorII -genome hg38 -use peaks.txt > peaks_K331A_LT.txt
mv peaks_K331A_LT.txt peaks_NHDF_K331A_LT.txt
getDifferentialPeaksReplicates.pl -t NHDF_LT_Donor1 NHDF_LT_Donor2 -i NHDF_LT_Donor1_Input NHDF_LT_Donor2_Input -genome hg38 -use peaks.txt > peaks_NHDF_LT.txt
getDifferentialPeaksReplicates.pl -t Pfsk-1B_LT+sT_r1 Pfsk-1B_LT+sT_r2 -i Pfsk-1B_LT+sT_r1_Input Pfsk-1B_LT+sT_r2_Input -genome hg38 -use peaks.txt > peaks_PFSK-1_LT+sT.txt
getDifferentialPeaksReplicates.pl -t HEK293_LT+sT_r2 HEK293_LT+sT_r3 -i HEK293_LT+sT_r2_Input HEK293_LT+sT_r3_Input -genome hg38 -use peaks.txt > peaks_HEK293_LT+sT.txt
merge peaks: tried 0, 200, 500, 1000, 2000
#http://homer.ucsd.edu/homer/ngs/mergePeaks.html
mergePeaks -d 1000 peaks_PFSK-1_LT+sT.txt peaks_HEK293_LT+sT.txt peaks_NHDF_LT.txt -prefix celllines -venn celllines.txt -matrix celllines
#-- generate bed files --
awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' peaks_NHDF_LT.txt > peaks_NHDF.bed;
awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' peaks_HEK293_LT+sT.txt > peaks_HEK293.bed;
awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' peaks_PFSK-1_LT+sT.txt > peaks_PFSK-1.bed;
awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_HEK293_LT+sT.txt > peaks_HEK293_only.bed;
awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_HEK293_LT+sT.txt_peaks_NHDF_LT.txt > peaks_HEK293_NHDF.bed;
awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_NHDF_LT.txt > peaks_NHDF_only.bed;
awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_PFSK-1_LT+sT.txt > peaks_PFSK-1_only.bed;
awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_PFSK-1_LT+sT.txt_peaks_HEK293_LT+sT.txt > peaks_PFSK-1_HEK293.bed;
awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_PFSK-1_LT+sT.txt_peaks_HEK293_LT+sT.txt_peaks_NHDF_LT.txt > peaks_PFSK-1_HEK293_NHDF.bed;
awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_PFSK-1_LT+sT.txt_peaks_NHDF_LT.txt > peaks_PFSK-1_NHDF.bed;
#-- annotate the peaks --
annotatePeaks.pl peaks_NHDF_LT.txt hg38 > annotatedPeaks_NHDF.txt
annotatePeaks.pl peaks_HEK293_LT+sT.txt hg38 > annotatedPeaks_HEK293.txt
annotatePeaks.pl peaks_PFSK-1_LT+sT.txt hg38 > annotatedPeaks_PFSK-1.txt
annotatePeaks.pl celllines_peaks_HEK293_LT+sT.txt hg38 > annotatedPeaks_HEK293_only.txt
annotatePeaks.pl celllines_peaks_HEK293_LT+sT.txt_peaks_NHDF_LT.txt hg38 > annotatedPeaks_HEK293_NHDF.txt
annotatePeaks.pl celllines_peaks_NHDF_LT.txt hg38 > annotatedPeaks_NHDF_only.txt
annotatePeaks.pl celllines_peaks_PFSK-1_LT+sT.txt hg38 > annotatedPeaks_PFSK-1_only.txt
annotatePeaks.pl celllines_peaks_PFSK-1_LT+sT.txt_peaks_HEK293_LT+sT.txt hg38 > annotatedPeaks_PFSK-1_HEK293.txt
annotatePeaks.pl celllines_peaks_PFSK-1_LT+sT.txt_peaks_HEK293_LT+sT.txt_peaks_NHDF_LT.txt hg38 > annotatedPeaks_PFSK-1_HEK293_NHDF.txt
annotatePeaks.pl celllines_peaks_PFSK-1_LT+sT.txt_peaks_NHDF_LT.txt hg38 > annotatedPeaks_PFSK-1_NHDF.txt
mkdir ../beds_PFSK-1_HEK293_NHDF;
for sample in peaks_HEK293_only peaks_PFSK-1_only peaks_NHDF_only peaks_HEK293 peaks_PFSK-1 peaks_NHDF peaks_PFSK-1_HEK293 peaks_PFSK-1_NHDF peaks_HEK293_NHDF peaks_PFSK-1_HEK293_NHDF; do
grep -v "cmd" ${sample}.bed > ../beds_PFSK-1_HEK293_NHDF/${sample}_.bed
done
#Chr Start End PeakID (cmd=annotatePeaks.pl common_peaks_NHDF.txt hg38) Peak Score Strand
~/Tools/csv2xls-0.4/csv_to_xls.py celllines.txt annotatedPeaks_HEK293_only.txt annotatedPeaks_PFSK-1_only.txt annotatedPeaks_NHDF_only.txt annotatedPeaks_HEK293.txt annotatedPeaks_PFSK-1.txt annotatedPeaks_NHDF.txt annotatedPeaks_PFSK-1_HEK293.txt annotatedPeaks_PFSK-1_NHDF.txt annotatedPeaks_HEK293_NHDF.txt annotatedPeaks_PFSK-1_HEK293_NHDF.txt -d$'\t' -o annotatedPeaks_PFSK-1_HEK293_NHDF.xls
#IMPORTANT: DELETE the column 'Strand' marked with '+' in the merged Excel file!
点赞本文的读者
还没有人对此文章表态
没有评论
Sorry, 没有相似文章
© 2023 XGenes.com Impressum