1 # download data from ncbi `https://trace.ncbi.nlm.nih.gov/Traces/sra/?run=SRR8102131` 2 mkdir -p ./{data/raw_data,analysis,ref} 3 cd ref 4 wget -c ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg19/ucsc.hg19* 5 cd .. 6 prefetch SRR8102131 7 ln -s ~/ncbi/public/sra/* . 8 9 # unzip SRR8102131 with fastq-dump 10 mkdir -p data/raw_data 11 fastq-dump --split-3 --defline-qual ‘+‘ --defline-seq ‘@$ac-$si/$ri length=$rl‘ --gzip SRR8102131 -O data/raw_data 12 13 # quality control with fastqc 14 mkdir -p analysis/fastqc 15 fastqc data/raw_data/SRR8102131_{1,2}.fastq.gz -o analysis/fastqc 16 multiqc analysis/fastqc/ -o analysis/fastqc/ 17 18 # read mapping with bowtie 19 mkdir -p analysis/BAM 20 bowtie2-build --threads 4 ref/hg19.fasta ref/hg19 21 bowtie2 -x ref/hg19 -p 4 --reorder -X 2000 22 --rg "ID:sample_1" --rg "PL:illumina" --rg "SM:SRR8102131" 23 -1 <(zcat data/raw_data/SRR8102131_1.fastq.gz) 24 -2 <(zcat data/raw_data/SRR8102131_2.fastq.gz) | 25 samtools view -F 4 -bS | 26 samtools sort --threads 4 -m 2G -o analysis/BAM/SRR8102131_sorted.bam 27 sambamba index -t 4 analysis/BAM/SRR8102131_sorted.bam 28 29 # peak calling with macs2 30 mkdir -p analysis/peak/ 31 macs2 callpeak --shift -100 --extsize 200 --SPMR --nomodel -B -g hs -q 0.01 -t analysis/BAM/SRR8102131_sorted.bam -n analysis/peak/SRR8102131_peak 32 33 # duplicates removing with sambamba 34 mkdir -p analysis/dupbam 35 ulimit -n 10240 36 sambamba markdup -r -p -t 4 analysis/BAM/SRR8102131_sorted.bam analysis/dupbam/SRR8102131_markdupba.bam 37 sambamba index -t 6 analysis/dupbam/SRR8102131_markdupba.bam 38 39 # HINT-ATAC 40 mkdir -p analysis/footprint 41 rgt-hint footprinting --atac-seq --paired-end --organism=hg19 --output-location=analysis/footprint --output-prefix=SRR8102131 analysis/dupbam/SRR8102131_markdupba.bam analysis/peak/SRR8102131_peak_peaks.narrowPeak 42 43 rgt-hint tracks --bc --bigWig --organism=hg19 --output-location=analysis/bigwig --output-prefix=SRR8102131_BC analysis/dupbam/SRR8102131_markdupba.bam analysis/peak/SRR8102131_peak_peaks.narrowPeak 44 45 mkdir -p analysis/footprint/MPBS 46 rgt-motifanalysis matching --organism=hg19 --input-files=analysis/footprint/SRR8102131.bed --output-location=analysis/footprint/MPBS 47 48 mkdir -p analysis/footprint/differential 49 python script/sort_mpbs.py -f analysis/footprint/MPBS/SRR8102131_mpbs.bed -l 3 50 rgt-hint differential --organism=hg19 --bc --nc 2 --mpbs-files=analysis/footprint/MPBS/SRR8102131_mpbs.bed --reads-files=analysis/dupbam/SRR8102131_markdupba.bam --conditions=SRR8102131 --output-location=analysis/footprint/differential 51 52 # bigwig quantitative file 53 mkdir -p analysis/bigwig 54 bamCoverage -b analysis/dupbam/SRR8102131_markdupba.bam --ignoreDuplicates 55 --skipNonCoveredRegions 56 --normalizeUsing RPKM 57 --binSize 1 -p max -o analysis/bigwig/SRR8102131.bw
代码没有进行去接头操作,请读者自行添加!
文章链接 https://pubmed.ncbi.nlm.nih.gov/30808370/
原文:https://www.cnblogs.com/mkmkblog/p/14236613.html