bwa-aln 发表于 2017-04-28 1234567891011121314151617181920212223242526272829303132# aln pipelinemkdir tmp# step1bwa aln -m 100000 -t 4 -i 15 -q 10 -f sampleA_1.sai reference.fa sampleA_1.fq.gzbwa aln -m 100000 -t 4 -i 15 -q 10 -f sampleA_2.sai reference.fa sampleA_2.fq.gz# step2# map and sort; unmapped标记成 mapQ=0;bwa sampe -a 600 -r "@RG\tID:sampleA\tPL:illumina\tPU:sampleA\tLB:sampleA\tSM:sampleA" reference.fa out_1.sai out_2.sai input_1.fq.gz input_2.fq.gz|awk '{if (!/^@/ && and($2,4) == 4) {$5=0; $6="*";} gsub(/ /,"\t",$0); print}' | samtools view -bS -T reference.fa - > sampleA.bam# FixMateInformation and sortjava -Xmx3g -Djava.io.tmpdir=tmp -XX:MaxPermSize=512m -XX:-UseGCOverheadLimit -jar picard.jar FixMateInformation I=sampleA.bam O=sampleA.sort.bam TMP_DIR=tmp SO=coordinate VALIDATION_STRINGENCY=SILENT# indexsamtools1.2 index sampleA.sort.bam# calmd, recalculate MD/NM tags and '=' basessamtools calmd -b sampleA.bam reference.fa > sampleA.sort.bam# remove PCR duplicatesjava -Xmx2G -Djava.io.tmpdir=tmp -XX:MaxPermSize=512m -XX:-UseGCOverheadLimit -jar picard.jar MarkDuplicates MAX_FILE_HANDLES=1000 REMOVE_DUPLICATES=true I=sampleA.sort.bam O=sampleA.rmdup.bam METRICS_FILE=sampleA.txt TMP_DIR=tmp VALIDATION_STRINGENCY=SILENT# mergejava -Xmx2G -Djava.io.tmpdir=tmp -XX:MaxPermSize=512m -XX:-UseGCOverheadLimit -jar picard.jar MergeSamFiles MAX_FILE_HANDLES=1000 I=sampleA01.rmdup.bam I=sampleA02.rmdup.bam I=sampleA03.rmdup.bam O=sampleA.all.bam TMP_DIR=tmp SO=coordinate AS=true VALIDATION_STRINGENCY=SILENT# indexsamtools index sampleA.all.bam# realignjava -Xmx4g -Djava.io.tmpdir=tmp -XX:MaxPermSize=512m -XX:-UseGCOverheadLimit -jar GenomeAnalysisTK.jar -T RealignerTargetCreator -R reference.fa -I sampleA.all.bam -o sampleA.realn.intervals -B:snps,VCF dbsnp_132.hg19.vcf -B:indels,VCF 1000G_indels_for_realignment.hg19.vcf -l INFOjava -Xmx4g -Djava.io.tmpdir=tmp -XX:MaxPermSize=512m -XX:-UseGCOverheadLimit -jar GenomeAnalysisTK.jar -T IndelRealigner -R reference.fa -I sampleA.all.bam -o sampleA.realn.bam -targetIntervals sampleA.realn.intervals -maxInMemory 300000 -l INFO