bwa-aln

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# aln pipeline
mkdir tmp
# step1
bwa aln -m 100000 -t 4 -i 15 -q 10 -f sampleA_1.sai reference.fa sampleA_1.fq.gz
bwa aln -m 100000 -t 4 -i 15 -q 10 -f sampleA_2.sai reference.fa sampleA_2.fq.gz
# step2
# map and sort; unmapped标记成 mapQ=0;
bwa sampe -a 600 -r "@RG\tID:sampleA\tPL:illumina\tPU:sampleA\tLB:sampleA\tSM:sampleA" reference.fa out_1.sai out_2.sai input_1.fq.gz input_2.fq.gz|awk '{if (!/^@/ && and($2,4) == 4) {$5=0; $6="*";} gsub(/ /,"\t",$0); print}' | samtools view -bS -T reference.fa - > sampleA.bam
# FixMateInformation and sort
java -Xmx3g -Djava.io.tmpdir=tmp -XX:MaxPermSize=512m -XX:-UseGCOverheadLimit -jar picard.jar FixMateInformation I=sampleA.bam O=sampleA.sort.bam TMP_DIR=tmp SO=coordinate VALIDATION_STRINGENCY=SILENT
# index
samtools1.2 index sampleA.sort.bam
# calmd, recalculate MD/NM tags and '=' bases
samtools calmd -b sampleA.bam reference.fa > sampleA.sort.bam
# remove PCR duplicates
java -Xmx2G -Djava.io.tmpdir=tmp -XX:MaxPermSize=512m -XX:-UseGCOverheadLimit -jar picard.jar MarkDuplicates MAX_FILE_HANDLES=1000 REMOVE_DUPLICATES=true I=sampleA.sort.bam O=sampleA.rmdup.bam METRICS_FILE=sampleA.txt TMP_DIR=tmp VALIDATION_STRINGENCY=SILENT
# merge
java -Xmx2G -Djava.io.tmpdir=tmp -XX:MaxPermSize=512m -XX:-UseGCOverheadLimit -jar picard.jar MergeSamFiles MAX_FILE_HANDLES=1000 I=sampleA01.rmdup.bam I=sampleA02.rmdup.bam I=sampleA03.rmdup.bam O=sampleA.all.bam TMP_DIR=tmp SO=coordinate AS=true VALIDATION_STRINGENCY=SILENT
# index
samtools index sampleA.all.bam
# realign
java -Xmx4g -Djava.io.tmpdir=tmp -XX:MaxPermSize=512m -XX:-UseGCOverheadLimit -jar GenomeAnalysisTK.jar -T RealignerTargetCreator -R reference.fa -I sampleA.all.bam -o sampleA.realn.intervals -B:snps,VCF dbsnp_132.hg19.vcf -B:indels,VCF 1000G_indels_for_realignment.hg19.vcf -l INFO
java -Xmx4g -Djava.io.tmpdir=tmp -XX:MaxPermSize=512m -XX:-UseGCOverheadLimit -jar GenomeAnalysisTK.jar -T IndelRealigner -R reference.fa -I sampleA.all.bam -o sampleA.realn.bam -targetIntervals sampleA.realn.intervals -maxInMemory 300000 -l INFO