|
|
@@ -0,0 +1,69 @@ |
|
|
|
task gatk { |
|
|
|
String sample_id |
|
|
|
File normal_bam |
|
|
|
File normal_bai |
|
|
|
File gatk4_database |
|
|
|
|
|
|
|
String docker |
|
|
|
String cluster |
|
|
|
String disk_size |
|
|
|
|
|
|
|
command <<< |
|
|
|
|
|
|
|
set -o pipefail |
|
|
|
set -e |
|
|
|
cp -r ${gatk4_database} ./gatk4_database |
|
|
|
cp ${normal_bam} ./${sample_id}.bam |
|
|
|
cp ${normal_bai} ./${sample_id}.bam.bai |
|
|
|
hg38_genome="./gatk4_database/GRCh38.d1.vd1.fa" |
|
|
|
dbsnp="./gatk4_database/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz" |
|
|
|
indel1="./gatk4_database/resources_broad_hg38_v0_Homo_sapiens_assembly38.known_indels.vcf.gz" |
|
|
|
indel2="./gatk4_database/resources_broad_hg38_v0_Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" |
|
|
|
bed="./gatk4_database/test.bed" |
|
|
|
pon="./gatk4_database/somatic-hg38_1000g_pon.hg38.vcf.gz" |
|
|
|
EXAC="./gatk4_database/somatic-hg38_small_exac_common_3.hg38.vcf.gz" |
|
|
|
|
|
|
|
mkdir markduplicate |
|
|
|
mkdir BQSR |
|
|
|
mkdir Mutect2 |
|
|
|
mkdir gatk4_resource |
|
|
|
|
|
|
|
java -jar -Xmx40g /opt/picard.jar MarkDuplicates INPUT=./${sample_id}.bam OUTPUT=./markduplicate/${sample_id}_marked.bam METRICS_FILE=./markduplicate/${sample_id}.metrics |
|
|
|
echo "dedup done" |
|
|
|
|
|
|
|
samtools index -@ 6 ./markduplicate/${sample_id}_marked.bam |
|
|
|
gatk --java-options "-Djava.io.tmpdir=./BQSR -Xms40G -Xmx40G -XX:ParallelGCThreads=2" BaseRecalibrator -I ./markduplicate/${sample_id}_marked.bam -R $hg38_genome -O ./BQSR/${sample_id}_markdup_bqsr.report --known-sites $dbsnp --known-sites $indel1 --known-sites $indel2 -L $bed |
|
|
|
|
|
|
|
gatk --java-options "-Djava.io.tmpdir=./BQSR -Xms40G -Xmx40G -XX:ParallelGCThreads=2" ApplyBQSR -I ./markduplicate/${sample_id}_marked.bam -R $hg38_genome -L $bed --bqsr-recal-file ./BQSR/${sample_id}_markdup_bqsr.report -O ./BQSR/${sample_id}_markdup_bqsr.bam |
|
|
|
echo "BQSR done" |
|
|
|
|
|
|
|
gatk --java-options "-Djava.io.tmpdir=./BQSR -Xms40G -Xmx40G -XX:ParallelGCThreads=2" Mutect2 -R $hg38_genome -I ./BQSR/${sample_id}_markdup_bqsr.bam -L $bed --pon $pon --germline-resource $dbsnp --tumor-lod-to-emit 2 --f1r2-tar-gz Mutect2/${sample_id}.f1r2.tar.gz -O Mutect2/${sample_id}_mutect2_raw.vcf |
|
|
|
echo "Mutect2 done" |
|
|
|
|
|
|
|
gatk --java-options "-Djava.io.tmpdir=./BQSR -Xms40G -Xmx40G -XX:ParallelGCThreads=2" GetPileupSummaries -R $hg38_genome -I ./BQSR/${sample_id}_markdup_bqsr.bam -V $EXAC -L $EXAC -L $bed --interval-set-rule INTERSECTION -O Mutect2/${sample_id}.getpileupsum.table |
|
|
|
|
|
|
|
gatk --java-options "-Djava.io.tmpdir=./BQSR -Xms40G -Xmx40G -XX:ParallelGCThreads=2" CalculateContamination -I Mutect2/${sample_id}.getpileupsum.table -O Mutect2/${sample_id}.contamination.table --tumor-segmentation Mutect2/${sample_id}.segmentation.table |
|
|
|
|
|
|
|
gatk --java-options "-Djava.io.tmpdir=./BQSR -Xms40G -Xmx40G -XX:ParallelGCThreads=2" LearnReadOrientationModel -I Mutect2/${sample_id}.f1r2.tar.gz -O Mutect2/${sample_id}_f1r2_artifact.tar.gz |
|
|
|
|
|
|
|
gatk --java-options "-Djava.io.tmpdir=./BQSR -Xms40G -Xmx40G -XX:ParallelGCThreads=2" FilterMutectCalls -V Mutect2/${sample_id}_mutect2_raw.vcf -R $hg38_genome --contamination-table Mutect2/${sample_id}.contamination.table --tumor-segmentation Mutect2/${sample_id}.segmentation.table --ob-priors Mutect2/${sample_id}_f1r2_artifact.tar.gz -O Mutect2/${sample_id}_mutect2_filtered.vcf |
|
|
|
bcftools norm -f $hg38_genome -O v -o Mutect2/${sample_id}_mutect2_filtered_bcfed.vcf Mutect2/${sample_id}_mutect2_filtered.vcf |
|
|
|
|
|
|
|
|
|
|
|
>>> |
|
|
|
|
|
|
|
runtime { |
|
|
|
docker : docker |
|
|
|
cluster: cluster |
|
|
|
systemDisk: "cloud_ssd 100" |
|
|
|
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" |
|
|
|
} |
|
|
|
|
|
|
|
output { |
|
|
|
File raw_vcf = "./Mutect2/${sample_id}_mutect2_raw.vcf" |
|
|
|
File filtered_vcf = "./Mutect2/${sample_id}_mutect2_filtered.vcf" |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|