Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

72 lines
3.9KB

  1. task gatk {
  2. String sample_id
  3. File normal_bam
  4. File normal_bai
  5. File gatk4_database
  6. String docker
  7. String cluster
  8. String disk_size
  9. command <<<
  10. set -o pipefail
  11. set -e
  12. echo "gatk start!"
  13. cp -r ${gatk4_database} ./gatk4_database
  14. cp ${normal_bam} ./${sample_id}.bam
  15. cp ${normal_bai} ./${sample_id}.bam.bai
  16. hg38_genome="./gatk4_database/GRCh38.d1.vd1.fa"
  17. dbsnp="./gatk4_database/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz"
  18. indel1="./gatk4_database/resources_broad_hg38_v0_Homo_sapiens_assembly38.known_indels.vcf.gz"
  19. indel2="./gatk4_database/resources_broad_hg38_v0_Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
  20. bed="./gatk4_database/S07604514_Padded_use.bed"
  21. pon="./gatk4_database/somatic-hg38_1000g_pon.hg38.vcf.gz"
  22. EXAC="./gatk4_database/somatic-hg38_small_exac_common_3.hg38.vcf.gz"
  23. GNOMAD="./gatk4_database/somatic-hg38_af-only-gnomad.hg38.vcf.gz"
  24. mkdir markduplicate
  25. mkdir BQSR
  26. mkdir Mutect2
  27. mkdir gatk4_resource
  28. java -jar -Xmx40g /opt/picard.jar MarkDuplicates INPUT=./${sample_id}.bam OUTPUT=./markduplicate/${sample_id}_marked.bam METRICS_FILE=./markduplicate/${sample_id}.metrics
  29. echo "dedup done"
  30. samtools index -@ 6 ./markduplicate/${sample_id}_marked.bam
  31. gatk --java-options "-Djava.io.tmpdir=./BQSR -Xms40G -Xmx40G -XX:ParallelGCThreads=2" BaseRecalibrator -I ./markduplicate/${sample_id}_marked.bam -R $hg38_genome -O ./BQSR/${sample_id}_markdup_bqsr.report --known-sites $dbsnp --known-sites $indel1 --known-sites $indel2 -L $bed
  32. gatk --java-options "-Djava.io.tmpdir=./BQSR -Xms40G -Xmx40G -XX:ParallelGCThreads=2" ApplyBQSR -I ./markduplicate/${sample_id}_marked.bam -R $hg38_genome -L $bed --bqsr-recal-file ./BQSR/${sample_id}_markdup_bqsr.report -O ./BQSR/${sample_id}_markdup_bqsr.bam
  33. echo "BQSR done"
  34. gatk --java-options "-Djava.io.tmpdir=./BQSR -Xms40G -Xmx40G -XX:ParallelGCThreads=2" Mutect2 -R $hg38_genome -I ./BQSR/${sample_id}_markdup_bqsr.bam -L $bed --pon $pon --germline-resource $GNOMAD --tumor-lod-to-emit 2 --f1r2-tar-gz Mutect2/${sample_id}.f1r2.tar.gz -O Mutect2/${sample_id}_mutect2_raw.vcf
  35. echo "Mutect2 done"
  36. gatk --java-options "-Djava.io.tmpdir=./BQSR -Xms40G -Xmx40G -XX:ParallelGCThreads=2" GetPileupSummaries -R $hg38_genome -I ./BQSR/${sample_id}_markdup_bqsr.bam -V $EXAC -L $EXAC -L $bed --interval-set-rule INTERSECTION -O Mutect2/${sample_id}.getpileupsum.table
  37. gatk --java-options "-Djava.io.tmpdir=./BQSR -Xms40G -Xmx40G -XX:ParallelGCThreads=2" CalculateContamination -I Mutect2/${sample_id}.getpileupsum.table -O Mutect2/${sample_id}.contamination.table --tumor-segmentation Mutect2/${sample_id}.segmentation.table
  38. gatk --java-options "-Djava.io.tmpdir=./BQSR -Xms40G -Xmx40G -XX:ParallelGCThreads=2" LearnReadOrientationModel -I Mutect2/${sample_id}.f1r2.tar.gz -O Mutect2/${sample_id}_f1r2_artifact.tar.gz
  39. gatk --java-options "-Djava.io.tmpdir=./BQSR -Xms40G -Xmx40G -XX:ParallelGCThreads=2" FilterMutectCalls -V Mutect2/${sample_id}_mutect2_raw.vcf -R $hg38_genome --contamination-table Mutect2/${sample_id}.contamination.table --tumor-segmentation Mutect2/${sample_id}.segmentation.table --ob-priors Mutect2/${sample_id}_f1r2_artifact.tar.gz -O Mutect2/${sample_id}_mutect2_filtered.vcf
  40. bcftools norm -f $hg38_genome -O v -o Mutect2/${sample_id}_mutect2_filtered_bcfed.vcf Mutect2/${sample_id}_mutect2_filtered.vcf
  41. >>>
  42. runtime {
  43. docker : docker
  44. cluster: cluster
  45. systemDisk: "cloud_ssd 100"
  46. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  47. }
  48. output {
  49. File raw_vcf = "./Mutect2/${sample_id}_mutect2_raw.vcf"
  50. File filtered_vcf = "./Mutect2/${sample_id}_mutect2_filtered.vcf"
  51. }
  52. }