Browse Source

first commit

master
biochenglinliu 1 month ago
commit
73e0fca4db
5 changed files with 108 additions and 0 deletions
  1. +0
    -0
      README.md
  2. +6
    -0
      defaults
  3. +9
    -0
      inputs
  4. +69
    -0
      tasks/gatk.wdl
  5. +24
    -0
      workflow.wdl

+ 0
- 0
README.md View File


+ 6
- 0
defaults View File

@@ -0,0 +1,6 @@
{
"gatk_docker":"registry.cn-shanghai.aliyuncs.com/shaolab2023_docker/gatk4:4.5.0.0_1",
"gatk4_database":"oss://database-shaolab/gatk4_database/"
"gatk_cluster":"OnDemand bcs.es.r.2xlarge img-ubuntu-vpc",
"disk_size":"300"
}

+ 9
- 0
inputs View File

@@ -0,0 +1,9 @@
{
"{{ project_name }}.gatk_docker": "{{ gatk_docker }}",
"{{ project_name }}.sample_id": "{{ sample_id }}",
"{{ project_name }}.normal_bam": "{{ normal_bam }}",
"{{ project_name }}.normal_bai": "{{ normal_bai }}",
"{{ project_name }}.gatk4_database": "{{ gatk4_database }}",
"{{ project_name }}.gatk_cluster": "{{ gatk_cluster }}",
"{{ project_name }}.disk_size": "{{ disk_size }}"
}

+ 69
- 0
tasks/gatk.wdl View File

@@ -0,0 +1,69 @@
task gatk {
String sample_id
File normal_bam
File normal_bai
File gatk4_database

String docker
String cluster
String disk_size

command <<<

set -o pipefail
set -e
cp -r ${gatk4_database} ./gatk4_database
cp ${normal_bam} ./${sample_id}.bam
cp ${normal_bai} ./${sample_id}.bam.bai
hg38_genome="./gatk4_database/GRCh38.d1.vd1.fa"
dbsnp="./gatk4_database/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz"
indel1="./gatk4_database/resources_broad_hg38_v0_Homo_sapiens_assembly38.known_indels.vcf.gz"
indel2="./gatk4_database/resources_broad_hg38_v0_Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
bed="./gatk4_database/test.bed"
pon="./gatk4_database/somatic-hg38_1000g_pon.hg38.vcf.gz"
EXAC="./gatk4_database/somatic-hg38_small_exac_common_3.hg38.vcf.gz"
mkdir markduplicate
mkdir BQSR
mkdir Mutect2
mkdir gatk4_resource

java -jar -Xmx40g /opt/picard.jar MarkDuplicates INPUT=./${sample_id}.bam OUTPUT=./markduplicate/${sample_id}_marked.bam METRICS_FILE=./markduplicate/${sample_id}.metrics
echo "dedup done"

samtools index -@ 6 ./markduplicate/${sample_id}_marked.bam
gatk --java-options "-Djava.io.tmpdir=./BQSR -Xms40G -Xmx40G -XX:ParallelGCThreads=2" BaseRecalibrator -I ./markduplicate/${sample_id}_marked.bam -R $hg38_genome -O ./BQSR/${sample_id}_markdup_bqsr.report --known-sites $dbsnp --known-sites $indel1 --known-sites $indel2 -L $bed

gatk --java-options "-Djava.io.tmpdir=./BQSR -Xms40G -Xmx40G -XX:ParallelGCThreads=2" ApplyBQSR -I ./markduplicate/${sample_id}_marked.bam -R $hg38_genome -L $bed --bqsr-recal-file ./BQSR/${sample_id}_markdup_bqsr.report -O ./BQSR/${sample_id}_markdup_bqsr.bam
echo "BQSR done"

gatk --java-options "-Djava.io.tmpdir=./BQSR -Xms40G -Xmx40G -XX:ParallelGCThreads=2" Mutect2 -R $hg38_genome -I ./BQSR/${sample_id}_markdup_bqsr.bam -L $bed --pon $pon --germline-resource $dbsnp --tumor-lod-to-emit 2 --f1r2-tar-gz Mutect2/${sample_id}.f1r2.tar.gz -O Mutect2/${sample_id}_mutect2_raw.vcf
echo "Mutect2 done"

gatk --java-options "-Djava.io.tmpdir=./BQSR -Xms40G -Xmx40G -XX:ParallelGCThreads=2" GetPileupSummaries -R $hg38_genome -I ./BQSR/${sample_id}_markdup_bqsr.bam -V $EXAC -L $EXAC -L $bed --interval-set-rule INTERSECTION -O Mutect2/${sample_id}.getpileupsum.table

gatk --java-options "-Djava.io.tmpdir=./BQSR -Xms40G -Xmx40G -XX:ParallelGCThreads=2" CalculateContamination -I Mutect2/${sample_id}.getpileupsum.table -O Mutect2/${sample_id}.contamination.table --tumor-segmentation Mutect2/${sample_id}.segmentation.table

gatk --java-options "-Djava.io.tmpdir=./BQSR -Xms40G -Xmx40G -XX:ParallelGCThreads=2" LearnReadOrientationModel -I Mutect2/${sample_id}.f1r2.tar.gz -O Mutect2/${sample_id}_f1r2_artifact.tar.gz

gatk --java-options "-Djava.io.tmpdir=./BQSR -Xms40G -Xmx40G -XX:ParallelGCThreads=2" FilterMutectCalls -V Mutect2/${sample_id}_mutect2_raw.vcf -R $hg38_genome --contamination-table Mutect2/${sample_id}.contamination.table --tumor-segmentation Mutect2/${sample_id}.segmentation.table --ob-priors Mutect2/${sample_id}_f1r2_artifact.tar.gz -O Mutect2/${sample_id}_mutect2_filtered.vcf
bcftools norm -f $hg38_genome -O v -o Mutect2/${sample_id}_mutect2_filtered_bcfed.vcf Mutect2/${sample_id}_mutect2_filtered.vcf


>>>

runtime {
docker : docker
cluster: cluster
systemDisk: "cloud_ssd 100"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File raw_vcf = "./Mutect2/${sample_id}_mutect2_raw.vcf"
File filtered_vcf = "./Mutect2/${sample_id}_mutect2_filtered.vcf"
}

}



+ 24
- 0
workflow.wdl View File

@@ -0,0 +1,24 @@
import "./tasks/gatk.wdl" as gatk
workflow {{ project_name }} {
String sample_id
File normal_bam
File normal_bai
File gatk4_database

String gatk_docker
String gatk_cluster
String disk_size

call gatk.gatk as gatk {
input:
sample_id=sample_id,
normal_bam=normal_bam,
normal_bai=normal_bai,
gatk4_database=gatk4_database,
docker=gatk_docker,
cluster=gatk_cluster,
disk_size=disk_size
}


}

Loading…
Cancel
Save