{ | { | ||||
"{{ project_name }}.benchmarking_dir": "oss://pgx-result/renluyao/manuscript_v3.0/reference_dataset_v4.0/", | "{{ project_name }}.benchmarking_dir": "oss://pgx-result/renluyao/manuscript_v3.0/reference_dataset_v4.0/", | ||||
"{{ project_name }}.SENTIEON_INSTALL_DIR": "/opt/sentieon-genomics", | |||||
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | "{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | ||||
"{{ project_name }}.BENCHMARKdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest", | "{{ project_name }}.BENCHMARKdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest", | ||||
"{{ project_name }}.gvcf": {{ gvcf_list.split(";") | tojson }}, | |||||
"{{ project_name }}.gvcf_idx": {{ gvcf_idx_list.split(";") | tojson }}, | |||||
"{{ project_name }}.LCL8": "{{ LCL8 }}", | |||||
"{{ project_name }}.disk_size": "500", | "{{ project_name }}.disk_size": "500", | ||||
"{{ project_name }}.LCL5": "{{ LCL5 }}", | |||||
"{{ project_name }}.rename": "{{ rename }}", | |||||
"{{ project_name }}.project": "{{ project }}", | "{{ project_name }}.project": "{{ project }}", | ||||
"{{ project_name }}.SMALLcluster_config": "OnDemand bcs.ps.g.xlarge img-ubuntu-vpc", | "{{ project_name }}.SMALLcluster_config": "OnDemand bcs.ps.g.xlarge img-ubuntu-vpc", | ||||
"{{ project_name }}.BIGcluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc", | "{{ project_name }}.BIGcluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc", | ||||
"{{ project_name }}.SENTIEONdocker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v2018.08.01", | |||||
"{{ project_name }}.LCL6": "{{ LCL6 }}", | |||||
"{{ project_name }}.MENDELIANdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | "{{ project_name }}.MENDELIANdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | ||||
"{{ project_name }}.LCL7": "{{ LCL7 }}", | |||||
"{{ project_name }}.DIYdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4", | "{{ project_name }}.DIYdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4", | ||||
"{{ project_name }}.ref_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/" | "{{ project_name }}.ref_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/" | ||||
} | |||||
} |
File vcf | File vcf | ||||
File benchmarking_dir | File benchmarking_dir | ||||
File ref_dir | File ref_dir | ||||
String sample = basename(vcf,".splited.vcf") | |||||
String sample = basename(vcf,".vcf") | |||||
String fasta | String fasta | ||||
String docker | String docker | ||||
String cluster_config | String cluster_config | ||||
export HGREF=/cromwell_root/tmp/reference_data/GRCh38.d1.vd1.fa | export HGREF=/cromwell_root/tmp/reference_data/GRCh38.d1.vd1.fa | ||||
cat ${vcf} | grep '#' > header | |||||
cat ${vcf} | grep -v '#' | grep -v '0/0' | grep -v '\./\.'| awk ' | |||||
BEGIN { OFS = "\t" } | |||||
{ | |||||
for ( i=9; i<=NF; i++ ) { | |||||
split($i,a,":") ;$i = a[1]; | |||||
} | |||||
} | |||||
{ print } | |||||
' > body | |||||
cat header body > filtered.vcf | |||||
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip filtered.vcf -c > ${sample}.rtg.vcf.gz | |||||
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip ${vcf} -c > ${sample}.rtg.vcf.gz | |||||
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.rtg.vcf.gz | /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.rtg.vcf.gz | ||||
if [[ ${sample} =~ "LCL5" ]];then | if [[ ${sample} =~ "LCL5" ]];then |
task merge_family { | task merge_family { | ||||
Array[File] splited_vcf | |||||
File LCL5_vcf_gz | |||||
File LCL5_vcf_idx | |||||
File LCL6_vcf_gz | |||||
File LCL6_vcf_idx | |||||
File LCL7_vcf_gz | |||||
File LCL7_vcf_idx | |||||
File LCL8_vcf_gz | |||||
File LCL8_vcf_idx | |||||
File rename | |||||
String project | String project | ||||
String docker | String docker | ||||
String cluster_config | String cluster_config | ||||
String disk_size | String disk_size | ||||
command <<< | command <<< | ||||
opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfannotate --relabel=${rename} -i ${LCL5_vcf_gz} -o LCL5.vcf.gz | |||||
mkdir -p /cromwell_root/tmp/vcf | |||||
cp ${sep=" " splited_vcf} /cromwell_root/tmp/vcf | |||||
for a in /cromwell_root/tmp/vcf/*vcf | |||||
do | |||||
for b in /cromwell_root/tmp/vcf/*vcf | |||||
do | |||||
for c in /cromwell_root/tmp/vcf/*vcf | |||||
do | |||||
for d in /cromwell_root/tmp/vcf/*vcf | |||||
do | |||||
sample_a=$(echo $a | cut -f7 -d_) | |||||
sample_b=$(echo $b | cut -f7 -d_) | |||||
sample_c=$(echo $c | cut -f7 -d_) | |||||
sample_d=$(echo $d | cut -f7 -d_) | |||||
rep_a=$(echo $a | cut -f8 -d_) | |||||
rep_b=$(echo $b | cut -f8 -d_) | |||||
rep_c=$(echo $c | cut -f8 -d_) | |||||
rep_d=$(echo $d | cut -f8 -d_) | |||||
if [ $sample_a == "LCL5" ] && [ $sample_b == "LCL6" ] && [ $sample_c == "LCL7" ] && [ $sample_d == "LCL8" ] && [ $rep_a == $rep_b ] && [ $rep_c == $rep_d ] && [ $rep_b == $rep_c ];then | |||||
cat $a | grep -v '#' > LCL5.body | |||||
cat $b | grep -v '#' | cut -f 10 > LCL6.body | |||||
cat $c | grep -v '#' | cut -f 10 > LCL7.body | |||||
cat $d | grep -v '#' | cut -f 10 > LCL8.body | |||||
echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL5\tLCL6\tLCL7\tLCL8" > header_name | |||||
cat $a | grep '##' | cat - header_name > header | |||||
paste LCL5.body LCL6.body LCL7.body LCL8.body > family.body | |||||
cat header family.body > ${project}.$rep_a.family.vcf | |||||
fi | |||||
done | |||||
done | |||||
done | |||||
done | |||||
opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfannotate --relabel=${rename} -i ${LCL6_vcf_gz} -o LCL6.vcf.gz | |||||
opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfannotate --relabel=${rename} -i ${LCL7_vcf_gz} -o LCL7.vcf.gz | |||||
opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfannotate --relabel=${rename} -i ${LCL8_vcf_gz} -o LCL8.vcf.gz | |||||
opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfmerge --force-merge-all -o ${project}.family.vcf.gz LCL5_vcf_gz LCL6_vcf_gz LCL7_vcf_gz LCL8_vcf_gz | |||||
gunzip ${project}.family.vcf.gz | |||||
>>> | >>> | ||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | ||||
} | } | ||||
output { | output { | ||||
Array[File] family_vcf = glob("*.family.vcf") | |||||
File merged_vcf = "${project}.family.vcf" | |||||
} | } | ||||
} | } | ||||
import "./tasks/split_gvcf_files.wdl" as split_gvcf_files | |||||
import "./tasks/GVCFtyper.wdl" as GVCFtyper | |||||
import "./tasks/benchmark.wdl" as benchmark | import "./tasks/benchmark.wdl" as benchmark | ||||
import "./tasks/mendelian.wdl" as mendelian | import "./tasks/mendelian.wdl" as mendelian | ||||
import "./tasks/merge_mendelian.wdl" as merge_mendelian | import "./tasks/merge_mendelian.wdl" as merge_mendelian | ||||
import "./tasks/quartet_mendelian.wdl" as quartet_mendelian | import "./tasks/quartet_mendelian.wdl" as quartet_mendelian | ||||
import "./tasks/D5_D6.wdl" as D5_D6 | |||||
import "./tasks/merge_family.wdl" as merge_family | import "./tasks/merge_family.wdl" as merge_family | ||||
workflow {{ project_name }} { | workflow {{ project_name }} { | ||||
Array[File] gvcf | |||||
Array[File] gvcf_idx | |||||
File LCL5 | |||||
File LCL6 | |||||
File LCL7 | |||||
File LCL8 | |||||
String BENCHMARKdocker | String BENCHMARKdocker | ||||
String MENDELIANdocker | String MENDELIANdocker | ||||
String DIYdocker | String DIYdocker | ||||
String SENTIEON_INSTALL_DIR | |||||
String SENTIEONdocker | |||||
String fasta | String fasta | ||||
File ref_dir | File ref_dir | ||||
File benchmarking_dir | File benchmarking_dir | ||||
File rename | |||||
String project | String project | ||||
String BIGcluster_config | String BIGcluster_config | ||||
String SMALLcluster_config | String SMALLcluster_config | ||||
call GVCFtyper.GVCFtyper as GVCFtyper { | |||||
call benchmark.benchmark as LCL5benchmark { | |||||
input: | input: | ||||
vcf=LCL5, | |||||
benchmarking_dir=benchmarking_dir, | |||||
ref_dir=ref_dir, | ref_dir=ref_dir, | ||||
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR, | |||||
fasta=fasta, | fasta=fasta, | ||||
vcf=gvcf, | |||||
vcf_idx=gvcf_idx, | |||||
project=project, | |||||
docker=SENTIEONdocker, | |||||
docker=BENCHMARKdocker, | |||||
cluster_config=BIGcluster_config, | cluster_config=BIGcluster_config, | ||||
disk_size=disk_size | |||||
disk_size=disk_size, | |||||
} | } | ||||
call split_gvcf_files.split_gvcf_files as split_gvcf_files { | |||||
call benchmark.benchmark as LCL6benchmark { | |||||
input: | input: | ||||
merged_gvcf=GVCFtyper.merged_gvcf, | |||||
docker=DIYdocker, | |||||
project=project, | |||||
cluster_config=SMALLcluster_config, | |||||
disk_size=disk_size | |||||
vcf=LCL6, | |||||
benchmarking_dir=benchmarking_dir, | |||||
ref_dir=ref_dir, | |||||
fasta=fasta, | |||||
docker=BENCHMARKdocker, | |||||
cluster_config=BIGcluster_config, | |||||
disk_size=disk_size, | |||||
} | } | ||||
call benchmark.benchmark as LCL7benchmark { | |||||
input: | |||||
vcf=LCL7, | |||||
benchmarking_dir=benchmarking_dir, | |||||
ref_dir=ref_dir, | |||||
fasta=fasta, | |||||
docker=BENCHMARKdocker, | |||||
cluster_config=BIGcluster_config, | |||||
disk_size=disk_size, | |||||
} | |||||
Array[File] single_gvcf = split_gvcf_files.splited_vcf | |||||
call benchmark.benchmark as LCL8benchmark { | |||||
input: | |||||
vcf=LCL8, | |||||
benchmarking_dir=benchmarking_dir, | |||||
ref_dir=ref_dir, | |||||
fasta=fasta, | |||||
docker=BENCHMARKdocker, | |||||
cluster_config=BIGcluster_config, | |||||
disk_size=disk_size, | |||||
} | |||||
scatter (idx in range(length(single_gvcf))) { | |||||
call benchmark.benchmark as benchmark { | |||||
input: | |||||
vcf=single_gvcf[idx], | |||||
benchmarking_dir=benchmarking_dir, | |||||
ref_dir=ref_dir, | |||||
fasta=fasta, | |||||
docker=BENCHMARKdocker, | |||||
cluster_config=BIGcluster_config, | |||||
disk_size=disk_size, | |||||
} | |||||
call merge_family.merge_family as merge_family { | |||||
input: | |||||
LCL5_vcf_gz=LCL5benchmark.rtg_vcf, | |||||
LCL5_vcf_idx=LCL5benchmark.rtg_vcf_index, | |||||
LCL6_vcf_gz=LCL6benchmark.rtg_vcf, | |||||
LCL6_vcf_idx=LCL6benchmark.rtg_vcf_index, | |||||
LCL7_vcf_gz=LCL7benchmark.rtg_vcf, | |||||
LCL7_vcf_idx=LCL7benchmark.rtg_vcf_index, | |||||
LCL8_vcf_gz=LCL8benchmark.rtg_vcf, | |||||
LCL8_vcf_idx=LCL8benchmark.rtg_vcf, | |||||
rename=rename, | |||||
project=project, | |||||
docker=DIYdocker, | |||||
cluster_config=SMALLcluster_config, | |||||
disk_size=disk_size, | |||||
} | |||||
call mendelian.mendelian as mendelian { | |||||
input: | |||||
family_vcf=merge_family.merged_vcf, | |||||
ref_dir=ref_dir, | |||||
fasta=fasta, | |||||
docker=MENDELIANdocker, | |||||
cluster_config=BIGcluster_config, | |||||
disk_size=disk_size | |||||
} | } | ||||
Boolean sister_tag = read_boolean(split_gvcf_files.sister_tag) | |||||
Boolean quartet_tag = read_boolean(split_gvcf_files.quartet_tag) | |||||
if (sister_tag) { | |||||
call D5_D6.D5_D6 as D5_D6 { | |||||
input: | |||||
splited_vcf=split_gvcf_files.splited_vcf, | |||||
project=project, | |||||
docker=DIYdocker, | |||||
cluster_config=SMALLcluster_config, | |||||
disk_size=disk_size, | |||||
} | |||||
call merge_mendelian.merge_mendelian as merge_mendelian { | |||||
input: | |||||
D5_trio_vcf=mendelian.D5_trio_vcf, | |||||
D6_trio_vcf=mendelian.D6_trio_vcf, | |||||
family_vcf=merge_family.merged_vcf, | |||||
docker=DIYdocker, | |||||
cluster_config=SMALLcluster_config, | |||||
disk_size=disk_size | |||||
} | } | ||||
if (quartet_tag) { | |||||
call merge_family.merge_family as merge_family { | |||||
input: | |||||
splited_vcf=split_gvcf_files.splited_vcf, | |||||
project=project, | |||||
docker=DIYdocker, | |||||
cluster_config=SMALLcluster_config, | |||||
disk_size=disk_size, | |||||
} | |||||
Array[File] family_vcfs = merge_family.family_vcf | |||||
scatter (idx in range(length(family_vcfs))) { | |||||
call mendelian.mendelian as mendelian { | |||||
input: | |||||
family_vcf=family_vcfs[idx], | |||||
ref_dir=ref_dir, | |||||
fasta=fasta, | |||||
docker=MENDELIANdocker, | |||||
cluster_config=BIGcluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call merge_mendelian.merge_mendelian as merge_mendelian { | |||||
input: | |||||
D5_trio_vcf=mendelian.D5_trio_vcf, | |||||
D6_trio_vcf=mendelian.D6_trio_vcf, | |||||
family_vcf=family_vcfs[idx], | |||||
docker=DIYdocker, | |||||
cluster_config=SMALLcluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
} | |||||
call quartet_mendelian.quartet_mendelian as quartet_mendelian { | |||||
input: | |||||
project_mendelian_summary=merge_mendelian.project_mendelian_summary, | |||||
project=project, | |||||
docker=DIYdocker, | |||||
cluster_config=SMALLcluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call quartet_mendelian.quartet_mendelian as quartet_mendelian { | |||||
input: | |||||
project_mendelian_summary=merge_mendelian.project_mendelian_summary, | |||||
project=project, | |||||
docker=DIYdocker, | |||||
cluster_config=SMALLcluster_config, | |||||
disk_size=disk_size | |||||
} | } | ||||
} | } | ||||