@@ -1,16 +1,17 @@ | |||
{ | |||
"{{ project_name }}.benchmarking_dir": "oss://pgx-result/renluyao/manuscript_v3.0/reference_dataset_v4.0/", | |||
"{{ project_name }}.SENTIEON_INSTALL_DIR": "/opt/sentieon-genomics", | |||
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | |||
"{{ project_name }}.BENCHMARKdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest", | |||
"{{ project_name }}.gvcf": {{ gvcf_list.split(";") | tojson }}, | |||
"{{ project_name }}.gvcf_idx": {{ gvcf_idx_list.split(";") | tojson }}, | |||
"{{ project_name }}.LCL8": "{{ LCL8 }}", | |||
"{{ project_name }}.disk_size": "500", | |||
"{{ project_name }}.LCL5": "{{ LCL5 }}", | |||
"{{ project_name }}.rename": "{{ rename }}", | |||
"{{ project_name }}.project": "{{ project }}", | |||
"{{ project_name }}.SMALLcluster_config": "OnDemand bcs.ps.g.xlarge img-ubuntu-vpc", | |||
"{{ project_name }}.BIGcluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc", | |||
"{{ project_name }}.SENTIEONdocker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v2018.08.01", | |||
"{{ project_name }}.LCL6": "{{ LCL6 }}", | |||
"{{ project_name }}.MENDELIANdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | |||
"{{ project_name }}.LCL7": "{{ LCL7 }}", | |||
"{{ project_name }}.DIYdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4", | |||
"{{ project_name }}.ref_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/" | |||
} | |||
} |
@@ -2,7 +2,7 @@ task benchmark { | |||
File vcf | |||
File benchmarking_dir | |||
File ref_dir | |||
String sample = basename(vcf,".splited.vcf") | |||
String sample = basename(vcf,".vcf") | |||
String fasta | |||
String docker | |||
String cluster_config | |||
@@ -18,19 +18,7 @@ task benchmark { | |||
export HGREF=/cromwell_root/tmp/reference_data/GRCh38.d1.vd1.fa | |||
cat ${vcf} | grep '#' > header | |||
cat ${vcf} | grep -v '#' | grep -v '0/0' | grep -v '\./\.'| awk ' | |||
BEGIN { OFS = "\t" } | |||
{ | |||
for ( i=9; i<=NF; i++ ) { | |||
split($i,a,":") ;$i = a[1]; | |||
} | |||
} | |||
{ print } | |||
' > body | |||
cat header body > filtered.vcf | |||
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip filtered.vcf -c > ${sample}.rtg.vcf.gz | |||
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip ${vcf} -c > ${sample}.rtg.vcf.gz | |||
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.rtg.vcf.gz | |||
if [[ ${sample} =~ "LCL5" ]];then |
@@ -1,46 +1,30 @@ | |||
task merge_family { | |||
Array[File] splited_vcf | |||
File LCL5_vcf_gz | |||
File LCL5_vcf_idx | |||
File LCL6_vcf_gz | |||
File LCL6_vcf_idx | |||
File LCL7_vcf_gz | |||
File LCL7_vcf_idx | |||
File LCL8_vcf_gz | |||
File LCL8_vcf_idx | |||
File rename | |||
String project | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfannotate --relabel=${rename} -i ${LCL5_vcf_gz} -o LCL5.vcf.gz | |||
mkdir -p /cromwell_root/tmp/vcf | |||
cp ${sep=" " splited_vcf} /cromwell_root/tmp/vcf | |||
for a in /cromwell_root/tmp/vcf/*vcf | |||
do | |||
for b in /cromwell_root/tmp/vcf/*vcf | |||
do | |||
for c in /cromwell_root/tmp/vcf/*vcf | |||
do | |||
for d in /cromwell_root/tmp/vcf/*vcf | |||
do | |||
sample_a=$(echo $a | cut -f7 -d_) | |||
sample_b=$(echo $b | cut -f7 -d_) | |||
sample_c=$(echo $c | cut -f7 -d_) | |||
sample_d=$(echo $d | cut -f7 -d_) | |||
rep_a=$(echo $a | cut -f8 -d_) | |||
rep_b=$(echo $b | cut -f8 -d_) | |||
rep_c=$(echo $c | cut -f8 -d_) | |||
rep_d=$(echo $d | cut -f8 -d_) | |||
if [ $sample_a == "LCL5" ] && [ $sample_b == "LCL6" ] && [ $sample_c == "LCL7" ] && [ $sample_d == "LCL8" ] && [ $rep_a == $rep_b ] && [ $rep_c == $rep_d ] && [ $rep_b == $rep_c ];then | |||
cat $a | grep -v '#' > LCL5.body | |||
cat $b | grep -v '#' | cut -f 10 > LCL6.body | |||
cat $c | grep -v '#' | cut -f 10 > LCL7.body | |||
cat $d | grep -v '#' | cut -f 10 > LCL8.body | |||
echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL5\tLCL6\tLCL7\tLCL8" > header_name | |||
cat $a | grep '##' | cat - header_name > header | |||
paste LCL5.body LCL6.body LCL7.body LCL8.body > family.body | |||
cat header family.body > ${project}.$rep_a.family.vcf | |||
fi | |||
done | |||
done | |||
done | |||
done | |||
opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfannotate --relabel=${rename} -i ${LCL6_vcf_gz} -o LCL6.vcf.gz | |||
opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfannotate --relabel=${rename} -i ${LCL7_vcf_gz} -o LCL7.vcf.gz | |||
opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfannotate --relabel=${rename} -i ${LCL8_vcf_gz} -o LCL8.vcf.gz | |||
opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfmerge --force-merge-all -o ${project}.family.vcf.gz LCL5_vcf_gz LCL6_vcf_gz LCL7_vcf_gz LCL8_vcf_gz | |||
gunzip ${project}.family.vcf.gz | |||
>>> | |||
@@ -51,7 +35,7 @@ | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
Array[File] family_vcf = glob("*.family.vcf") | |||
File merged_vcf = "${project}.family.vcf" | |||
} | |||
} | |||
@@ -1,29 +1,27 @@ | |||
import "./tasks/split_gvcf_files.wdl" as split_gvcf_files | |||
import "./tasks/GVCFtyper.wdl" as GVCFtyper | |||
import "./tasks/benchmark.wdl" as benchmark | |||
import "./tasks/mendelian.wdl" as mendelian | |||
import "./tasks/merge_mendelian.wdl" as merge_mendelian | |||
import "./tasks/quartet_mendelian.wdl" as quartet_mendelian | |||
import "./tasks/D5_D6.wdl" as D5_D6 | |||
import "./tasks/merge_family.wdl" as merge_family | |||
workflow {{ project_name }} { | |||
Array[File] gvcf | |||
Array[File] gvcf_idx | |||
File LCL5 | |||
File LCL6 | |||
File LCL7 | |||
File LCL8 | |||
String BENCHMARKdocker | |||
String MENDELIANdocker | |||
String DIYdocker | |||
String SENTIEON_INSTALL_DIR | |||
String SENTIEONdocker | |||
String fasta | |||
File ref_dir | |||
File benchmarking_dir | |||
File rename | |||
String project | |||
@@ -31,100 +29,94 @@ workflow {{ project_name }} { | |||
String BIGcluster_config | |||
String SMALLcluster_config | |||
call GVCFtyper.GVCFtyper as GVCFtyper { | |||
call benchmark.benchmark as LCL5benchmark { | |||
input: | |||
vcf=LCL5, | |||
benchmarking_dir=benchmarking_dir, | |||
ref_dir=ref_dir, | |||
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR, | |||
fasta=fasta, | |||
vcf=gvcf, | |||
vcf_idx=gvcf_idx, | |||
project=project, | |||
docker=SENTIEONdocker, | |||
docker=BENCHMARKdocker, | |||
cluster_config=BIGcluster_config, | |||
disk_size=disk_size | |||
disk_size=disk_size, | |||
} | |||
call split_gvcf_files.split_gvcf_files as split_gvcf_files { | |||
call benchmark.benchmark as LCL6benchmark { | |||
input: | |||
merged_gvcf=GVCFtyper.merged_gvcf, | |||
docker=DIYdocker, | |||
project=project, | |||
cluster_config=SMALLcluster_config, | |||
disk_size=disk_size | |||
vcf=LCL6, | |||
benchmarking_dir=benchmarking_dir, | |||
ref_dir=ref_dir, | |||
fasta=fasta, | |||
docker=BENCHMARKdocker, | |||
cluster_config=BIGcluster_config, | |||
disk_size=disk_size, | |||
} | |||
call benchmark.benchmark as LCL7benchmark { | |||
input: | |||
vcf=LCL7, | |||
benchmarking_dir=benchmarking_dir, | |||
ref_dir=ref_dir, | |||
fasta=fasta, | |||
docker=BENCHMARKdocker, | |||
cluster_config=BIGcluster_config, | |||
disk_size=disk_size, | |||
} | |||
Array[File] single_gvcf = split_gvcf_files.splited_vcf | |||
call benchmark.benchmark as LCL8benchmark { | |||
input: | |||
vcf=LCL8, | |||
benchmarking_dir=benchmarking_dir, | |||
ref_dir=ref_dir, | |||
fasta=fasta, | |||
docker=BENCHMARKdocker, | |||
cluster_config=BIGcluster_config, | |||
disk_size=disk_size, | |||
} | |||
scatter (idx in range(length(single_gvcf))) { | |||
call benchmark.benchmark as benchmark { | |||
input: | |||
vcf=single_gvcf[idx], | |||
benchmarking_dir=benchmarking_dir, | |||
ref_dir=ref_dir, | |||
fasta=fasta, | |||
docker=BENCHMARKdocker, | |||
cluster_config=BIGcluster_config, | |||
disk_size=disk_size, | |||
} | |||
call merge_family.merge_family as merge_family { | |||
input: | |||
LCL5_vcf_gz=LCL5benchmark.rtg_vcf, | |||
LCL5_vcf_idx=LCL5benchmark.rtg_vcf_index, | |||
LCL6_vcf_gz=LCL6benchmark.rtg_vcf, | |||
LCL6_vcf_idx=LCL6benchmark.rtg_vcf_index, | |||
LCL7_vcf_gz=LCL7benchmark.rtg_vcf, | |||
LCL7_vcf_idx=LCL7benchmark.rtg_vcf_index, | |||
LCL8_vcf_gz=LCL8benchmark.rtg_vcf, | |||
LCL8_vcf_idx=LCL8benchmark.rtg_vcf, | |||
rename=rename, | |||
project=project, | |||
docker=DIYdocker, | |||
cluster_config=SMALLcluster_config, | |||
disk_size=disk_size, | |||
} | |||
call mendelian.mendelian as mendelian { | |||
input: | |||
family_vcf=merge_family.merged_vcf, | |||
ref_dir=ref_dir, | |||
fasta=fasta, | |||
docker=MENDELIANdocker, | |||
cluster_config=BIGcluster_config, | |||
disk_size=disk_size | |||
} | |||
Boolean sister_tag = read_boolean(split_gvcf_files.sister_tag) | |||
Boolean quartet_tag = read_boolean(split_gvcf_files.quartet_tag) | |||
if (sister_tag) { | |||
call D5_D6.D5_D6 as D5_D6 { | |||
input: | |||
splited_vcf=split_gvcf_files.splited_vcf, | |||
project=project, | |||
docker=DIYdocker, | |||
cluster_config=SMALLcluster_config, | |||
disk_size=disk_size, | |||
} | |||
call merge_mendelian.merge_mendelian as merge_mendelian { | |||
input: | |||
D5_trio_vcf=mendelian.D5_trio_vcf, | |||
D6_trio_vcf=mendelian.D6_trio_vcf, | |||
family_vcf=merge_family.merged_vcf, | |||
docker=DIYdocker, | |||
cluster_config=SMALLcluster_config, | |||
disk_size=disk_size | |||
} | |||
if (quartet_tag) { | |||
call merge_family.merge_family as merge_family { | |||
input: | |||
splited_vcf=split_gvcf_files.splited_vcf, | |||
project=project, | |||
docker=DIYdocker, | |||
cluster_config=SMALLcluster_config, | |||
disk_size=disk_size, | |||
} | |||
Array[File] family_vcfs = merge_family.family_vcf | |||
scatter (idx in range(length(family_vcfs))) { | |||
call mendelian.mendelian as mendelian { | |||
input: | |||
family_vcf=family_vcfs[idx], | |||
ref_dir=ref_dir, | |||
fasta=fasta, | |||
docker=MENDELIANdocker, | |||
cluster_config=BIGcluster_config, | |||
disk_size=disk_size | |||
} | |||
call merge_mendelian.merge_mendelian as merge_mendelian { | |||
input: | |||
D5_trio_vcf=mendelian.D5_trio_vcf, | |||
D6_trio_vcf=mendelian.D6_trio_vcf, | |||
family_vcf=family_vcfs[idx], | |||
docker=DIYdocker, | |||
cluster_config=SMALLcluster_config, | |||
disk_size=disk_size | |||
} | |||
} | |||
call quartet_mendelian.quartet_mendelian as quartet_mendelian { | |||
input: | |||
project_mendelian_summary=merge_mendelian.project_mendelian_summary, | |||
project=project, | |||
docker=DIYdocker, | |||
cluster_config=SMALLcluster_config, | |||
disk_size=disk_size | |||
} | |||
call quartet_mendelian.quartet_mendelian as quartet_mendelian { | |||
input: | |||
project_mendelian_summary=merge_mendelian.project_mendelian_summary, | |||
project=project, | |||
docker=DIYdocker, | |||
cluster_config=SMALLcluster_config, | |||
disk_size=disk_size | |||
} | |||
} | |||