Browse Source

input vcf

master
LUYAO REN 4 years ago
parent
commit
64b086903b
4 changed files with 105 additions and 140 deletions
  1. +6
    -5
      inputs
  2. +2
    -14
      tasks/benchmark.wdl
  3. +20
    -36
      tasks/merge_family.wdl
  4. +77
    -85
      workflow.wdl

+ 6
- 5
inputs View File

{ {
"{{ project_name }}.benchmarking_dir": "oss://pgx-result/renluyao/manuscript_v3.0/reference_dataset_v4.0/", "{{ project_name }}.benchmarking_dir": "oss://pgx-result/renluyao/manuscript_v3.0/reference_dataset_v4.0/",
"{{ project_name }}.SENTIEON_INSTALL_DIR": "/opt/sentieon-genomics",
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", "{{ project_name }}.fasta": "GRCh38.d1.vd1.fa",
"{{ project_name }}.BENCHMARKdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest", "{{ project_name }}.BENCHMARKdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest",
"{{ project_name }}.gvcf": {{ gvcf_list.split(";") | tojson }},
"{{ project_name }}.gvcf_idx": {{ gvcf_idx_list.split(";") | tojson }},
"{{ project_name }}.LCL8": "{{ LCL8 }}",
"{{ project_name }}.disk_size": "500", "{{ project_name }}.disk_size": "500",
"{{ project_name }}.LCL5": "{{ LCL5 }}",
"{{ project_name }}.rename": "{{ rename }}",
"{{ project_name }}.project": "{{ project }}", "{{ project_name }}.project": "{{ project }}",
"{{ project_name }}.SMALLcluster_config": "OnDemand bcs.ps.g.xlarge img-ubuntu-vpc", "{{ project_name }}.SMALLcluster_config": "OnDemand bcs.ps.g.xlarge img-ubuntu-vpc",
"{{ project_name }}.BIGcluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc", "{{ project_name }}.BIGcluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc",
"{{ project_name }}.SENTIEONdocker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v2018.08.01",
"{{ project_name }}.LCL6": "{{ LCL6 }}",
"{{ project_name }}.MENDELIANdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", "{{ project_name }}.MENDELIANdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1",
"{{ project_name }}.LCL7": "{{ LCL7 }}",
"{{ project_name }}.DIYdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4", "{{ project_name }}.DIYdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4",
"{{ project_name }}.ref_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/" "{{ project_name }}.ref_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/"
}
}

+ 2
- 14
tasks/benchmark.wdl View File

File vcf File vcf
File benchmarking_dir File benchmarking_dir
File ref_dir File ref_dir
String sample = basename(vcf,".splited.vcf")
String sample = basename(vcf,".vcf")
String fasta String fasta
String docker String docker
String cluster_config String cluster_config


export HGREF=/cromwell_root/tmp/reference_data/GRCh38.d1.vd1.fa export HGREF=/cromwell_root/tmp/reference_data/GRCh38.d1.vd1.fa


cat ${vcf} | grep '#' > header
cat ${vcf} | grep -v '#' | grep -v '0/0' | grep -v '\./\.'| awk '
BEGIN { OFS = "\t" }
{
for ( i=9; i<=NF; i++ ) {
split($i,a,":") ;$i = a[1];
}
}
{ print }
' > body
cat header body > filtered.vcf

/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip filtered.vcf -c > ${sample}.rtg.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip ${vcf} -c > ${sample}.rtg.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.rtg.vcf.gz /opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.rtg.vcf.gz


if [[ ${sample} =~ "LCL5" ]];then if [[ ${sample} =~ "LCL5" ]];then

+ 20
- 36
tasks/merge_family.wdl View File

task merge_family { task merge_family {
Array[File] splited_vcf
File LCL5_vcf_gz
File LCL5_vcf_idx
File LCL6_vcf_gz
File LCL6_vcf_idx
File LCL7_vcf_gz
File LCL7_vcf_idx
File LCL8_vcf_gz
File LCL8_vcf_idx
File rename
String project String project
String docker String docker
String cluster_config String cluster_config
String disk_size String disk_size
command <<< command <<<
opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfannotate --relabel=${rename} -i ${LCL5_vcf_gz} -o LCL5.vcf.gz


mkdir -p /cromwell_root/tmp/vcf

cp ${sep=" " splited_vcf} /cromwell_root/tmp/vcf

for a in /cromwell_root/tmp/vcf/*vcf
do
for b in /cromwell_root/tmp/vcf/*vcf
do
for c in /cromwell_root/tmp/vcf/*vcf
do
for d in /cromwell_root/tmp/vcf/*vcf
do
sample_a=$(echo $a | cut -f7 -d_)
sample_b=$(echo $b | cut -f7 -d_)
sample_c=$(echo $c | cut -f7 -d_)
sample_d=$(echo $d | cut -f7 -d_)
rep_a=$(echo $a | cut -f8 -d_)
rep_b=$(echo $b | cut -f8 -d_)
rep_c=$(echo $c | cut -f8 -d_)
rep_d=$(echo $d | cut -f8 -d_)
if [ $sample_a == "LCL5" ] && [ $sample_b == "LCL6" ] && [ $sample_c == "LCL7" ] && [ $sample_d == "LCL8" ] && [ $rep_a == $rep_b ] && [ $rep_c == $rep_d ] && [ $rep_b == $rep_c ];then
cat $a | grep -v '#' > LCL5.body
cat $b | grep -v '#' | cut -f 10 > LCL6.body
cat $c | grep -v '#' | cut -f 10 > LCL7.body
cat $d | grep -v '#' | cut -f 10 > LCL8.body
echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL5\tLCL6\tLCL7\tLCL8" > header_name
cat $a | grep '##' | cat - header_name > header
paste LCL5.body LCL6.body LCL7.body LCL8.body > family.body
cat header family.body > ${project}.$rep_a.family.vcf
fi
done
done
done
done
opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfannotate --relabel=${rename} -i ${LCL6_vcf_gz} -o LCL6.vcf.gz

opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfannotate --relabel=${rename} -i ${LCL7_vcf_gz} -o LCL7.vcf.gz

opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfannotate --relabel=${rename} -i ${LCL8_vcf_gz} -o LCL8.vcf.gz

opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfmerge --force-merge-all -o ${project}.family.vcf.gz LCL5_vcf_gz LCL6_vcf_gz LCL7_vcf_gz LCL8_vcf_gz

gunzip ${project}.family.vcf.gz


>>> >>>


dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
} }
output { output {
Array[File] family_vcf = glob("*.family.vcf")
File merged_vcf = "${project}.family.vcf"
} }
} }



+ 77
- 85
workflow.wdl View File

import "./tasks/split_gvcf_files.wdl" as split_gvcf_files
import "./tasks/GVCFtyper.wdl" as GVCFtyper
import "./tasks/benchmark.wdl" as benchmark import "./tasks/benchmark.wdl" as benchmark
import "./tasks/mendelian.wdl" as mendelian import "./tasks/mendelian.wdl" as mendelian
import "./tasks/merge_mendelian.wdl" as merge_mendelian import "./tasks/merge_mendelian.wdl" as merge_mendelian
import "./tasks/quartet_mendelian.wdl" as quartet_mendelian import "./tasks/quartet_mendelian.wdl" as quartet_mendelian
import "./tasks/D5_D6.wdl" as D5_D6
import "./tasks/merge_family.wdl" as merge_family import "./tasks/merge_family.wdl" as merge_family




workflow {{ project_name }} { workflow {{ project_name }} {


Array[File] gvcf
Array[File] gvcf_idx
File LCL5
File LCL6
File LCL7
File LCL8


String BENCHMARKdocker String BENCHMARKdocker
String MENDELIANdocker String MENDELIANdocker
String DIYdocker String DIYdocker
String SENTIEON_INSTALL_DIR
String SENTIEONdocker




String fasta String fasta
File ref_dir File ref_dir


File benchmarking_dir File benchmarking_dir
File rename


String project String project


String BIGcluster_config String BIGcluster_config
String SMALLcluster_config String SMALLcluster_config


call GVCFtyper.GVCFtyper as GVCFtyper {
call benchmark.benchmark as LCL5benchmark {
input: input:
vcf=LCL5,
benchmarking_dir=benchmarking_dir,
ref_dir=ref_dir, ref_dir=ref_dir,
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR,
fasta=fasta, fasta=fasta,
vcf=gvcf,
vcf_idx=gvcf_idx,
project=project,
docker=SENTIEONdocker,
docker=BENCHMARKdocker,
cluster_config=BIGcluster_config, cluster_config=BIGcluster_config,
disk_size=disk_size
disk_size=disk_size,
} }


call split_gvcf_files.split_gvcf_files as split_gvcf_files {
call benchmark.benchmark as LCL6benchmark {
input: input:
merged_gvcf=GVCFtyper.merged_gvcf,
docker=DIYdocker,
project=project,
cluster_config=SMALLcluster_config,
disk_size=disk_size
vcf=LCL6,
benchmarking_dir=benchmarking_dir,
ref_dir=ref_dir,
fasta=fasta,
docker=BENCHMARKdocker,
cluster_config=BIGcluster_config,
disk_size=disk_size,
} }


call benchmark.benchmark as LCL7benchmark {
input:
vcf=LCL7,
benchmarking_dir=benchmarking_dir,
ref_dir=ref_dir,
fasta=fasta,
docker=BENCHMARKdocker,
cluster_config=BIGcluster_config,
disk_size=disk_size,
}


Array[File] single_gvcf = split_gvcf_files.splited_vcf
call benchmark.benchmark as LCL8benchmark {
input:
vcf=LCL8,
benchmarking_dir=benchmarking_dir,
ref_dir=ref_dir,
fasta=fasta,
docker=BENCHMARKdocker,
cluster_config=BIGcluster_config,
disk_size=disk_size,
}


scatter (idx in range(length(single_gvcf))) {
call benchmark.benchmark as benchmark {
input:
vcf=single_gvcf[idx],
benchmarking_dir=benchmarking_dir,
ref_dir=ref_dir,
fasta=fasta,
docker=BENCHMARKdocker,
cluster_config=BIGcluster_config,
disk_size=disk_size,
}
call merge_family.merge_family as merge_family {
input:
LCL5_vcf_gz=LCL5benchmark.rtg_vcf,
LCL5_vcf_idx=LCL5benchmark.rtg_vcf_index,
LCL6_vcf_gz=LCL6benchmark.rtg_vcf,
LCL6_vcf_idx=LCL6benchmark.rtg_vcf_index,
LCL7_vcf_gz=LCL7benchmark.rtg_vcf,
LCL7_vcf_idx=LCL7benchmark.rtg_vcf_index,
LCL8_vcf_gz=LCL8benchmark.rtg_vcf,
LCL8_vcf_idx=LCL8benchmark.rtg_vcf,
rename=rename,
project=project,
docker=DIYdocker,
cluster_config=SMALLcluster_config,
disk_size=disk_size,
}


call mendelian.mendelian as mendelian {
input:
family_vcf=merge_family.merged_vcf,
ref_dir=ref_dir,
fasta=fasta,
docker=MENDELIANdocker,
cluster_config=BIGcluster_config,
disk_size=disk_size
} }


Boolean sister_tag = read_boolean(split_gvcf_files.sister_tag)
Boolean quartet_tag = read_boolean(split_gvcf_files.quartet_tag)

if (sister_tag) {
call D5_D6.D5_D6 as D5_D6 {
input:
splited_vcf=split_gvcf_files.splited_vcf,
project=project,
docker=DIYdocker,
cluster_config=SMALLcluster_config,
disk_size=disk_size,
}
call merge_mendelian.merge_mendelian as merge_mendelian {
input:
D5_trio_vcf=mendelian.D5_trio_vcf,
D6_trio_vcf=mendelian.D6_trio_vcf,
family_vcf=merge_family.merged_vcf,
docker=DIYdocker,
cluster_config=SMALLcluster_config,
disk_size=disk_size
} }


if (quartet_tag) {
call merge_family.merge_family as merge_family {
input:
splited_vcf=split_gvcf_files.splited_vcf,
project=project,
docker=DIYdocker,
cluster_config=SMALLcluster_config,
disk_size=disk_size,
}

Array[File] family_vcfs = merge_family.family_vcf
scatter (idx in range(length(family_vcfs))) {
call mendelian.mendelian as mendelian {
input:
family_vcf=family_vcfs[idx],
ref_dir=ref_dir,
fasta=fasta,
docker=MENDELIANdocker,
cluster_config=BIGcluster_config,
disk_size=disk_size
}

call merge_mendelian.merge_mendelian as merge_mendelian {
input:
D5_trio_vcf=mendelian.D5_trio_vcf,
D6_trio_vcf=mendelian.D6_trio_vcf,
family_vcf=family_vcfs[idx],
docker=DIYdocker,
cluster_config=SMALLcluster_config,
disk_size=disk_size
}
}

call quartet_mendelian.quartet_mendelian as quartet_mendelian {
input:
project_mendelian_summary=merge_mendelian.project_mendelian_summary,
project=project,
docker=DIYdocker,
cluster_config=SMALLcluster_config,
disk_size=disk_size
}
call quartet_mendelian.quartet_mendelian as quartet_mendelian {
input:
project_mendelian_summary=merge_mendelian.project_mendelian_summary,
project=project,
docker=DIYdocker,
cluster_config=SMALLcluster_config,
disk_size=disk_size
} }
} }



Loading…
Cancel
Save