Selaa lähdekoodia

input vcf

master
LUYAO REN 4 vuotta sitten
vanhempi
commit
64b086903b
4 muutettua tiedostoa jossa 105 lisäystä ja 140 poistoa
  1. +6
    -5
      inputs
  2. +2
    -14
      tasks/benchmark.wdl
  3. +20
    -36
      tasks/merge_family.wdl
  4. +77
    -85
      workflow.wdl

+ 6
- 5
inputs Näytä tiedosto

@@ -1,16 +1,17 @@
{
"{{ project_name }}.benchmarking_dir": "oss://pgx-result/renluyao/manuscript_v3.0/reference_dataset_v4.0/",
"{{ project_name }}.SENTIEON_INSTALL_DIR": "/opt/sentieon-genomics",
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa",
"{{ project_name }}.BENCHMARKdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest",
"{{ project_name }}.gvcf": {{ gvcf_list.split(";") | tojson }},
"{{ project_name }}.gvcf_idx": {{ gvcf_idx_list.split(";") | tojson }},
"{{ project_name }}.LCL8": "{{ LCL8 }}",
"{{ project_name }}.disk_size": "500",
"{{ project_name }}.LCL5": "{{ LCL5 }}",
"{{ project_name }}.rename": "{{ rename }}",
"{{ project_name }}.project": "{{ project }}",
"{{ project_name }}.SMALLcluster_config": "OnDemand bcs.ps.g.xlarge img-ubuntu-vpc",
"{{ project_name }}.BIGcluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc",
"{{ project_name }}.SENTIEONdocker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v2018.08.01",
"{{ project_name }}.LCL6": "{{ LCL6 }}",
"{{ project_name }}.MENDELIANdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1",
"{{ project_name }}.LCL7": "{{ LCL7 }}",
"{{ project_name }}.DIYdocker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.4",
"{{ project_name }}.ref_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/"
}
}

+ 2
- 14
tasks/benchmark.wdl Näytä tiedosto

@@ -2,7 +2,7 @@ task benchmark {
File vcf
File benchmarking_dir
File ref_dir
String sample = basename(vcf,".splited.vcf")
String sample = basename(vcf,".vcf")
String fasta
String docker
String cluster_config
@@ -18,19 +18,7 @@ task benchmark {

export HGREF=/cromwell_root/tmp/reference_data/GRCh38.d1.vd1.fa

cat ${vcf} | grep '#' > header
cat ${vcf} | grep -v '#' | grep -v '0/0' | grep -v '\./\.'| awk '
BEGIN { OFS = "\t" }
{
for ( i=9; i<=NF; i++ ) {
split($i,a,":") ;$i = a[1];
}
}
{ print }
' > body
cat header body > filtered.vcf

/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip filtered.vcf -c > ${sample}.rtg.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip ${vcf} -c > ${sample}.rtg.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.rtg.vcf.gz

if [[ ${sample} =~ "LCL5" ]];then

+ 20
- 36
tasks/merge_family.wdl Näytä tiedosto

@@ -1,46 +1,30 @@
task merge_family {
Array[File] splited_vcf
File LCL5_vcf_gz
File LCL5_vcf_idx
File LCL6_vcf_gz
File LCL6_vcf_idx
File LCL7_vcf_gz
File LCL7_vcf_idx
File LCL8_vcf_gz
File LCL8_vcf_idx
File rename
String project
String docker
String cluster_config
String disk_size
command <<<
opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfannotate --relabel=${rename} -i ${LCL5_vcf_gz} -o LCL5.vcf.gz

mkdir -p /cromwell_root/tmp/vcf

cp ${sep=" " splited_vcf} /cromwell_root/tmp/vcf

for a in /cromwell_root/tmp/vcf/*vcf
do
for b in /cromwell_root/tmp/vcf/*vcf
do
for c in /cromwell_root/tmp/vcf/*vcf
do
for d in /cromwell_root/tmp/vcf/*vcf
do
sample_a=$(echo $a | cut -f7 -d_)
sample_b=$(echo $b | cut -f7 -d_)
sample_c=$(echo $c | cut -f7 -d_)
sample_d=$(echo $d | cut -f7 -d_)
rep_a=$(echo $a | cut -f8 -d_)
rep_b=$(echo $b | cut -f8 -d_)
rep_c=$(echo $c | cut -f8 -d_)
rep_d=$(echo $d | cut -f8 -d_)
if [ $sample_a == "LCL5" ] && [ $sample_b == "LCL6" ] && [ $sample_c == "LCL7" ] && [ $sample_d == "LCL8" ] && [ $rep_a == $rep_b ] && [ $rep_c == $rep_d ] && [ $rep_b == $rep_c ];then
cat $a | grep -v '#' > LCL5.body
cat $b | grep -v '#' | cut -f 10 > LCL6.body
cat $c | grep -v '#' | cut -f 10 > LCL7.body
cat $d | grep -v '#' | cut -f 10 > LCL8.body
echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tLCL5\tLCL6\tLCL7\tLCL8" > header_name
cat $a | grep '##' | cat - header_name > header
paste LCL5.body LCL6.body LCL7.body LCL8.body > family.body
cat header family.body > ${project}.$rep_a.family.vcf
fi
done
done
done
done
opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfannotate --relabel=${rename} -i ${LCL6_vcf_gz} -o LCL6.vcf.gz

opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfannotate --relabel=${rename} -i ${LCL7_vcf_gz} -o LCL7.vcf.gz

opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfannotate --relabel=${rename} -i ${LCL8_vcf_gz} -o LCL8.vcf.gz

opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg vcfmerge --force-merge-all -o ${project}.family.vcf.gz LCL5_vcf_gz LCL6_vcf_gz LCL7_vcf_gz LCL8_vcf_gz

gunzip ${project}.family.vcf.gz

>>>

@@ -51,7 +35,7 @@
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
Array[File] family_vcf = glob("*.family.vcf")
File merged_vcf = "${project}.family.vcf"
}
}


+ 77
- 85
workflow.wdl Näytä tiedosto

@@ -1,29 +1,27 @@
import "./tasks/split_gvcf_files.wdl" as split_gvcf_files
import "./tasks/GVCFtyper.wdl" as GVCFtyper
import "./tasks/benchmark.wdl" as benchmark
import "./tasks/mendelian.wdl" as mendelian
import "./tasks/merge_mendelian.wdl" as merge_mendelian
import "./tasks/quartet_mendelian.wdl" as quartet_mendelian
import "./tasks/D5_D6.wdl" as D5_D6
import "./tasks/merge_family.wdl" as merge_family


workflow {{ project_name }} {

Array[File] gvcf
Array[File] gvcf_idx
File LCL5
File LCL6
File LCL7
File LCL8

String BENCHMARKdocker
String MENDELIANdocker
String DIYdocker
String SENTIEON_INSTALL_DIR
String SENTIEONdocker


String fasta
File ref_dir

File benchmarking_dir
File rename

String project

@@ -31,100 +29,94 @@ workflow {{ project_name }} {
String BIGcluster_config
String SMALLcluster_config

call GVCFtyper.GVCFtyper as GVCFtyper {
call benchmark.benchmark as LCL5benchmark {
input:
vcf=LCL5,
benchmarking_dir=benchmarking_dir,
ref_dir=ref_dir,
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR,
fasta=fasta,
vcf=gvcf,
vcf_idx=gvcf_idx,
project=project,
docker=SENTIEONdocker,
docker=BENCHMARKdocker,
cluster_config=BIGcluster_config,
disk_size=disk_size
disk_size=disk_size,
}

call split_gvcf_files.split_gvcf_files as split_gvcf_files {
call benchmark.benchmark as LCL6benchmark {
input:
merged_gvcf=GVCFtyper.merged_gvcf,
docker=DIYdocker,
project=project,
cluster_config=SMALLcluster_config,
disk_size=disk_size
vcf=LCL6,
benchmarking_dir=benchmarking_dir,
ref_dir=ref_dir,
fasta=fasta,
docker=BENCHMARKdocker,
cluster_config=BIGcluster_config,
disk_size=disk_size,
}

call benchmark.benchmark as LCL7benchmark {
input:
vcf=LCL7,
benchmarking_dir=benchmarking_dir,
ref_dir=ref_dir,
fasta=fasta,
docker=BENCHMARKdocker,
cluster_config=BIGcluster_config,
disk_size=disk_size,
}

Array[File] single_gvcf = split_gvcf_files.splited_vcf
call benchmark.benchmark as LCL8benchmark {
input:
vcf=LCL8,
benchmarking_dir=benchmarking_dir,
ref_dir=ref_dir,
fasta=fasta,
docker=BENCHMARKdocker,
cluster_config=BIGcluster_config,
disk_size=disk_size,
}

scatter (idx in range(length(single_gvcf))) {
call benchmark.benchmark as benchmark {
input:
vcf=single_gvcf[idx],
benchmarking_dir=benchmarking_dir,
ref_dir=ref_dir,
fasta=fasta,
docker=BENCHMARKdocker,
cluster_config=BIGcluster_config,
disk_size=disk_size,
}
call merge_family.merge_family as merge_family {
input:
LCL5_vcf_gz=LCL5benchmark.rtg_vcf,
LCL5_vcf_idx=LCL5benchmark.rtg_vcf_index,
LCL6_vcf_gz=LCL6benchmark.rtg_vcf,
LCL6_vcf_idx=LCL6benchmark.rtg_vcf_index,
LCL7_vcf_gz=LCL7benchmark.rtg_vcf,
LCL7_vcf_idx=LCL7benchmark.rtg_vcf_index,
LCL8_vcf_gz=LCL8benchmark.rtg_vcf,
LCL8_vcf_idx=LCL8benchmark.rtg_vcf,
rename=rename,
project=project,
docker=DIYdocker,
cluster_config=SMALLcluster_config,
disk_size=disk_size,
}

call mendelian.mendelian as mendelian {
input:
family_vcf=merge_family.merged_vcf,
ref_dir=ref_dir,
fasta=fasta,
docker=MENDELIANdocker,
cluster_config=BIGcluster_config,
disk_size=disk_size
}

Boolean sister_tag = read_boolean(split_gvcf_files.sister_tag)
Boolean quartet_tag = read_boolean(split_gvcf_files.quartet_tag)

if (sister_tag) {
call D5_D6.D5_D6 as D5_D6 {
input:
splited_vcf=split_gvcf_files.splited_vcf,
project=project,
docker=DIYdocker,
cluster_config=SMALLcluster_config,
disk_size=disk_size,
}
call merge_mendelian.merge_mendelian as merge_mendelian {
input:
D5_trio_vcf=mendelian.D5_trio_vcf,
D6_trio_vcf=mendelian.D6_trio_vcf,
family_vcf=merge_family.merged_vcf,
docker=DIYdocker,
cluster_config=SMALLcluster_config,
disk_size=disk_size
}

if (quartet_tag) {
call merge_family.merge_family as merge_family {
input:
splited_vcf=split_gvcf_files.splited_vcf,
project=project,
docker=DIYdocker,
cluster_config=SMALLcluster_config,
disk_size=disk_size,
}

Array[File] family_vcfs = merge_family.family_vcf
scatter (idx in range(length(family_vcfs))) {
call mendelian.mendelian as mendelian {
input:
family_vcf=family_vcfs[idx],
ref_dir=ref_dir,
fasta=fasta,
docker=MENDELIANdocker,
cluster_config=BIGcluster_config,
disk_size=disk_size
}

call merge_mendelian.merge_mendelian as merge_mendelian {
input:
D5_trio_vcf=mendelian.D5_trio_vcf,
D6_trio_vcf=mendelian.D6_trio_vcf,
family_vcf=family_vcfs[idx],
docker=DIYdocker,
cluster_config=SMALLcluster_config,
disk_size=disk_size
}
}

call quartet_mendelian.quartet_mendelian as quartet_mendelian {
input:
project_mendelian_summary=merge_mendelian.project_mendelian_summary,
project=project,
docker=DIYdocker,
cluster_config=SMALLcluster_config,
disk_size=disk_size
}
call quartet_mendelian.quartet_mendelian as quartet_mendelian {
input:
project_mendelian_summary=merge_mendelian.project_mendelian_summary,
project=project,
docker=DIYdocker,
cluster_config=SMALLcluster_config,
disk_size=disk_size
}
}


Loading…
Peruuta
Tallenna