{ | { | ||||
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | "{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | ||||
"{{ project_name }}.LCL6familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL5VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL6mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | "{{ project_name }}.LCL6mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | ||||
"{{ project_name }}.mergeSister.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL5mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | "{{ project_name }}.LCL5mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | ||||
"{{ project_name }}.test_name": "{{ test_name }}", | |||||
"{{ project_name }}.disk_size": "150", | "{{ project_name }}.disk_size": "150", | ||||
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", | "{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", | ||||
"{{ project_name }}.LCL6variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | "{{ project_name }}.LCL6variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | ||||
"{{ project_name }}.LCL6zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL7familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL5familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL6VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL5merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.reformVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call:v1.1", | |||||
"{{ project_name }}.LCL5zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.xlarge img-ubuntu-vpc", | "{{ project_name }}.cluster_config": "OnDemand bcs.a2.xlarge img-ubuntu-vpc", | ||||
"{{ project_name }}.LCL8familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL7variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | "{{ project_name }}.LCL7variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | ||||
"{{ project_name }}.LCL5variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | "{{ project_name }}.LCL5variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | ||||
"{{ project_name }}.LCL8variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | "{{ project_name }}.LCL8variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | ||||
"{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/" | "{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/" | ||||
} | |||||
} | |||||
task merge { | task merge { | ||||
Array[File] family_vcf_gz | Array[File] family_vcf_gz | ||||
Array[File] family_vcf_idx | Array[File] family_vcf_idx | ||||
String test_name | |||||
String sample | String sample | ||||
String docker | String docker | ||||
String cluster_config | String cluster_config | ||||
command <<< | command <<< | ||||
rtg vcfmerge --force-merge-all --no-gzip -o ${sample}.merged.vcf ${sep=" " family_vcf_gz} | |||||
rtg vcfmerge --force-merge-all -o ${sample}.merged.vcf.gz ${sep=" " family_vcf_gz} | |||||
rtg vcffilter -i ${sample}.merged.vcf.gz -o ${sample}.snv.merged.vcf.gz --snps-only --all-samples | |||||
rtg vcffilter -i ${sample}.merged.vcf.gz -o ${sample}.indel.merged.vcf.gz --non-snps-only --all-samples | |||||
zcat ${sample}.indel.merged.vcf.gz | grep '#CHROM' | cut -f10-12 > name | |||||
for i in {10..12}; do zcat ${sample}.snv.merged.vcf.gz | grep -v '#' | cut -f$i | cut -d ':' -f2-4 | grep -v '\.'| sort | uniq -c | awk '{print $1, substr($1,0,7)}' | sed 's/\s\+/\t/g' | cut -f1 > $i.snv.txt; done | |||||
paste *.snv.txt | cat name - > snv.txt | |||||
for i in {10..12}; do zcat ${sample}.indel.merged.vcf.gz | grep -v '#' | cut -f$i | cut -d ':' -f2-4 | grep -v '\.'| sort | uniq -c | awk '{print $1, substr($1,0,7)}' | sed 's/\s\+/\t/g' | cut -f1 > $i.indel.txt; done | |||||
paste *.indel.txt | cat name - > index.txt | |||||
echo 'type' > column | |||||
echo '0,0,0' >> column | |||||
echo '0,0,1' >> column | |||||
echo '0,1,0' >> column | |||||
echo '0,1,1' >> column | |||||
echo '1,0,0' >> column | |||||
echo '1,0,1' >> column | |||||
echo '1,1,0' >> column | |||||
echo '1,1,1' >> column | |||||
paste column snv.txt > ${test_name}.snv.txt | |||||
paste column indel.txt > ${test_name}.indel.txt | |||||
cat ${sample}.merged.vcf | grep -v '#' | cut -f1-2 | sed s'/\t/_/g' | sort | uniq -c | sed 's/\s\+/\t/g' | awk '{ if ($1 != 1) { print } }' | cut -f3 > ${sample}.vcf_dup.txt | |||||
>>> | >>> | ||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | ||||
} | } | ||||
output { | output { | ||||
File merged_vcf = "${sample}.merged.vcf" | |||||
File vcf_dup = "${sample}.vcf_dup.txt" | |||||
File merged_vcf = "${sample}.merged.vcf.gz" | |||||
File merged_vcf_idx = "${sample}.merged.vcf.gz.tbi" | |||||
File merged_snv = "${sample}.snv.merged.vcf.gz" | |||||
File merged_snv_idx = "${sample}.snv.merged.vcf.gz.tbi" | |||||
File merged_indel = "${sample}.indel.merged.vcf.gz" | |||||
File merged_indel_idx = "${sample}.indel.merged.vcf.gz.tbi" | |||||
File snv = "${test_name}.snv.txt" | |||||
File indel = "${test_name}.indel.txt" | |||||
} | } | ||||
} | } |
import "./tasks/variantsNorm.wdl" as variantsNorm | import "./tasks/variantsNorm.wdl" as variantsNorm | ||||
import "./tasks/mendelian.wdl" as mendelian | import "./tasks/mendelian.wdl" as mendelian | ||||
import "./tasks/zipIndex.wdl" as zipIndex | |||||
import "./tasks/VCFrename.wdl" as VCFrename | |||||
import "./tasks/mergeSister.wdl" as mergeSister | |||||
import "./tasks/reformVCF.wdl" as reformVCF | |||||
import "./tasks/merge.wdl" as merge | |||||
import "./tasks/votes.wdl" as votes | |||||
workflow {{ project_name }} { | workflow {{ project_name }} { | ||||
File inputSamplesFile | File inputSamplesFile | ||||
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile) | Array[Array[File]] inputSamples = read_tsv(inputSamplesFile) | ||||
File ref_dir | File ref_dir | ||||
String fasta | String fasta | ||||
String test_name | |||||
String cluster_config | String cluster_config | ||||
String disk_size | String disk_size | ||||
cluster_config=cluster_config, | cluster_config=cluster_config, | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
call zipIndex.zipIndex as LCL5zipIndex { | |||||
input: | |||||
vcf=LCL5mendelian.trio_vcf, | |||||
sample="LCL5", | |||||
family_name=quartet[8], | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call zipIndex.zipIndex as LCL6zipIndex { | |||||
input: | |||||
vcf=LCL6mendelian.trio_vcf, | |||||
sample="LCL6", | |||||
family_name=quartet[8], | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call VCFrename.VCFrename as LCL5VCFrename { | |||||
input: | |||||
trio_vcf_gz=LCL5zipIndex.vcf_gz, | |||||
trio_vcf_idx=LCL5zipIndex.vcf_idx, | |||||
mother_name=quartet[7], | |||||
father_name=quartet[6], | |||||
child_name=quartet[4], | |||||
family_name=quartet[8], | |||||
child="LCL5", | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call VCFrename.VCFrename as LCL6VCFrename { | |||||
input: | |||||
trio_vcf_gz=LCL6zipIndex.vcf_gz, | |||||
trio_vcf_idx=LCL6zipIndex.vcf_idx, | |||||
mother_name=quartet[7], | |||||
father_name=quartet[6], | |||||
child_name=quartet[5], | |||||
family_name=quartet[8], | |||||
child="LCL6", | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call mergeSister.mergeSister as mergeSister { | |||||
input: | |||||
LCL5_trio_vcf_gz=LCL5VCFrename.rename_trio_vcf_gz, | |||||
LCL5_trio_vcf_idx=LCL5VCFrename.rename_trio_vcf_idx, | |||||
LCL6_trio_vcf_gz=LCL6VCFrename.rename_trio_vcf_gz, | |||||
LCL6_trio_vcf_idx=LCL6VCFrename.rename_trio_vcf_idx, | |||||
family_name=quartet[8], | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call reformVCF.reformVCF as reformVCF { | |||||
input: | |||||
family_mendelian_info=mergeSister.family_mendelian_info, | |||||
family_name=quartet[8], | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call zipIndex.zipIndex as LCL5familyzipIndex { | |||||
input: | |||||
vcf=reformVCF.LCL5_family_info, | |||||
sample='LCL5', | |||||
family_name=quartet[8], | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call zipIndex.zipIndex as LCL6familyzipIndex { | |||||
input: | |||||
vcf=reformVCF.LCL6_family_info, | |||||
sample='LCL6', | |||||
family_name=quartet[8], | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call zipIndex.zipIndex as LCL7familyzipIndex { | |||||
input: | |||||
vcf=reformVCF.LCL7_family_info, | |||||
sample='LCL7', | |||||
family_name=quartet[8], | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call zipIndex.zipIndex as LCL8familyzipIndex { | |||||
input: | |||||
vcf=reformVCF.LCL8_family_info, | |||||
sample='LCL8', | |||||
family_name=quartet[8], | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
} | } | ||||
call merge.merge as LCL5merge { | |||||
input: | |||||
family_vcf_gz=LCL5familyzipIndex.vcf_gz, | |||||
family_vcf_idx=LCL5familyzipIndex.vcf_idx, | |||||
sample="LCL5", | |||||
test_name=test_name, | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
} | } |