@@ -1,13 +1,26 @@ | |||
{ | |||
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | |||
"{{ project_name }}.LCL6familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL5VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL6mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | |||
"{{ project_name }}.mergeSister.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL5mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | |||
"{{ project_name }}.test_name": "{{ test_name }}", | |||
"{{ project_name }}.disk_size": "150", | |||
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", | |||
"{{ project_name }}.LCL6variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | |||
"{{ project_name }}.LCL6zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL7familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL5familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL6VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL5merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.reformVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call:v1.1", | |||
"{{ project_name }}.LCL5zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.xlarge img-ubuntu-vpc", | |||
"{{ project_name }}.LCL8familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL7variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | |||
"{{ project_name }}.LCL5variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | |||
"{{ project_name }}.LCL8variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | |||
"{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/" | |||
} | |||
} | |||
@@ -1,6 +1,7 @@ | |||
task merge { | |||
Array[File] family_vcf_gz | |||
Array[File] family_vcf_idx | |||
String test_name | |||
String sample | |||
String docker | |||
String cluster_config | |||
@@ -8,9 +9,34 @@ task merge { | |||
command <<< | |||
rtg vcfmerge --force-merge-all --no-gzip -o ${sample}.merged.vcf ${sep=" " family_vcf_gz} | |||
rtg vcfmerge --force-merge-all -o ${sample}.merged.vcf.gz ${sep=" " family_vcf_gz} | |||
rtg vcffilter -i ${sample}.merged.vcf.gz -o ${sample}.snv.merged.vcf.gz --snps-only --all-samples | |||
rtg vcffilter -i ${sample}.merged.vcf.gz -o ${sample}.indel.merged.vcf.gz --non-snps-only --all-samples | |||
zcat ${sample}.indel.merged.vcf.gz | grep '#CHROM' | cut -f10-12 > name | |||
for i in {10..12}; do zcat ${sample}.snv.merged.vcf.gz | grep -v '#' | cut -f$i | cut -d ':' -f2-4 | grep -v '\.'| sort | uniq -c | awk '{print $1, substr($1,0,7)}' | sed 's/\s\+/\t/g' | cut -f1 > $i.snv.txt; done | |||
paste *.snv.txt | cat name - > snv.txt | |||
for i in {10..12}; do zcat ${sample}.indel.merged.vcf.gz | grep -v '#' | cut -f$i | cut -d ':' -f2-4 | grep -v '\.'| sort | uniq -c | awk '{print $1, substr($1,0,7)}' | sed 's/\s\+/\t/g' | cut -f1 > $i.indel.txt; done | |||
paste *.indel.txt | cat name - > index.txt | |||
echo 'type' > column | |||
echo '0,0,0' >> column | |||
echo '0,0,1' >> column | |||
echo '0,1,0' >> column | |||
echo '0,1,1' >> column | |||
echo '1,0,0' >> column | |||
echo '1,0,1' >> column | |||
echo '1,1,0' >> column | |||
echo '1,1,1' >> column | |||
paste column snv.txt > ${test_name}.snv.txt | |||
paste column indel.txt > ${test_name}.indel.txt | |||
cat ${sample}.merged.vcf | grep -v '#' | cut -f1-2 | sed s'/\t/_/g' | sort | uniq -c | sed 's/\s\+/\t/g' | awk '{ if ($1 != 1) { print } }' | cut -f3 > ${sample}.vcf_dup.txt | |||
>>> | |||
@@ -21,7 +47,13 @@ task merge { | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File merged_vcf = "${sample}.merged.vcf" | |||
File vcf_dup = "${sample}.vcf_dup.txt" | |||
File merged_vcf = "${sample}.merged.vcf.gz" | |||
File merged_vcf_idx = "${sample}.merged.vcf.gz.tbi" | |||
File merged_snv = "${sample}.snv.merged.vcf.gz" | |||
File merged_snv_idx = "${sample}.snv.merged.vcf.gz.tbi" | |||
File merged_indel = "${sample}.indel.merged.vcf.gz" | |||
File merged_indel_idx = "${sample}.indel.merged.vcf.gz.tbi" | |||
File snv = "${test_name}.snv.txt" | |||
File indel = "${test_name}.indel.txt" | |||
} | |||
} |
@@ -1,11 +1,18 @@ | |||
import "./tasks/variantsNorm.wdl" as variantsNorm | |||
import "./tasks/mendelian.wdl" as mendelian | |||
import "./tasks/zipIndex.wdl" as zipIndex | |||
import "./tasks/VCFrename.wdl" as VCFrename | |||
import "./tasks/mergeSister.wdl" as mergeSister | |||
import "./tasks/reformVCF.wdl" as reformVCF | |||
import "./tasks/merge.wdl" as merge | |||
import "./tasks/votes.wdl" as votes | |||
workflow {{ project_name }} { | |||
File inputSamplesFile | |||
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile) | |||
File ref_dir | |||
String fasta | |||
String test_name | |||
String cluster_config | |||
String disk_size | |||
@@ -72,5 +79,104 @@ workflow {{ project_name }} { | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call zipIndex.zipIndex as LCL5zipIndex { | |||
input: | |||
vcf=LCL5mendelian.trio_vcf, | |||
sample="LCL5", | |||
family_name=quartet[8], | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call zipIndex.zipIndex as LCL6zipIndex { | |||
input: | |||
vcf=LCL6mendelian.trio_vcf, | |||
sample="LCL6", | |||
family_name=quartet[8], | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call VCFrename.VCFrename as LCL5VCFrename { | |||
input: | |||
trio_vcf_gz=LCL5zipIndex.vcf_gz, | |||
trio_vcf_idx=LCL5zipIndex.vcf_idx, | |||
mother_name=quartet[7], | |||
father_name=quartet[6], | |||
child_name=quartet[4], | |||
family_name=quartet[8], | |||
child="LCL5", | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call VCFrename.VCFrename as LCL6VCFrename { | |||
input: | |||
trio_vcf_gz=LCL6zipIndex.vcf_gz, | |||
trio_vcf_idx=LCL6zipIndex.vcf_idx, | |||
mother_name=quartet[7], | |||
father_name=quartet[6], | |||
child_name=quartet[5], | |||
family_name=quartet[8], | |||
child="LCL6", | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call mergeSister.mergeSister as mergeSister { | |||
input: | |||
LCL5_trio_vcf_gz=LCL5VCFrename.rename_trio_vcf_gz, | |||
LCL5_trio_vcf_idx=LCL5VCFrename.rename_trio_vcf_idx, | |||
LCL6_trio_vcf_gz=LCL6VCFrename.rename_trio_vcf_gz, | |||
LCL6_trio_vcf_idx=LCL6VCFrename.rename_trio_vcf_idx, | |||
family_name=quartet[8], | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call reformVCF.reformVCF as reformVCF { | |||
input: | |||
family_mendelian_info=mergeSister.family_mendelian_info, | |||
family_name=quartet[8], | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call zipIndex.zipIndex as LCL5familyzipIndex { | |||
input: | |||
vcf=reformVCF.LCL5_family_info, | |||
sample='LCL5', | |||
family_name=quartet[8], | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call zipIndex.zipIndex as LCL6familyzipIndex { | |||
input: | |||
vcf=reformVCF.LCL6_family_info, | |||
sample='LCL6', | |||
family_name=quartet[8], | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call zipIndex.zipIndex as LCL7familyzipIndex { | |||
input: | |||
vcf=reformVCF.LCL7_family_info, | |||
sample='LCL7', | |||
family_name=quartet[8], | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call zipIndex.zipIndex as LCL8familyzipIndex { | |||
input: | |||
vcf=reformVCF.LCL8_family_info, | |||
sample='LCL8', | |||
family_name=quartet[8], | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
} | |||
call merge.merge as LCL5merge { | |||
input: | |||
family_vcf_gz=LCL5familyzipIndex.vcf_gz, | |||
family_vcf_idx=LCL5familyzipIndex.vcf_idx, | |||
sample="LCL5", | |||
test_name=test_name, | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
} |