@@ -56,10 +56,6 @@ def parse_INFO(info): | |||
values.append('1') | |||
elif kv[0] == 'AF': | |||
pass | |||
elif kv[0] == 'POSITIVE_TRAIN_SITE': | |||
pass | |||
elif kv[0] == 'NEGATIVE_TRAIN_SITE': | |||
pass | |||
else: | |||
keys.append(kv[0]) | |||
values.append(kv[1]) |
@@ -5,7 +5,8 @@ import re | |||
import pandas as pd | |||
from operator import itemgetter | |||
from collections import Counter | |||
from itertools import islice | |||
from itertools import islice | |||
from __future__ import division | |||
# input arguments | |||
parser = argparse.ArgumentParser(description="this script is to count voting number") |
@@ -1,4 +1,5 @@ | |||
{ | |||
"{{ project_name }}.LCL6normZip.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL7merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | |||
"{{ project_name }}.LCL6familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
@@ -7,15 +8,21 @@ | |||
"{{ project_name }}.LCL5VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL6mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | |||
"{{ project_name }}.mergeSister.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL7normZip.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL5mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | |||
"{{ project_name }}.disk_size": "150", | |||
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", | |||
"{{ project_name }}.LCL6bedAnnotation.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.repeat_bed": "oss://pgx-result/renluyao/manuscript/all.repeat.bed", | |||
"{{ project_name }}.LCL6merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL6variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | |||
"{{ project_name }}.LCL5mergeVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL6zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL8mergeVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL5votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||
"{{ project_name }}.LCL7familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL5familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL5normZip.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL6VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL5merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL8votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||
@@ -23,10 +30,16 @@ | |||
"{{ project_name }}.LCL5zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.xlarge img-ubuntu-vpc", | |||
"{{ project_name }}.LCL8familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL5bedAnnotation.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL6mergeVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL7variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | |||
"{{ project_name }}.LCL7mergeVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL8merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL5variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | |||
"{{ project_name }}.LCL7bedAnnotation.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL8variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | |||
"{{ project_name }}.LCL8bedAnnotation.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL8normZip.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/" | |||
} | |||
@@ -0,0 +1,25 @@ | |||
task bed_annotation { | |||
File merged_vcf | |||
File merged_vcf_idx | |||
File repeat_bed | |||
String sample | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
rtg vcfannotate --bed-info=${repeat_bed} -i ${merged_vcf} -o ${sample}.normed.repeatAnno.vcf.gz | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File repeat_annotated_vcf = "${sample}.normed.repeatAnno.vcf.gz" | |||
} | |||
} |
@@ -0,0 +1,25 @@ | |||
task extract_info { | |||
File vcf | |||
String vcf_name = basename(vcf,".vcf") | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
python /opt/extract_vcf_information.py -i ${vcf} -o ${vcf_name}.txt | |||
cat ${vcf_name}.txt | cut -f23,25,27,22,12,21,3,18,4,8,11,15 > ${vcf_name}.essential.txt | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File vcf_info = "${vcf_name}.txt" | |||
File vcf_needed_info = "${vcf_name}.essential.txt" | |||
} | |||
} |
@@ -1,30 +0,0 @@ | |||
task indelNorm { | |||
File vcf | |||
File ref_dir | |||
String fasta | |||
String sampleName | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
cat ${vcf} | grep '#' > header | |||
cat ${vcf} | grep -v '#' > body | |||
cat body | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > body.filtered | |||
cat header body.filtered > ${sampleName}.filtered.vcf | |||
/opt/hall-lab/bcftools-1.9/bin/bcftools norm -f ${ref_dir}/${fasta} ${sampleName}.filtered.vcf > ${sampleName}.normed.vcf | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File normed_vcf = "${sampleName}.normed.vcf" | |||
} | |||
} |
@@ -8,7 +8,7 @@ task mergeVCFInfo { | |||
command <<< | |||
rtg vcfmerge --force-merge-all --no-gzip -o ${sample}.merged.info.vcf ${sep=" " vcf_gz} | |||
rtg vcfmerge --force-merge-all -o ${sample}.merged.info.vcf.gz ${sep=" " vcf_gz} | |||
>>> | |||
@@ -19,6 +19,7 @@ task mergeVCFInfo { | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File merged_info = "${sample}.merged.info.vcf" | |||
File merged_vcf = "${sample}.merged.info.vcf.gz" | |||
File merged_vcf_idx = "${sample}.merged.info.vcf.gz.tbi" | |||
} | |||
} |
@@ -8,7 +8,6 @@ task reformVCF { | |||
command <<< | |||
python /opt/reformVCF.py -vcf ${family_mendelian_info} -name ${family_name} | |||
>>> | |||
@@ -9,7 +9,6 @@ task votes { | |||
command <<< | |||
python /opt/high_confidence_call_vote.py -vcf ${merged_vcf} -dup ${vcf_dup} -sample ${sample} -prefix ${prefix} | |||
cat ${prefix}_annotated.vcf | cut -f1-9,45 | grep -v 'filtered' | grep -v 'confirm for parents' | grep -v 'pcr-free-speicifc' | grep -v 'pcr-speicifc' | grep -v 'dupVar' > ${prefix}_bechmarking_calls.vcf | |||
>>> | |||
runtime { | |||
@@ -20,6 +19,5 @@ task votes { | |||
} | |||
output { | |||
File annotated_vcf = "${prefix}_annotated.vcf" | |||
File benchmark_call = "${prefix}_bechmarking_calls.vcf" | |||
} | |||
} |
@@ -1,14 +1,13 @@ | |||
task zipIndex { | |||
File vcf | |||
String sample | |||
String family_name | |||
String vcf_name = basename(vcf,".vcf") | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
rtg bgzip ${vcf} -c > ${family_name}.${sample}.vcf.gz | |||
rtg index -f vcf ${family_name}.${sample}.vcf.gz | |||
rtg bgzip ${vcf} -c > ${vcf_name}.vcf.gz | |||
rtg index -f vcf ${vcf_name}.vcf.gz | |||
>>> | |||
@@ -19,7 +18,7 @@ task zipIndex { | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File vcf_gz = "${family_name}.${sample}.vcf.gz" | |||
File vcf_idx = "${family_name}.${sample}.vcf.gz.tbi" | |||
File vcf_gz = "${vcf_name}.vcf.gz" | |||
File vcf_idx = "${vcf_name}.vcf.gz.tbi" | |||
} | |||
} |
@@ -6,11 +6,14 @@ import "./tasks/mergeSister.wdl" as mergeSister | |||
import "./tasks/reformVCF.wdl" as reformVCF | |||
import "./tasks/merge.wdl" as merge | |||
import "./tasks/votes.wdl" as votes | |||
import "./tasks/bed_annotation.wdl" as bed_annotation | |||
import "./tasks/mergeVCFInfo.wdl" as mergeVCFInfo | |||
workflow {{ project_name }} { | |||
File inputSamplesFile | |||
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile) | |||
File ref_dir | |||
File repeat_bed | |||
String fasta | |||
String cluster_config | |||
String disk_size | |||
@@ -52,6 +55,30 @@ workflow {{ project_name }} { | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call zipIndex.zipIndex as LCL5normZip{ | |||
input: | |||
vcf=LCL5variantsNorm.normed_vcf, | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call zipIndex.zipIndex as LCL6normZip{ | |||
input: | |||
vcf=LCL6variantsNorm.normed_vcf, | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call zipIndex.zipIndex as LCL7normZip{ | |||
input: | |||
vcf=LCL7variantsNorm.normed_vcf, | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call zipIndex.zipIndex as LCL8normZip{ | |||
input: | |||
vcf=LCL8variantsNorm.normed_vcf, | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call mendelian.mendelian as LCL5mendelian { | |||
input: | |||
child_vcf=LCL5variantsNorm.normed_vcf, | |||
@@ -81,16 +108,12 @@ workflow {{ project_name }} { | |||
call zipIndex.zipIndex as LCL5zipIndex { | |||
input: | |||
vcf=LCL5mendelian.trio_vcf, | |||
sample="LCL5", | |||
family_name=quartet[8], | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call zipIndex.zipIndex as LCL6zipIndex { | |||
input: | |||
vcf=LCL6mendelian.trio_vcf, | |||
sample="LCL6", | |||
family_name=quartet[8], | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
@@ -138,36 +161,29 @@ workflow {{ project_name }} { | |||
call zipIndex.zipIndex as LCL5familyzipIndex { | |||
input: | |||
vcf=reformVCF.LCL5_family_info, | |||
sample='LCL5', | |||
family_name=quartet[8], | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call zipIndex.zipIndex as LCL6familyzipIndex { | |||
input: | |||
vcf=reformVCF.LCL6_family_info, | |||
sample='LCL6', | |||
family_name=quartet[8], | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call zipIndex.zipIndex as LCL7familyzipIndex { | |||
input: | |||
vcf=reformVCF.LCL7_family_info, | |||
sample='LCL7', | |||
family_name=quartet[8], | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call zipIndex.zipIndex as LCL8familyzipIndex { | |||
input: | |||
vcf=reformVCF.LCL8_family_info, | |||
sample='LCL8', | |||
family_name=quartet[8], | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
} | |||
### family info merge | |||
call merge.merge as LCL5merge { | |||
input: | |||
family_vcf_gz=LCL5familyzipIndex.vcf_gz, | |||
@@ -235,6 +251,75 @@ workflow {{ project_name }} { | |||
prefix='LCL8_consensus', | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
} | |||
### vcf original information | |||
call mergeVCFInfo.mergeVCFInfo as LCL5mergeVCF { | |||
input: | |||
vcf_gz=LCL5normZip.vcf_gz, | |||
vcf_idx=LCL5normZip.vcf_idx, | |||
sample='LCL5', | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call bed_annotation.bed_annotation as LCL5bedAnnotation { | |||
input: | |||
merged_vcf=LCL5mergeVCF.merged_vcf, | |||
merged_vcf_idx=LCL5mergeVCF.merged_vcf_idx, | |||
repeat_bed=repeat_bed, | |||
sample='LCL5', | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call mergeVCFInfo.mergeVCFInfo as LCL6mergeVCF { | |||
input: | |||
vcf_gz=LCL6normZip.vcf_gz, | |||
vcf_idx=LCL6normZip.vcf_idx, | |||
sample='LCL6', | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call bed_annotation.bed_annotation as LCL6bedAnnotation { | |||
input: | |||
merged_vcf=LCL6mergeVCF.merged_vcf, | |||
merged_vcf_idx=LCL6mergeVCF.merged_vcf_idx, | |||
repeat_bed=repeat_bed, | |||
sample='LCL6', | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call mergeVCFInfo.mergeVCFInfo as LCL7mergeVCF { | |||
input: | |||
vcf_gz=LCL7normZip.vcf_gz, | |||
vcf_idx=LCL7normZip.vcf_idx, | |||
sample='LCL7', | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call bed_annotation.bed_annotation as LCL7bedAnnotation { | |||
input: | |||
merged_vcf=LCL7mergeVCF.merged_vcf, | |||
merged_vcf_idx=LCL7mergeVCF.merged_vcf_idx, | |||
repeat_bed=repeat_bed, | |||
sample='LCL7', | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call mergeVCFInfo.mergeVCFInfo as LCL8mergeVCF { | |||
input: | |||
vcf_gz=LCL8normZip.vcf_gz, | |||
vcf_idx=LCL8normZip.vcf_idx, | |||
sample='LCL8', | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call bed_annotation.bed_annotation as LCL8bedAnnotation { | |||
input: | |||
merged_vcf=LCL8mergeVCF.merged_vcf, | |||
merged_vcf_idx=LCL8mergeVCF.merged_vcf_idx, | |||
repeat_bed=repeat_bed, | |||
sample='LCL8', | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
} | |||