Browse Source

info intergration

master
LUYAO REN 5 years ago
parent
commit
7cfe6ee9ff
11 changed files with 171 additions and 59 deletions
  1. +0
    -4
      codescripts/extract_vcf_information.py
  2. +2
    -1
      codescripts/high_confidence_call_vote.py
  3. +13
    -0
      inputs
  4. +25
    -0
      tasks/bed_annotation.wdl
  5. +25
    -0
      tasks/extract_info.wdl
  6. +0
    -30
      tasks/indelNorm.wdl
  7. +3
    -2
      tasks/mergeVCFInfo.wdl
  8. +0
    -1
      tasks/reformVCF.wdl
  9. +0
    -2
      tasks/votes.wdl
  10. +5
    -6
      tasks/zipIndex.wdl
  11. +98
    -13
      workflow.wdl

+ 0
- 4
codescripts/extract_vcf_information.py View File

@@ -56,10 +56,6 @@ def parse_INFO(info):
values.append('1')
elif kv[0] == 'AF':
pass
elif kv[0] == 'POSITIVE_TRAIN_SITE':
pass
elif kv[0] == 'NEGATIVE_TRAIN_SITE':
pass
else:
keys.append(kv[0])
values.append(kv[1])

+ 2
- 1
codescripts/high_confidence_call_vote.py View File

@@ -5,7 +5,8 @@ import re
import pandas as pd
from operator import itemgetter
from collections import Counter
from itertools import islice
from itertools import islice
from __future__ import division

# input arguments
parser = argparse.ArgumentParser(description="this script is to count voting number")

+ 13
- 0
inputs View File

@@ -1,4 +1,5 @@
{
"{{ project_name }}.LCL6normZip.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL7merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa",
"{{ project_name }}.LCL6familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
@@ -7,15 +8,21 @@
"{{ project_name }}.LCL5VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL6mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1",
"{{ project_name }}.mergeSister.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL7normZip.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1",
"{{ project_name }}.disk_size": "150",
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}",
"{{ project_name }}.LCL6bedAnnotation.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.repeat_bed": "oss://pgx-result/renluyao/manuscript/all.repeat.bed",
"{{ project_name }}.LCL6merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL6variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9",
"{{ project_name }}.LCL5mergeVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL6zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL8mergeVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1",
"{{ project_name }}.LCL7familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5normZip.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL6VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL8votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1",
@@ -23,10 +30,16 @@
"{{ project_name }}.LCL5zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.xlarge img-ubuntu-vpc",
"{{ project_name }}.LCL8familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5bedAnnotation.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL6mergeVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL7variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9",
"{{ project_name }}.LCL7mergeVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL8merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9",
"{{ project_name }}.LCL7bedAnnotation.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL8variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9",
"{{ project_name }}.LCL8bedAnnotation.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL8normZip.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/"
}


+ 25
- 0
tasks/bed_annotation.wdl View File

@@ -0,0 +1,25 @@
task bed_annotation {
File merged_vcf
File merged_vcf_idx
File repeat_bed
String sample
String docker
String cluster_config
String disk_size
command <<<

rtg vcfannotate --bed-info=${repeat_bed} -i ${merged_vcf} -o ${sample}.normed.repeatAnno.vcf.gz

>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File repeat_annotated_vcf = "${sample}.normed.repeatAnno.vcf.gz"
}
}

+ 25
- 0
tasks/extract_info.wdl View File

@@ -0,0 +1,25 @@
task extract_info {
File vcf
String vcf_name = basename(vcf,".vcf")
String docker
String cluster_config
String disk_size
command <<<

python /opt/extract_vcf_information.py -i ${vcf} -o ${vcf_name}.txt
cat ${vcf_name}.txt | cut -f23,25,27,22,12,21,3,18,4,8,11,15 > ${vcf_name}.essential.txt

>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File vcf_info = "${vcf_name}.txt"
File vcf_needed_info = "${vcf_name}.essential.txt"
}
}

+ 0
- 30
tasks/indelNorm.wdl View File

@@ -1,30 +0,0 @@
task indelNorm {
File vcf
File ref_dir
String fasta
String sampleName
String docker
String cluster_config
String disk_size
command <<<

cat ${vcf} | grep '#' > header
cat ${vcf} | grep -v '#' > body
cat body | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > body.filtered
cat header body.filtered > ${sampleName}.filtered.vcf

/opt/hall-lab/bcftools-1.9/bin/bcftools norm -f ${ref_dir}/${fasta} ${sampleName}.filtered.vcf > ${sampleName}.normed.vcf

>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File normed_vcf = "${sampleName}.normed.vcf"
}
}

+ 3
- 2
tasks/mergeVCFInfo.wdl View File

@@ -8,7 +8,7 @@ task mergeVCFInfo {
command <<<

rtg vcfmerge --force-merge-all --no-gzip -o ${sample}.merged.info.vcf ${sep=" " vcf_gz}
rtg vcfmerge --force-merge-all -o ${sample}.merged.info.vcf.gz ${sep=" " vcf_gz}
>>>

@@ -19,6 +19,7 @@ task mergeVCFInfo {
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File merged_info = "${sample}.merged.info.vcf"
File merged_vcf = "${sample}.merged.info.vcf.gz"
File merged_vcf_idx = "${sample}.merged.info.vcf.gz.tbi"
}
}

+ 0
- 1
tasks/reformVCF.wdl View File

@@ -8,7 +8,6 @@ task reformVCF {
command <<<

python /opt/reformVCF.py -vcf ${family_mendelian_info} -name ${family_name}

>>>


+ 0
- 2
tasks/votes.wdl View File

@@ -9,7 +9,6 @@ task votes {
command <<<
python /opt/high_confidence_call_vote.py -vcf ${merged_vcf} -dup ${vcf_dup} -sample ${sample} -prefix ${prefix}
cat ${prefix}_annotated.vcf | cut -f1-9,45 | grep -v 'filtered' | grep -v 'confirm for parents' | grep -v 'pcr-free-speicifc' | grep -v 'pcr-speicifc' | grep -v 'dupVar' > ${prefix}_bechmarking_calls.vcf
>>>

runtime {
@@ -20,6 +19,5 @@ task votes {
}
output {
File annotated_vcf = "${prefix}_annotated.vcf"
File benchmark_call = "${prefix}_bechmarking_calls.vcf"
}
}

+ 5
- 6
tasks/zipIndex.wdl View File

@@ -1,14 +1,13 @@
task zipIndex {
File vcf
String sample
String family_name
String vcf_name = basename(vcf,".vcf")
String docker
String cluster_config
String disk_size
command <<<
rtg bgzip ${vcf} -c > ${family_name}.${sample}.vcf.gz
rtg index -f vcf ${family_name}.${sample}.vcf.gz
rtg bgzip ${vcf} -c > ${vcf_name}.vcf.gz
rtg index -f vcf ${vcf_name}.vcf.gz

>>>

@@ -19,7 +18,7 @@ task zipIndex {
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File vcf_gz = "${family_name}.${sample}.vcf.gz"
File vcf_idx = "${family_name}.${sample}.vcf.gz.tbi"
File vcf_gz = "${vcf_name}.vcf.gz"
File vcf_idx = "${vcf_name}.vcf.gz.tbi"
}
}

+ 98
- 13
workflow.wdl View File

@@ -6,11 +6,14 @@ import "./tasks/mergeSister.wdl" as mergeSister
import "./tasks/reformVCF.wdl" as reformVCF
import "./tasks/merge.wdl" as merge
import "./tasks/votes.wdl" as votes
import "./tasks/bed_annotation.wdl" as bed_annotation
import "./tasks/mergeVCFInfo.wdl" as mergeVCFInfo

workflow {{ project_name }} {
File inputSamplesFile
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile)
File ref_dir
File repeat_bed
String fasta
String cluster_config
String disk_size
@@ -52,6 +55,30 @@ workflow {{ project_name }} {
cluster_config=cluster_config,
disk_size=disk_size
}
call zipIndex.zipIndex as LCL5normZip{
input:
vcf=LCL5variantsNorm.normed_vcf,
cluster_config=cluster_config,
disk_size=disk_size
}
call zipIndex.zipIndex as LCL6normZip{
input:
vcf=LCL6variantsNorm.normed_vcf,
cluster_config=cluster_config,
disk_size=disk_size
}
call zipIndex.zipIndex as LCL7normZip{
input:
vcf=LCL7variantsNorm.normed_vcf,
cluster_config=cluster_config,
disk_size=disk_size
}
call zipIndex.zipIndex as LCL8normZip{
input:
vcf=LCL8variantsNorm.normed_vcf,
cluster_config=cluster_config,
disk_size=disk_size
}
call mendelian.mendelian as LCL5mendelian {
input:
child_vcf=LCL5variantsNorm.normed_vcf,
@@ -81,16 +108,12 @@ workflow {{ project_name }} {
call zipIndex.zipIndex as LCL5zipIndex {
input:
vcf=LCL5mendelian.trio_vcf,
sample="LCL5",
family_name=quartet[8],
cluster_config=cluster_config,
disk_size=disk_size
}
call zipIndex.zipIndex as LCL6zipIndex {
input:
vcf=LCL6mendelian.trio_vcf,
sample="LCL6",
family_name=quartet[8],
cluster_config=cluster_config,
disk_size=disk_size
}
@@ -138,36 +161,29 @@ workflow {{ project_name }} {
call zipIndex.zipIndex as LCL5familyzipIndex {
input:
vcf=reformVCF.LCL5_family_info,
sample='LCL5',
family_name=quartet[8],
cluster_config=cluster_config,
disk_size=disk_size
}
call zipIndex.zipIndex as LCL6familyzipIndex {
input:
vcf=reformVCF.LCL6_family_info,
sample='LCL6',
family_name=quartet[8],
cluster_config=cluster_config,
disk_size=disk_size
}
call zipIndex.zipIndex as LCL7familyzipIndex {
input:
vcf=reformVCF.LCL7_family_info,
sample='LCL7',
family_name=quartet[8],
cluster_config=cluster_config,
disk_size=disk_size
}
call zipIndex.zipIndex as LCL8familyzipIndex {
input:
vcf=reformVCF.LCL8_family_info,
sample='LCL8',
family_name=quartet[8],
cluster_config=cluster_config,
disk_size=disk_size
}
}
### family info merge
call merge.merge as LCL5merge {
input:
family_vcf_gz=LCL5familyzipIndex.vcf_gz,
@@ -235,6 +251,75 @@ workflow {{ project_name }} {
prefix='LCL8_consensus',
cluster_config=cluster_config,
disk_size=disk_size
}
}
### vcf original information
call mergeVCFInfo.mergeVCFInfo as LCL5mergeVCF {
input:
vcf_gz=LCL5normZip.vcf_gz,
vcf_idx=LCL5normZip.vcf_idx,
sample='LCL5',
cluster_config=cluster_config,
disk_size=disk_size
}
call bed_annotation.bed_annotation as LCL5bedAnnotation {
input:
merged_vcf=LCL5mergeVCF.merged_vcf,
merged_vcf_idx=LCL5mergeVCF.merged_vcf_idx,
repeat_bed=repeat_bed,
sample='LCL5',
cluster_config=cluster_config,
disk_size=disk_size
}
call mergeVCFInfo.mergeVCFInfo as LCL6mergeVCF {
input:
vcf_gz=LCL6normZip.vcf_gz,
vcf_idx=LCL6normZip.vcf_idx,
sample='LCL6',
cluster_config=cluster_config,
disk_size=disk_size
}
call bed_annotation.bed_annotation as LCL6bedAnnotation {
input:
merged_vcf=LCL6mergeVCF.merged_vcf,
merged_vcf_idx=LCL6mergeVCF.merged_vcf_idx,
repeat_bed=repeat_bed,
sample='LCL6',
cluster_config=cluster_config,
disk_size=disk_size
}
call mergeVCFInfo.mergeVCFInfo as LCL7mergeVCF {
input:
vcf_gz=LCL7normZip.vcf_gz,
vcf_idx=LCL7normZip.vcf_idx,
sample='LCL7',
cluster_config=cluster_config,
disk_size=disk_size
}
call bed_annotation.bed_annotation as LCL7bedAnnotation {
input:
merged_vcf=LCL7mergeVCF.merged_vcf,
merged_vcf_idx=LCL7mergeVCF.merged_vcf_idx,
repeat_bed=repeat_bed,
sample='LCL7',
cluster_config=cluster_config,
disk_size=disk_size
}
call mergeVCFInfo.mergeVCFInfo as LCL8mergeVCF {
input:
vcf_gz=LCL8normZip.vcf_gz,
vcf_idx=LCL8normZip.vcf_idx,
sample='LCL8',
cluster_config=cluster_config,
disk_size=disk_size
}
call bed_annotation.bed_annotation as LCL8bedAnnotation {
input:
merged_vcf=LCL8mergeVCF.merged_vcf,
merged_vcf_idx=LCL8mergeVCF.merged_vcf_idx,
repeat_bed=repeat_bed,
sample='LCL8',
cluster_config=cluster_config,
disk_size=disk_size
}
}


Loading…
Cancel
Save