LUYAO REN пре 5 година
родитељ
комит
7cfe6ee9ff
11 измењених фајлова са 171 додато и 59 уклоњено
  1. +0
    -4
      codescripts/extract_vcf_information.py
  2. +2
    -1
      codescripts/high_confidence_call_vote.py
  3. +13
    -0
      inputs
  4. +25
    -0
      tasks/bed_annotation.wdl
  5. +25
    -0
      tasks/extract_info.wdl
  6. +0
    -30
      tasks/indelNorm.wdl
  7. +3
    -2
      tasks/mergeVCFInfo.wdl
  8. +0
    -1
      tasks/reformVCF.wdl
  9. +0
    -2
      tasks/votes.wdl
  10. +5
    -6
      tasks/zipIndex.wdl
  11. +98
    -13
      workflow.wdl

+ 0
- 4
codescripts/extract_vcf_information.py Прегледај датотеку

values.append('1') values.append('1')
elif kv[0] == 'AF': elif kv[0] == 'AF':
pass pass
elif kv[0] == 'POSITIVE_TRAIN_SITE':
pass
elif kv[0] == 'NEGATIVE_TRAIN_SITE':
pass
else: else:
keys.append(kv[0]) keys.append(kv[0])
values.append(kv[1]) values.append(kv[1])

+ 2
- 1
codescripts/high_confidence_call_vote.py Прегледај датотеку

import pandas as pd import pandas as pd
from operator import itemgetter from operator import itemgetter
from collections import Counter from collections import Counter
from itertools import islice
from itertools import islice
from __future__ import division


# input arguments # input arguments
parser = argparse.ArgumentParser(description="this script is to count voting number") parser = argparse.ArgumentParser(description="this script is to count voting number")

+ 13
- 0
inputs Прегледај датотеку

{ {
"{{ project_name }}.LCL6normZip.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL7merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.LCL7merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", "{{ project_name }}.fasta": "GRCh38.d1.vd1.fa",
"{{ project_name }}.LCL6familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.LCL6familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.LCL5VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL6mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", "{{ project_name }}.LCL6mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1",
"{{ project_name }}.mergeSister.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.mergeSister.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL7normZip.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", "{{ project_name }}.LCL5mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1",
"{{ project_name }}.disk_size": "150", "{{ project_name }}.disk_size": "150",
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", "{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}",
"{{ project_name }}.LCL6bedAnnotation.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.repeat_bed": "oss://pgx-result/renluyao/manuscript/all.repeat.bed",
"{{ project_name }}.LCL6merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.LCL6merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL6variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", "{{ project_name }}.LCL6variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9",
"{{ project_name }}.LCL5mergeVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL6zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.LCL6zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL8mergeVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", "{{ project_name }}.LCL5votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1",
"{{ project_name }}.LCL7familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.LCL7familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.LCL5familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5normZip.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL6VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.LCL6VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.LCL5merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL8votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", "{{ project_name }}.LCL8votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1",
"{{ project_name }}.LCL5zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.LCL5zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.xlarge img-ubuntu-vpc", "{{ project_name }}.cluster_config": "OnDemand bcs.a2.xlarge img-ubuntu-vpc",
"{{ project_name }}.LCL8familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.LCL8familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5bedAnnotation.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL6mergeVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL7variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", "{{ project_name }}.LCL7variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9",
"{{ project_name }}.LCL7mergeVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL8merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.LCL8merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", "{{ project_name }}.LCL5variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9",
"{{ project_name }}.LCL7bedAnnotation.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL8variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", "{{ project_name }}.LCL8variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9",
"{{ project_name }}.LCL8bedAnnotation.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL8normZip.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/" "{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/"
} }



+ 25
- 0
tasks/bed_annotation.wdl Прегледај датотеку

task bed_annotation {
File merged_vcf
File merged_vcf_idx
File repeat_bed
String sample
String docker
String cluster_config
String disk_size
command <<<

rtg vcfannotate --bed-info=${repeat_bed} -i ${merged_vcf} -o ${sample}.normed.repeatAnno.vcf.gz

>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File repeat_annotated_vcf = "${sample}.normed.repeatAnno.vcf.gz"
}
}

+ 25
- 0
tasks/extract_info.wdl Прегледај датотеку

task extract_info {
File vcf
String vcf_name = basename(vcf,".vcf")
String docker
String cluster_config
String disk_size
command <<<

python /opt/extract_vcf_information.py -i ${vcf} -o ${vcf_name}.txt
cat ${vcf_name}.txt | cut -f23,25,27,22,12,21,3,18,4,8,11,15 > ${vcf_name}.essential.txt

>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File vcf_info = "${vcf_name}.txt"
File vcf_needed_info = "${vcf_name}.essential.txt"
}
}

+ 0
- 30
tasks/indelNorm.wdl Прегледај датотеку

task indelNorm {
File vcf
File ref_dir
String fasta
String sampleName
String docker
String cluster_config
String disk_size
command <<<

cat ${vcf} | grep '#' > header
cat ${vcf} | grep -v '#' > body
cat body | grep -w '^chr1\|^chr2\|^chr3\|^chr4\|^chr5\|^chr6\|^chr7\|^chr8\|^chr9\|^chr10\|^chr11\|^chr12\|^chr13\|^chr14\|^chr15\|^chr16\|^chr17\|^chr18\|^chr19\|^chr20\|^chr21\|^chr22\|^chrX' > body.filtered
cat header body.filtered > ${sampleName}.filtered.vcf

/opt/hall-lab/bcftools-1.9/bin/bcftools norm -f ${ref_dir}/${fasta} ${sampleName}.filtered.vcf > ${sampleName}.normed.vcf

>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File normed_vcf = "${sampleName}.normed.vcf"
}
}

+ 3
- 2
tasks/mergeVCFInfo.wdl Прегледај датотеку

command <<< command <<<


rtg vcfmerge --force-merge-all --no-gzip -o ${sample}.merged.info.vcf ${sep=" " vcf_gz}
rtg vcfmerge --force-merge-all -o ${sample}.merged.info.vcf.gz ${sep=" " vcf_gz}
>>> >>>


dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
} }
output { output {
File merged_info = "${sample}.merged.info.vcf"
File merged_vcf = "${sample}.merged.info.vcf.gz"
File merged_vcf_idx = "${sample}.merged.info.vcf.gz.tbi"
} }
} }

+ 0
- 1
tasks/reformVCF.wdl Прегледај датотеку

command <<< command <<<


python /opt/reformVCF.py -vcf ${family_mendelian_info} -name ${family_name} python /opt/reformVCF.py -vcf ${family_mendelian_info} -name ${family_name}


>>> >>>



+ 0
- 2
tasks/votes.wdl Прегледај датотеку

command <<< command <<<
python /opt/high_confidence_call_vote.py -vcf ${merged_vcf} -dup ${vcf_dup} -sample ${sample} -prefix ${prefix} python /opt/high_confidence_call_vote.py -vcf ${merged_vcf} -dup ${vcf_dup} -sample ${sample} -prefix ${prefix}
cat ${prefix}_annotated.vcf | cut -f1-9,45 | grep -v 'filtered' | grep -v 'confirm for parents' | grep -v 'pcr-free-speicifc' | grep -v 'pcr-speicifc' | grep -v 'dupVar' > ${prefix}_bechmarking_calls.vcf
>>> >>>


runtime { runtime {
} }
output { output {
File annotated_vcf = "${prefix}_annotated.vcf" File annotated_vcf = "${prefix}_annotated.vcf"
File benchmark_call = "${prefix}_bechmarking_calls.vcf"
} }
} }

+ 5
- 6
tasks/zipIndex.wdl Прегледај датотеку

task zipIndex { task zipIndex {
File vcf File vcf
String sample
String family_name
String vcf_name = basename(vcf,".vcf")
String docker String docker
String cluster_config String cluster_config
String disk_size String disk_size
command <<< command <<<
rtg bgzip ${vcf} -c > ${family_name}.${sample}.vcf.gz
rtg index -f vcf ${family_name}.${sample}.vcf.gz
rtg bgzip ${vcf} -c > ${vcf_name}.vcf.gz
rtg index -f vcf ${vcf_name}.vcf.gz


>>> >>>


dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
} }
output { output {
File vcf_gz = "${family_name}.${sample}.vcf.gz"
File vcf_idx = "${family_name}.${sample}.vcf.gz.tbi"
File vcf_gz = "${vcf_name}.vcf.gz"
File vcf_idx = "${vcf_name}.vcf.gz.tbi"
} }
} }

+ 98
- 13
workflow.wdl Прегледај датотеку

import "./tasks/reformVCF.wdl" as reformVCF import "./tasks/reformVCF.wdl" as reformVCF
import "./tasks/merge.wdl" as merge import "./tasks/merge.wdl" as merge
import "./tasks/votes.wdl" as votes import "./tasks/votes.wdl" as votes
import "./tasks/bed_annotation.wdl" as bed_annotation
import "./tasks/mergeVCFInfo.wdl" as mergeVCFInfo


workflow {{ project_name }} { workflow {{ project_name }} {
File inputSamplesFile File inputSamplesFile
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile) Array[Array[File]] inputSamples = read_tsv(inputSamplesFile)
File ref_dir File ref_dir
File repeat_bed
String fasta String fasta
String cluster_config String cluster_config
String disk_size String disk_size
cluster_config=cluster_config, cluster_config=cluster_config,
disk_size=disk_size disk_size=disk_size
} }
call zipIndex.zipIndex as LCL5normZip{
input:
vcf=LCL5variantsNorm.normed_vcf,
cluster_config=cluster_config,
disk_size=disk_size
}
call zipIndex.zipIndex as LCL6normZip{
input:
vcf=LCL6variantsNorm.normed_vcf,
cluster_config=cluster_config,
disk_size=disk_size
}
call zipIndex.zipIndex as LCL7normZip{
input:
vcf=LCL7variantsNorm.normed_vcf,
cluster_config=cluster_config,
disk_size=disk_size
}
call zipIndex.zipIndex as LCL8normZip{
input:
vcf=LCL8variantsNorm.normed_vcf,
cluster_config=cluster_config,
disk_size=disk_size
}
call mendelian.mendelian as LCL5mendelian { call mendelian.mendelian as LCL5mendelian {
input: input:
child_vcf=LCL5variantsNorm.normed_vcf, child_vcf=LCL5variantsNorm.normed_vcf,
call zipIndex.zipIndex as LCL5zipIndex { call zipIndex.zipIndex as LCL5zipIndex {
input: input:
vcf=LCL5mendelian.trio_vcf, vcf=LCL5mendelian.trio_vcf,
sample="LCL5",
family_name=quartet[8],
cluster_config=cluster_config, cluster_config=cluster_config,
disk_size=disk_size disk_size=disk_size
} }
call zipIndex.zipIndex as LCL6zipIndex { call zipIndex.zipIndex as LCL6zipIndex {
input: input:
vcf=LCL6mendelian.trio_vcf, vcf=LCL6mendelian.trio_vcf,
sample="LCL6",
family_name=quartet[8],
cluster_config=cluster_config, cluster_config=cluster_config,
disk_size=disk_size disk_size=disk_size
} }
call zipIndex.zipIndex as LCL5familyzipIndex { call zipIndex.zipIndex as LCL5familyzipIndex {
input: input:
vcf=reformVCF.LCL5_family_info, vcf=reformVCF.LCL5_family_info,
sample='LCL5',
family_name=quartet[8],
cluster_config=cluster_config, cluster_config=cluster_config,
disk_size=disk_size disk_size=disk_size
} }
call zipIndex.zipIndex as LCL6familyzipIndex { call zipIndex.zipIndex as LCL6familyzipIndex {
input: input:
vcf=reformVCF.LCL6_family_info, vcf=reformVCF.LCL6_family_info,
sample='LCL6',
family_name=quartet[8],
cluster_config=cluster_config, cluster_config=cluster_config,
disk_size=disk_size disk_size=disk_size
} }
call zipIndex.zipIndex as LCL7familyzipIndex { call zipIndex.zipIndex as LCL7familyzipIndex {
input: input:
vcf=reformVCF.LCL7_family_info, vcf=reformVCF.LCL7_family_info,
sample='LCL7',
family_name=quartet[8],
cluster_config=cluster_config, cluster_config=cluster_config,
disk_size=disk_size disk_size=disk_size
} }
call zipIndex.zipIndex as LCL8familyzipIndex { call zipIndex.zipIndex as LCL8familyzipIndex {
input: input:
vcf=reformVCF.LCL8_family_info, vcf=reformVCF.LCL8_family_info,
sample='LCL8',
family_name=quartet[8],
cluster_config=cluster_config, cluster_config=cluster_config,
disk_size=disk_size disk_size=disk_size
} }
} }
### family info merge
call merge.merge as LCL5merge { call merge.merge as LCL5merge {
input: input:
family_vcf_gz=LCL5familyzipIndex.vcf_gz, family_vcf_gz=LCL5familyzipIndex.vcf_gz,
prefix='LCL8_consensus', prefix='LCL8_consensus',
cluster_config=cluster_config, cluster_config=cluster_config,
disk_size=disk_size disk_size=disk_size
}
}
### vcf original information
call mergeVCFInfo.mergeVCFInfo as LCL5mergeVCF {
input:
vcf_gz=LCL5normZip.vcf_gz,
vcf_idx=LCL5normZip.vcf_idx,
sample='LCL5',
cluster_config=cluster_config,
disk_size=disk_size
}
call bed_annotation.bed_annotation as LCL5bedAnnotation {
input:
merged_vcf=LCL5mergeVCF.merged_vcf,
merged_vcf_idx=LCL5mergeVCF.merged_vcf_idx,
repeat_bed=repeat_bed,
sample='LCL5',
cluster_config=cluster_config,
disk_size=disk_size
}
call mergeVCFInfo.mergeVCFInfo as LCL6mergeVCF {
input:
vcf_gz=LCL6normZip.vcf_gz,
vcf_idx=LCL6normZip.vcf_idx,
sample='LCL6',
cluster_config=cluster_config,
disk_size=disk_size
}
call bed_annotation.bed_annotation as LCL6bedAnnotation {
input:
merged_vcf=LCL6mergeVCF.merged_vcf,
merged_vcf_idx=LCL6mergeVCF.merged_vcf_idx,
repeat_bed=repeat_bed,
sample='LCL6',
cluster_config=cluster_config,
disk_size=disk_size
}
call mergeVCFInfo.mergeVCFInfo as LCL7mergeVCF {
input:
vcf_gz=LCL7normZip.vcf_gz,
vcf_idx=LCL7normZip.vcf_idx,
sample='LCL7',
cluster_config=cluster_config,
disk_size=disk_size
}
call bed_annotation.bed_annotation as LCL7bedAnnotation {
input:
merged_vcf=LCL7mergeVCF.merged_vcf,
merged_vcf_idx=LCL7mergeVCF.merged_vcf_idx,
repeat_bed=repeat_bed,
sample='LCL7',
cluster_config=cluster_config,
disk_size=disk_size
}
call mergeVCFInfo.mergeVCFInfo as LCL8mergeVCF {
input:
vcf_gz=LCL8normZip.vcf_gz,
vcf_idx=LCL8normZip.vcf_idx,
sample='LCL8',
cluster_config=cluster_config,
disk_size=disk_size
}
call bed_annotation.bed_annotation as LCL8bedAnnotation {
input:
merged_vcf=LCL8mergeVCF.merged_vcf,
merged_vcf_idx=LCL8mergeVCF.merged_vcf_idx,
repeat_bed=repeat_bed,
sample='LCL8',
cluster_config=cluster_config,
disk_size=disk_size
}
} }



Loading…
Откажи
Сачувај