瀏覽代碼

Update: add VEP

master
YaqingLiu 4 年之前
父節點
當前提交
607454ab2f
共有 4 個文件被更改,包括 163 次插入3 次删除
  1. +6
    -0
      defaults
  2. +7
    -1
      inputs
  3. +77
    -0
      tasks/VEP.wdl
  4. +73
    -2
      workflow.wdl

+ 6
- 0
defaults 查看文件

@@ -18,6 +18,11 @@
"sentieon_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v202010.02",
"varscan_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/varscan2:v2.4.3",
"annovar_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/annovar:v2019.10",
"vep_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/vep:v104.0",
"cache": "oss://pgx-reference-data/ensembl_vep/",
"vep_path": "/opt/vep/ensembl-vep",
"species": "homo_sapiens_merged",
"vcf2maf_path": "/opt/mskcc-vcf2maf",
"tnseq_pon": "",
"tnscope_pon": "",
"disk_size": "200",
@@ -27,5 +32,6 @@
"tnscope": false,
"varscan": false,
"annovar": false,
"vep": false,
"only_pass": true
}

+ 7
- 1
inputs 查看文件

@@ -23,6 +23,11 @@
"{{ project_name }}.annovar_database": "{{ annovar_database }}",
"{{ project_name }}.tnseq_pon": "{{ tnseq_pon }}",
"{{ project_name }}.tnscope_pon": "{{ tnscope_pon }}",
"{{ project_name }}.vep_path": "{{ vep_path }}",
"{{ project_name }}.cache": "{{ cache }}",
"{{ project_name }}.species": "{{ species }}",
"{{ project_name }}.vcf2maf_path": "{{ vcf2maf_path }}",
"{{ project_name }}.vep_docker": "{{ vep_docker }}",
"{{ project_name }}.disk_size": "{{ disk_size }}",
"{{ project_name }}.cluster_config": "{{ cluster_config }}",
"{{ project_name }}.haplotyper": {{ haplotyper | tojson }},
@@ -30,5 +35,6 @@
"{{ project_name }}.tnscope": {{ tnscope | tojson }},
"{{ project_name }}.varscan": {{ varscan | tojson }},
"{{ project_name }}.annovar": {{ annovar | tojson }},
"{{ project_name }}.only_pass": {{ only_pass | tojson }}
"{{ project_name }}.only_pass": {{ only_pass | tojson }},
"{{ project_name }}.vep": {{ vep | tojson }}
}

+ 77
- 0
tasks/VEP.wdl 查看文件

@@ -0,0 +1,77 @@
task vcf2maf {

File vcf
String sample_id
String basename = basename(vcf,".vcf")
String tumor_id
String normal_id
File ref_dir
String fasta
String vep_path
File cache
String hg
String species
Boolean only_pass
String vcf2maf_path
String docker
String cluster_config
String disk_size


command <<<
set -o pipefail
set -e
nt=$(nproc)

if [ only_pass ]; then
awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${sample_id}.INPUT.vcf
else
cp ${vcf} ${sample_id}.INPUT.vcf
fi

# Define ncbi_build
if [ hg == "hg19" ]; then
ncbi_build="GRCh37"
elif [ hg == "hg38" ]; then
ncbi_build="GRCh38"
fi

# Extract the BND variants from VCF
awk -F'\t' '{if(($1~"^#")||($8!~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.INPUT.vcf > ${sample_id}.INPUT.vcf2maf.vcf
awk -F'\t' '{if(($1~"^#")||($8~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.INPUT.vcf > ${sample_id}.INPUT.VEP.vcf

# vcf2maf
perl ${vcf2maf_path}/vcf2maf.pl \
--input-vcf ${sample_id}.INPUT.vcf2maf.vcf --output-maf ${basename}.maf \
--tumor-id ${tumor_id} --normal-id ${normal_id} \
--ref-fasta ${ref_dir}/${fasta} \
--vep-path ${vep_path} \
--vep-data ${cache} \
--ncbi-build $ncbi_build \
--species ${species} \
--vep-fork $nt

# vep: only annotate the BND
perl ${vep_path}/vep \
--input_file ${sample_id}.INPUT.VEP.vcf --output_file ${basename}.BND.VEP.txt \
--fasta ${ref_dir}/${fasta} \
--dir ${cache} \
--assembly $ncbi_build \
--species ${species} \
--fork $nt \
--no_progress --no_stats --buffer_size 5000 --sift b --ccds --uniprot --hgvs --symbol --numbers --domains --gene_phenotype --canonical --protein --biotype --uniprot --tsl --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --tab --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --format vcf --force_overwrite --offline --pubmed --regulatory
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File input_vcf = "${sample_id}.INPUT.vcf"
File maf = "${basename}.${hg}.maf"
File bnd_vep = "${basename}.BND.VEP.txt"
}
}

+ 73
- 2
workflow.wdl 查看文件

@@ -10,6 +10,7 @@ import "./tasks/somatic.wdl" as somatic
import "./tasks/processSomatic.wdl" as processSomatic
import "./tasks/somaticFilter.wdl" as somaticFilter
import "./tasks/ANNOVAR.wdl" as ANNOVAR
import "./tasks/VEP.wdl" as VEP


workflow {{ project_name }} {
@@ -25,6 +26,7 @@ workflow {{ project_name }} {
String sentieon_docker
String varscan_docker
String annovar_docker
String vep_docker

File ref_dir
String fasta
@@ -42,6 +44,11 @@ workflow {{ project_name }} {
File? tnseq_pon
File? tnscope_pon

String vep_path
File cache
String species
String vcf2maf_path

String disk_size
String cluster_config

@@ -51,6 +58,7 @@ workflow {{ project_name }} {
Boolean varscan

Boolean annovar
Boolean vep
Boolean only_pass

if (tumor_fastq_1!= "") {
@@ -263,6 +271,27 @@ workflow {{ project_name }} {
disk_size=disk_size
}
}

if (vep) {
call VEP.VEP as TNseq_VEP {
input:
vcf=TNseq.TNseq_vcf,
hg=hg,
only_pass=only_pass,
sample_id=sample_id,
tumor_id=sample_id + "_tumor",
normal_id=sample_id + "_normal",
ref_dir=ref_dir,
fasta=fasta,
vep_path=vep_path,
cache=cache,
species=species,
vcf2maf_path=vcf2maf_path,
docker=vep_docker,
cluster_config=cluster_config,
disk_size=disk_size
}
}
}

if (tnscope) {
@@ -303,6 +332,27 @@ workflow {{ project_name }} {
disk_size=disk_size
}
}

if (vep) {
call VEP.VEP as TNscope_VEP {
input:
vcf=TNscope.TNscope_vcf,
hg=hg,
only_pass=only_pass,
sample_id=sample_id,
tumor_id=sample_id + "_tumor",
normal_id=sample_id + "_normal",
ref_dir=ref_dir,
fasta=fasta,
vep_path=vep_path,
cache=cache,
species=species,
vcf2maf_path=vcf2maf_path,
docker=vep_docker,
cluster_config=cluster_config,
disk_size=disk_size
}
}
}

if (varscan) {
@@ -325,7 +375,7 @@ workflow {{ project_name }} {
sample=sample_id,
varscan_snp=somatic.varscan_snp,
varscan_indel=somatic.varscan_indel,
docker=varscan_docker,
docker=varscan_docker,
disk_size=disk_size,
cluster_config=cluster_config
}
@@ -340,7 +390,7 @@ workflow {{ project_name }} {
varscan_indel_germline_hc=processSomatic.varscan_indel_germline_hc,
varscan_indel_loh_hc=processSomatic.varscan_indel_loh_hc,
varscan_indel=somatic.varscan_indel,
docker=varscan_docker,
docker=varscan_docker,
disk_size=disk_size,
cluster_config=cluster_config
}
@@ -357,5 +407,26 @@ workflow {{ project_name }} {
disk_size=disk_size
}
}

if (vep) {
call VEP.VEP as VarScan_VEP {
input:
vcf=VarScan.VarScan_vcf,
hg=hg,
only_pass=only_pass,
sample_id=sample_id,
tumor_id=sample_id + "_tumor",
normal_id=sample_id + "_normal",
ref_dir=ref_dir,
fasta=fasta,
vep_path=vep_path,
cache=cache,
species=species,
vcf2maf_path=vcf2maf_path,
docker=vep_docker,
cluster_config=cluster_config,
disk_size=disk_size
}
}
}
}

Loading…
取消
儲存