Przeglądaj źródła

Update: add VEP

master
YaqingLiu 4 lat temu
rodzic
commit
607454ab2f
4 zmienionych plików z 163 dodań i 3 usunięć
  1. +6
    -0
      defaults
  2. +7
    -1
      inputs
  3. +77
    -0
      tasks/VEP.wdl
  4. +73
    -2
      workflow.wdl

+ 6
- 0
defaults Wyświetl plik

"sentieon_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v202010.02", "sentieon_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v202010.02",
"varscan_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/varscan2:v2.4.3", "varscan_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/varscan2:v2.4.3",
"annovar_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/annovar:v2019.10", "annovar_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/annovar:v2019.10",
"vep_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/vep:v104.0",
"cache": "oss://pgx-reference-data/ensembl_vep/",
"vep_path": "/opt/vep/ensembl-vep",
"species": "homo_sapiens_merged",
"vcf2maf_path": "/opt/mskcc-vcf2maf",
"tnseq_pon": "", "tnseq_pon": "",
"tnscope_pon": "", "tnscope_pon": "",
"disk_size": "200", "disk_size": "200",
"tnscope": false, "tnscope": false,
"varscan": false, "varscan": false,
"annovar": false, "annovar": false,
"vep": false,
"only_pass": true "only_pass": true
} }

+ 7
- 1
inputs Wyświetl plik

"{{ project_name }}.annovar_database": "{{ annovar_database }}", "{{ project_name }}.annovar_database": "{{ annovar_database }}",
"{{ project_name }}.tnseq_pon": "{{ tnseq_pon }}", "{{ project_name }}.tnseq_pon": "{{ tnseq_pon }}",
"{{ project_name }}.tnscope_pon": "{{ tnscope_pon }}", "{{ project_name }}.tnscope_pon": "{{ tnscope_pon }}",
"{{ project_name }}.vep_path": "{{ vep_path }}",
"{{ project_name }}.cache": "{{ cache }}",
"{{ project_name }}.species": "{{ species }}",
"{{ project_name }}.vcf2maf_path": "{{ vcf2maf_path }}",
"{{ project_name }}.vep_docker": "{{ vep_docker }}",
"{{ project_name }}.disk_size": "{{ disk_size }}", "{{ project_name }}.disk_size": "{{ disk_size }}",
"{{ project_name }}.cluster_config": "{{ cluster_config }}", "{{ project_name }}.cluster_config": "{{ cluster_config }}",
"{{ project_name }}.haplotyper": {{ haplotyper | tojson }}, "{{ project_name }}.haplotyper": {{ haplotyper | tojson }},
"{{ project_name }}.tnscope": {{ tnscope | tojson }}, "{{ project_name }}.tnscope": {{ tnscope | tojson }},
"{{ project_name }}.varscan": {{ varscan | tojson }}, "{{ project_name }}.varscan": {{ varscan | tojson }},
"{{ project_name }}.annovar": {{ annovar | tojson }}, "{{ project_name }}.annovar": {{ annovar | tojson }},
"{{ project_name }}.only_pass": {{ only_pass | tojson }}
"{{ project_name }}.only_pass": {{ only_pass | tojson }},
"{{ project_name }}.vep": {{ vep | tojson }}
} }

+ 77
- 0
tasks/VEP.wdl Wyświetl plik

task vcf2maf {

File vcf
String sample_id
String basename = basename(vcf,".vcf")
String tumor_id
String normal_id
File ref_dir
String fasta
String vep_path
File cache
String hg
String species
Boolean only_pass
String vcf2maf_path
String docker
String cluster_config
String disk_size


command <<<
set -o pipefail
set -e
nt=$(nproc)

if [ only_pass ]; then
awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${sample_id}.INPUT.vcf
else
cp ${vcf} ${sample_id}.INPUT.vcf
fi

# Define ncbi_build
if [ hg == "hg19" ]; then
ncbi_build="GRCh37"
elif [ hg == "hg38" ]; then
ncbi_build="GRCh38"
fi

# Extract the BND variants from VCF
awk -F'\t' '{if(($1~"^#")||($8!~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.INPUT.vcf > ${sample_id}.INPUT.vcf2maf.vcf
awk -F'\t' '{if(($1~"^#")||($8~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.INPUT.vcf > ${sample_id}.INPUT.VEP.vcf

# vcf2maf
perl ${vcf2maf_path}/vcf2maf.pl \
--input-vcf ${sample_id}.INPUT.vcf2maf.vcf --output-maf ${basename}.maf \
--tumor-id ${tumor_id} --normal-id ${normal_id} \
--ref-fasta ${ref_dir}/${fasta} \
--vep-path ${vep_path} \
--vep-data ${cache} \
--ncbi-build $ncbi_build \
--species ${species} \
--vep-fork $nt

# vep: only annotate the BND
perl ${vep_path}/vep \
--input_file ${sample_id}.INPUT.VEP.vcf --output_file ${basename}.BND.VEP.txt \
--fasta ${ref_dir}/${fasta} \
--dir ${cache} \
--assembly $ncbi_build \
--species ${species} \
--fork $nt \
--no_progress --no_stats --buffer_size 5000 --sift b --ccds --uniprot --hgvs --symbol --numbers --domains --gene_phenotype --canonical --protein --biotype --uniprot --tsl --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --tab --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --format vcf --force_overwrite --offline --pubmed --regulatory
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File input_vcf = "${sample_id}.INPUT.vcf"
File maf = "${basename}.${hg}.maf"
File bnd_vep = "${basename}.BND.VEP.txt"
}
}

+ 73
- 2
workflow.wdl Wyświetl plik

import "./tasks/processSomatic.wdl" as processSomatic import "./tasks/processSomatic.wdl" as processSomatic
import "./tasks/somaticFilter.wdl" as somaticFilter import "./tasks/somaticFilter.wdl" as somaticFilter
import "./tasks/ANNOVAR.wdl" as ANNOVAR import "./tasks/ANNOVAR.wdl" as ANNOVAR
import "./tasks/VEP.wdl" as VEP




workflow {{ project_name }} { workflow {{ project_name }} {
String sentieon_docker String sentieon_docker
String varscan_docker String varscan_docker
String annovar_docker String annovar_docker
String vep_docker


File ref_dir File ref_dir
String fasta String fasta
File? tnseq_pon File? tnseq_pon
File? tnscope_pon File? tnscope_pon


String vep_path
File cache
String species
String vcf2maf_path

String disk_size String disk_size
String cluster_config String cluster_config


Boolean varscan Boolean varscan


Boolean annovar Boolean annovar
Boolean vep
Boolean only_pass Boolean only_pass


if (tumor_fastq_1!= "") { if (tumor_fastq_1!= "") {
disk_size=disk_size disk_size=disk_size
} }
} }

if (vep) {
call VEP.VEP as TNseq_VEP {
input:
vcf=TNseq.TNseq_vcf,
hg=hg,
only_pass=only_pass,
sample_id=sample_id,
tumor_id=sample_id + "_tumor",
normal_id=sample_id + "_normal",
ref_dir=ref_dir,
fasta=fasta,
vep_path=vep_path,
cache=cache,
species=species,
vcf2maf_path=vcf2maf_path,
docker=vep_docker,
cluster_config=cluster_config,
disk_size=disk_size
}
}
} }


if (tnscope) { if (tnscope) {
disk_size=disk_size disk_size=disk_size
} }
} }

if (vep) {
call VEP.VEP as TNscope_VEP {
input:
vcf=TNscope.TNscope_vcf,
hg=hg,
only_pass=only_pass,
sample_id=sample_id,
tumor_id=sample_id + "_tumor",
normal_id=sample_id + "_normal",
ref_dir=ref_dir,
fasta=fasta,
vep_path=vep_path,
cache=cache,
species=species,
vcf2maf_path=vcf2maf_path,
docker=vep_docker,
cluster_config=cluster_config,
disk_size=disk_size
}
}
} }


if (varscan) { if (varscan) {
sample=sample_id, sample=sample_id,
varscan_snp=somatic.varscan_snp, varscan_snp=somatic.varscan_snp,
varscan_indel=somatic.varscan_indel, varscan_indel=somatic.varscan_indel,
docker=varscan_docker,
docker=varscan_docker,
disk_size=disk_size, disk_size=disk_size,
cluster_config=cluster_config cluster_config=cluster_config
} }
varscan_indel_germline_hc=processSomatic.varscan_indel_germline_hc, varscan_indel_germline_hc=processSomatic.varscan_indel_germline_hc,
varscan_indel_loh_hc=processSomatic.varscan_indel_loh_hc, varscan_indel_loh_hc=processSomatic.varscan_indel_loh_hc,
varscan_indel=somatic.varscan_indel, varscan_indel=somatic.varscan_indel,
docker=varscan_docker,
docker=varscan_docker,
disk_size=disk_size, disk_size=disk_size,
cluster_config=cluster_config cluster_config=cluster_config
} }
disk_size=disk_size disk_size=disk_size
} }
} }

if (vep) {
call VEP.VEP as VarScan_VEP {
input:
vcf=VarScan.VarScan_vcf,
hg=hg,
only_pass=only_pass,
sample_id=sample_id,
tumor_id=sample_id + "_tumor",
normal_id=sample_id + "_normal",
ref_dir=ref_dir,
fasta=fasta,
vep_path=vep_path,
cache=cache,
species=species,
vcf2maf_path=vcf2maf_path,
docker=vep_docker,
cluster_config=cluster_config,
disk_size=disk_size
}
}
} }
} }

Ładowanie…
Anuluj
Zapisz