### VEP (Variant Effect Predictor) | |||||
VEP predicts the functional effects of genomic variants. The annotated VCF will be converted into MAF based on vcf2maf. | |||||
### Getting Started | |||||
We recommend using choppy system and Aliyun OSS service. The command will look like this: | |||||
``` | |||||
# Activate the choppy environment | |||||
$ open-choppy-env | |||||
# Install the APP | |||||
$ choppy install YaqingLiu/VEP [-f] | |||||
# List the parameters | |||||
$ choppy samples YaqingLiu/VEP-latest [--no-default] | |||||
# Submit you task with the `samples.csv file` and `project name` | |||||
$ choppy batch YaqingLiu/VEP-latest samples.csv -p Project [-l project:Label] | |||||
# Query the status of all tasks in the project | |||||
$ choppy query -L project:Label | grep "status" | |||||
``` | |||||
### Output | |||||
There are two files: .vep.vcf and .maf |
{ | |||||
"vep_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/vep:v104.0", | |||||
"fasta": "GRCh38.d1.vd1.fa", | |||||
"ref_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/", | |||||
"cache": "oss://pgx-reference-data/ensembl_vep/cache/", | |||||
"vep_path": "/opt/vep/ensembl-vep", | |||||
"species": "homo_sapiens_merged", | |||||
"ncbi_build": "GRCh38", | |||||
"disk_size": "200", | |||||
"cluster_config": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc" | |||||
} |
{ | |||||
"{{ project_name }}.vcf": "{{ vcf }}", | |||||
"{{ project_name }}.sample_id": "{{ sample_id }}", | |||||
"{{ project_name }}.tumor_id": "{{ tumor_id }}", | |||||
"{{ project_name }}.normal_id": "{{ normal_id }}", | |||||
"{{ project_name }}.ref_dir": "{{ ref_dir }}", | |||||
"{{ project_name }}.fasta": "{{ fasta }}", | |||||
"{{ project_name }}.vep_path": "{{ vep_path }}", | |||||
"{{ project_name }}.cache": "{{ cache }}", | |||||
"{{ project_name }}.ncbi_build": "{{ ncbi_build }}", | |||||
"{{ project_name }}.species": "{{ species }}", | |||||
"{{ project_name }}.vep_docker": "{{ vep_docker }}", | |||||
"{{ project_name }}.disk_size": "{{ disk_size }}", | |||||
"{{ project_name }}.cluster_config": "{{ cluster_config }}" | |||||
} |
task vcf2maf { | |||||
File vcf | |||||
String basename = basename(vcf,".vcf") | |||||
String tumor_id | |||||
String normal_id | |||||
File ref_dir | |||||
String fasta | |||||
String vep_path | |||||
File cache | |||||
String ncbi_build | |||||
String species | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
nt=$(nproc) | |||||
perl /opt/mskcc-vcf2maf/vcf2maf.pl \ | |||||
--input-vcf ${vcf} --output-maf ${basename}.maf \ | |||||
--tumor-id ${tumor_id} --normal-id ${normal_id} \ | |||||
--ref-fasta ${ref_dir}/${fasta} \ | |||||
--vep-path ${vep_path} \ | |||||
--vep-data ${cache} \ | |||||
--ncbi-build ${ncbi_build} \ | |||||
--species ${species} \ | |||||
--vep-fork $nt | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File vep_vcf = "${basename}.vep.vcf" | |||||
File maf = "${basename}.maf" | |||||
} | |||||
} |
import "./tasks/vcf2maf.wdl" as vcf2maf | |||||
workflow {{ project_name }} { | |||||
File vcf | |||||
String sample_id | |||||
String tumor_id | |||||
String normal_id | |||||
File ref_dir | |||||
String fasta | |||||
String vep_path | |||||
File cache | |||||
String ncbi_build | |||||
String species | |||||
String vep_docker | |||||
String cluster_config | |||||
String disk_size | |||||
call vcf2maf.vcf2maf as vcf2maf { | |||||
input: | |||||
vcf=vcf, | |||||
tumor_id=tumor_id, | |||||
normal_id=normal_id, | |||||
ref_dir=ref_dir, | |||||
fasta=fasta, | |||||
vep_path=vep_path, | |||||
cache=cache, | |||||
ncbi_build=ncbi_build, | |||||
species=species, | |||||
docker=vep_docker, | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
} |