|
|
@@ -9,9 +9,8 @@ task VEP { |
|
|
|
String fasta |
|
|
|
String vep_path |
|
|
|
File cache |
|
|
|
String hg |
|
|
|
String ncbi_build |
|
|
|
String species |
|
|
|
Boolean only_pass |
|
|
|
String vcf2maf_path |
|
|
|
String docker |
|
|
|
String cluster_config |
|
|
@@ -23,37 +22,71 @@ task VEP { |
|
|
|
set -e |
|
|
|
nt=$(nproc) |
|
|
|
|
|
|
|
# Define ncbi_build |
|
|
|
if [ hg == "hg19" ]; then |
|
|
|
ncbi_build="GRCh37" |
|
|
|
elif [ hg == "hg38" ]; then |
|
|
|
ncbi_build="GRCh38" |
|
|
|
awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${basename}.PASS.vcf |
|
|
|
|
|
|
|
ncol=`awk -F'\t' '{if($1!~"^#"){print NF}}' ${basename}.PASS.vcf | uniq` |
|
|
|
if [ $ncol -lt 11 ]; then |
|
|
|
vcf2maf_ID="--tumor-id ${tumor_id} --normal-id ${normal_id}" |
|
|
|
else |
|
|
|
vcf2maf_ID="--tumor-id ${tumor_id}" |
|
|
|
fi |
|
|
|
|
|
|
|
nrow=`awk -F'\t' '{if($1~"^chr"){print $0}}' ${basename}.PASS.vcf | wc -l` |
|
|
|
if [ $nrow -lt 5000 ]; then |
|
|
|
buffer_size="--buffer_size 5000" |
|
|
|
else |
|
|
|
buffer_size="--buffer_size 500" |
|
|
|
fi |
|
|
|
|
|
|
|
# Extract the BND variants from VCF |
|
|
|
awk -F'\t' '{if(($1~"^#")||($8!~".*SVTYPE=BND.*")){print $0}}' ${vcf} > ${sample_id}.INPUT.vcf2maf.vcf |
|
|
|
awk -F'\t' '{if(($1~"^#")||($8~".*SVTYPE=BND.*")){print $0}}' ${vcf} > ${sample_id}.INPUT.VEP.vcf |
|
|
|
# awk -F'\t' '{if(($1~"^#")||($8!~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.PASS.vcf > ${sample_id}.PASS.vcf2maf.vcf |
|
|
|
# awk -F'\t' '{if(($1~"^#")||($8~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.PASS.vcf > ${sample_id}.INPUT.VEP.vcf |
|
|
|
|
|
|
|
# vcf2maf |
|
|
|
# perl ${vcf2maf_path}/vcf2maf.pl \ |
|
|
|
# --input-vcf ${sample_id}.PASS.vcf2maf.vcf --output-maf ${basename}.maf \ |
|
|
|
# --tumor-id ${tumor_id} --normal-id ${normal_id} \ |
|
|
|
# --ref-fasta ${ref_dir}/${fasta} \ |
|
|
|
# --vep-path ${vep_path} \ |
|
|
|
# --vep-data ${cache} \ |
|
|
|
# --ncbi-build ${ncbi_build} \ |
|
|
|
# --species ${species} \ |
|
|
|
# --vep-fork $nt |
|
|
|
|
|
|
|
# vep |
|
|
|
# perl ${vep_path}/vep \ |
|
|
|
# --input_file ${basename}.PASS.vcf --output_file ${basename}.PASS.vep.vcf \ |
|
|
|
# --fasta ${ref_dir}/${fasta} \ |
|
|
|
# --dir ${cache} \ |
|
|
|
# --assembly ${ncbi_build} \ |
|
|
|
# --species ${species} \ |
|
|
|
# --fork $nt \ |
|
|
|
# --format vcf --vcf \ |
|
|
|
# --no_progress \ |
|
|
|
# --no_stats \ |
|
|
|
# $buffer_size \ |
|
|
|
# --sift b \ |
|
|
|
# --ccds --uniprot --hgvs --symbol --numbers --domains --gene_phenotype --canonical --protein --biotype --uniprot --tsl --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --force_overwrite --offline --pubmed --regulatory |
|
|
|
|
|
|
|
perl ${vep_path}/vep --format vcf --vcf \ |
|
|
|
--assembly ${ncbi_build} \ |
|
|
|
--species ${species} \ |
|
|
|
--everything --af_exac \ |
|
|
|
--offline \ |
|
|
|
--cache --dir_cache ${cache} \ |
|
|
|
--fasta ${ref_dir}/${fasta} \ |
|
|
|
--input_file ${basename}.PASS.vcf --output_file ${basename}.PASS.vep.vcf |
|
|
|
|
|
|
|
|
|
|
|
# vcf2maf |
|
|
|
perl ${vcf2maf_path}/vcf2maf.pl \ |
|
|
|
--input-vcf ${sample_id}.INPUT.vcf2maf.vcf --output-maf ${basename}.maf \ |
|
|
|
--tumor-id ${tumor_id} --normal-id ${normal_id} \ |
|
|
|
--inhibit-vep \ |
|
|
|
--input-vcf ${basename}.PASS.vep.vcf --output-maf ${basename}.PASS.maf \ |
|
|
|
$vcf2maf_ID \ |
|
|
|
--ref-fasta ${ref_dir}/${fasta} \ |
|
|
|
--vep-path ${vep_path} \ |
|
|
|
--vep-data ${cache} \ |
|
|
|
--ncbi-build $ncbi_build \ |
|
|
|
--ncbi-build ${ncbi_build} \ |
|
|
|
--species ${species} \ |
|
|
|
--vep-fork $nt |
|
|
|
|
|
|
|
# vep: only annotate the BND |
|
|
|
perl ${vep_path}/vep \ |
|
|
|
--input_file ${sample_id}.INPUT.VEP.vcf --output_file ${basename}.BND.VEP.txt \ |
|
|
|
--fasta ${ref_dir}/${fasta} \ |
|
|
|
--dir ${cache} \ |
|
|
|
--assembly $ncbi_build \ |
|
|
|
--species ${species} \ |
|
|
|
--fork $nt \ |
|
|
|
--no_progress --no_stats --buffer_size 5000 --sift b --ccds --uniprot --hgvs --symbol --numbers --domains --gene_phenotype --canonical --protein --biotype --uniprot --tsl --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --tab --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --format vcf --force_overwrite --offline --pubmed --regulatory |
|
|
|
>>> |
|
|
|
|
|
|
|
runtime { |
|
|
@@ -64,7 +97,8 @@ task VEP { |
|
|
|
} |
|
|
|
|
|
|
|
output { |
|
|
|
File maf = "${basename}.${hg}.maf" |
|
|
|
File bnd_vep = "${basename}.BND.VEP.txt" |
|
|
|
File input_vcf = "${basename}.PASS.vcf" |
|
|
|
File vep_vcf = "${basename}.PASS.vep.vcf" |
|
|
|
File maf = "${basename}.PASS.maf" |
|
|
|
} |
|
|
|
} |