|
|
@@ -22,20 +22,22 @@ task VEP { |
|
|
|
set -e |
|
|
|
nt=$(nproc) |
|
|
|
|
|
|
|
awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${basename}.PASS.vcf |
|
|
|
awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${sample_id}.vcf |
|
|
|
|
|
|
|
ncol=`awk -F'\t' '{if($1!~"^#"){print NF}}' ${basename}.PASS.vcf | uniq` |
|
|
|
# Judge the SAMPLE info of vcf file |
|
|
|
ncol=`awk -F'\t' '{if($1!~"^#"){print NF}}' ${sample_id}.vcf | uniq` |
|
|
|
if [ $ncol -lt 11 ]; then |
|
|
|
vcf2maf_ID="--tumor-id ${tumor_id} --normal-id ${normal_id}" |
|
|
|
SAMPLE="--tumor-id ${tumor_id} --normal-id ${normal_id}" |
|
|
|
else |
|
|
|
vcf2maf_ID="--tumor-id ${tumor_id}" |
|
|
|
SAMPLE="--tumor-id ${sample_id}" |
|
|
|
fi |
|
|
|
|
|
|
|
nrow=`awk -F'\t' '{if($1~"^chr"){print $0}}' ${basename}.PASS.vcf | wc -l` |
|
|
|
# Set the buffer_size based on the data size |
|
|
|
nrow=`awk -F'\t' '{if($1~"^chr"){print $0}}' ${sample_id}.vcf | wc -l` |
|
|
|
if [ $nrow -lt 5000 ]; then |
|
|
|
buffer_size="--buffer_size 5000" |
|
|
|
else |
|
|
|
buffer_size="--buffer_size 500" |
|
|
|
buffer_size="--buffer_size 1000" |
|
|
|
fi |
|
|
|
|
|
|
|
# Extract the BND variants from VCF |
|
|
@@ -55,7 +57,7 @@ task VEP { |
|
|
|
|
|
|
|
# vep |
|
|
|
# perl ${vep_path}/vep \ |
|
|
|
# --input_file ${basename}.PASS.vcf --output_file ${basename}.PASS.vep.vcf \ |
|
|
|
# --input_file ${sample_id}.vcf --output_file ${basename}.PASS.vep.vcf \ |
|
|
|
# --fasta ${ref_dir}/${fasta} \ |
|
|
|
# --dir ${cache} \ |
|
|
|
# --assembly ${ncbi_build} \ |
|
|
@@ -68,6 +70,13 @@ task VEP { |
|
|
|
# --sift b \ |
|
|
|
# --ccds --uniprot --hgvs --symbol --numbers --domains --gene_phenotype --canonical --protein --biotype --uniprot --tsl --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --force_overwrite --offline --pubmed --regulatory |
|
|
|
|
|
|
|
# vcf2vcf: transfer into a standardized format |
|
|
|
perl ${vcf2maf_path}/vcf2vcf.pl \ |
|
|
|
--input-vcf ${sample_id}.vcf --output-vcf ${basename}.norm.vcf \ |
|
|
|
$SAMPLE \ |
|
|
|
--ref-fasta ${reference} |
|
|
|
|
|
|
|
# VEP annotation |
|
|
|
perl ${vep_path}/vep --format vcf --vcf \ |
|
|
|
--assembly ${ncbi_build} \ |
|
|
|
--species ${species} \ |
|
|
@@ -75,14 +84,14 @@ task VEP { |
|
|
|
--offline \ |
|
|
|
--cache --dir_cache ${cache} \ |
|
|
|
--fasta ${ref_dir}/${fasta} \ |
|
|
|
--input_file ${basename}.PASS.vcf --output_file ${basename}.PASS.vep.vcf |
|
|
|
|
|
|
|
$buffer_size \ |
|
|
|
--input_file ${basename}.norm.vcf --output_file ${basename}.vep.vcf |
|
|
|
|
|
|
|
# vcf2maf |
|
|
|
perl ${vcf2maf_path}/vcf2maf.pl \ |
|
|
|
--inhibit-vep \ |
|
|
|
--input-vcf ${basename}.PASS.vep.vcf --output-maf ${basename}.PASS.maf \ |
|
|
|
$vcf2maf_ID \ |
|
|
|
--input-vcf ${basename}.vep.vcf --output-maf ${basename}.maf \ |
|
|
|
$SAMPLE \ |
|
|
|
--ref-fasta ${ref_dir}/${fasta} \ |
|
|
|
--ncbi-build ${ncbi_build} \ |
|
|
|
--species ${species} \ |
|
|
@@ -97,8 +106,7 @@ task VEP { |
|
|
|
} |
|
|
|
|
|
|
|
output { |
|
|
|
File input_vcf = "${basename}.PASS.vcf" |
|
|
|
File vep_vcf = "${basename}.PASS.vep.vcf" |
|
|
|
File maf = "${basename}.PASS.maf" |
|
|
|
File vep_vcf = "${basename}.vep.vcf" |
|
|
|
File maf = "${basename}.maf" |
|
|
|
} |
|
|
|
} |