Przeglądaj źródła

Update: VEP and ANNOVAR

master
YaqingLiu 3 lat temu
rodzic
commit
0b9bcc926f
2 zmienionych plików z 62 dodań i 28 usunięć
  1. +2
    -2
      tasks/ANNOVAR.wdl
  2. +60
    -26
      tasks/VEP.wdl

+ 2
- 2
tasks/ANNOVAR.wdl Wyświetl plik

@@ -18,8 +18,8 @@ task ANNOVAR {
/installations/annovar/table_annovar.pl ${vcf} \
${annovar_database} -buildver ${hg} \
-out ${basename} -remove \
-protocol refGene,ensGene,knownGene,rmsk,cytoBand,genomicSuperDups,esp6500siv2_all,ALL.sites.2015_08,AFR.sites.2015_08,AMR.sites.2015_08,EAS.sites.2015_08,EUR.sites.2015_08,SAS.sites.2015_08,avsnp147,dbnsfp33a,clinvar_20210501,gnomad_genome,dbscsnv11,dbnsfp31a_interpro \
-operation g,g,g,r,r,r,f,f,f,f,f,f,f,f,f,f,f,f,f \
-protocol refGene,ensGene,knownGene,cytoBand,genomicSuperDups,esp6500siv2_all,ALL.sites.2015_08,AFR.sites.2015_08,AMR.sites.2015_08,EAS.sites.2015_08,EUR.sites.2015_08,SAS.sites.2015_08,avsnp147,dbnsfp33a,clinvar_20210501,gnomad_genome,dbscsnv11,dbnsfp31a_interpro \
-operation g,g,g,r,r,f,f,f,f,f,f,f,f,f,f,f,f,f \
-nastring . -vcfinput -thread $nt
fi


+ 60
- 26
tasks/VEP.wdl Wyświetl plik

@@ -9,9 +9,8 @@ task VEP {
String fasta
String vep_path
File cache
String hg
String ncbi_build
String species
Boolean only_pass
String vcf2maf_path
String docker
String cluster_config
@@ -23,37 +22,71 @@ task VEP {
set -e
nt=$(nproc)

# Define ncbi_build
if [ hg == "hg19" ]; then
ncbi_build="GRCh37"
elif [ hg == "hg38" ]; then
ncbi_build="GRCh38"
awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${vcf} > ${basename}.PASS.vcf
ncol=`awk -F'\t' '{if($1!~"^#"){print NF}}' ${basename}.PASS.vcf | uniq`
if [ $ncol -lt 11 ]; then
vcf2maf_ID="--tumor-id ${tumor_id} --normal-id ${normal_id}"
else
vcf2maf_ID="--tumor-id ${tumor_id}"
fi

nrow=`awk -F'\t' '{if($1~"^chr"){print $0}}' ${basename}.PASS.vcf | wc -l`
if [ $nrow -lt 5000 ]; then
buffer_size="--buffer_size 5000"
else
buffer_size="--buffer_size 500"
fi

# Extract the BND variants from VCF
awk -F'\t' '{if(($1~"^#")||($8!~".*SVTYPE=BND.*")){print $0}}' ${vcf} > ${sample_id}.INPUT.vcf2maf.vcf
awk -F'\t' '{if(($1~"^#")||($8~".*SVTYPE=BND.*")){print $0}}' ${vcf} > ${sample_id}.INPUT.VEP.vcf
# awk -F'\t' '{if(($1~"^#")||($8!~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.PASS.vcf > ${sample_id}.PASS.vcf2maf.vcf
# awk -F'\t' '{if(($1~"^#")||($8~".*SVTYPE=BND.*")){print $0}}' ${sample_id}.PASS.vcf > ${sample_id}.INPUT.VEP.vcf

# vcf2maf
# perl ${vcf2maf_path}/vcf2maf.pl \
# --input-vcf ${sample_id}.PASS.vcf2maf.vcf --output-maf ${basename}.maf \
# --tumor-id ${tumor_id} --normal-id ${normal_id} \
# --ref-fasta ${ref_dir}/${fasta} \
# --vep-path ${vep_path} \
# --vep-data ${cache} \
# --ncbi-build ${ncbi_build} \
# --species ${species} \
# --vep-fork $nt

# vep
# perl ${vep_path}/vep \
# --input_file ${basename}.PASS.vcf --output_file ${basename}.PASS.vep.vcf \
# --fasta ${ref_dir}/${fasta} \
# --dir ${cache} \
# --assembly ${ncbi_build} \
# --species ${species} \
# --fork $nt \
# --format vcf --vcf \
# --no_progress \
# --no_stats \
# $buffer_size \
# --sift b \
# --ccds --uniprot --hgvs --symbol --numbers --domains --gene_phenotype --canonical --protein --biotype --uniprot --tsl --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --force_overwrite --offline --pubmed --regulatory
perl ${vep_path}/vep --format vcf --vcf \
--assembly ${ncbi_build} \
--species ${species} \
--everything --af_exac \
--offline \
--cache --dir_cache ${cache} \
--fasta ${ref_dir}/${fasta} \
--input_file ${basename}.PASS.vcf --output_file ${basename}.PASS.vep.vcf


# vcf2maf
perl ${vcf2maf_path}/vcf2maf.pl \
--input-vcf ${sample_id}.INPUT.vcf2maf.vcf --output-maf ${basename}.maf \
--tumor-id ${tumor_id} --normal-id ${normal_id} \
--inhibit-vep \
--input-vcf ${basename}.PASS.vep.vcf --output-maf ${basename}.PASS.maf \
$vcf2maf_ID \
--ref-fasta ${ref_dir}/${fasta} \
--vep-path ${vep_path} \
--vep-data ${cache} \
--ncbi-build $ncbi_build \
--ncbi-build ${ncbi_build} \
--species ${species} \
--vep-fork $nt

# vep: only annotate the BND
perl ${vep_path}/vep \
--input_file ${sample_id}.INPUT.VEP.vcf --output_file ${basename}.BND.VEP.txt \
--fasta ${ref_dir}/${fasta} \
--dir ${cache} \
--assembly $ncbi_build \
--species ${species} \
--fork $nt \
--no_progress --no_stats --buffer_size 5000 --sift b --ccds --uniprot --hgvs --symbol --numbers --domains --gene_phenotype --canonical --protein --biotype --uniprot --tsl --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --tab --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --format vcf --force_overwrite --offline --pubmed --regulatory
>>>
runtime {
@@ -64,7 +97,8 @@ task VEP {
}

output {
File maf = "${basename}.${hg}.maf"
File bnd_vep = "${basename}.BND.VEP.txt"
File input_vcf = "${basename}.PASS.vcf"
File vep_vcf = "${basename}.PASS.vep.vcf"
File maf = "${basename}.PASS.maf"
}
}

Ładowanie…
Anuluj
Zapisz