|
|
|
|
|
|
|
|
|
|
|
task VQSR { |
|
|
|
|
|
String SENTIEON_INSTALL_DIR |
|
|
|
|
|
String fasta |
|
|
|
|
|
File vcf |
|
|
|
|
|
File vcf_index |
|
|
|
|
|
File ref_dir |
|
|
|
|
|
String dbsnp |
|
|
|
|
|
String dbsnp_Mill |
|
|
|
|
|
String dbsnp_1000G_omni |
|
|
|
|
|
String dbsnp_hapmap |
|
|
|
|
|
String dbsnp_1000G_phase1 |
|
|
|
|
|
String sample |
|
|
|
|
|
String docker |
|
|
|
|
|
String cluster_config |
|
|
|
|
|
String disk_size |
|
|
|
|
|
|
|
|
|
|
|
command <<< |
|
|
|
|
|
set -o pipefail |
|
|
|
|
|
set -e |
|
|
|
|
|
export SENTIEON_LICENSE=192.168.0.55:8990 |
|
|
|
|
|
|
|
|
|
|
|
resource_text="--resource ${dbsnp_1000G_phase1} \ |
|
|
|
|
|
--resource_param 1000G,known=false,training=true,truth=false,prior=10.0 " |
|
|
|
|
|
resource_text="$resource_text --resource ${dbsnp_1000G_omni} \ |
|
|
|
|
|
--resource_param omni,known=false,training=true,truth=true,prior=12.0 " |
|
|
|
|
|
resource_text="$resource_text --resource ${dbsnp} \ |
|
|
|
|
|
--resource_param dbsnp,known=true,training=false,truth=false,prior=2.0 " |
|
|
|
|
|
resource_text="$resource_text --resource ${dbsnp_hapmap} \ |
|
|
|
|
|
--resource_param hapmap,known=false,training=true,truth=true,prior=15.0" |
|
|
|
|
|
|
|
|
|
|
|
annotation_array="DP QD FS SOR MQ MQRankSum ReadPosRankSum" |
|
|
|
|
|
for annotation in $annotation_array; do |
|
|
|
|
|
annotate_text="$annotate_text --annotation $annotation" |
|
|
|
|
|
done |
|
|
|
|
|
|
|
|
|
|
|
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} --algo VarCal -v ${vcf} $resource_text $annotate_text --var_type SNP --plot_file ${sample}.vqsrSNP.hc.plotfile --tranches_file ${sample}.vqsrSNP.hc.tranches ${sample}.vqsrSNP.hc.recal |
|
|
|
|
|
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} --algo ApplyVarCal -v ${vcf} --var_type SNP --tranches_file ${sample}.vqsrSNP.hc.tranches --sensitivity 99.0 --recal ${sample}.vqsrSNP.hc.recal ${sample}.vqsrSNP.hc.recaled.vcf.gz |
|
|
|
|
|
${SENTIEON_INSTALL_DIR}/bin/sentieon plot vqsr -o ${sample}.vqsrSNP.pdf ${sample}.vqsrSNP.hc.plotfile |
|
|
|
|
|
|
|
|
|
|
|
resource_text="$resource_text --resource ${dbsnp_Mill} \ |
|
|
|
|
|
--resource_param Mills,known=false,training=true,truth=true,prior=12.0 " |
|
|
|
|
|
resource_text="$resource_text --resource ${dbsnp} \ |
|
|
|
|
|
--resource_param dbsnp,known=true,training=false,truth=false,prior=2.0 " |
|
|
|
|
|
|
|
|
|
|
|
annotation_array="QD DP FS SOR MQ ReadPosRankSum" |
|
|
|
|
|
annotate_text="" |
|
|
|
|
|
for annotation in $annotation_array; do |
|
|
|
|
|
annotate_text="$annotate_text --annotation $annotation" |
|
|
|
|
|
done |
|
|
|
|
|
|
|
|
|
|
|
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} --algo VarCal -v ${vcf} $resource_text $annotate_text --var_type INDEL --plot_file ${sample}.vqsrINDEL.hc.plotfile --max_gaussians 4 --tranches_file ${sample}.vqsrINDEL.hc.tranches ${sample}.vqsrINDEL.hc.recal |
|
|
|
|
|
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -r ${ref_dir}/${fasta} --algo ApplyVarCal -v ${sample}.vqsrSNP.hc.recaled.vcf.gz --var_type INDEL --recal ${sample}.vqsrINDEL.hc.recal --tranches_file ${sample}.vqsrINDEL.hc.tranches --sensitivity 99.0 ${sample}.vqsrSNPINDEL.hc.recaled.vcf.gz |
|
|
|
|
|
${SENTIEON_INSTALL_DIR}/bin/sentieon plot vqsr -o ${sample}.vqsrINDEL.VQSR.pdf ${sample}.vqsrINDEL.hc.plotfile |
|
|
|
|
|
|
|
|
|
|
|
>>> |
|
|
|
|
|
|
|
|
|
|
|
runtime { |
|
|
|
|
|
dockerTag:docker |
|
|
|
|
|
cluster: cluster_config |
|
|
|
|
|
systemDisk: "cloud_ssd 40" |
|
|
|
|
|
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
output { |
|
|
|
|
|
File filtered_vcf = "${sample}.vqsrSNPINDEL.hc.recaled.vcf.gz" |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|