Germline & Somatic short variant discovery (SNVs + Indels) for WGS & WES.
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

94 lines
2.6KB

  1. task TNseq {
  2. String sample
  3. String SENTIEON_INSTALL_DIR
  4. String SENTIEON_LICENSE
  5. File tumor_recaled_bam
  6. File tumor_recaled_bam_index
  7. File? normal_recaled_bam
  8. File? normal_recaled_bam_index
  9. String tumor_name
  10. String normal_name
  11. File ref_dir
  12. String fasta
  13. File germline_resource
  14. File germline_resource_tbi
  15. File? regions
  16. Int? interval_padding
  17. File? pon_vcf
  18. String docker
  19. String cluster_config
  20. String disk_size
  21. command <<<
  22. set -o pipefail
  23. set -e
  24. export SENTIEON_LICENSE=${SENTIEON_LICENSE}
  25. nt=$(nproc)
  26. if [ ${regions} ]; then
  27. INTERVAL="--interval ${regions} --interval_padding ${interval_padding}"
  28. else
  29. INTERVAL=""
  30. fi
  31. if [ ${pon_vcf} ]; then
  32. PON="--pon ${pon_vcf}"
  33. ${SENTIEON_INSTALL_DIR}/bin/sentieon util vcfindex ${pon_vcf}
  34. else
  35. PON=""
  36. fi
  37. if [ ${normal_recaled_bam} ]; then
  38. INPUT="-i ${tumor_recaled_bam} -i ${normal_recaled_bam}"
  39. SAMPLE="--tumor_sample ${tumor_name} --normal_sample ${normal_name}"
  40. else
  41. INPUT="-i ${tumor_recaled_bam}"
  42. SAMPLE="--tumor_sample ${tumor_name}"
  43. fi
  44. ${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt -r ${ref_dir}/${fasta} \
  45. $INPUT $INTERVAL \
  46. --algo TNhaplotyper2 $SAMPLE \
  47. --germline_vcf ${germline_resource} \
  48. $PON \
  49. ${sample}.TNseq.vcf \
  50. --algo OrientationBias --tumor_sample ${tumor_name} \
  51. ${sample}.orientation \
  52. --algo ContaminationModel $SAMPLE \
  53. --vcf ${germline_resource} \
  54. --tumor_segments ${sample}.contamination.segments \
  55. ${sample}.contamination
  56. ${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt \
  57. -r ${ref_dir}/${fasta} \
  58. --algo TNfilter $SAMPLE \
  59. -v ${sample}.TNseq.vcf \
  60. --contamination ${sample}.contamination \
  61. --tumor_segments ${sample}.contamination.segments \
  62. --orientation_priors ${sample}.orientation \
  63. ${sample}.TNseq.filter.vcf
  64. awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${sample}.TNseq.filter.vcf > ${sample}.TNseq.filter.PASS.vcf
  65. >>>
  66. runtime {
  67. docker: docker
  68. cluster: cluster_config
  69. systemDisk: "cloud_ssd 40"
  70. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  71. }
  72. output {
  73. File TNseq_pass_vcf='${sample}.TNseq.filter.PASS.vcf'
  74. File TNseq_filter_vcf='${sample}.TNseq.filter.vcf'
  75. File TNseq_filter_vcf_index = "${sample}.TNseq.filter.vcf.idx"
  76. File TNseq_vcf = "${sample}.TNseq.vcf"
  77. File TNseq_vcf_index = "${sample}.TNseq.vcf.idx"
  78. File contamination = "${sample}.contamination"
  79. File contamination_segments = "${sample}.contamination.segments"
  80. File orientation = "${sample}.orientation"
  81. }
  82. }