You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

96 lines
3.6KB

  1. task CNVkit {
  2. String sample
  3. File ref_dir
  4. String fasta
  5. File ref_flat
  6. File regions
  7. File hrd
  8. File tumor_bam
  9. File tumor_bam_index
  10. File? normal_bam
  11. File? normal_bam_index
  12. String docker
  13. String cluster_config
  14. String disk_size
  15. command <<<
  16. set -o pipefail
  17. set -e
  18. nt=$(nproc)
  19. cnvkit.py access ${ref_dir}/${fasta} -o access.bed
  20. # Prepare the target bed
  21. cnvkit.py target ${regions} --annotate ${ref_flat} --split --short-names -o my_baits.bed
  22. if [ ${normal_bam} ]; then
  23. cnvkit.py autobin ${tumor_bam} ${normal_bam} -t my_baits.bed -g access.bed
  24. else
  25. cnvkit.py autobin ${tumor_bam} -t my_baits.bed -g access.bed
  26. fi
  27. # For each sample...
  28. cnvkit.py coverage ${tumor_bam} my_baits.target.bed -o ${sample}.T.targetcoverage.cnn
  29. cnvkit.py coverage ${tumor_bam} my_baits.antitarget.bed -o ${sample}.T.antitargetcoverage.cnn
  30. if [ ${normal_bam} ]; then
  31. cnvkit.py coverage ${normal_bam} my_baits.target.bed -o ${sample}.N.targetcoverage.cnn
  32. cnvkit.py coverage ${normal_bam} my_baits.antitarget.bed -o ${sample}.N.antitargetcoverage.cnn
  33. # With paired or pooled normals
  34. cnvkit.py reference *.N.{,anti}targetcoverage.cnn --fasta ${ref_dir}/${fasta} -o reference.cnn
  35. else
  36. # With no control sample
  37. cnvkit.py reference -o reference.cnn -f ${ref_dir}/${fasta} -t my_baits.target.bed -a my_baits.antitarget.bed
  38. fi
  39. # For each tumor sample...
  40. cnvkit.py fix ${sample}.T.targetcoverage.cnn ${sample}.T.antitargetcoverage.cnn reference.cnn -o ${sample}.cnr
  41. cnvkit.py segment ${sample}.cnr -o ${sample}.cns
  42. # Check noise
  43. cnvkit.py metrics ${sample}.cnr -s ${sample}.cns > ${sample}.stats
  44. # Derive each segment's absolute integer copy number, ploidy must be int value
  45. PURITY=`awk -F'\t' '{print $6}' ${hrd} | sed -n '2p'`
  46. cnvkit.py call ${sample}.cns -y -m clonal --purity $PURITY -o ${sample}.call.cns
  47. cnvkit.py call ${sample}.cnr -y -m clonal --purity $PURITY -o ${sample}.call.cnr
  48. # Plot the results
  49. cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -o ${sample}.scatter.pdf
  50. cnvkit.py diagram ${sample}.cnr -s ${sample}.call.cns -o ${sample}.diagram.pdf
  51. cnvkit.py heatmap ${sample}.cnr ${sample}.call.cns -o ${sample}.heatmap.pdf
  52. # Genemetrics
  53. mkdir gainloss
  54. cnvkit.py genemetrics ${sample}.call.cnr -t 0.2 -m 3 -o ${sample}.ratio_cnv.txt
  55. cnvkit.py genemetrics ${sample}.call.cnr -s ${sample}.call.cns -t 0.2 -m 3 -o ${sample}.segment_cnv.txt
  56. # Filter genes
  57. cat ${sample}.ratio_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > ratio_cnv.txt
  58. cat ${sample}.segment_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > segment_cnv.txt
  59. comm -12 ratio_cnv.txt segment_cnv.txt > ${sample}.trusted_genes.txt
  60. for gene in `cat ${sample}.trusted_genes.txt`
  61. do
  62. cnvkit.py scatter ${sample}.call.cnr -s ${sample}.call.cns -g $gene -o ./gainloss/${sample}.$gene.scatter.pdf
  63. done
  64. >>>
  65. runtime {
  66. docker: docker
  67. cluster: cluster_config
  68. systemDisk: "cloud_ssd 40"
  69. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  70. }
  71. output {
  72. File scatter_pdf = "${sample}.scatter.pdf"
  73. File diagram_pdf = "${sample}.diagram.pdf"
  74. File heatmap_pdf = "${sample}.heatmap.pdf"
  75. File cnr = "${sample}.cnr"
  76. File cns = "${sample}.cns"
  77. File stats = "${sample}.stats"
  78. File call_cnr = "${sample}.call.cnr"
  79. File call_cns = "${sample}.call.cns"
  80. File ratio_cnv = "${sample}.ratio_cnv.txt"
  81. File segment_cnv = "${sample}.segment_cnv.txt"
  82. File gainloss_genes = "${sample}.trusted_genes.txt"
  83. Array[File] gainloss = glob("./gainloss/*")
  84. }
  85. }