Automated integrated analysis software for genomics data of the cancer patients.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

96 lines
3.6KB

  1. task CNVkit {
  2. String sample
  3. File tumor_bam
  4. File tumor_bam_index
  5. File? normal_bam
  6. File? normal_bam_index
  7. File regions
  8. File ref_dir
  9. String fasta
  10. File ref_flat
  11. String docker
  12. String cluster_config
  13. String disk_size
  14. command <<<
  15. set -o pipefail
  16. set -e
  17. nt=$(nproc)
  18. mkdir -p /cromwell_root/tmp/cnvkit
  19. cd /cromwell_root/tmp/cnvkit
  20. cnvkit.py access ${ref_dir}/${fasta} -o access.bed
  21. # Prepare the target bed
  22. cnvkit.py target ${regions} --annotate ${ref_flat} --split --short-names -o my_baits.bed
  23. if [ ${normal_bam} ]; then
  24. cnvkit.py autobin ${tumor_bam} ${normal_bam} -t my_baits.bed -g access.bed
  25. else
  26. cnvkit.py autobin ${tumor_bam} -t my_baits.bed -g access.bed
  27. fi
  28. # For each sample...
  29. cnvkit.py coverage ${tumor_bam} my_baits.target.bed -o ${sample}.T.targetcoverage.cnn
  30. cnvkit.py coverage ${tumor_bam} my_baits.antitarget.bed -o ${sample}.T.antitargetcoverage.cnn
  31. if [ ${normal_bam} ]; then
  32. cnvkit.py coverage ${normal_bam} my_baits.target.bed -o ${sample}.N.targetcoverage.cnn
  33. cnvkit.py coverage ${normal_bam} my_baits.antitarget.bed -o ${sample}.N.antitargetcoverage.cnn
  34. # With paired or pooled normals
  35. cnvkit.py reference *.N.{,anti}targetcoverage.cnn --fasta ${ref_dir}/${fasta} -o reference.cnn
  36. else
  37. # With no control sample
  38. cnvkit.py reference -o reference.cnn -f ${ref_dir}/${fasta} -t my_baits.target.bed -a my_baits.antitarget.bed
  39. fi
  40. # For each tumor sample...
  41. cnvkit.py fix ${sample}.T.targetcoverage.cnn ${sample}.T.antitargetcoverage.cnn reference.cnn -o ${sample}.cnr
  42. cnvkit.py segment ${sample}.cnr -o ${sample}.cns
  43. # Check noise
  44. cnvkit.py metrics ${sample}.cnr -s ${sample}.cns > ${sample}.stats
  45. # Derive each segment's absolute integer copy number, ploidy must be int value
  46. purity=`awk -F'\t' '{print $6}' ${hrd} | sed -n '2p'`
  47. cnvkit.py call ${sample}.cns -y -m clonal --purity ${purity} -o ${sample}.call.cns
  48. # Plot the results
  49. cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -o ${sample}.scatter.pdf
  50. cnvkit.py diagram ${sample}.cnr -s ${sample}.call.cns -o ${sample}.diagram.pdf
  51. cnvkit.py heatmap ${sample}.cnr ${sample}.call.cns -o ${sample}.heatmap.pdf
  52. # Genemetrics
  53. mkdir gainloss
  54. cnvkit.py genemetrics ${sample}.cnr -s ${sample}.call.cns -t 0 -m 0 -o ${sample}.cnv.txt
  55. # Filter genes
  56. cnvkit.py genemetrics ${sample}.cnr -t 0.2 -m 3 -o ${sample}.ratio-genes.txt
  57. cnvkit.py genemetrics ${sample}.cnr -s ${sample}.call.cns -t 0.2 -m 3 -o ${sample}.segment-genes.txt
  58. cat ${sample}.ratio-genes.txt | tail -n+2 | cut -f1 | sort | uniq > ratio-genes.txt
  59. cat ${sample}.segment-genes.txt | tail -n+2 | cut -f1 | sort | uniq > segment-genes.txt
  60. comm -12 ratio-genes.txt segment-genes.txt > trusted_cnv_genes.txt
  61. for gene in `cat trusted_cnv_genes.txt`
  62. do
  63. cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -g $gene -o ./gainloss/${sample}.$gene.scatter.pdf
  64. done
  65. >>>
  66. runtime {
  67. docker: docker
  68. cluster: cluster_config
  69. systemDisk: "cloud_ssd 40"
  70. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  71. }
  72. output {
  73. File scatter_pdf = "${sample}.scatter.pdf"
  74. File diagram_pdf = "${sample}.diagram.pdf"
  75. File heatmap_pdf = "${sample}.heatmap.pdf"
  76. File cnr = "${sample}.cnr"
  77. File cns = "${sample}.cns"
  78. File stats = "${sample}.stats"
  79. File call_cns = "${sample}.call.cns"
  80. File genemetrics = "${sample}.cnv.txt"
  81. File gainloss_genes = "/cromwell_root/tmp/cnvkit/trusted_cnv_genes.txt"
  82. Array[File] gainloss = glob("/cromwell_root/tmp/cnvkit/gainloss/*")
  83. }
  84. }