Automated integrated analysis software for genomics data of the cancer patients.
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

96 lines
3.6KB

  1. task CNVkit {
  2. String sample
  3. File tumor_bam
  4. File tumor_bam_index
  5. File? normal_bam
  6. File? normal_bam_index
  7. File regions
  8. File ref_dir
  9. String fasta
  10. File ref_flat
  11. String docker
  12. String cluster_config
  13. String disk_size
  14. command <<<
  15. set -o pipefail
  16. set -e
  17. nt=$(nproc)
  18. mkdir -p /cromwell_root/tmp/cnvkit
  19. cd /cromwell_root/tmp/cnvkit
  20. cnvkit.py access ${ref_dir}/${fasta} -o access.bed
  21. # Prepare the target bed
  22. cnvkit.py target ${regions} --annotate ${ref_flat} --split --short-names -o my_baits.bed
  23. if [ ${normal_bam} ]; then
  24. cnvkit.py autobin ${tumor_bam} ${normal_bam} -t my_baits.bed -g access.bed
  25. else
  26. cnvkit.py autobin ${tumor_bam} -t my_baits.bed -g access.bed
  27. fi
  28. # For each sample...
  29. cnvkit.py coverage ${tumor_bam} my_baits.target.bed -o ${sample}.T.targetcoverage.cnn
  30. cnvkit.py coverage ${tumor_bam} my_baits.antitarget.bed -o ${sample}.T.antitargetcoverage.cnn
  31. if [ ${normal_bam} ]; then
  32. cnvkit.py coverage ${normal_bam} my_baits.target.bed -o ${sample}.N.targetcoverage.cnn
  33. cnvkit.py coverage ${normal_bam} my_baits.antitarget.bed -o ${sample}.N.antitargetcoverage.cnn
  34. # With paired or pooled normals
  35. cnvkit.py reference *.N.{,anti}targetcoverage.cnn --fasta ${ref_dir}/${fasta} -o reference.cnn
  36. else
  37. # With no control sample
  38. cnvkit.py reference -o reference.cnn -f ${ref_dir}/${fasta} -t my_baits.target.bed -a my_baits.antitarget.bed
  39. fi
  40. # For each tumor sample...
  41. cnvkit.py fix ${sample}.T.targetcoverage.cnn ${sample}.T.antitargetcoverage.cnn reference.cnn -o ${sample}.cnr
  42. cnvkit.py segment ${sample}.cnr -o ${sample}.cns
  43. # Check noise
  44. cnvkit.py metrics ${sample}.cnr -s ${sample}.cns > ${sample}.stats
  45. # Derive each segment's absolute integer copy number, ploidy must be int value
  46. purity=`awk -F'\t' '{print $6}' ${hrd} | sed -n '2p'`
  47. cnvkit.py call ${sample}.cns -y -m clonal --purity ${purity} -o ${sample}.call.cns
  48. # Plot the results
  49. cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -o ${sample}.scatter.pdf
  50. cnvkit.py diagram ${sample}.cnr -s ${sample}.call.cns -o ${sample}.diagram.pdf
  51. cnvkit.py heatmap ${sample}.cnr ${sample}.call.cns -o ${sample}.heatmap.pdf
  52. # Genemetrics
  53. mkdir gainloss
  54. cnvkit.py genemetrics ${sample}.cnr -s ${sample}.call.cns -t 0 -m 0 -o ${sample}.cnv.txt
  55. # Filter genes
  56. cnvkit.py genemetrics ${sample}.cnr -t 0.2 -m 3 -o ${sample}.ratio-genes.txt
  57. cnvkit.py genemetrics ${sample}.cnr -s ${sample}.call.cns -t 0.2 -m 3 -o ${sample}.segment-genes.txt
  58. cat ${sample}.ratio-genes.txt | tail -n+2 | cut -f1 | sort | uniq > ratio-genes.txt
  59. cat ${sample}.segment-genes.txt | tail -n+2 | cut -f1 | sort | uniq > segment-genes.txt
  60. comm -12 ratio-genes.txt segment-genes.txt > trusted_cnv_genes.txt
  61. for gene in `cat trusted_cnv_genes.txt`
  62. do
  63. cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -g $gene -o ./gainloss/${sample}.$gene.scatter.pdf
  64. done
  65. >>>
  66. runtime {
  67. docker: docker
  68. cluster: cluster_config
  69. systemDisk: "cloud_ssd 40"
  70. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  71. }
  72. output {
  73. File scatter_pdf = "${sample}.scatter.pdf"
  74. File diagram_pdf = "${sample}.diagram.pdf"
  75. File heatmap_pdf = "${sample}.heatmap.pdf"
  76. File cnr = "${sample}.cnr"
  77. File cns = "${sample}.cns"
  78. File stats = "${sample}.stats"
  79. File call_cns = "${sample}.call.cns"
  80. File genemetrics = "${sample}.cnv.txt"
  81. File gainloss_genes = "/cromwell_root/tmp/cnvkit/trusted_cnv_genes.txt"
  82. Array[File] gainloss = glob("/cromwell_root/tmp/cnvkit/gainloss/*")
  83. }
  84. }