Automated integrated analysis software for genomics data of the cancer patients.
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

96 lines
3.6KB

  1. task CNVkit {
  2. String sample
  3. File ref_dir
  4. String fasta
  5. File ref_flat
  6. File regions
  7. File hrd
  8. File tumor_bam
  9. File tumor_bam_index
  10. File? normal_bam
  11. File? normal_bam_index
  12. String docker
  13. String cluster_config
  14. String disk_size
  15. command <<<
  16. set -o pipefail
  17. set -e
  18. nt=$(nproc)
  19. cnvkit.py access ${ref_dir}/${fasta} -o access.bed
  20. # Prepare the target bed
  21. cnvkit.py target ${regions} --annotate ${ref_flat} --split --short-names -o my_baits.bed
  22. if [ ${normal_bam} ]; then
  23. cnvkit.py autobin ${tumor_bam} ${normal_bam} -t my_baits.bed -g access.bed
  24. else
  25. cnvkit.py autobin ${tumor_bam} -t my_baits.bed -g access.bed
  26. fi
  27. # For each sample...
  28. cnvkit.py coverage ${tumor_bam} my_baits.target.bed -o ${sample}.T.targetcoverage.cnn
  29. cnvkit.py coverage ${tumor_bam} my_baits.antitarget.bed -o ${sample}.T.antitargetcoverage.cnn
  30. if [ ${normal_bam} ]; then
  31. cnvkit.py coverage ${normal_bam} my_baits.target.bed -o ${sample}.N.targetcoverage.cnn
  32. cnvkit.py coverage ${normal_bam} my_baits.antitarget.bed -o ${sample}.N.antitargetcoverage.cnn
  33. # With paired or pooled normals
  34. cnvkit.py reference *.N.{,anti}targetcoverage.cnn --fasta ${ref_dir}/${fasta} -o reference.cnn
  35. else
  36. # With no control sample
  37. cnvkit.py reference -o reference.cnn -f ${ref_dir}/${fasta} -t my_baits.target.bed -a my_baits.antitarget.bed
  38. fi
  39. # For each tumor sample...
  40. cnvkit.py fix ${sample}.T.targetcoverage.cnn ${sample}.T.antitargetcoverage.cnn reference.cnn -o ${sample}.cnr
  41. cnvkit.py segment ${sample}.cnr -o ${sample}.cns
  42. # Check noise
  43. cnvkit.py metrics ${sample}.cnr -s ${sample}.cns > ${sample}.stats
  44. # Derive each segment's absolute integer copy number, ploidy must be int value
  45. PURITY=`awk -F'\t' '{print $6}' ${hrd} | sed -n '2p'`
  46. cnvkit.py call ${sample}.cns -y -m clonal --purity $PURITY -o ${sample}.call.cns
  47. cnvkit.py call ${sample}.cnr -y -m clonal --purity $PURITY -o ${sample}.call.cnr
  48. # Plot the results
  49. cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -o ${sample}.scatter.pdf
  50. cnvkit.py diagram ${sample}.cnr -s ${sample}.call.cns -o ${sample}.diagram.pdf
  51. cnvkit.py heatmap ${sample}.cnr ${sample}.call.cns -o ${sample}.heatmap.pdf
  52. # Genemetrics
  53. mkdir gainloss
  54. cnvkit.py genemetrics ${sample}.call.cnr -t 0.2 -m 3 -o ${sample}.ratio_cnv.txt
  55. cnvkit.py genemetrics ${sample}.call.cnr -s ${sample}.call.cns -t 0.2 -m 3 -o ${sample}.segment_cnv.txt
  56. # Filter genes
  57. cat ${sample}.ratio_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > ratio_cnv.txt
  58. cat ${sample}.segment_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > segment_cnv.txt
  59. comm -12 ratio_cnv.txt segment_cnv.txt > ${sample}.trusted_genes.txt
  60. for gene in `cat ${sample}.trusted_genes.txt`
  61. do
  62. cnvkit.py scatter ${sample}.call.cnr -s ${sample}.call.cns -g $gene -o ./gainloss/${sample}.$gene.scatter.pdf
  63. done
  64. >>>
  65. runtime {
  66. docker: docker
  67. cluster: cluster_config
  68. systemDisk: "cloud_ssd 40"
  69. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  70. }
  71. output {
  72. File scatter_pdf = "${sample}.scatter.pdf"
  73. File diagram_pdf = "${sample}.diagram.pdf"
  74. File heatmap_pdf = "${sample}.heatmap.pdf"
  75. File cnr = "${sample}.cnr"
  76. File cns = "${sample}.cns"
  77. File stats = "${sample}.stats"
  78. File call_cnr = "${sample}.call.cnr"
  79. File call_cns = "${sample}.call.cns"
  80. File ratio_cnv = "${sample}.ratio_cnv.txt"
  81. File segment_cnv = "${sample}.segment_cnv.txt"
  82. File gainloss_genes = "${sample}.trusted_genes.txt"
  83. Array[File] gainloss = glob("./gainloss/*")
  84. }
  85. }