Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.

100 lines
3.9KB

  1. task CNVkit {
  2. String sample
  3. File ref_dir
  4. String fasta
  5. File ref_flat
  6. File regions
  7. File hrd
  8. File tumor_bam
  9. File tumor_bam_index
  10. File? normal_bam
  11. File? normal_bam_index
  12. String docker
  13. String cluster_config
  14. String disk_size
  15. command <<<
  16. set -o pipefail
  17. set -e
  18. nt=$(nproc)
  19. cnvkit.py access ${ref_dir}/${fasta} -o access.bed
  20. # Prepare the target bed
  21. cnvkit.py target ${regions} --annotate ${ref_flat} --split --short-names -o my_baits.bed
  22. if [ ${normal_bam} ]; then
  23. cnvkit.py autobin ${tumor_bam} ${normal_bam} -t my_baits.bed -g access.bed
  24. else
  25. cnvkit.py autobin ${tumor_bam} -t my_baits.bed -g access.bed
  26. fi
  27. # For each sample...
  28. cnvkit.py coverage ${tumor_bam} my_baits.target.bed -o ${sample}.T.targetcoverage.cnn
  29. cnvkit.py coverage ${tumor_bam} my_baits.antitarget.bed -o ${sample}.T.antitargetcoverage.cnn
  30. if [ ${normal_bam} ]; then
  31. cnvkit.py coverage ${normal_bam} my_baits.target.bed -o ${sample}.N.targetcoverage.cnn
  32. cnvkit.py coverage ${normal_bam} my_baits.antitarget.bed -o ${sample}.N.antitargetcoverage.cnn
  33. # With paired or pooled normals
  34. cnvkit.py reference *.N.{,anti}targetcoverage.cnn --fasta ${ref_dir}/${fasta} -o reference.cnn
  35. else
  36. # With no control sample
  37. cnvkit.py reference -o reference.cnn -f ${ref_dir}/${fasta} -t my_baits.target.bed -a my_baits.antitarget.bed
  38. fi
  39. # For each tumor sample...
  40. cnvkit.py fix ${sample}.T.targetcoverage.cnn ${sample}.T.antitargetcoverage.cnn reference.cnn -o ${sample}.cnr
  41. cnvkit.py segment ${sample}.cnr -o ${sample}.cns
  42. # Check noise
  43. cnvkit.py metrics ${sample}.cnr -s ${sample}.cns > ${sample}.stats
  44. # Derive each segment's absolute integer copy number, ploidy must be int value
  45. PURITY=`awk -F'\t' '{print $6}' ${hrd} | sed -n '2p'`
  46. cnvkit.py segmetrics ${sample}.cns --ci --sem -o ${sample}.segmetrics.cns
  47. cnvkit.py call --drop-low-coverag --filter ci --filter sem ${sample}.segmetrics.cns -y -m clonal --purity $PURITY -o ${sample}.call.cns
  48. # cnvkit.py call --drop-low-coverag ${sample}.cnr -y -m clonal --purity $PURITY -o ${sample}.cnr
  49. # Plot the results
  50. cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -o ${sample}.scatter.pdf
  51. cnvkit.py diagram ${sample}.cnr -s ${sample}.call.cns -o ${sample}.diagram.pdf
  52. cnvkit.py heatmap ${sample}.cnr ${sample}.call.cns -o ${sample}.heatmap.pdf
  53. # Genemetrics
  54. mkdir gainloss
  55. cnvkit.py genemetrics ${sample}.cnr -t 0.2 -m 3 -o ${sample}.ratio_cnv.txt
  56. cnvkit.py genemetrics ${sample}.cnr -s ${sample}.call.cns -t 0.2 -m 3 -o ${sample}.segment_cnv.txt
  57. # Filter genes
  58. cat ${sample}.ratio_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > ratio_cnv.txt
  59. cat ${sample}.segment_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > segment_cnv.txt
  60. comm -12 ratio_cnv.txt segment_cnv.txt > ${sample}.trusted_genes.txt
  61. # Scatter plot for each gene
  62. touch failed_genes.txt
  63. # for gene in `cat ${sample}.trusted_genes.txt`
  64. # do
  65. # cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -g $gene -o ./gainloss/${sample}.$gene.scatter.pdf || echo $gene >> failed_genes.txt
  66. # done
  67. >>>
  68. runtime {
  69. docker: docker
  70. cluster: cluster_config
  71. systemDisk: "cloud_ssd 40"
  72. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  73. }
  74. output {
  75. File scatter_pdf = "${sample}.scatter.pdf"
  76. File diagram_pdf = "${sample}.diagram.pdf"
  77. File heatmap_pdf = "${sample}.heatmap.pdf"
  78. File cnr = "${sample}.cnr"
  79. File cns = "${sample}.cns"
  80. File stats = "${sample}.stats"
  81. File call_cnr = "${sample}.cnr"
  82. File call_cns = "${sample}.call.cns"
  83. File ratio_cnv = "${sample}.ratio_cnv.txt"
  84. File segment_cnv = "${sample}.segment_cnv.txt"
  85. File trusted_genes = "${sample}.trusted_genes.txt"
  86. File failed_genes = "${sample}.failed_genes.txt"
  87. Array[File] gainloss = glob("./gainloss/*")
  88. }
  89. }