Automated integrated analysis software for genomics data of the cancer patients.
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

108 lines
4.4KB

  1. task CNVkit {
  2. String sample
  3. File ref_dir
  4. String fasta
  5. File ref_flat
  6. File regions
  7. File hrd
  8. File tumor_bam
  9. File tumor_bam_index
  10. File? normal_bam
  11. File? normal_bam_index
  12. String docker
  13. String cluster_config
  14. String disk_size
  15. command <<<
  16. set -o pipefail
  17. set -e
  18. nt=$(nproc)
  19. cnvkit.py access ${ref_dir}/${fasta} -o access.bed
  20. # Prepare the target bed
  21. cnvkit.py target ${regions} --annotate ${ref_flat} --split --short-names -o my_baits.bed
  22. if [ ${normal_bam} ]; then
  23. cnvkit.py autobin ${tumor_bam} ${normal_bam} -t my_baits.bed -g access.bed
  24. else
  25. cnvkit.py autobin ${tumor_bam} -t my_baits.bed -g access.bed
  26. fi
  27. # For each sample...
  28. cnvkit.py coverage ${tumor_bam} my_baits.target.bed -o ${sample}.T.targetcoverage.cnn
  29. cnvkit.py coverage ${tumor_bam} my_baits.antitarget.bed -o ${sample}.T.antitargetcoverage.cnn
  30. if [ ${normal_bam} ]; then
  31. cnvkit.py coverage ${normal_bam} my_baits.target.bed -o ${sample}.N.targetcoverage.cnn
  32. cnvkit.py coverage ${normal_bam} my_baits.antitarget.bed -o ${sample}.N.antitargetcoverage.cnn
  33. # With paired or pooled normals
  34. cnvkit.py reference *.N.{,anti}targetcoverage.cnn --fasta ${ref_dir}/${fasta} -o reference.cnn
  35. else
  36. # With no control sample
  37. cnvkit.py reference -o reference.cnn -f ${ref_dir}/${fasta} -t my_baits.target.bed -a my_baits.antitarget.bed
  38. fi
  39. # For each tumor sample...
  40. cnvkit.py fix ${sample}.T.targetcoverage.cnn ${sample}.T.antitargetcoverage.cnn reference.cnn -o ${sample}.cnr
  41. cnvkit.py segment ${sample}.cnr -o ${sample}.cns
  42. # Check noise
  43. cnvkit.py metrics ${sample}.cnr -s ${sample}.cns > ${sample}.stats
  44. # Derive each segment's absolute integer copy number, ploidy must be int value
  45. PURITY=`awk -F'\t' '{print $6}' ${hrd} | sed -n '2p'`
  46. cnvkit.py segmetrics ${sample}.cnr -s ${sample}.cns --ci -o ${sample}.segmetrics.cns
  47. cnvkit.py call ${sample}.segmetrics.cns --drop-low-coverage --filter ci -m threshold --purity $PURITY -o ${sample}.call.cns
  48. # Plot the results
  49. cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -o ${sample}.scatter.pdf
  50. cnvkit.py diagram ${sample}.cnr -s ${sample}.call.cns -o ${sample}.diagram.pdf
  51. cnvkit.py heatmap ${sample}.cnr ${sample}.call.cns -o ${sample}.heatmap.pdf
  52. # Genemetrics
  53. cnvkit.py genemetrics ${sample}.cnr -t 0.2 -m 3 -o ${sample}.ratio_cnv.txt
  54. cnvkit.py genemetrics ${sample}.cnr -s ${sample}.call.cns -t 0.2 -m 3 -o ${sample}.segment_cnv.txt
  55. # Filter genes
  56. cat ${sample}.ratio_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > ratio_cnv.txt
  57. cat ${sample}.segment_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > segment_cnv.txt
  58. comm -12 ratio_cnv.txt segment_cnv.txt > ${sample}.trusted_genes.txt
  59. # # Scatter plot for each gene
  60. # mkdir gainloss
  61. # touch failed_genes.txt
  62. # for gene in `cat ${sample}.trusted_genes.txt`
  63. # do
  64. # cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -g $gene -o ./gainloss/${sample}.$gene.scatter.pdf || echo $gene >> failed_genes.txt
  65. # done
  66. # Filter by trusted_genes
  67. awk 'NR==FNR {a[$1]=$2;next} NR!=FNR {if(FNR == 1 || (FNR != 1 && $1 in a)) print $0}' ${sample}.trusted_genes.txt ${sample}.ratio_cnv.txt > ${sample}.ratio_cnv.trusted.txt
  68. # Infer absolute CN (not adjust by purity)
  69. cnvkit.py call ${sample}.ratio_cnv.trusted.txt -m threshold -o ${sample}.ratio_cnv.call.txt
  70. awk '{if ($6 != 2) print $0}' ${sample}.ratio_cnv.call.txt > ${sample}.ratio_cnv.call.filter.txt
  71. >>>
  72. runtime {
  73. docker: docker
  74. cluster: cluster_config
  75. systemDisk: "cloud_ssd 40"
  76. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  77. }
  78. output {
  79. File scatter_pdf = "${sample}.scatter.pdf"
  80. File diagram_pdf = "${sample}.diagram.pdf"
  81. File heatmap_pdf = "${sample}.heatmap.pdf"
  82. File cnr = "${sample}.cnr"
  83. File cns = "${sample}.cns"
  84. File stats = "${sample}.stats"
  85. File call_cns = "${sample}.call.cns"
  86. File ratio_cnv = "${sample}.ratio_cnv.txt"
  87. File segment_cnv = "${sample}.segment_cnv.txt"
  88. File trusted_genes = "${sample}.trusted_genes.txt"
  89. File? failed_genes = "${sample}.failed_genes.txt"
  90. Array[File]? gainloss = glob("./gainloss/*")
  91. File ratio_cnv_trusted = "${sample}.ratio_cnv.trusted.txt"
  92. File ratio_cnv_trusted_call = "${sample}.ratio_cnv.call.txt"
  93. File ratio_cnv_trusted_call_filter = "${sample}.ratio_cnv.call.filter.txt"
  94. }
  95. }