Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

108 lines
4.4KB

  1. task CNVkit {
  2. String sample
  3. File ref_dir
  4. String fasta
  5. File ref_flat
  6. File regions
  7. File hrd
  8. File tumor_bam
  9. File tumor_bam_index
  10. File? normal_bam
  11. File? normal_bam_index
  12. String docker
  13. String cluster_config
  14. String disk_size
  15. command <<<
  16. set -o pipefail
  17. set -e
  18. nt=$(nproc)
  19. cnvkit.py access ${ref_dir}/${fasta} -o access.bed
  20. # Prepare the target bed
  21. cnvkit.py target ${regions} --annotate ${ref_flat} --split --short-names -o my_baits.bed
  22. if [ ${normal_bam} ]; then
  23. cnvkit.py autobin ${tumor_bam} ${normal_bam} -t my_baits.bed -g access.bed
  24. else
  25. cnvkit.py autobin ${tumor_bam} -t my_baits.bed -g access.bed
  26. fi
  27. # For each sample...
  28. cnvkit.py coverage ${tumor_bam} my_baits.target.bed -o ${sample}.T.targetcoverage.cnn
  29. cnvkit.py coverage ${tumor_bam} my_baits.antitarget.bed -o ${sample}.T.antitargetcoverage.cnn
  30. if [ ${normal_bam} ]; then
  31. cnvkit.py coverage ${normal_bam} my_baits.target.bed -o ${sample}.N.targetcoverage.cnn
  32. cnvkit.py coverage ${normal_bam} my_baits.antitarget.bed -o ${sample}.N.antitargetcoverage.cnn
  33. # With paired or pooled normals
  34. cnvkit.py reference *.N.{,anti}targetcoverage.cnn --fasta ${ref_dir}/${fasta} -o reference.cnn
  35. else
  36. # With no control sample
  37. cnvkit.py reference -o reference.cnn -f ${ref_dir}/${fasta} -t my_baits.target.bed -a my_baits.antitarget.bed
  38. fi
  39. # For each tumor sample...
  40. cnvkit.py fix ${sample}.T.targetcoverage.cnn ${sample}.T.antitargetcoverage.cnn reference.cnn -o ${sample}.cnr
  41. cnvkit.py segment ${sample}.cnr -o ${sample}.cns
  42. # Check noise
  43. cnvkit.py metrics ${sample}.cnr -s ${sample}.cns > ${sample}.stats
  44. # Derive each segment's absolute integer copy number, ploidy must be int value
  45. PURITY=`awk -F'\t' '{print $6}' ${hrd} | sed -n '2p'`
  46. cnvkit.py segmetrics ${sample}.cnr -s ${sample}.cns --ci -o ${sample}.segmetrics.cns
  47. cnvkit.py call ${sample}.segmetrics.cns --drop-low-coverage --filter ci -m threshold --purity $PURITY -o ${sample}.call.cns
  48. # Plot the results
  49. cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -o ${sample}.scatter.pdf
  50. cnvkit.py diagram ${sample}.cnr -s ${sample}.call.cns -o ${sample}.diagram.pdf
  51. cnvkit.py heatmap ${sample}.cnr ${sample}.call.cns -o ${sample}.heatmap.pdf
  52. # Genemetrics
  53. cnvkit.py genemetrics ${sample}.cnr -t 0.2 -m 3 -o ${sample}.ratio_cnv.txt
  54. cnvkit.py genemetrics ${sample}.cnr -s ${sample}.call.cns -t 0.2 -m 3 -o ${sample}.segment_cnv.txt
  55. # Filter genes
  56. cat ${sample}.ratio_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > ratio_cnv.txt
  57. cat ${sample}.segment_cnv.txt | tail -n+2 | cut -f1 | sort | uniq > segment_cnv.txt
  58. comm -12 ratio_cnv.txt segment_cnv.txt > ${sample}.trusted_genes.txt
  59. # # Scatter plot for each gene
  60. # mkdir gainloss
  61. # touch failed_genes.txt
  62. # for gene in `cat ${sample}.trusted_genes.txt`
  63. # do
  64. # cnvkit.py scatter ${sample}.cnr -s ${sample}.call.cns -g $gene -o ./gainloss/${sample}.$gene.scatter.pdf || echo $gene >> failed_genes.txt
  65. # done
  66. # Filter by trusted_genes
  67. awk 'NR==FNR {a[$1]=$2;next} NR!=FNR {if(FNR == 1 || (FNR != 1 && $1 in a)) print $0}' ${sample}.trusted_genes.txt ${sample}.ratio_cnv.txt > ${sample}.ratio_cnv.trusted.txt
  68. # Infer absolute CN (not adjust by purity)
  69. cnvkit.py call ${sample}.ratio_cnv.trusted.txt -m threshold -o ${sample}.ratio_cnv.call.txt
  70. awk '{if ($6 != 2) print $0}' ${sample}.ratio_cnv.call.txt > ${sample}.ratio_cnv.call.filter.txt
  71. >>>
  72. runtime {
  73. docker: docker
  74. cluster: cluster_config
  75. systemDisk: "cloud_ssd 40"
  76. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  77. }
  78. output {
  79. File scatter_pdf = "${sample}.scatter.pdf"
  80. File diagram_pdf = "${sample}.diagram.pdf"
  81. File heatmap_pdf = "${sample}.heatmap.pdf"
  82. File cnr = "${sample}.cnr"
  83. File cns = "${sample}.cns"
  84. File stats = "${sample}.stats"
  85. File call_cns = "${sample}.call.cns"
  86. File ratio_cnv = "${sample}.ratio_cnv.txt"
  87. File segment_cnv = "${sample}.segment_cnv.txt"
  88. File trusted_genes = "${sample}.trusted_genes.txt"
  89. File? failed_genes = "${sample}.failed_genes.txt"
  90. Array[File]? gainloss = glob("./gainloss/*")
  91. File ratio_cnv_trusted = "${sample}.ratio_cnv.trusted.txt"
  92. File ratio_cnv_trusted_call = "${sample}.ratio_cnv.call.txt"
  93. File ratio_cnv_trusted_call_filter = "${sample}.ratio_cnv.call.filter.txt"
  94. }
  95. }