Automated integrated analysis software for genomics data of the cancer patients.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

71 lines
2.2KB

  1. task HRD {
  2. String sample
  3. File ref_dir
  4. String fasta
  5. File gc
  6. File tumor_bam
  7. File tumor_bam_index
  8. File? normal_bam
  9. File? normal_bam_index
  10. String docker
  11. String cluster_config
  12. String disk_size
  13. command <<<
  14. set -o pipefail
  15. set -e
  16. nt=$(nproc)
  17. HRD_ANALYSIS_PATH="/cromwell_root/tmp"
  18. mkdir $HRD_ANALYSIS_PATH
  19. seqz=$HRD_ANALYSIS_PATH'/'${sample}'.seqz.gz'
  20. small=$HRD_ANALYSIS_PATH'/'${sample}'.small.seqz.gz'
  21. # bam2seqz
  22. sequenza-utils bam2seqz -gc ${gc} --fasta ${ref_dir}/${fasta} -n ${normal_bam} -t ${tumor_bam} -o $seqz -C chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY --parallel 24
  23. # merge and remove
  24. cd $HRD_ANALYSIS_PATH
  25. zcat ${sample}_*.seqz.gz | awk '{if (NR == 1 || (NR != 1 && $1 != "chromosome")) {print $0}}' | bgzip > $seqz
  26. tabix -f -s 1 -b 2 -e 2 -S 1 $seqz
  27. rm ${sample}_*.seqz.gz; rm ${sample}_*.seqz.gz.tbi
  28. # seqz_binning: WES: 50; WGS: 200
  29. sequenza-utils seqz_binning --seqz $seqz -w 50 -o $small
  30. # analysis in r
  31. Rscript ~/sequenza.r $HRD_ANALYSIS_PATH ${sample}
  32. >>>
  33. runtime {
  34. docker: docker
  35. cluster: cluster_config
  36. systemDisk: "cloud_ssd 40"
  37. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  38. }
  39. output {
  40. hrd="${sample}.HRD.txt"
  41. alternative_fit="${sample}_alternative_fit.pdf"
  42. alternative_solutions="${sample}_alternative_solutions.txt"
  43. chromosome_depths="${sample}_chromosome_depths.pdf"
  44. chromosome_view="${sample}_chromosome_view.pdf"
  45. CN_bars="${sample}_CN_bars.pdf"
  46. confints_CP="${sample}_confints_CP.txt"
  47. contours_CP="${sample}_contours_CP.pdf"
  48. CP_contours="${sample}_CP_contours.pdf"
  49. gc_plots="${sample}_gc_plots.pdf"
  50. genome_view="${sample}_genome_view.pdf"
  51. model_fit="${sample}_model_fit.pdf"
  52. mutations="${sample}_mutations.txt"
  53. scarHRD_input="${sample}_scarHRD_input.txt"
  54. segments="${sample}_segments.txt"
  55. sequenza_cp_table="${sample}_sequenza_cp_table.RData"
  56. sequenza_extract="${sample}_sequenza_extract.RData"
  57. sequenza_log="${sample}_sequenza_log.txt"
  58. small_seqz="${sample}.small.seqz.gz"
  59. small_seqz_index="${sample}.small.seqz.gz.tbi"
  60. }
  61. }