Automated integrated analysis software for genomics data of the cancer patients.
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

71 lines
2.2KB

  1. task HRD {
  2. String sample
  3. File ref_dir
  4. String fasta
  5. File gc
  6. File tumor_bam
  7. File tumor_bam_index
  8. File? normal_bam
  9. File? normal_bam_index
  10. String docker
  11. String cluster_config
  12. String disk_size
  13. command <<<
  14. set -o pipefail
  15. set -e
  16. nt=$(nproc)
  17. HRD_ANALYSIS_PATH="/cromwell_root/tmp"
  18. mkdir $HRD_ANALYSIS_PATH
  19. seqz=$HRD_ANALYSIS_PATH'/'${sample}'.seqz.gz'
  20. small=$HRD_ANALYSIS_PATH'/'${sample}'.small.seqz.gz'
  21. # bam2seqz
  22. sequenza-utils bam2seqz -gc ${gc} --fasta ${ref_dir}/${fasta} -n ${normal_bam} -t ${tumor_bam} -o $seqz -C chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY --parallel 24
  23. # merge and remove
  24. cd $HRD_ANALYSIS_PATH
  25. zcat ${sample}_*.seqz.gz | awk '{if (NR == 1 || (NR != 1 && $1 != "chromosome")) {print $0}}' | bgzip > $seqz
  26. tabix -f -s 1 -b 2 -e 2 -S 1 $seqz
  27. rm ${sample}_*.seqz.gz; rm ${sample}_*.seqz.gz.tbi
  28. # seqz_binning: WES: 50; WGS: 200
  29. sequenza-utils seqz_binning --seqz $seqz -w 50 -o $small
  30. # analysis in r
  31. Rscript ~/sequenza.r $HRD_ANALYSIS_PATH ${sample}
  32. >>>
  33. runtime {
  34. docker: docker
  35. cluster: cluster_config
  36. systemDisk: "cloud_ssd 40"
  37. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  38. }
  39. output {
  40. hrd="${sample}.HRD.txt"
  41. alternative_fit="${sample}_alternative_fit.pdf"
  42. alternative_solutions="${sample}_alternative_solutions.txt"
  43. chromosome_depths="${sample}_chromosome_depths.pdf"
  44. chromosome_view="${sample}_chromosome_view.pdf"
  45. CN_bars="${sample}_CN_bars.pdf"
  46. confints_CP="${sample}_confints_CP.txt"
  47. contours_CP="${sample}_contours_CP.pdf"
  48. CP_contours="${sample}_CP_contours.pdf"
  49. gc_plots="${sample}_gc_plots.pdf"
  50. genome_view="${sample}_genome_view.pdf"
  51. model_fit="${sample}_model_fit.pdf"
  52. mutations="${sample}_mutations.txt"
  53. scarHRD_input="${sample}_scarHRD_input.txt"
  54. segments="${sample}_segments.txt"
  55. sequenza_cp_table="${sample}_sequenza_cp_table.RData"
  56. sequenza_extract="${sample}_sequenza_extract.RData"
  57. sequenza_log="${sample}_sequenza_log.txt"
  58. small_seqz="${sample}.small.seqz.gz"
  59. small_seqz_index="${sample}.small.seqz.gz.tbi"
  60. }
  61. }