Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

80 linhas
2.4KB

  1. task pathseq {
  2. String sample_id
  3. File fastq1
  4. File fastq2
  5. File host_image
  6. File host_kmer
  7. File microbe_dict
  8. File microbe_bwa_image
  9. File microbe_taxonomy
  10. String disk_size
  11. String docker
  12. String cluster
  13. command <<<
  14. set -o pipefail
  15. set -e
  16. mkdir ubam
  17. mkdir filter_log
  18. mkdir clean_pair_bam
  19. mkdir clean_unpaired_bam
  20. mkdir pathseq_result
  21. mkdir bwa_pair_bam
  22. mkdir bwa_pair_unpair_bam
  23. gatk FastqToSam \
  24. -F1 ${fastq1} \
  25. -F2 ${fastq2} \
  26. -O ./ubam/${sample_id}.bam \
  27. -SM ${sample_id}
  28. time gatk --java-options "-Xmx58g" PathSeqFilterSpark \
  29. --input ./ubam/${sample_id}.bam \
  30. --paired-output ./clean_pair_bam/${sample_id}_paired.bam \
  31. --unpaired-output ./clean_unpaired_bam/${sample_id}_unpaired.bam \
  32. --min-clipped-read-length 70 \
  33. --kmer-file ${host_kmer} \
  34. --filter-bwa-image ${host_image} \
  35. --filter-metrics filter_log/${sample_id}.log
  36. time gatk --java-options "-Xmx58g" PathSeqBwaSpark \
  37. --paired-input ./clean_pair_bam/${sample_id}_paired.bam \
  38. --unpaired-input ./clean_unpaired_bam/${sample_id}_unpaired.bam \
  39. --paired-output bwa_pair_bam/${sample_id}_bwa_paired.bam \
  40. --unpaired-output bwa_unpair_bam/${sample_id}_bwa_unpaired.bam \
  41. --microbe-bwa-image ${microbe_bwa_image} \
  42. --microbe-dict ${microbe_dict}
  43. time gatk --java-options "-Xmx58g" PathSeqScoreSpark \
  44. --paired-input bwa_pair_bam/${sample_id}_bwa_paired.bam \
  45. --unpaired-input bwa_unpair_bam/${sample_id}_bwa_unpaired.bam \
  46. --taxonomy-file ${microbe_taxonomy} \
  47. --scores-output pathseq_result/${sample_id}.pathseq.txt \
  48. --output pathseq_result/${sample_id}.pathseq_reads.bam \
  49. --min-score-identity 0.90 \
  50. --identity-margin 0.02
  51. >>>
  52. runtime {
  53. docker: docker
  54. cluster: cluster
  55. systemDisk: "cloud_ssd 40"
  56. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  57. }
  58. output {
  59. File pathseq_txt = "pathseq_result/${sample_id}.pathseq.txt"
  60. File pathseq_bam = "pathseq_result/${sample_id}.pathseq_reads.bam"
  61. File pathseq_sbi = "pathseq_result/${sample_id}.pathseq_reads.bam.sbi"
  62. }
  63. }