You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

84 line
2.5KB

  1. task SentieonFastqToBam {
  2. # 工具输入文件和参数
  3. File fastq1
  4. File fastq2
  5. String sample_id
  6. String Seq_platform
  7. String ref_fasta
  8. File ref_fasta_dir
  9. String SENTIEON_LICENSE
  10. String docker
  11. String cluster_config
  12. String disk_size
  13. ## Extra driver parameters
  14. String qc_driver_args = ""
  15. String lc_driver_args = "--traverse_param=200000/10000"
  16. String dedup_driver_args = "--traverse_param=200000/10000"
  17. ## Extra algo parameters
  18. String bwa_args = "-Y -M"
  19. String bwa_chunk_size = "100000000"
  20. String lc_args = ""
  21. String bam_option = "--bam_compression 1"
  22. String out_bam = sample_id + ".dedup.bam"
  23. String out_bai = sample_id + ".dedup.bam.bai"
  24. # 工具运行命令
  25. command <<<
  26. set -exo pipefail
  27. export SENTIEON_LICENSE=${SENTIEON_LICENSE}
  28. nt=$(nproc)
  29. sentieon bwa mem -R "@RG\tID:${sample_id}\tSM:${sample_id}\tPL:${Seq_platform}" ${bwa_args} -K ${bwa_chunk_size} -t $nt ${ref_fasta_dir}/${ref_fasta} ${fastq1} ${fastq2} \
  30. | sentieon util sort ${bam_option} -i - -r ${ref_fasta_dir}/${ref_fasta} -t $nt -o ${sample_id}.sorted.bam --sam2bam
  31. ls ./
  32. sentieon driver -r ${ref_fasta_dir}/${ref_fasta} -t $nt -i ${sample_id}.sorted.bam ${qc_driver_args} \
  33. --algo MeanQualityByCycle ${sample_id}.mq_metrics.txt \
  34. --algo QualDistribution ${sample_id}.qd_metrics.txt \
  35. --algo GCBias --summary ${sample_id}.gc_summary_metrics.txt ${sample_id}.gc_metrics.txt \
  36. --algo AlignmentStat ${sample_id}.aln_metrics.txt \
  37. --algo InsertSizeMetricAlgo ${sample_id}.is_metrics.txt
  38. ls ./
  39. sentieon driver -r ${ref_fasta_dir}/${ref_fasta} -t $nt -i ${sample_id}.sorted.bam ${lc_driver_args} \
  40. --algo LocusCollector \
  41. ${lc_args} \
  42. ${sample_id}.score.txt.gz
  43. ls ./
  44. sentieon driver -r ${ref_fasta_dir}/${ref_fasta} -t $nt -i ${sample_id}.sorted.bam ${dedup_driver_args} \
  45. --algo Dedup \
  46. --score_info ${sample_id}.score.txt.gz \
  47. --metrics ${sample_id}.dedup_metrics.txt \
  48. ${bam_option} ${out_bam}
  49. ls ./
  50. >>>
  51. runtime {
  52. docker:docker
  53. cluster:cluster_config
  54. systemDisk:"cloud_ssd 40"
  55. dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
  56. }
  57. # 工具运行输出结果
  58. output {
  59. File deduped_bam = out_bam
  60. File deduped_bam_bai = out_bai
  61. Array[File] qc_metrics = glob("*_metrics.txt")
  62. }
  63. }