您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

82 行
2.7KB

  1. task arriba{
  2. String sample_id
  3. File fastq1
  4. File fastq2
  5. File STAR_INDEX_DIR
  6. File ASSEMBLY_FA
  7. File ANNOTATION_GTF
  8. String disk_size
  9. String docker
  10. String cluster
  11. command <<<
  12. set -o pipefail
  13. set -e
  14. mkdir ./output/
  15. STAR \
  16. --runThreadN 24 \
  17. --genomeDir ${STAR_INDEX_DIR} \
  18. --genomeLoad NoSharedMemory \
  19. --readFilesIn ${fastq1} ${fastq2} \
  20. --readFilesCommand zcat \
  21. --outSAMtype BAM SortedByCoordinate \
  22. --outSAMunmapped Within \
  23. --outBAMcompression 0 \
  24. --outFilterMultimapNmax 50 \
  25. --peOverlapNbasesMin 10 \
  26. --alignSplicedMateMapLminOverLmate 0.5 \
  27. --alignSJstitchMismatchNmax 5 -1 5 5 \
  28. --chimSegmentMin 10 \
  29. --chimOutType WithinBAM HardClip \
  30. --chimJunctionOverhangMin 10 \
  31. --chimScoreDropMax 30 \
  32. --chimScoreJunctionNonGTAG 0 \
  33. --chimScoreSeparation 1 \
  34. --chimSegmentReadGapMax 3 \
  35. --chimMultimapNmax 50 \
  36. --outFileNamePrefix ./output/${sample_id}.
  37. /arriba_v2.1.0/arriba \
  38. -x ./output/${sample_id}.Aligned.sortedByCoord.out.bam \
  39. -o ./output/${sample_id}_fusions.tsv -O ./output/${sample_id}_fusions.discarded.tsv \
  40. -a ${ASSEMBLY_FA} \
  41. -g ${ANNOTATION_GTF} \
  42. -b /arriba_v2.1.0/database/blacklist_hg38_GRCh38_v2.1.0.tsv.gz \
  43. -k /arriba_v2.1.0/database/known_fusions_hg38_GRCh38_v2.1.0.tsv.gz \
  44. -t /arriba_v2.1.0/database/known_fusions_hg38_GRCh38_v2.1.0.tsv.gz \
  45. -p /arriba_v2.1.0/database/protein_domains_hg38_GRCh38_v2.1.0.gff3
  46. samtools index -@ 24 ./output/${sample_id}.Aligned.sortedByCoord.out.bam ./output/${sample_id}.Aligned.sortedByCoord.out.bam.bai
  47. Rscript /arriba_v2.1.0/draw_fusions.R \
  48. --fusions=./output/${sample_id}_fusions.tsv \
  49. --alignments=./output/${sample_id}.Aligned.sortedByCoord.out.bam \
  50. --output=./output/${sample_id}_fusion.pdf \
  51. --annotation=${ANNOTATION_GTF} \
  52. --cytobands=/arriba_v2.1.0/database/cytobands_hg38_GRCh38_v2.1.0.tsv \
  53. --proteinDomains=/arriba_v2.1.0/database/protein_domains_hg38_GRCh38_v2.1.0.gff3
  54. >>>
  55. runtime {
  56. docker: docker
  57. cluster: cluster
  58. systemDisk: "cloud_ssd 40"
  59. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  60. }
  61. output {
  62. Array[File] arriba_result=glob("./output/*.tsv")
  63. Array[File] arriba_pdf=glob("./output/*.pdf")
  64. }
  65. }