用于miRNA-seq二代测序数据分析
選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. task TrimAdapt {
  2. String sample_id
  3. File in_fastq
  4. String adapter_seq
  5. Int randomBase_in_adapter
  6. Int sequencing_length
  7. String docker
  8. String cluster_config
  9. String disk_size
  10. command <<<
  11. set -o pipefail
  12. set -e
  13. nt=$(nproc)
  14. echo -e "Trim adapter's invariant part." > ${sample_id}.trimAdapt.log
  15. fastp --thread $nt -Q \
  16. --length_required 0 \
  17. --length_limit $[${sequencing_length}-1] \
  18. --adapter_sequence ${adapter_seq} \
  19. -i ${in_fastq} \
  20. -o ${sample_id}.trimAdapt.fastq.tmp.gz \
  21. 2>> ${sample_id}.trimAdapt.log
  22. if [ ${randomBase_in_adapter} -gt 0 ]
  23. then
  24. echo -e "\nTrim ${randomBase_in_adapter} random base from both sides\n" >> ${sample_id}.trimAdapt.log
  25. fastp --thread $nt -A -Q -L \
  26. --trim_front1 ${randomBase_in_adapter} --trim_tail1 ${randomBase_in_adapter} \
  27. -i ${sample_id}.trimAdapt.fastq.tmp.gz \
  28. -o ${sample_id}.trimAdapt.fastq.gz \
  29. 2>> ${sample_id}.trimAdapt.log
  30. else
  31. mv ${sample_id}.trimAdapt.fastq.tmp.gz ${sample_id}.trimAdapt.fastq.gz
  32. fi
  33. echo -e "Length\tReadCount" > ${sample_id}.trimAdapt.lengthDistribute
  34. zcat ${sample_id}.trimAdapt.fastq.gz | paste - - - - | cut -f 2 | \
  35. awk '{a[length($1)]++}END{for(i in a){print i,a[i]}}' | sort -n \
  36. >> ${sample_id}.trimAdapt.lengthDistribute
  37. >>>
  38. runtime {
  39. docker: docker
  40. cluster: cluster_config
  41. systemDisk: "cloud_ssd 40"
  42. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  43. }
  44. output {
  45. File out_fastq="${sample_id}.trimAdapt.fastq.gz"
  46. File out_log="${sample_id}.trimAdapt.log"
  47. File out_lengthDistribute="${sample_id}.trimAdapt.lengthDistribute"
  48. }
  49. }