用于miRNA-seq二代测序数据分析
Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

56 lines
1.7KB

  1. task TrimAdapt {
  2. String sample_ID
  3. File in_fastq
  4. String adapter_seq
  5. Int randomBase_in_adapter
  6. String docker
  7. String cluster_config
  8. String disk_size
  9. command <<<
  10. set -o pipefail
  11. set -e
  12. nt=$(nproc)
  13. echo -e "Trim adapter's invariant part." > ${sample_ID}.trimAdapt.log
  14. fastp --thread $nt -Q -L\
  15. --adapter_sequence ${adapter_seq} \
  16. -i ${in_fastq} \
  17. -o ${sample_ID}.trimAdapt.fastq.tmp.gz \
  18. 2>> ${sample_ID}.trimAdapt.log
  19. if [ ${randomBase_in_adapter} -gt 0 ]
  20. then
  21. echo -e "\nTrim ${randomBase_in_adapter} random base from both sides\n" >> ${sample_ID}.trimAdapt.log
  22. fastp --thread $nt -A -Q -L \
  23. --trim_front1 ${randomBase_in_adapter} --trim_tail1 ${randomBase_in_adapter} \
  24. -i ${sample_ID}.trimAdapt.fastq.tmp.gz \
  25. -o ${sample_ID}.trimAdapt.fastq.gz \
  26. 2>> ${sample_ID}.trimAdapt.log
  27. else
  28. mv ${sample_ID}.trimAdapt.fastq.tmp.gz ${sample_ID}.trimAdapt.fastq.gz
  29. fi
  30. zcat ${sample_ID}.trimAdapt.fastq.gz | paste - - - - | cut -f 2 | \
  31. awk '{a[length($1)]++}END{for(i in a){print i,a[i]}}' | sort -n \
  32. > ${sample_ID}.trimAdapt.lengthDistribute
  33. >>>
  34. runtime {
  35. docker: docker
  36. cluster: cluster_config
  37. systemDisk: "cloud_ssd 40"
  38. dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  39. }
  40. output {
  41. File out_fastq="${sample_ID}.trimAdapt.fastq.gz"
  42. File out_log="${sample_ID}.trimAdapt.log"
  43. File out_lengthDistribute="${sample_ID}.trimAdapt.lengthDistribute"
  44. }
  45. }