task Align { | |||||
String sample_ID | |||||
File in_fastq | |||||
File dir_index | |||||
String prefix_index | |||||
Int sum_unmatch_quality_limit | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
nt=$(nproc) | |||||
bowtie --threads $nt \ | |||||
${dir_index}/${prefix_index} \ | |||||
-e ${sum_unmatch_quality_limit} \ | |||||
-q ${in_fastq} \ | |||||
--un ${sample_ID}.matureUnaligned.fastq \ | |||||
-S ${sample_ID}.align2mature.sam \ | |||||
2> ${sample_ID}.align2mature.log | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File out_sam="${sample_ID}.align2mature.sam" | |||||
File out_fastq="${sample_ID}.matureUnaligned.fastq" | |||||
File out_log="${sample_ID}.align2mature.log" | |||||
} | |||||
} |
task Quantification { | |||||
String sample_ID | |||||
File in_sam | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
echo -e "ID.miRNA\tReadCount" > ${sample_ID}.matureMiR.readCount | |||||
cat ${in_sam} | grep -v '^@' | awk '($2==0)' | awk '{a[$3]++}END{for(i in a){printf "%s\t%d\n",i,a[i]}}' | sort >> ${sample_ID}.matureMiR.readCount | |||||
>>> | |||||
runtime { | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File out_readCount="${sample_ID}.matureMiR.readCount" | |||||
} | |||||
} |
task ReadFilter { | |||||
String sample_ID | |||||
File in_fastq | |||||
Int qualified_quality_phred | |||||
Int unqualified_percent_limit | |||||
Int n_base_limit | |||||
Int length_required | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
nt=$(nproc) | |||||
fastp --thread $nt -A \ | |||||
--qualified_quality_phred ${qualified_quality_phred} --unqualified_percent_limit ${unqualified_percent_limit} \ | |||||
--n_base_limit ${n_base_limit} \ | |||||
--length_required ${length_required} \ | |||||
-i ${in_fastq} \ | |||||
-o ${sample_ID}.trimAdapt.filter.fastq.gz \ | |||||
2> ${sample_ID}.filter.log | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File out_fastq="${sample_ID}.trimAdapt.filter.fastq.gz" | |||||
File out_log="${sample_ID}.filter.log" | |||||
} | |||||
} |
task ReadStats { | |||||
String sample_ID | |||||
File in_log_trimAdatper | |||||
File in_log_readFilter | |||||
File in_log_align_mature | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
Total_input=$(cat ${in_log_trimAdatper} | grep 'total reads' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g') | |||||
Pass_trimAdatper=$(cat ${in_log_trimAdatper} | grep 'reads passed filter' | tail -n 1 | cut -d ':' -f 2 | sed 's/ //g') | |||||
Adapter_dimer=$(bc<<<$Total_input-$Pass_trimAdatper) | |||||
Too_short=$(cat ${in_log_readFilter} | grep 'too short' | tail -n 1 | cut -d ':' -f 2 | sed 's/ //g') | |||||
Low_quality_singleBase=$(cat ${in_log_readFilter} | grep 'low quality' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g') | |||||
Low_quality_tooManyN=$(cat ${in_log_readFilter} | grep 'too many N' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g') | |||||
Low_quality=$(bc<<<$Low_quality_singleBase+$Low_quality_tooManyN) | |||||
ForAlign=$(cat ${in_log_readFilter} | grep 'reads passed filter' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g') | |||||
Align_miRNA_mature=$(cat ${in_log_align_mature} | grep 'at least one reported alignment' | head -n 1 | cut -d ':' -f 2 | cut -d '(' -f 1 | sed 's/ //g') | |||||
echo -e "Stage\tReadCount" > ${sample_ID}.readStats | |||||
echo -e "Total Input\t$Total_input" >> ${sample_ID}.readStats | |||||
echo -e "Adapter Dimer\t$Adapter_dimer" >> ${sample_ID}.readStats | |||||
echo -e "Too Short\t$Too_short" >> ${sample_ID}.readStats | |||||
echo -e "Low Quality\t$Low_quality" >> ${sample_ID}.readStats | |||||
echo -e "For Align\t$ForAlign" >> ${sample_ID}.readStats | |||||
echo -e "Mature miRNA\t$Align_miRNA_mature" >> ${sample_ID}.readStats | |||||
>>> | |||||
runtime { | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File out="${sample_ID}.readStats" | |||||
} | |||||
} |
task TrimAdapt { | |||||
String sample_ID | |||||
File in_fastq | |||||
String adapter_seq | |||||
Int randomBase_in_adapter | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
nt=$(nproc) | |||||
echo -e "Trim adapter's invariant part." > ${sample_ID}.trimAdapt.log | |||||
fastp --thread $nt -Q -L\ | |||||
--adapter_sequence ${adapter_seq} \ | |||||
-i ${in_fastq} \ | |||||
-o ${sample_ID}.trimAdapt.fastq.tmp.gz \ | |||||
2>> ${sample_ID}.trimAdapt.log | |||||
if [ ${randomBase_in_adapter} -gt 0 ] | |||||
then | |||||
echo -e "\nTrim ${randomBase_in_adapter} random base from both sides\n" >> ${sample_ID}.trimAdapt.log | |||||
fastp --thread $nt -A -Q -L \ | |||||
--trim_front1 ${randomBase_in_adapter} --trim_tail1 ${randomBase_in_adapter} \ | |||||
-i ${sample_ID}.trimAdapt.fastq.tmp.gz \ | |||||
-o ${sample_ID}.trimAdapt.fastq.gz \ | |||||
2>> ${sample_ID}.trimAdapt.log | |||||
else | |||||
mv ${sample_ID}.trimAdapt.fastq.tmp.gz ${sample_ID}.trimAdapt.fastq.gz | |||||
fi | |||||
zcat ${sample_ID}.trimAdapt.fastq.gz | paste - - - - | cut -f 2 | \ | |||||
awk '{a[length($1)]++}END{for(i in a){print i,a[i]}}' | sort -n \ | |||||
> ${sample_ID}.trimAdapt.lengthDistribute | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File out_fastq="${sample_ID}.trimAdapt.fastq.gz" | |||||
File out_log="${sample_ID}.trimAdapt.log" | |||||
File out_lengthDistribute="${sample_ID}.trimAdapt.lengthDistribute" | |||||
} | |||||
} |