@@ -0,0 +1,42 @@ | |||
task Align { | |||
String sample_ID | |||
File in_fastq | |||
File dir_index | |||
String prefix_index | |||
Int sum_unmatch_quality_limit | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
bowtie --threads $nt \ | |||
${dir_index}/${prefix_index} \ | |||
-e ${sum_unmatch_quality_limit} \ | |||
-q ${in_fastq} \ | |||
--un ${sample_ID}.matureUnaligned.fastq \ | |||
-S ${sample_ID}.align2mature.sam \ | |||
2> ${sample_ID}.align2mature.log | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File out_sam="${sample_ID}.align2mature.sam" | |||
File out_fastq="${sample_ID}.matureUnaligned.fastq" | |||
File out_log="${sample_ID}.align2mature.log" | |||
} | |||
} |
@@ -0,0 +1,27 @@ | |||
task Quantification { | |||
String sample_ID | |||
File in_sam | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
echo -e "ID.miRNA\tReadCount" > ${sample_ID}.matureMiR.readCount | |||
cat ${in_sam} | grep -v '^@' | awk '($2==0)' | awk '{a[$3]++}END{for(i in a){printf "%s\t%d\n",i,a[i]}}' | sort >> ${sample_ID}.matureMiR.readCount | |||
>>> | |||
runtime { | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File out_readCount="${sample_ID}.matureMiR.readCount" | |||
} | |||
} |
@@ -0,0 +1,41 @@ | |||
task ReadFilter { | |||
String sample_ID | |||
File in_fastq | |||
Int qualified_quality_phred | |||
Int unqualified_percent_limit | |||
Int n_base_limit | |||
Int length_required | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
fastp --thread $nt -A \ | |||
--qualified_quality_phred ${qualified_quality_phred} --unqualified_percent_limit ${unqualified_percent_limit} \ | |||
--n_base_limit ${n_base_limit} \ | |||
--length_required ${length_required} \ | |||
-i ${in_fastq} \ | |||
-o ${sample_ID}.trimAdapt.filter.fastq.gz \ | |||
2> ${sample_ID}.filter.log | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File out_fastq="${sample_ID}.trimAdapt.filter.fastq.gz" | |||
File out_log="${sample_ID}.filter.log" | |||
} | |||
} |
@@ -0,0 +1,49 @@ | |||
task ReadStats { | |||
String sample_ID | |||
File in_log_trimAdatper | |||
File in_log_readFilter | |||
File in_log_align_mature | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
Total_input=$(cat ${in_log_trimAdatper} | grep 'total reads' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g') | |||
Pass_trimAdatper=$(cat ${in_log_trimAdatper} | grep 'reads passed filter' | tail -n 1 | cut -d ':' -f 2 | sed 's/ //g') | |||
Adapter_dimer=$(bc<<<$Total_input-$Pass_trimAdatper) | |||
Too_short=$(cat ${in_log_readFilter} | grep 'too short' | tail -n 1 | cut -d ':' -f 2 | sed 's/ //g') | |||
Low_quality_singleBase=$(cat ${in_log_readFilter} | grep 'low quality' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g') | |||
Low_quality_tooManyN=$(cat ${in_log_readFilter} | grep 'too many N' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g') | |||
Low_quality=$(bc<<<$Low_quality_singleBase+$Low_quality_tooManyN) | |||
ForAlign=$(cat ${in_log_readFilter} | grep 'reads passed filter' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g') | |||
Align_miRNA_mature=$(cat ${in_log_align_mature} | grep 'at least one reported alignment' | head -n 1 | cut -d ':' -f 2 | cut -d '(' -f 1 | sed 's/ //g') | |||
echo -e "Stage\tReadCount" > ${sample_ID}.readStats | |||
echo -e "Total Input\t$Total_input" >> ${sample_ID}.readStats | |||
echo -e "Adapter Dimer\t$Adapter_dimer" >> ${sample_ID}.readStats | |||
echo -e "Too Short\t$Too_short" >> ${sample_ID}.readStats | |||
echo -e "Low Quality\t$Low_quality" >> ${sample_ID}.readStats | |||
echo -e "For Align\t$ForAlign" >> ${sample_ID}.readStats | |||
echo -e "Mature miRNA\t$Align_miRNA_mature" >> ${sample_ID}.readStats | |||
>>> | |||
runtime { | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File out="${sample_ID}.readStats" | |||
} | |||
} |
@@ -0,0 +1,55 @@ | |||
task TrimAdapt { | |||
String sample_ID | |||
File in_fastq | |||
String adapter_seq | |||
Int randomBase_in_adapter | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
echo -e "Trim adapter's invariant part." > ${sample_ID}.trimAdapt.log | |||
fastp --thread $nt -Q -L\ | |||
--adapter_sequence ${adapter_seq} \ | |||
-i ${in_fastq} \ | |||
-o ${sample_ID}.trimAdapt.fastq.tmp.gz \ | |||
2>> ${sample_ID}.trimAdapt.log | |||
if [ ${randomBase_in_adapter} -gt 0 ] | |||
then | |||
echo -e "\nTrim ${randomBase_in_adapter} random base from both sides\n" >> ${sample_ID}.trimAdapt.log | |||
fastp --thread $nt -A -Q -L \ | |||
--trim_front1 ${randomBase_in_adapter} --trim_tail1 ${randomBase_in_adapter} \ | |||
-i ${sample_ID}.trimAdapt.fastq.tmp.gz \ | |||
-o ${sample_ID}.trimAdapt.fastq.gz \ | |||
2>> ${sample_ID}.trimAdapt.log | |||
else | |||
mv ${sample_ID}.trimAdapt.fastq.tmp.gz ${sample_ID}.trimAdapt.fastq.gz | |||
fi | |||
zcat ${sample_ID}.trimAdapt.fastq.gz | paste - - - - | cut -f 2 | \ | |||
awk '{a[length($1)]++}END{for(i in a){print i,a[i]}}' | sort -n \ | |||
> ${sample_ID}.trimAdapt.lengthDistribute | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File out_fastq="${sample_ID}.trimAdapt.fastq.gz" | |||
File out_log="${sample_ID}.trimAdapt.log" | |||
File out_lengthDistribute="${sample_ID}.trimAdapt.lengthDistribute" | |||
} | |||
} |