|
|
|
|
|
|
|
|
task ReadStats { |
|
|
task ReadStats { |
|
|
|
|
|
|
|
|
String sample_ID |
|
|
|
|
|
|
|
|
String sample_id |
|
|
File in_log_trimAdatper |
|
|
File in_log_trimAdatper |
|
|
File in_log_readFilter |
|
|
File in_log_readFilter |
|
|
File in_log_align_mature |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
File in_log_align_miRNA |
|
|
|
|
|
File in_log_align_preMiRNA |
|
|
|
|
|
File in_log_align_piRNA |
|
|
|
|
|
File in_log_align_tRNA |
|
|
|
|
|
File in_log_align_RNA |
|
|
|
|
|
File in_log_align_hg38 |
|
|
|
|
|
File in_sam_align_RNA |
|
|
|
|
|
|
|
|
String cluster_config |
|
|
String cluster_config |
|
|
String disk_size |
|
|
String disk_size |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
set -o pipefail |
|
|
set -o pipefail |
|
|
set -e |
|
|
set -e |
|
|
|
|
|
|
|
|
Total_input=$(cat ${in_log_trimAdatper} | grep 'total reads' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g') |
|
|
|
|
|
|
|
|
n_Total_Sequence=$(cat ${in_log_trimAdatper} | grep 'total reads' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Pass_trimAdatper=$(cat ${in_log_trimAdatper} | grep 'reads passed filter' | tail -n 1 | cut -d ':' -f 2 | sed 's/ //g') |
|
|
|
|
|
Adapter_dimer=$(bc<<<$Total_input-$Pass_trimAdatper) |
|
|
|
|
|
|
|
|
echo "Pass1" |
|
|
|
|
|
n_AdapterNotFound=$(cat ${in_log_trimAdatper} | grep 'reads failed due to too long' | cut -d ':' -f 2 | sed 's/ //g') |
|
|
|
|
|
n_Total_forCount=$(bc<<<$n_Total_Sequence-$n_AdapterNotFound) |
|
|
|
|
|
|
|
|
Too_short=$(cat ${in_log_readFilter} | grep 'too short' | tail -n 1 | cut -d ':' -f 2 | sed 's/ //g') |
|
|
|
|
|
|
|
|
n_Pass_trimAdatper=$(cat ${in_log_trimAdatper} | grep 'reads passed filter' | tail -n 1 | cut -d ':' -f 2 | sed 's/ //g') |
|
|
|
|
|
n_Adapter_dimer=$(bc<<<$n_Total_Sequence-$n_AdapterNotFound-$n_Pass_trimAdatper) |
|
|
|
|
|
|
|
|
|
|
|
n_Too_short=$(cat ${in_log_readFilter} | grep 'too short' | tail -n 1 | cut -d ':' -f 2 | sed 's/ //g') |
|
|
|
|
|
n_Low_quality_singleBase=$(cat ${in_log_readFilter} | grep 'low quality' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g') |
|
|
|
|
|
n_Low_quality_tooManyN=$(cat ${in_log_readFilter} | grep 'too many N' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g') |
|
|
|
|
|
n_Low_quality=$(bc<<<$n_Low_quality_singleBase+$n_Low_quality_tooManyN) |
|
|
|
|
|
n_ForAlign=$(cat ${in_log_readFilter} | grep 'reads passed filter' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g') |
|
|
|
|
|
|
|
|
Low_quality_singleBase=$(cat ${in_log_readFilter} | grep 'low quality' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g') |
|
|
|
|
|
Low_quality_tooManyN=$(cat ${in_log_readFilter} | grep 'too many N' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g') |
|
|
|
|
|
Low_quality=$(bc<<<$Low_quality_singleBase+$Low_quality_tooManyN) |
|
|
|
|
|
|
|
|
echo "Pass2" |
|
|
|
|
|
n_miRNA_mature=$(cat ${in_log_align_miRNA} | grep 'at least one reported alignment' | head -n 1 | cut -d ':' -f 2 | cut -d '(' -f 1 | sed 's/ //g') |
|
|
|
|
|
n_miRNA_hairpin=$(cat ${in_log_align_preMiRNA} | grep 'at least one reported alignment' | head -n 1 | cut -d ':' -f 2 | cut -d '(' -f 1 | sed 's/ //g') |
|
|
|
|
|
n_piRNA=$(cat ${in_log_align_piRNA} | grep 'at least one reported alignment' | head -n 1 | cut -d ':' -f 2 | cut -d '(' -f 1 | sed 's/ //g') |
|
|
|
|
|
n_tRNA=$(cat ${in_log_align_tRNA} | grep 'at least one reported alignment' | head -n 1 | cut -d ':' -f 2 | cut -d '(' -f 1 | sed 's/ //g') |
|
|
|
|
|
|
|
|
ForAlign=$(cat ${in_log_readFilter} | grep 'reads passed filter' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g') |
|
|
|
|
|
|
|
|
n_RNA=$(cat ${in_log_align_RNA} | grep 'at least one reported alignment' | head -n 1 | cut -d ':' -f 2 | cut -d '(' -f 1 | sed 's/ //g') |
|
|
|
|
|
n_otGenomic=$(cat ${in_log_align_hg38} | grep 'at least one reported alignment' | head -n 1 | cut -d ':' -f 2 | cut -d '(' -f 1 | sed 's/ //g') |
|
|
|
|
|
|
|
|
Align_miRNA_mature=$(cat ${in_log_align_mature} | grep 'at least one reported alignment' | head -n 1 | cut -d ':' -f 2 | cut -d '(' -f 1 | sed 's/ //g') |
|
|
|
|
|
|
|
|
n_notGenomic=$(cat ${in_log_align_hg38} | grep 'reads that failed to align' | head -n 1 | cut -d ':' -f 2 | cut -d '(' -f 1 | sed 's/ //g' ) |
|
|
|
|
|
|
|
|
echo -e "Stage\tReadCount" > ${sample_ID}.readStats |
|
|
|
|
|
echo -e "Total Input\t$Total_input" >> ${sample_ID}.readStats |
|
|
|
|
|
echo -e "Adapter Dimer\t$Adapter_dimer" >> ${sample_ID}.readStats |
|
|
|
|
|
echo -e "Too Short\t$Too_short" >> ${sample_ID}.readStats |
|
|
|
|
|
echo -e "Low Quality\t$Low_quality" >> ${sample_ID}.readStats |
|
|
|
|
|
echo -e "For Align\t$ForAlign" >> ${sample_ID}.readStats |
|
|
|
|
|
echo -e "Mature miRNA\t$Align_miRNA_mature" >> ${sample_ID}.readStats |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
echo "Pass3" |
|
|
|
|
|
|
|
|
|
|
|
# mkdir -p /cromwell_root/tmp |
|
|
|
|
|
echo "Pass3.1" |
|
|
|
|
|
|
|
|
|
|
|
groupedReadCount=/cromwell_root/tmp/${sample_id}.trimAdapt.filter.align2RNA.grouped.readCount |
|
|
|
|
|
echo "Pass3.2" |
|
|
|
|
|
|
|
|
|
|
|
# cat ${in_sam_align_RNA} | head -n 4 |
|
|
|
|
|
echo "Pass3.3" |
|
|
|
|
|
|
|
|
|
|
|
# cat ${in_sam_align_RNA} | grep -v '@' | awk '($2!=4)' | cut -f 3 | sed 's/.*;//g' | awk '{a[$1]++}END{for(i in a){printf "%s\t%d\n",i,a[i]}}' > $groupedReadCount |
|
|
|
|
|
|
|
|
|
|
|
echo "Pass4" |
|
|
|
|
|
|
|
|
|
|
|
# n_mRNA=$(cat $groupedReadCount | grep '^mRNA' | cut -f 2 ) |
|
|
|
|
|
# n_lncRNA=$(cat $groupedReadCount | grep '^long_non-coding_RNA' | cut -f 2 ) |
|
|
|
|
|
# n_rRNA=$(cat $groupedReadCount | grep '^ribosomal_RNA' | cut -f 2 ) |
|
|
|
|
|
# n_YRNA=$(cat $groupedReadCount | grep '^Y_RNA' | cut -f 2 ) |
|
|
|
|
|
# n_otsmall=$(cat $groupedReadCount | grep -E '^misc_RNA|small|guide_RNA|vault_RNA' | cut -f 2 | awk '{sum+=$1}END{print sum}') |
|
|
|
|
|
# n_otTranscript=$(bc<<<$n_RNA-$n_mRNA-$n_lncRNA-$n_rRNA-$n_YRNA-$n_otsmall) |
|
|
|
|
|
|
|
|
|
|
|
echo "Pass5" |
|
|
|
|
|
|
|
|
|
|
|
file_output=${sample_id}.readStats |
|
|
|
|
|
echo -e "Stage\tReadCount" > $file_output |
|
|
|
|
|
echo -e "adapter not found\t$n_AdapterNotFound" >> $file_output |
|
|
|
|
|
echo -e "adapter dimer\t$n_Adapter_dimer" >> $file_output |
|
|
|
|
|
echo -e "too short\t$n_Too_short" >> $file_output |
|
|
|
|
|
echo -e "low sequencing quality\t$n_Low_quality" >> $file_output |
|
|
|
|
|
echo -e "mature miRNA\t$n_miRNA_mature" >> $file_output |
|
|
|
|
|
echo -e "hairpin miRNA\t$n_miRNA_hairpin" >> $file_output |
|
|
|
|
|
echo -e "piRNA\t$n_piRNA" >> $file_output |
|
|
|
|
|
echo -e "tRNA\t$n_tRNA" >> $file_output |
|
|
|
|
|
# echo -e "mRNA\t$n_mRNA" >> $file_output |
|
|
|
|
|
# echo -e "lncRNA\t$n_lncRNA" >> $file_output |
|
|
|
|
|
# echo -e "rRNA\t$n_rRNA" >> $file_output |
|
|
|
|
|
# echo -e "YRNA\t$n_YRNA" >> $file_output |
|
|
|
|
|
# echo -e "other small RNA\t$n_otsmall" >> $file_output |
|
|
|
|
|
# echo -e "other from transcriptome\t$n_otTranscript" >> $file_output |
|
|
|
|
|
echo -e "other from human genome\t$n_otGenomic" >> $file_output |
|
|
|
|
|
echo -e "not from human genome\t$n_notGenomic" >> $file_output |
|
|
|
|
|
|
|
|
|
|
|
echo "Pass6" |
|
|
>>> |
|
|
>>> |
|
|
|
|
|
|
|
|
runtime { |
|
|
runtime { |
|
|
|
|
|
|
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
output { |
|
|
output { |
|
|
File out="${sample_ID}.readStats" |
|
|
|
|
|
|
|
|
File out="${sample_id}.readStats" |
|
|
} |
|
|
} |
|
|
} |
|
|
} |