Przeglądaj źródła

错误修正

master
chenziyin 6 lat temu
rodzic
commit
50ebbe1fe2
1 zmienionych plików z 51 dodań i 37 usunięć
  1. +51
    -37
      tasks/ReadStats.wdl

+ 51
- 37
tasks/ReadStats.wdl Wyświetl plik

@@ -20,22 +20,18 @@ task ReadStats {

n_Total_Sequence=$(cat ${in_log_trimAdatper} | grep 'total reads' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g')


echo "Pass1"
n_AdapterNotFound=$(cat ${in_log_trimAdatper} | grep 'reads failed due to too long' | cut -d ':' -f 2 | sed 's/ //g')
n_Total_forCount=$(bc<<<$n_Total_Sequence-$n_AdapterNotFound)
n_Total_forCount=$[$n_Total_Sequence-$n_AdapterNotFound]

n_Pass_trimAdatper=$(cat ${in_log_trimAdatper} | grep 'reads passed filter' | tail -n 1 | cut -d ':' -f 2 | sed 's/ //g')
n_Adapter_dimer=$(bc<<<$n_Total_Sequence-$n_AdapterNotFound-$n_Pass_trimAdatper)
n_Adapter_dimer=$[$n_Total_Sequence-$n_AdapterNotFound-$n_Pass_trimAdatper]
n_Too_short=$(cat ${in_log_readFilter} | grep 'too short' | tail -n 1 | cut -d ':' -f 2 | sed 's/ //g')
n_Low_quality_singleBase=$(cat ${in_log_readFilter} | grep 'low quality' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g')
n_Low_quality_tooManyN=$(cat ${in_log_readFilter} | grep 'too many N' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g')
n_Low_quality=$(bc<<<$n_Low_quality_singleBase+$n_Low_quality_tooManyN)
n_Low_quality=$[$n_Low_quality_singleBase+$n_Low_quality_tooManyN]
n_ForAlign=$(cat ${in_log_readFilter} | grep 'reads passed filter' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g')

echo "Pass2"
n_miRNA_mature=$(cat ${in_log_align_miRNA} | grep 'at least one reported alignment' | head -n 1 | cut -d ':' -f 2 | cut -d '(' -f 1 | sed 's/ //g')
n_miRNA_hairpin=$(cat ${in_log_align_preMiRNA} | grep 'at least one reported alignment' | head -n 1 | cut -d ':' -f 2 | cut -d '(' -f 1 | sed 's/ //g')
n_piRNA=$(cat ${in_log_align_piRNA} | grep 'at least one reported alignment' | head -n 1 | cut -d ':' -f 2 | cut -d '(' -f 1 | sed 's/ //g')
@@ -46,29 +42,47 @@ task ReadStats {

n_notGenomic=$(cat ${in_log_align_hg38} | grep 'reads that failed to align' | head -n 1 | cut -d ':' -f 2 | cut -d '(' -f 1 | sed 's/ //g' )

echo "Pass3"
# mkdir -p /cromwell_root/tmp
echo "Pass3.1"

groupedReadCount=/cromwell_root/tmp/${sample_id}.trimAdapt.filter.align2RNA.grouped.readCount
echo "Pass3.2"

# cat ${in_sam_align_RNA} | head -n 4
echo "Pass3.3"

# cat ${in_sam_align_RNA} | grep -v '@' | awk '($2!=4)' | cut -f 3 | sed 's/.*;//g' | awk '{a[$1]++}END{for(i in a){printf "%s\t%d\n",i,a[i]}}' > $groupedReadCount

echo "Pass4"

# n_mRNA=$(cat $groupedReadCount | grep '^mRNA' | cut -f 2 )
# n_lncRNA=$(cat $groupedReadCount | grep '^long_non-coding_RNA' | cut -f 2 )
# n_rRNA=$(cat $groupedReadCount | grep '^ribosomal_RNA' | cut -f 2 )
# n_YRNA=$(cat $groupedReadCount | grep '^Y_RNA' | cut -f 2 )
# n_otsmall=$(cat $groupedReadCount | grep -E '^misc_RNA|small|guide_RNA|vault_RNA' | cut -f 2 | awk '{sum+=$1}END{print sum}')
# n_otTranscript=$(bc<<<$n_RNA-$n_mRNA-$n_lncRNA-$n_rRNA-$n_YRNA-$n_otsmall)

echo "Pass5"
groupedReadCount=${sample_id}.trimAdapt.filter.align2RNA.grouped.readCount
cat ${in_sam_align_RNA} | grep -v '@' | awk '($2!=4)' | cut -f 3 | sed 's/.*;//g' | awk '{a[$1]++}END{for(i in a){printf "%s\t%d\n",i,a[i]}}' > $groupedReadCount


if [ $(cat $groupedReadCount | grep '^mRNA' | wc -l ) -gt 0]
then
n_mRNA=$(cat $groupedReadCount | grep '^mRNA' | cut -f 2 )
else
n_mRNA=0
fi

if [ $(cat $groupedReadCount | grep '^long_non-coding_RNA' | wc -l ) -gt 0 ]
then
n_lncRNA=$(cat $groupedReadCount | grep '^long_non-coding_RNA' | cut -f 2 )
else
n_lncRNA=0
fi

if [ $(cat $groupedReadCount | grep '^ribosomal_RNA' | wc -l ) -gt 0 ]
then
n_rRNA=$(cat $groupedReadCount | grep '^ribosomal_RNA' | cut -f 2 )
else
n_rRNA=0
fi

if [ $(cat $groupedReadCount | grep '^Y_RNA' | wc -l ) -gt 0 ]
then
n_YRNA=$(cat $groupedReadCount | grep '^Y_RNA' | cut -f 2 )
else
n_YRNA=0
fi

if [$(cat $groupedReadCount | grep -E '^misc_RNA|small|guide_RNA|vault_RNA' | wc -l ) -gt 0 ]
then
n_otsmall=$(cat $groupedReadCount | grep -E '^misc_RNA|small|guide_RNA|vault_RNA' | cut -f 2 | awk '{sum+=$1}END{print sum}')
else
n_otsmall=0
fi

n_otTranscript=$[$n_RNA-$n_mRNA-$n_lncRNA-$n_rRNA-$n_YRNA-$n_otsmall]
file_output=${sample_id}.readStats
echo -e "Stage\tReadCount" > $file_output
@@ -80,16 +94,15 @@ task ReadStats {
echo -e "hairpin miRNA\t$n_miRNA_hairpin" >> $file_output
echo -e "piRNA\t$n_piRNA" >> $file_output
echo -e "tRNA\t$n_tRNA" >> $file_output
# echo -e "mRNA\t$n_mRNA" >> $file_output
# echo -e "lncRNA\t$n_lncRNA" >> $file_output
# echo -e "rRNA\t$n_rRNA" >> $file_output
# echo -e "YRNA\t$n_YRNA" >> $file_output
# echo -e "other small RNA\t$n_otsmall" >> $file_output
# echo -e "other from transcriptome\t$n_otTranscript" >> $file_output
echo -e "mRNA\t$n_mRNA" >> $file_output
echo -e "lncRNA\t$n_lncRNA" >> $file_output
echo -e "rRNA\t$n_rRNA" >> $file_output
echo -e "YRNA\t$n_YRNA" >> $file_output
echo -e "other small RNA\t$n_otsmall" >> $file_output
echo -e "other from transcriptome\t$n_otTranscript" >> $file_output
echo -e "other from human genome\t$n_otGenomic" >> $file_output
echo -e "not from human genome\t$n_notGenomic" >> $file_output

echo "Pass6"
>>>

runtime {
@@ -100,5 +113,6 @@ task ReadStats {

output {
File out="${sample_id}.readStats"
File out_groupedCount="${sample_id}.trimAdapt.filter.align2RNA.grouped.readCount"
}
}

Ładowanie…
Anuluj
Zapisz