浏览代码

Initial version

master
chenziyin 6 年前
父节点
当前提交
34454c43ab
共有 5 个文件被更改,包括 214 次插入0 次删除
  1. +42
    -0
      tasks/Align.wdl
  2. +27
    -0
      tasks/Quantification.wdl
  3. +41
    -0
      tasks/ReadFilter.wdl
  4. +49
    -0
      tasks/ReadStats.wdl
  5. +55
    -0
      tasks/TrimAdapt.wdl

+ 42
- 0
tasks/Align.wdl 查看文件

@@ -0,0 +1,42 @@
task Align {
String sample_ID
File in_fastq
File dir_index
String prefix_index

Int sum_unmatch_quality_limit

String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)


bowtie --threads $nt \
${dir_index}/${prefix_index} \
-e ${sum_unmatch_quality_limit} \
-q ${in_fastq} \
--un ${sample_ID}.matureUnaligned.fastq \
-S ${sample_ID}.align2mature.sam \
2> ${sample_ID}.align2mature.log
>>>

runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File out_sam="${sample_ID}.align2mature.sam"
File out_fastq="${sample_ID}.matureUnaligned.fastq"
File out_log="${sample_ID}.align2mature.log"
}
}

+ 27
- 0
tasks/Quantification.wdl 查看文件

@@ -0,0 +1,27 @@
task Quantification {
String sample_ID
File in_sam

String cluster_config
String disk_size


command <<<
set -o pipefail
set -e

echo -e "ID.miRNA\tReadCount" > ${sample_ID}.matureMiR.readCount
cat ${in_sam} | grep -v '^@' | awk '($2==0)' | awk '{a[$3]++}END{for(i in a){printf "%s\t%d\n",i,a[i]}}' | sort >> ${sample_ID}.matureMiR.readCount

>>>

runtime {
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File out_readCount="${sample_ID}.matureMiR.readCount"
}
}

+ 41
- 0
tasks/ReadFilter.wdl 查看文件

@@ -0,0 +1,41 @@
task ReadFilter {
String sample_ID
File in_fastq
Int qualified_quality_phred
Int unqualified_percent_limit
Int n_base_limit
Int length_required

String docker
String cluster_config
String disk_size


command <<<
set -o pipefail
set -e
nt=$(nproc)

fastp --thread $nt -A \
--qualified_quality_phred ${qualified_quality_phred} --unqualified_percent_limit ${unqualified_percent_limit} \
--n_base_limit ${n_base_limit} \
--length_required ${length_required} \
-i ${in_fastq} \
-o ${sample_ID}.trimAdapt.filter.fastq.gz \
2> ${sample_ID}.filter.log
>>>

runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File out_fastq="${sample_ID}.trimAdapt.filter.fastq.gz"
File out_log="${sample_ID}.filter.log"
}
}

+ 49
- 0
tasks/ReadStats.wdl 查看文件

@@ -0,0 +1,49 @@
task ReadStats {
String sample_ID
File in_log_trimAdatper
File in_log_readFilter
File in_log_align_mature
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e

Total_input=$(cat ${in_log_trimAdatper} | grep 'total reads' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g')

Pass_trimAdatper=$(cat ${in_log_trimAdatper} | grep 'reads passed filter' | tail -n 1 | cut -d ':' -f 2 | sed 's/ //g')
Adapter_dimer=$(bc<<<$Total_input-$Pass_trimAdatper)

Too_short=$(cat ${in_log_readFilter} | grep 'too short' | tail -n 1 | cut -d ':' -f 2 | sed 's/ //g')

Low_quality_singleBase=$(cat ${in_log_readFilter} | grep 'low quality' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g')
Low_quality_tooManyN=$(cat ${in_log_readFilter} | grep 'too many N' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g')
Low_quality=$(bc<<<$Low_quality_singleBase+$Low_quality_tooManyN)

ForAlign=$(cat ${in_log_readFilter} | grep 'reads passed filter' | head -n 1 | cut -d ':' -f 2 | sed 's/ //g')

Align_miRNA_mature=$(cat ${in_log_align_mature} | grep 'at least one reported alignment' | head -n 1 | cut -d ':' -f 2 | cut -d '(' -f 1 | sed 's/ //g')

echo -e "Stage\tReadCount" > ${sample_ID}.readStats
echo -e "Total Input\t$Total_input" >> ${sample_ID}.readStats
echo -e "Adapter Dimer\t$Adapter_dimer" >> ${sample_ID}.readStats
echo -e "Too Short\t$Too_short" >> ${sample_ID}.readStats
echo -e "Low Quality\t$Low_quality" >> ${sample_ID}.readStats
echo -e "For Align\t$ForAlign" >> ${sample_ID}.readStats
echo -e "Mature miRNA\t$Align_miRNA_mature" >> ${sample_ID}.readStats
>>>

runtime {
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File out="${sample_ID}.readStats"
}
}

+ 55
- 0
tasks/TrimAdapt.wdl 查看文件

@@ -0,0 +1,55 @@
task TrimAdapt {
String sample_ID
File in_fastq

String adapter_seq
Int randomBase_in_adapter

String docker
String cluster_config
String disk_size


command <<<
set -o pipefail
set -e
nt=$(nproc)
echo -e "Trim adapter's invariant part." > ${sample_ID}.trimAdapt.log
fastp --thread $nt -Q -L\
--adapter_sequence ${adapter_seq} \
-i ${in_fastq} \
-o ${sample_ID}.trimAdapt.fastq.tmp.gz \
2>> ${sample_ID}.trimAdapt.log

if [ ${randomBase_in_adapter} -gt 0 ]
then
echo -e "\nTrim ${randomBase_in_adapter} random base from both sides\n" >> ${sample_ID}.trimAdapt.log
fastp --thread $nt -A -Q -L \
--trim_front1 ${randomBase_in_adapter} --trim_tail1 ${randomBase_in_adapter} \
-i ${sample_ID}.trimAdapt.fastq.tmp.gz \
-o ${sample_ID}.trimAdapt.fastq.gz \
2>> ${sample_ID}.trimAdapt.log
else
mv ${sample_ID}.trimAdapt.fastq.tmp.gz ${sample_ID}.trimAdapt.fastq.gz
fi

zcat ${sample_ID}.trimAdapt.fastq.gz | paste - - - - | cut -f 2 | \
awk '{a[length($1)]++}END{for(i in a){print i,a[i]}}' | sort -n \
> ${sample_ID}.trimAdapt.lengthDistribute

>>>

runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File out_fastq="${sample_ID}.trimAdapt.fastq.gz"
File out_log="${sample_ID}.trimAdapt.log"
File out_lengthDistribute="${sample_ID}.trimAdapt.lengthDistribute"
}
}

正在加载...
取消
保存