瀏覽代碼

first commit

master
biochenglinliu 3 年之前
當前提交
932264f036
共有 6 個檔案被更改,包括 288 行新增0 行删除
  1. +0
    -0
      README.md
  2. +28
    -0
      defaults
  3. +31
    -0
      inputs
  4. +68
    -0
      tasks/fastp.wdl
  5. +79
    -0
      tasks/pathseq.wdl
  6. +82
    -0
      workflow.wdl

+ 0
- 0
README.md 查看文件


+ 28
- 0
defaults 查看文件

@@ -0,0 +1,28 @@
{
"host_image":"oss://database-shao/pathseq_database/pathseq_host.fa.img",
"host_kmer":"oss://database-shao/pathseq_database/pathseq_host.bfi",
"microbe_dict":"oss://database-shao/pathseq_database/pathseq_microbe.dict",
"microbe_bwa_image":"oss://database-shao/pathseq_database/pathseq_microbe.fa.img",
"microbe_taxonomy":"oss://database-shao/pathseq_database/pathseq_taxonomy.db",
"pathseq_docker":"registry.cn-shanghai.aliyuncs.com/shaolab_docker/gatk4:4.2.6.1",
"pathseq_cluster":"OnDemand bcs.ps.r.2xlarge img-ubuntu-vpc",
"disk_size":"300",
"fastp_docker":"registry.cn-shanghai.aliyuncs.com/shaolab_docker/teprof:1.5",
"fastp_cluster":"OnDemand bcs.ps.r.2xlarge img-ubuntu-vpc",
"trim_front1":"0",
"trim_tail1":"0",
"max_len1":"0",
"trim_front2":"0",
"trim_tail2":"0",
"max_len2":"0",
"adapter_sequence":"AGATCGGAAGAGCACACGTCTGAACTCCAGTCA",
"adapter_sequence_r2":"AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT",
"disable_adapter_trimming":"0",
"length_required":"50",
"length_required1":"20",
"UMI":"0",
"umi_len":"0",
"umi_loc":"umi_loc",
"qualified_quality_phred":"20",
"disable_quality_filtering":"1"
}

+ 31
- 0
inputs 查看文件

@@ -0,0 +1,31 @@
{
"{{ project_name }}.sample_id": "{{ sample_id }}",
"{{ project_name }}.fastq1": "{{ fastq1 }}",
"{{ project_name }}.fastq2": "{{ fastq2 }}",
"{{ project_name }}.host_image": "{{ host_image }}",
"{{ project_name }}.host_kmer": "{{ host_kmer }}",
"{{ project_name }}.microbe_dict": "{{ microbe_dict }}",
"{{ project_name }}.microbe_bwa_image": "{{ microbe_bwa_image }}",
"{{ project_name }}.microbe_taxonomy": "{{ microbe_taxonomy }}",
"{{ project_name }}.pathseq_docker": "{{ pathseq_docker }}",
"{{ project_name }}.pathseq_cluster": "{{ pathseq_cluster }}",
"{{ project_name }}.disk_size": "{{ disk_size }}",
"{{ project_name }}.fastp_docker": "{{ fastp_docker }}",
"{{ project_name }}.fastp_cluster": "{{ fastp_cluster }}",
"{{ project_name }}.trim_front1": "{{ trim_front1 }}",
"{{ project_name }}.trim_tail1": "{{ trim_tail1 }}",
"{{ project_name }}.max_len1": "{{ max_len1 }}",
"{{ project_name }}.trim_front2": "{{ trim_front2 }}",
"{{ project_name }}.trim_tail2": "{{ trim_tail2 }}",
"{{ project_name }}.max_len2": "{{ max_len2 }}",
"{{ project_name }}.adapter_sequence": "{{ adapter_sequence }}",
"{{ project_name }}.adapter_sequence_r2": "{{ adapter_sequence_r2 }}",
"{{ project_name }}.disable_adapter_trimming": "{{ disable_adapter_trimming }}",
"{{ project_name }}.length_required1": "{{ length_required1 }}",
"{{ project_name }}.UMI": "{{ UMI }}",
"{{ project_name }}.umi_loc": "{{ umi_loc }}",
"{{ project_name }}.umi_len": "{{ umi_len }}",
"{{ project_name }}.length_required": "{{ length_required }}",
"{{ project_name }}.qualified_quality_phred": "{{ qualified_quality_phred }}",
"{{ project_name }}.disable_quality_filtering": "{{ disable_quality_filtering }}"
}

+ 68
- 0
tasks/fastp.wdl 查看文件

@@ -0,0 +1,68 @@
task fastp {
File read1
File read2
String sample_id
String adapter_sequence
String adapter_sequence_r2
String docker
String cluster
String disk_size
String umi_loc
Int trim_front1
Int trim_tail1
Int max_len1
Int trim_front2
Int trim_tail2
Int max_len2
Int disable_adapter_trimming
Int length_required
Int umi_len
Int UMI
Int qualified_quality_phred
Int length_required1
Int disable_quality_filtering
command <<<
mkdir -p /cromwell_root/tmp/fastp/
##1.Disable_quality_filtering
if [ "${disable_quality_filtering}" == 0 ]
then
cp ${read1} /cromwell_root/tmp/fastp/{sample_id}_R1.fastq.tmp1.gz
cp ${read2} /cromwell_root/tmp/fastp/{sample_id}_R2.fastq.tmp1.gz
else
fastp --thread 8 --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i ${read1} -I ${read2} -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -j ${sample_id}.json -h ${sample_id}.html
fi

##2.UMI
if [ "${UMI}" == 0 ]
then
cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz
cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz
else
fastp --thread 8 -U --umi_loc=${umi_loc} --umi_len=${umi_len} --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -j ${sample_id}.json -h ${sample_id}.html
fi

##3.Trim
if [ "${disable_adapter_trimming}" == 0 ]
then
fastp --thread 8 -l ${length_required} -q ${qualified_quality_phred} -u ${length_required1} --adapter_sequence ${adapter_sequence} --adapter_sequence_r2 ${adapter_sequence_r2} --detect_adapter_for_pe --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -o ${sample_id}_R1.fastq.gz -O ${sample_id}_R2.fastq.gz -j ${sample_id}.json -h ${sample_id}.html
else
cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz ${sample_id}_R1.fastq.gz
cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz ${sample_id}_R2.fastq.gz
fi
>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File json = "${sample_id}.json"
File report = "${sample_id}.html"
File Trim_R1 = "${sample_id}_R1.fastq.gz"
File Trim_R2 = "${sample_id}_R2.fastq.gz"
}
}

+ 79
- 0
tasks/pathseq.wdl 查看文件

@@ -0,0 +1,79 @@
task pathseq {
String sample_id
File fastq1
File fastq2
File host_image
File host_kmer
File microbe_dict
File microbe_bwa_image
File microbe_taxonomy

String disk_size
String docker
String cluster

command <<<
set -o pipefail
set -e

mkdir ubam
mkdir filter_log
mkdir clean_pair_bam
mkdir clean_unpaired_bam
mkdir pathseq_result
mkdir bwa_pair_bam
mkdir bwa_pair_unpair_bam

gatk FastqToSam \
-F1 ${fastq1} \
-F2 ${fastq2} \
-O ./ubam/${sample_id}.bam \
-SM ${sample_id}

time gatk --java-options "-Xmx58g" PathSeqFilterSpark \
--input ./ubam/${sample_id}.bam \
--paired-output ./clean_pair_bam/${sample_id}_paired.bam \
--unpaired-output ./clean_unpaired_bam/${sample_id}_unpaired.bam \
--min-clipped-read-length 70 \
--kmer-file ${host_kmer} \
--filter-bwa-image ${host_image} \
--filter-metrics filter_log/${sample_id}.log

time gatk --java-options "-Xmx58g" PathSeqBwaSpark \
--paired-input ./clean_pair_bam/${sample_id}_paired.bam \
--unpaired-input ./clean_unpaired_bam/${sample_id}_unpaired.bam \
--paired-output bwa_pair_bam/${sample_id}_bwa_paired.bam \
--unpaired-output bwa_unpair_bam/${sample_id}_bwa_unpaired.bam \
--microbe-bwa-image ${microbe_bwa_image} \
--microbe-dict ${microbe_dict}


time gatk --java-options "-Xmx58g" PathSeqScoreSpark \
--paired-input bwa_pair_bam/${sample_id}_bwa_paired.bam \
--unpaired-input bwa_unpair_bam/${sample_id}_bwa_unpaired.bam \
--taxonomy-file ${microbe_taxonomy} \
--scores-output pathseq_result/${sample_id}.pathseq.txt \
--output pathseq_result/${sample_id}.pathseq_reads.bam \
--min-score-identity 0.90 \
--identity-margin 0.02


>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File pathseq_txt = "pathseq_result/${sample_id}.pathseq.txt"
File pathseq_bam = "pathseq_result/${sample_id}.pathseq_reads.bam"
File pathseq_sbi = "pathseq_result/${sample_id}.pathseq_reads.bam.sbi"
}

}


+ 82
- 0
workflow.wdl 查看文件

@@ -0,0 +1,82 @@
import "./tasks/fastp.wdl" as fastp
import "./tasks/pathseq.wdl" as pathseq

workflow {{ project_name }} {
String sample_id
File fastq1
File fastq2
String disk_size

String fastp_docker
String fastp_cluster
String adapter_sequence
String adapter_sequence_r2
String umi_loc
Int trim_front1
Int trim_tail1
Int max_len1
Int trim_front2
Int trim_tail2
Int max_len2
Int disable_adapter_trimming
Int length_required
Int umi_len
Int UMI
Int qualified_quality_phred
Int length_required1
Int disable_quality_filtering
File host_image
File host_kmer
File microbe_dict
File microbe_bwa_image
File microbe_taxonomy

String pathseq_docker
String pathseq_cluster

call fastp.fastp as fastp {
input:
read1=fastq1,
read2=fastq2,
sample_id=sample_id,
docker=fastp_docker,
cluster=fastp_cluster,
disk_size=disk_size,
adapter_sequence=adapter_sequence,
adapter_sequence_r2=adapter_sequence_r2,
umi_loc=umi_loc,
trim_front1=trim_front1,
trim_tail1=trim_tail1,
max_len1=max_len1,
trim_front2=trim_front2,
trim_tail2=trim_tail2,
max_len2=max_len2,
disable_adapter_trimming=disable_adapter_trimming,
length_required=length_required,
umi_len=umi_len,
UMI=UMI,
qualified_quality_phred=qualified_quality_phred,
length_required1=length_required1,
disable_quality_filtering=disable_quality_filtering
}


call pathseq.pathseq as pathseq {
input:
sample_id=sample_id,
fastq1=fastp.Trim_R1,
fastq2=fastp.Trim_R2,
host_image=host_image,
host_kmer=host_kmer,
microbe_dict=microbe_dict,
microbe_bwa_image=microbe_bwa_image,
microbe_taxonomy=microbe_taxonomy,
disk_size=disk_size,
docker=pathseq_docker,
cluster=pathseq_cluster
}



}

Loading…
取消
儲存