{ | |||||
"host_image":"oss://database-shao/pathseq_database/pathseq_host.fa.img", | |||||
"host_kmer":"oss://database-shao/pathseq_database/pathseq_host.bfi", | |||||
"microbe_dict":"oss://database-shao/pathseq_database/pathseq_microbe.dict", | |||||
"microbe_bwa_image":"oss://database-shao/pathseq_database/pathseq_microbe.fa.img", | |||||
"microbe_taxonomy":"oss://database-shao/pathseq_database/pathseq_taxonomy.db", | |||||
"pathseq_docker":"registry.cn-shanghai.aliyuncs.com/shaolab_docker/gatk4:4.2.6.1", | |||||
"pathseq_cluster":"OnDemand bcs.ps.r.2xlarge img-ubuntu-vpc", | |||||
"disk_size":"300", | |||||
"fastp_docker":"registry.cn-shanghai.aliyuncs.com/shaolab_docker/teprof:1.5", | |||||
"fastp_cluster":"OnDemand bcs.ps.r.2xlarge img-ubuntu-vpc", | |||||
"trim_front1":"0", | |||||
"trim_tail1":"0", | |||||
"max_len1":"0", | |||||
"trim_front2":"0", | |||||
"trim_tail2":"0", | |||||
"max_len2":"0", | |||||
"adapter_sequence":"AGATCGGAAGAGCACACGTCTGAACTCCAGTCA", | |||||
"adapter_sequence_r2":"AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT", | |||||
"disable_adapter_trimming":"0", | |||||
"length_required":"50", | |||||
"length_required1":"20", | |||||
"UMI":"0", | |||||
"umi_len":"0", | |||||
"umi_loc":"umi_loc", | |||||
"qualified_quality_phred":"20", | |||||
"disable_quality_filtering":"1" | |||||
} |
{ | |||||
"{{ project_name }}.sample_id": "{{ sample_id }}", | |||||
"{{ project_name }}.fastq1": "{{ fastq1 }}", | |||||
"{{ project_name }}.fastq2": "{{ fastq2 }}", | |||||
"{{ project_name }}.host_image": "{{ host_image }}", | |||||
"{{ project_name }}.host_kmer": "{{ host_kmer }}", | |||||
"{{ project_name }}.microbe_dict": "{{ microbe_dict }}", | |||||
"{{ project_name }}.microbe_bwa_image": "{{ microbe_bwa_image }}", | |||||
"{{ project_name }}.microbe_taxonomy": "{{ microbe_taxonomy }}", | |||||
"{{ project_name }}.pathseq_docker": "{{ pathseq_docker }}", | |||||
"{{ project_name }}.pathseq_cluster": "{{ pathseq_cluster }}", | |||||
"{{ project_name }}.disk_size": "{{ disk_size }}", | |||||
"{{ project_name }}.fastp_docker": "{{ fastp_docker }}", | |||||
"{{ project_name }}.fastp_cluster": "{{ fastp_cluster }}", | |||||
"{{ project_name }}.trim_front1": "{{ trim_front1 }}", | |||||
"{{ project_name }}.trim_tail1": "{{ trim_tail1 }}", | |||||
"{{ project_name }}.max_len1": "{{ max_len1 }}", | |||||
"{{ project_name }}.trim_front2": "{{ trim_front2 }}", | |||||
"{{ project_name }}.trim_tail2": "{{ trim_tail2 }}", | |||||
"{{ project_name }}.max_len2": "{{ max_len2 }}", | |||||
"{{ project_name }}.adapter_sequence": "{{ adapter_sequence }}", | |||||
"{{ project_name }}.adapter_sequence_r2": "{{ adapter_sequence_r2 }}", | |||||
"{{ project_name }}.disable_adapter_trimming": "{{ disable_adapter_trimming }}", | |||||
"{{ project_name }}.length_required1": "{{ length_required1 }}", | |||||
"{{ project_name }}.UMI": "{{ UMI }}", | |||||
"{{ project_name }}.umi_loc": "{{ umi_loc }}", | |||||
"{{ project_name }}.umi_len": "{{ umi_len }}", | |||||
"{{ project_name }}.length_required": "{{ length_required }}", | |||||
"{{ project_name }}.qualified_quality_phred": "{{ qualified_quality_phred }}", | |||||
"{{ project_name }}.disable_quality_filtering": "{{ disable_quality_filtering }}" | |||||
} |
task fastp { | |||||
File read1 | |||||
File read2 | |||||
String sample_id | |||||
String adapter_sequence | |||||
String adapter_sequence_r2 | |||||
String docker | |||||
String cluster | |||||
String disk_size | |||||
String umi_loc | |||||
Int trim_front1 | |||||
Int trim_tail1 | |||||
Int max_len1 | |||||
Int trim_front2 | |||||
Int trim_tail2 | |||||
Int max_len2 | |||||
Int disable_adapter_trimming | |||||
Int length_required | |||||
Int umi_len | |||||
Int UMI | |||||
Int qualified_quality_phred | |||||
Int length_required1 | |||||
Int disable_quality_filtering | |||||
command <<< | |||||
mkdir -p /cromwell_root/tmp/fastp/ | |||||
##1.Disable_quality_filtering | |||||
if [ "${disable_quality_filtering}" == 0 ] | |||||
then | |||||
cp ${read1} /cromwell_root/tmp/fastp/{sample_id}_R1.fastq.tmp1.gz | |||||
cp ${read2} /cromwell_root/tmp/fastp/{sample_id}_R2.fastq.tmp1.gz | |||||
else | |||||
fastp --thread 8 --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i ${read1} -I ${read2} -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -j ${sample_id}.json -h ${sample_id}.html | |||||
fi | |||||
##2.UMI | |||||
if [ "${UMI}" == 0 ] | |||||
then | |||||
cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz | |||||
cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz | |||||
else | |||||
fastp --thread 8 -U --umi_loc=${umi_loc} --umi_len=${umi_len} --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -j ${sample_id}.json -h ${sample_id}.html | |||||
fi | |||||
##3.Trim | |||||
if [ "${disable_adapter_trimming}" == 0 ] | |||||
then | |||||
fastp --thread 8 -l ${length_required} -q ${qualified_quality_phred} -u ${length_required1} --adapter_sequence ${adapter_sequence} --adapter_sequence_r2 ${adapter_sequence_r2} --detect_adapter_for_pe --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -o ${sample_id}_R1.fastq.gz -O ${sample_id}_R2.fastq.gz -j ${sample_id}.json -h ${sample_id}.html | |||||
else | |||||
cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz ${sample_id}_R1.fastq.gz | |||||
cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz ${sample_id}_R2.fastq.gz | |||||
fi | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File json = "${sample_id}.json" | |||||
File report = "${sample_id}.html" | |||||
File Trim_R1 = "${sample_id}_R1.fastq.gz" | |||||
File Trim_R2 = "${sample_id}_R2.fastq.gz" | |||||
} | |||||
} |
task pathseq { | |||||
String sample_id | |||||
File fastq1 | |||||
File fastq2 | |||||
File host_image | |||||
File host_kmer | |||||
File microbe_dict | |||||
File microbe_bwa_image | |||||
File microbe_taxonomy | |||||
String disk_size | |||||
String docker | |||||
String cluster | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
mkdir ubam | |||||
mkdir filter_log | |||||
mkdir clean_pair_bam | |||||
mkdir clean_unpaired_bam | |||||
mkdir pathseq_result | |||||
mkdir bwa_pair_bam | |||||
mkdir bwa_pair_unpair_bam | |||||
gatk FastqToSam \ | |||||
-F1 ${fastq1} \ | |||||
-F2 ${fastq2} \ | |||||
-O ./ubam/${sample_id}.bam \ | |||||
-SM ${sample_id} | |||||
time gatk --java-options "-Xmx58g" PathSeqFilterSpark \ | |||||
--input ./ubam/${sample_id}.bam \ | |||||
--paired-output ./clean_pair_bam/${sample_id}_paired.bam \ | |||||
--unpaired-output ./clean_unpaired_bam/${sample_id}_unpaired.bam \ | |||||
--min-clipped-read-length 70 \ | |||||
--kmer-file ${host_kmer} \ | |||||
--filter-bwa-image ${host_image} \ | |||||
--filter-metrics filter_log/${sample_id}.log | |||||
time gatk --java-options "-Xmx58g" PathSeqBwaSpark \ | |||||
--paired-input ./clean_pair_bam/${sample_id}_paired.bam \ | |||||
--unpaired-input ./clean_unpaired_bam/${sample_id}_unpaired.bam \ | |||||
--paired-output bwa_pair_bam/${sample_id}_bwa_paired.bam \ | |||||
--unpaired-output bwa_unpair_bam/${sample_id}_bwa_unpaired.bam \ | |||||
--microbe-bwa-image ${microbe_bwa_image} \ | |||||
--microbe-dict ${microbe_dict} | |||||
time gatk --java-options "-Xmx58g" PathSeqScoreSpark \ | |||||
--paired-input bwa_pair_bam/${sample_id}_bwa_paired.bam \ | |||||
--unpaired-input bwa_unpair_bam/${sample_id}_bwa_unpaired.bam \ | |||||
--taxonomy-file ${microbe_taxonomy} \ | |||||
--scores-output pathseq_result/${sample_id}.pathseq.txt \ | |||||
--output pathseq_result/${sample_id}.pathseq_reads.bam \ | |||||
--min-score-identity 0.90 \ | |||||
--identity-margin 0.02 | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File pathseq_txt = "pathseq_result/${sample_id}.pathseq.txt" | |||||
File pathseq_bam = "pathseq_result/${sample_id}.pathseq_reads.bam" | |||||
File pathseq_sbi = "pathseq_result/${sample_id}.pathseq_reads.bam.sbi" | |||||
} | |||||
} | |||||
import "./tasks/fastp.wdl" as fastp | |||||
import "./tasks/pathseq.wdl" as pathseq | |||||
workflow {{ project_name }} { | |||||
String sample_id | |||||
File fastq1 | |||||
File fastq2 | |||||
String disk_size | |||||
String fastp_docker | |||||
String fastp_cluster | |||||
String adapter_sequence | |||||
String adapter_sequence_r2 | |||||
String umi_loc | |||||
Int trim_front1 | |||||
Int trim_tail1 | |||||
Int max_len1 | |||||
Int trim_front2 | |||||
Int trim_tail2 | |||||
Int max_len2 | |||||
Int disable_adapter_trimming | |||||
Int length_required | |||||
Int umi_len | |||||
Int UMI | |||||
Int qualified_quality_phred | |||||
Int length_required1 | |||||
Int disable_quality_filtering | |||||
File host_image | |||||
File host_kmer | |||||
File microbe_dict | |||||
File microbe_bwa_image | |||||
File microbe_taxonomy | |||||
String pathseq_docker | |||||
String pathseq_cluster | |||||
call fastp.fastp as fastp { | |||||
input: | |||||
read1=fastq1, | |||||
read2=fastq2, | |||||
sample_id=sample_id, | |||||
docker=fastp_docker, | |||||
cluster=fastp_cluster, | |||||
disk_size=disk_size, | |||||
adapter_sequence=adapter_sequence, | |||||
adapter_sequence_r2=adapter_sequence_r2, | |||||
umi_loc=umi_loc, | |||||
trim_front1=trim_front1, | |||||
trim_tail1=trim_tail1, | |||||
max_len1=max_len1, | |||||
trim_front2=trim_front2, | |||||
trim_tail2=trim_tail2, | |||||
max_len2=max_len2, | |||||
disable_adapter_trimming=disable_adapter_trimming, | |||||
length_required=length_required, | |||||
umi_len=umi_len, | |||||
UMI=UMI, | |||||
qualified_quality_phred=qualified_quality_phred, | |||||
length_required1=length_required1, | |||||
disable_quality_filtering=disable_quality_filtering | |||||
} | |||||
call pathseq.pathseq as pathseq { | |||||
input: | |||||
sample_id=sample_id, | |||||
fastq1=fastp.Trim_R1, | |||||
fastq2=fastp.Trim_R2, | |||||
host_image=host_image, | |||||
host_kmer=host_kmer, | |||||
microbe_dict=microbe_dict, | |||||
microbe_bwa_image=microbe_bwa_image, | |||||
microbe_taxonomy=microbe_taxonomy, | |||||
disk_size=disk_size, | |||||
docker=pathseq_docker, | |||||
cluster=pathseq_cluster | |||||
} | |||||
} |