@@ -0,0 +1 @@ | |||
word.docx |
@@ -0,0 +1,31 @@ | |||
{ | |||
"starfusion_database_dir":"oss://pgx-reference-data/reference/starfusion/ctat_genome_lib_build_dir/", | |||
"starfusion_docker":"registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/starfusion:1.10.0", | |||
"starfusion_cluster":"OnDemand bcs.b4.3xlarge img-ubuntu-vpc", | |||
"disk_size":"200", | |||
"fastp_docker":"registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastp:0.19.6", | |||
"fastp_cluster":"OnDemand bcs.b4.3xlarge img-ubuntu-vpc", | |||
"trim_front1":"0", | |||
"trim_tail1":"0", | |||
"max_len1":"0", | |||
"trim_front2":"0", | |||
"trim_tail2":"0", | |||
"max_len2":"0", | |||
"adapter_sequence":"AGATCGGAAGAGCACACGTCTGAACTCCAGTCA", | |||
"adapter_sequence_r2":"AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT", | |||
"disable_adapter_trimming":"0", | |||
"length_required":"50", | |||
"length_required1":"20", | |||
"UMI":"0", | |||
"umi_len":"0", | |||
"umi_loc":"umi_loc", | |||
"qualified_quality_phred":"20", | |||
"disable_quality_filtering":"1", | |||
"STAR_INDEX_DIR":"oss://pgx-reference-data/reference/arriba_hg38_v37/STAR_index_hg38_GENCODE37/", | |||
"ASSEMBLY_FA":"oss://pgx-result/chenqingwang/arriba/hg38.fa", | |||
"ANNOTATION_GTF":"oss://pgx-result/chenqingwang/arriba/GENCODE37.gtf", | |||
"arriba_docker":"registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/arriba:2.1.0", | |||
"arriba_cluster":"OnDemand bcs.b4.3xlarge img-ubuntu-vpc", | |||
"mixcr_docker":"registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/mixcr:3", | |||
"mixcr_cluster":"OnDemand bcs.b4.3xlarge img-ubuntu-vpc" | |||
} |
@@ -0,0 +1,34 @@ | |||
{ | |||
"{{ project_name }}.sample_id": "{{ sample_id }}", | |||
"{{ project_name }}.fastq1": "{{ fastq1 }}", | |||
"{{ project_name }}.fastq2": "{{ fastq2 }}", | |||
"{{ project_name }}.starfusion_database_dir": "{{ starfusion_database_dir }}", | |||
"{{ project_name }}.starfusion_docker": "{{ starfusion_docker }}", | |||
"{{ project_name }}.starfusion_cluster": "{{ starfusion_cluster }}", | |||
"{{ project_name }}.disk_size": "{{ disk_size }}", | |||
"{{ project_name }}.fastp_docker": "{{ fastp_docker }}", | |||
"{{ project_name }}.fastp_cluster": "{{ fastp_cluster }}", | |||
"{{ project_name }}.trim_front1": "{{ trim_front1 }}", | |||
"{{ project_name }}.trim_tail1": "{{ trim_tail1 }}", | |||
"{{ project_name }}.max_len1": "{{ max_len1 }}", | |||
"{{ project_name }}.trim_front2": "{{ trim_front2 }}", | |||
"{{ project_name }}.trim_tail2": "{{ trim_tail2 }}", | |||
"{{ project_name }}.max_len2": "{{ max_len2 }}", | |||
"{{ project_name }}.adapter_sequence": "{{ adapter_sequence }}", | |||
"{{ project_name }}.adapter_sequence_r2": "{{ adapter_sequence_r2 }}", | |||
"{{ project_name }}.disable_adapter_trimming": "{{ disable_adapter_trimming }}", | |||
"{{ project_name }}.length_required1": "{{ length_required1 }}", | |||
"{{ project_name }}.UMI": "{{ UMI }}", | |||
"{{ project_name }}.umi_loc": "{{ umi_loc }}", | |||
"{{ project_name }}.umi_len": "{{ umi_len }}", | |||
"{{ project_name }}.length_required": "{{ length_required }}", | |||
"{{ project_name }}.qualified_quality_phred": "{{ qualified_quality_phred }}", | |||
"{{ project_name }}.disable_quality_filtering": "{{ disable_quality_filtering }}", | |||
"{{ project_name }}.STAR_INDEX_DIR": "{{ STAR_INDEX_DIR }}", | |||
"{{ project_name }}.ASSEMBLY_FA": "{{ ASSEMBLY_FA }}", | |||
"{{ project_name }}.ANNOTATION_GTF": "{{ ANNOTATION_GTF }}", | |||
"{{ project_name }}.arriba_docker": "{{ arriba_docker }}", | |||
"{{ project_name }}.arriba_cluster": "{{ arriba_cluster }}", | |||
"{{ project_name }}.mixcr_docker": "{{ mixcr_docker }}", | |||
"{{ project_name }}.mixcr_cluster": "{{ mixcr_cluster }}" | |||
} |
@@ -0,0 +1,70 @@ | |||
task arriba{ | |||
String sample_id | |||
File fastq1 | |||
File fastq2 | |||
File STAR_INDEX_DIR | |||
File ASSEMBLY_FA | |||
File ANNOTATION_GTF | |||
String disk_size | |||
String docker | |||
String cluster | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
mkdir ./output/ | |||
STAR \ | |||
--runThreadN 16 \ | |||
--genomeDir ${STAR_INDEX_DIR} \ | |||
--genomeLoad NoSharedMemory \ | |||
--readFilesIn ${fastq1} ${fastq2} \ | |||
--readFilesCommand zcat \ | |||
--outSAMtype BAM Unsorted \ | |||
--outSAMunmapped Within \ | |||
--outBAMcompression 0 \ | |||
--outFilterMultimapNmax 50 \ | |||
--peOverlapNbasesMin 10 \ | |||
--alignSplicedMateMapLminOverLmate 0.5 \ | |||
--alignSJstitchMismatchNmax 5 -1 5 5 \ | |||
--chimSegmentMin 10 \ | |||
--chimOutType WithinBAM HardClip \ | |||
--chimJunctionOverhangMin 10 \ | |||
--chimScoreDropMax 30 \ | |||
--chimScoreJunctionNonGTAG 0 \ | |||
--chimScoreSeparation 1 \ | |||
--chimSegmentReadGapMax 3 \ | |||
--chimMultimapNmax 50 \ | |||
--outFileNamePrefix ./output/${sample_id}. | |||
/arriba_v2.1.0/arriba \ | |||
-x ./output/${sample_id}.Aligned.out.bam \ | |||
-o ./output/${sample_id}_fusions.tsv -O ./output/${sample_id}_fusions.discarded.tsv \ | |||
-a ${ASSEMBLY_FA} \ | |||
-g ${ANNOTATION_GTF} \ | |||
-b /arriba_v2.1.0/database/blacklist_hg38_GRCh38_v2.1.0.tsv.gz \ | |||
-k /arriba_v2.1.0/database/known_fusions_hg38_GRCh38_v2.1.0.tsv.gz \ | |||
-t /arriba_v2.1.0/database/known_fusions_hg38_GRCh38_v2.1.0.tsv.gz \ | |||
-p /arriba_v2.1.0/database/protein_domains_hg38_GRCh38_v2.1.0.gff3 | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
Array[File] arriba_result=glob("./output/*.tsv") | |||
Array[File] arriba_bam=glob("./output/*.bam") | |||
} | |||
} | |||
@@ -0,0 +1,68 @@ | |||
task fastp { | |||
File read1 | |||
File read2 | |||
String sample_id | |||
String adapter_sequence | |||
String adapter_sequence_r2 | |||
String docker | |||
String cluster | |||
String disk_size | |||
String umi_loc | |||
Int trim_front1 | |||
Int trim_tail1 | |||
Int max_len1 | |||
Int trim_front2 | |||
Int trim_tail2 | |||
Int max_len2 | |||
Int disable_adapter_trimming | |||
Int length_required | |||
Int umi_len | |||
Int UMI | |||
Int qualified_quality_phred | |||
Int length_required1 | |||
Int disable_quality_filtering | |||
command <<< | |||
mkdir -p /cromwell_root/tmp/fastp/ | |||
##1.Disable_quality_filtering | |||
if [ "${disable_quality_filtering}" == 0 ] | |||
then | |||
cp ${read1} /cromwell_root/tmp/fastp/{sample_id}_R1.fastq.tmp1.gz | |||
cp ${read2} /cromwell_root/tmp/fastp/{sample_id}_R2.fastq.tmp1.gz | |||
else | |||
fastp --thread 16 --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i ${read1} -I ${read2} -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -j ${sample_id}.json -h ${sample_id}.html | |||
fi | |||
##2.UMI | |||
if [ "${UMI}" == 0 ] | |||
then | |||
cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz | |||
cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz | |||
else | |||
fastp --thread 16 -U --umi_loc=${umi_loc} --umi_len=${umi_len} --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -j ${sample_id}.json -h ${sample_id}.html | |||
fi | |||
##3.Trim | |||
if [ "${disable_adapter_trimming}" == 0 ] | |||
then | |||
fastp --thread 16 -l ${length_required} -q ${qualified_quality_phred} -u ${length_required1} --adapter_sequence ${adapter_sequence} --adapter_sequence_r2 ${adapter_sequence_r2} --detect_adapter_for_pe --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -o ${sample_id}_R1.fastq.gz -O ${sample_id}_R2.fastq.gz -j ${sample_id}.json -h ${sample_id}.html | |||
else | |||
cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz ${sample_id}_R1.fastq.gz | |||
cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz ${sample_id}_R2.fastq.gz | |||
fi | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File json = "${sample_id}.json" | |||
File report = "${sample_id}.html" | |||
File Trim_R1 = "${sample_id}_R1.fastq.gz" | |||
File Trim_R2 = "${sample_id}_R2.fastq.gz" | |||
} | |||
} |
@@ -0,0 +1,36 @@ | |||
task mixcr{ | |||
String sample_id | |||
File fastq1 | |||
File fastq2 | |||
String docker | |||
String cluster | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
mkdir ${sample_id}_mixcr_output | |||
mixcr analyze shotgun --species hs \ | |||
-t 16 \ | |||
--starting-material rna \ | |||
--only-productive ${fastq1} ${fastq2} ${sample_id}_mixcr_output/${sample_id} | |||
>>> | |||
runtime { | |||
docker : docker | |||
cluster: cluster | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
Array[File] mixcr_result=glob("${sample_id}_mixcr_output/${sample_id}*") | |||
} | |||
} | |||
@@ -0,0 +1,50 @@ | |||
task starfusion{ | |||
String sample_id | |||
File fastq1 | |||
File fastq2 | |||
File database_dir | |||
String docker | |||
String cluster | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
mkdir -p ${sample_id}/output | |||
STAR-Fusion --genome_lib_dir ${database_dir} \ | |||
--left_fq ${fastq1} \ | |||
--right_fq ${fastq2} \ | |||
--CPU 16 \ | |||
--output_dir ${sample_id}/output \ | |||
--FusionInspector validate \ | |||
--examine_coding_effect | |||
tar -zcvf ${sample_id}/output/${sample_id}_FusionInspector-validate.tgz ${sample_id}/output/FusionInspector-validate | |||
>>> | |||
runtime { | |||
docker : docker | |||
cluster: cluster | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
Array[File] starfusion_result=glob("${sample_id}/output/*.tsv") | |||
Array[File] FusionInspector_result=glob("${sample_id}/output/FusionInspector-validate/*.tsv") | |||
Array[File] starfusion_bam=glob("${sample_id}/output/*.bam") | |||
Array[File] starfusion_tab=glob("${sample_id}/output/*.tab") | |||
Array[File] starfusion_junction=glob("${sample_id}/output/*.junction") | |||
Array[File] starfusion_out=glob("${sample_id}/output/*.out") | |||
Array[File] starfusion_tgz=glob("${sample_id}/output/*.tgz") | |||
} | |||
} | |||
@@ -0,0 +1,107 @@ | |||
import "./tasks/starfusion.wdl" as starfusion | |||
import "./tasks/fastp.wdl" as fastp | |||
import "./tasks/arriba.wdl" as arriba | |||
import "./tasks/mixcr.wdl" as mixcr | |||
workflow {{ project_name }} { | |||
String sample_id | |||
File fastq1 | |||
File fastq2 | |||
File starfusion_database_dir | |||
String starfusion_docker | |||
String starfusion_cluster | |||
String disk_size | |||
String fastp_docker | |||
String fastp_cluster | |||
String adapter_sequence | |||
String adapter_sequence_r2 | |||
String umi_loc | |||
Int trim_front1 | |||
Int trim_tail1 | |||
Int max_len1 | |||
Int trim_front2 | |||
Int trim_tail2 | |||
Int max_len2 | |||
Int disable_adapter_trimming | |||
Int length_required | |||
Int umi_len | |||
Int UMI | |||
Int qualified_quality_phred | |||
Int length_required1 | |||
Int disable_quality_filtering | |||
File STAR_INDEX_DIR | |||
File ASSEMBLY_FA | |||
File ANNOTATION_GTF | |||
String arriba_docker | |||
String arriba_cluster | |||
String mixcr_docker | |||
String mixcr_cluster | |||
call fastp.fastp as fastp { | |||
input: | |||
read1=fastq1, | |||
read2=fastq2, | |||
sample_id=sample_id, | |||
docker=fastp_docker, | |||
cluster=fastp_cluster, | |||
disk_size=disk_size, | |||
adapter_sequence=adapter_sequence, | |||
adapter_sequence_r2=adapter_sequence_r2, | |||
umi_loc=umi_loc, | |||
trim_front1=trim_front1, | |||
trim_tail1=trim_tail1, | |||
max_len1=max_len1, | |||
trim_front2=trim_front2, | |||
trim_tail2=trim_tail2, | |||
max_len2=max_len2, | |||
disable_adapter_trimming=disable_adapter_trimming, | |||
length_required=length_required, | |||
umi_len=umi_len, | |||
UMI=UMI, | |||
qualified_quality_phred=qualified_quality_phred, | |||
length_required1=length_required1, | |||
disable_quality_filtering=disable_quality_filtering | |||
} | |||
call starfusion.starfusion as starfusion { | |||
input: | |||
sample_id=sample_id, | |||
fastq1=fastp.Trim_R1, | |||
fastq2=fastp.Trim_R2, | |||
database_dir=starfusion_database_dir, | |||
docker=starfusion_docker, | |||
cluster=starfusion_cluster, | |||
disk_size=disk_size | |||
} | |||
call arriba.arriba as arriba { | |||
input: | |||
sample_id=sample_id, | |||
fastq1=fastp.Trim_R1, | |||
fastq2=fastp.Trim_R2, | |||
STAR_INDEX_DIR=STAR_INDEX_DIR, | |||
ASSEMBLY_FA=ASSEMBLY_FA, | |||
ANNOTATION_GTF=ANNOTATION_GTF, | |||
disk_size=disk_size, | |||
docker=arriba_docker, | |||
cluster=arriba_cluster | |||
} | |||
call mixcr.mixcr as mixcr { | |||
input: | |||
sample_id=sample_id, | |||
fastq1=fastp.Trim_R1, | |||
fastq2=fastp.Trim_R2, | |||
docker=mixcr_docker, | |||
cluster=mixcr_cluster, | |||
disk_size=disk_size | |||
} | |||
} |