word.docx |
{ | |||||
"starfusion_database_dir":"oss://database-breastsurg/starfusion/ctat_genome_lib_build_dir/", | |||||
"starfusion_docker":"registry.cn-shanghai.aliyuncs.com/breastsurg_docker/starfusion:1.10.0", | |||||
"starfusion_cluster":"OnDemand bcs.es.r.4xlarge img-ubuntu-vpc", | |||||
"disk_size":"300", | |||||
"trim_galore_docker":"registry.cn-shanghai.aliyuncs.com/breastsurg_docker/trim_galore:0.6.7", | |||||
"trim_galore_cluster":"OnDemand bcs.es.r.4xlarge img-ubuntu-vpc", | |||||
"STAR_INDEX_DIR":"oss://database-breastsurg/arriba_hg38_v37/STAR_index_hg38_GENCODE37/", | |||||
"ASSEMBLY_FA":"oss://database-breastsurg/arriba/hg38.fa", | |||||
"ANNOTATION_GTF":"oss://database-breastsurg/arriba/GENCODE37.gtf", | |||||
"arriba_docker":"registry.cn-shanghai.aliyuncs.com/breastsurg_docker/arriba:2.1.0", | |||||
"arriba_cluster":"OnDemand bcs.es.r.4xlarge img-ubuntu-vpc" | |||||
} |
{ | |||||
"{{ project_name }}.sample_id": "{{ sample_id }}", | |||||
"{{ project_name }}.fastq1": "{{ fastq1 }}", | |||||
"{{ project_name }}.fastq2": "{{ fastq2 }}", | |||||
"{{ project_name }}.starfusion_database_dir": "{{ starfusion_database_dir }}", | |||||
"{{ project_name }}.starfusion_docker": "{{ starfusion_docker }}", | |||||
"{{ project_name }}.starfusion_cluster": "{{ starfusion_cluster }}", | |||||
"{{ project_name }}.disk_size": "{{ disk_size }}", | |||||
"{{ project_name }}.trim_galore_docker": "{{ trim_galore_docker }}", | |||||
"{{ project_name }}.trim_galore_cluster": "{{ trim_galore_cluster }}", | |||||
"{{ project_name }}.STAR_INDEX_DIR": "{{ STAR_INDEX_DIR }}", | |||||
"{{ project_name }}.ASSEMBLY_FA": "{{ ASSEMBLY_FA }}", | |||||
"{{ project_name }}.ANNOTATION_GTF": "{{ ANNOTATION_GTF }}", | |||||
"{{ project_name }}.arriba_docker": "{{ arriba_docker }}", | |||||
"{{ project_name }}.arriba_cluster": "{{ arriba_cluster }}" | |||||
} |
task arriba{ | |||||
String sample_id | |||||
File fastq1 | |||||
File fastq2 | |||||
File STAR_INDEX_DIR | |||||
File ASSEMBLY_FA | |||||
File ANNOTATION_GTF | |||||
String disk_size | |||||
String docker | |||||
String cluster | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
mkdir ./output/ | |||||
STAR \ | |||||
--runThreadN 16 \ | |||||
--genomeDir ${STAR_INDEX_DIR} \ | |||||
--genomeLoad NoSharedMemory \ | |||||
--readFilesIn ${fastq1} ${fastq2} \ | |||||
--readFilesCommand zcat \ | |||||
--outSAMtype BAM SortedByCoordinate \ | |||||
--outSAMunmapped Within \ | |||||
--outBAMcompression 0 \ | |||||
--outFilterMultimapNmax 50 \ | |||||
--peOverlapNbasesMin 10 \ | |||||
--alignSplicedMateMapLminOverLmate 0.5 \ | |||||
--alignSJstitchMismatchNmax 5 -1 5 5 \ | |||||
--chimSegmentMin 10 \ | |||||
--chimOutType WithinBAM HardClip \ | |||||
--chimJunctionOverhangMin 10 \ | |||||
--chimScoreDropMax 30 \ | |||||
--chimScoreJunctionNonGTAG 0 \ | |||||
--chimScoreSeparation 1 \ | |||||
--chimSegmentReadGapMax 3 \ | |||||
--chimMultimapNmax 50 \ | |||||
--outFileNamePrefix ./output/${sample_id}. | |||||
/arriba_v2.1.0/arriba \ | |||||
-x ./output/${sample_id}.Aligned.sortedByCoord.out.bam \ | |||||
-o ./output/${sample_id}_fusions.tsv -O ./output/${sample_id}_fusions.discarded.tsv \ | |||||
-a ${ASSEMBLY_FA} \ | |||||
-g ${ANNOTATION_GTF} \ | |||||
-b /arriba_v2.1.0/database/blacklist_hg38_GRCh38_v2.1.0.tsv.gz \ | |||||
-k /arriba_v2.1.0/database/known_fusions_hg38_GRCh38_v2.1.0.tsv.gz \ | |||||
-t /arriba_v2.1.0/database/known_fusions_hg38_GRCh38_v2.1.0.tsv.gz \ | |||||
-p /arriba_v2.1.0/database/protein_domains_hg38_GRCh38_v2.1.0.gff3 | |||||
samtools index -@ 16 ./output/${sample_id}.Aligned.sortedByCoord.out.bam ./output/${sample_id}.Aligned.sortedByCoord.out.bam.bai | |||||
Rscript /arriba_v2.1.0/draw_fusions.R \ | |||||
--fusions=./output/${sample_id}_fusions.tsv \ | |||||
--alignments=./output/${sample_id}.Aligned.sortedByCoord.out.bam \ | |||||
--output=./output/${sample_id}_fusion.pdf \ | |||||
--annotation=${ANNOTATION_GTF} \ | |||||
--cytobands=/arriba_v2.1.0/database/cytobands_hg38_GRCh38_v2.1.0.tsv \ | |||||
--proteinDomains=/arriba_v2.1.0/database/protein_domains_hg38_GRCh38_v2.1.0.gff3 | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
Array[File] arriba_result=glob("./output/*.tsv") | |||||
Array[File] arriba_pdf=glob("./output/*.pdf") | |||||
} | |||||
} | |||||
task starfusion{ | |||||
String sample_id | |||||
File fastq1 | |||||
File fastq2 | |||||
File database_dir | |||||
String docker | |||||
String cluster | |||||
String disk_size | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
mkdir -p ${sample_id}/output | |||||
STAR-Fusion --genome_lib_dir ${database_dir} \ | |||||
--left_fq ${fastq1} \ | |||||
--right_fq ${fastq2} \ | |||||
--CPU 16 \ | |||||
--output_dir ${sample_id}/output \ | |||||
--FusionInspector validate \ | |||||
--examine_coding_effect | |||||
mv ${sample_id}/output/star-fusion.fusion_predictions.tsv ${sample_id}/output/${sample_id}_star-fusion.fusion_predictions.tsv | |||||
mv ${sample_id}/output/star-fusion.fusion_predictions.abridged.tsv ${sample_id}/output/${sample_id}_star-fusion.fusion_predictions.abridged.tsv | |||||
mv ${sample_id}/output/star-fusion.fusion_predictions.abridged.coding_effect.tsv ${sample_id}/output/${sample_id}_star-fusion.fusion_predictions.abridged.coding_effect.tsv | |||||
cp ${sample_id}/output/FusionInspector-validate/finspector.FusionInspector.fusions.tsv ${sample_id}/output/${sample_id}_finspector.FusionInspector.fusions.tsv | |||||
cp ${sample_id}/output/FusionInspector-validate/finspector.FusionInspector.fusions.abridged.tsv ${sample_id}/output/${sample_id}_finspector.FusionInspector.fusions.abridged.tsv | |||||
cp ${sample_id}/output/FusionInspector-validate/finspector.FusionInspector.fusions.abridged.tsv.annotated ${sample_id}/output/${sample_id}_finspector.FusionInspector.fusions.abridged.tsv.annotated | |||||
cp ${sample_id}/output/FusionInspector-validate/finspector.FusionInspector.fusions.abridged.tsv.annotated.coding_effect ${sample_id}/output/${sample_id}_finspector.FusionInspector.fusions.abridged.tsv.annotated.coding_effect | |||||
cp ${sample_id}/output/FusionInspector-validate/finspector.fusion_inspector_web.html ${sample_id}/output/${sample_id}_finspector.fusion_inspector_web.html | |||||
tar -zcvf ${sample_id}/output/${sample_id}_FusionInspector-validate.tgz ${sample_id}/output/FusionInspector-validate | |||||
mv ${sample_id}/output/Aligned.out.bam ${sample_id}/output/${sample_id}_Aligned.out.bam | |||||
>>> | |||||
runtime { | |||||
docker : docker | |||||
cluster: cluster | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
Array[File] starfusion_result=glob("${sample_id}/output/${sample_id}_star-fusion*") | |||||
Array[File] finspector_result=glob("${sample_id}/output/${sample_id}_finspector*") | |||||
Array[File] starfusion_out=glob("${sample_id}/output/*.out") | |||||
Array[File] starfusion_tgz=glob("${sample_id}/output/*.tgz") | |||||
} | |||||
} | |||||
task trim_galore { | |||||
File read1 | |||||
File read2 | |||||
String sample_id | |||||
String docker | |||||
String cluster | |||||
String disk_size | |||||
command <<< | |||||
mkdir input1 | |||||
cp ${read1} ./input1/${sample_id}_R1.fastq.gz | |||||
cp ${read2} ./input1/${sample_id}_R2.fastq.gz | |||||
mkdir clean_fastq | |||||
trim_galore --paired --cores 16 --fastqc -gzip --stringency 3 --phred33 --length 50 --output_dir ./clean_fastq/ ./input1/${sample_id}_R1.fastq.gz ./input1/${sample_id}_R2.fastq.gz | |||||
mv ./clean_fastq/${sample_id}_R1_val_1.fq.gz ./${sample_id}_R1.fastq.gz | |||||
mv ./clean_fastq/${sample_id}_R2_val_2.fq.gz ./${sample_id}_R2.fastq.gz | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File txt_R1 = "./clean_fastq/${sample_id}_R1.fastq.gz_trimming_report.txt" | |||||
File txt_R2 = "./clean_fastq/${sample_id}_R2.fastq.gz_trimming_report.txt" | |||||
File zip_R1 = "./clean_fastq/${sample_id}_R1_val_1_fastqc.zip" | |||||
File zip_R2 = "./clean_fastq/${sample_id}_R2_val_2_fastqc.zip" | |||||
File Trim_R1 = "${sample_id}_R1.fastq.gz" | |||||
File Trim_R2 = "${sample_id}_R2.fastq.gz" | |||||
} | |||||
} |
import "./tasks/starfusion.wdl" as starfusion | |||||
import "./tasks/trim_galore.wdl" as trim_galore | |||||
import "./tasks/arriba.wdl" as arriba | |||||
workflow {{ project_name }} { | |||||
String sample_id | |||||
File fastq1 | |||||
File fastq2 | |||||
File starfusion_database_dir | |||||
String starfusion_docker | |||||
String starfusion_cluster | |||||
String disk_size | |||||
String trim_galore_docker | |||||
String trim_galore_cluster | |||||
File STAR_INDEX_DIR | |||||
File ASSEMBLY_FA | |||||
File ANNOTATION_GTF | |||||
String arriba_docker | |||||
String arriba_cluster | |||||
call trim_galore.trim_galore as trim_galore { | |||||
input: | |||||
read1=fastq1, | |||||
read2=fastq2, | |||||
sample_id=sample_id, | |||||
docker=trim_galore_docker, | |||||
cluster=trim_galore_cluster, | |||||
disk_size=disk_size | |||||
} | |||||
call starfusion.starfusion as starfusion { | |||||
input: | |||||
sample_id=sample_id, | |||||
fastq1=trim_galore.Trim_R1, | |||||
fastq2=trim_galore.Trim_R2, | |||||
database_dir=starfusion_database_dir, | |||||
docker=starfusion_docker, | |||||
cluster=starfusion_cluster, | |||||
disk_size=disk_size | |||||
} | |||||
call arriba.arriba as arriba { | |||||
input: | |||||
sample_id=sample_id, | |||||
fastq1=trim_galore.Trim_R1, | |||||
fastq2=trim_galore.Trim_R2, | |||||
STAR_INDEX_DIR=STAR_INDEX_DIR, | |||||
ASSEMBLY_FA=ASSEMBLY_FA, | |||||
ANNOTATION_GTF=ANNOTATION_GTF, | |||||
disk_size=disk_size, | |||||
docker=arriba_docker, | |||||
cluster=arriba_cluster | |||||
} | |||||
} |