@@ -0,0 +1 @@ | |||
word.docx |
@@ -0,0 +1,13 @@ | |||
{ | |||
"starfusion_database_dir":"oss://database-breastsurg/starfusion/ctat_genome_lib_build_dir/", | |||
"starfusion_docker":"registry.cn-shanghai.aliyuncs.com/breastsurg_docker/starfusion:1.10.0", | |||
"starfusion_cluster":"OnDemand bcs.es.r.4xlarge img-ubuntu-vpc", | |||
"disk_size":"300", | |||
"trim_galore_docker":"registry.cn-shanghai.aliyuncs.com/breastsurg_docker/trim_galore:0.6.7", | |||
"trim_galore_cluster":"OnDemand bcs.es.r.4xlarge img-ubuntu-vpc", | |||
"STAR_INDEX_DIR":"oss://database-breastsurg/arriba_hg38_v37/STAR_index_hg38_GENCODE37/", | |||
"ASSEMBLY_FA":"oss://database-breastsurg/arriba/hg38.fa", | |||
"ANNOTATION_GTF":"oss://database-breastsurg/arriba/GENCODE37.gtf", | |||
"arriba_docker":"registry.cn-shanghai.aliyuncs.com/breastsurg_docker/arriba:2.1.0", | |||
"arriba_cluster":"OnDemand bcs.es.r.4xlarge img-ubuntu-vpc" | |||
} |
@@ -0,0 +1,16 @@ | |||
{ | |||
"{{ project_name }}.sample_id": "{{ sample_id }}", | |||
"{{ project_name }}.fastq1": "{{ fastq1 }}", | |||
"{{ project_name }}.fastq2": "{{ fastq2 }}", | |||
"{{ project_name }}.starfusion_database_dir": "{{ starfusion_database_dir }}", | |||
"{{ project_name }}.starfusion_docker": "{{ starfusion_docker }}", | |||
"{{ project_name }}.starfusion_cluster": "{{ starfusion_cluster }}", | |||
"{{ project_name }}.disk_size": "{{ disk_size }}", | |||
"{{ project_name }}.trim_galore_docker": "{{ trim_galore_docker }}", | |||
"{{ project_name }}.trim_galore_cluster": "{{ trim_galore_cluster }}", | |||
"{{ project_name }}.STAR_INDEX_DIR": "{{ STAR_INDEX_DIR }}", | |||
"{{ project_name }}.ASSEMBLY_FA": "{{ ASSEMBLY_FA }}", | |||
"{{ project_name }}.ANNOTATION_GTF": "{{ ANNOTATION_GTF }}", | |||
"{{ project_name }}.arriba_docker": "{{ arriba_docker }}", | |||
"{{ project_name }}.arriba_cluster": "{{ arriba_cluster }}" | |||
} |
@@ -0,0 +1,80 @@ | |||
task arriba{ | |||
String sample_id | |||
File fastq1 | |||
File fastq2 | |||
File STAR_INDEX_DIR | |||
File ASSEMBLY_FA | |||
File ANNOTATION_GTF | |||
String disk_size | |||
String docker | |||
String cluster | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
mkdir ./output/ | |||
STAR \ | |||
--runThreadN 16 \ | |||
--genomeDir ${STAR_INDEX_DIR} \ | |||
--genomeLoad NoSharedMemory \ | |||
--readFilesIn ${fastq1} ${fastq2} \ | |||
--readFilesCommand zcat \ | |||
--outSAMtype BAM SortedByCoordinate \ | |||
--outSAMunmapped Within \ | |||
--outBAMcompression 0 \ | |||
--outFilterMultimapNmax 50 \ | |||
--peOverlapNbasesMin 10 \ | |||
--alignSplicedMateMapLminOverLmate 0.5 \ | |||
--alignSJstitchMismatchNmax 5 -1 5 5 \ | |||
--chimSegmentMin 10 \ | |||
--chimOutType WithinBAM HardClip \ | |||
--chimJunctionOverhangMin 10 \ | |||
--chimScoreDropMax 30 \ | |||
--chimScoreJunctionNonGTAG 0 \ | |||
--chimScoreSeparation 1 \ | |||
--chimSegmentReadGapMax 3 \ | |||
--chimMultimapNmax 50 \ | |||
--outFileNamePrefix ./output/${sample_id}. | |||
/arriba_v2.1.0/arriba \ | |||
-x ./output/${sample_id}.Aligned.sortedByCoord.out.bam \ | |||
-o ./output/${sample_id}_fusions.tsv -O ./output/${sample_id}_fusions.discarded.tsv \ | |||
-a ${ASSEMBLY_FA} \ | |||
-g ${ANNOTATION_GTF} \ | |||
-b /arriba_v2.1.0/database/blacklist_hg38_GRCh38_v2.1.0.tsv.gz \ | |||
-k /arriba_v2.1.0/database/known_fusions_hg38_GRCh38_v2.1.0.tsv.gz \ | |||
-t /arriba_v2.1.0/database/known_fusions_hg38_GRCh38_v2.1.0.tsv.gz \ | |||
-p /arriba_v2.1.0/database/protein_domains_hg38_GRCh38_v2.1.0.gff3 | |||
samtools index -@ 16 ./output/${sample_id}.Aligned.sortedByCoord.out.bam ./output/${sample_id}.Aligned.sortedByCoord.out.bam.bai | |||
Rscript /arriba_v2.1.0/draw_fusions.R \ | |||
--fusions=./output/${sample_id}_fusions.tsv \ | |||
--alignments=./output/${sample_id}.Aligned.sortedByCoord.out.bam \ | |||
--output=./output/${sample_id}_fusion.pdf \ | |||
--annotation=${ANNOTATION_GTF} \ | |||
--cytobands=/arriba_v2.1.0/database/cytobands_hg38_GRCh38_v2.1.0.tsv \ | |||
--proteinDomains=/arriba_v2.1.0/database/protein_domains_hg38_GRCh38_v2.1.0.gff3 | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
Array[File] arriba_result=glob("./output/*.tsv") | |||
Array[File] arriba_pdf=glob("./output/*.pdf") | |||
} | |||
} | |||
@@ -0,0 +1,56 @@ | |||
task starfusion{ | |||
String sample_id | |||
File fastq1 | |||
File fastq2 | |||
File database_dir | |||
String docker | |||
String cluster | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
mkdir -p ${sample_id}/output | |||
STAR-Fusion --genome_lib_dir ${database_dir} \ | |||
--left_fq ${fastq1} \ | |||
--right_fq ${fastq2} \ | |||
--CPU 16 \ | |||
--output_dir ${sample_id}/output \ | |||
--FusionInspector validate \ | |||
--examine_coding_effect | |||
mv ${sample_id}/output/star-fusion.fusion_predictions.tsv ${sample_id}/output/${sample_id}_star-fusion.fusion_predictions.tsv | |||
mv ${sample_id}/output/star-fusion.fusion_predictions.abridged.tsv ${sample_id}/output/${sample_id}_star-fusion.fusion_predictions.abridged.tsv | |||
mv ${sample_id}/output/star-fusion.fusion_predictions.abridged.coding_effect.tsv ${sample_id}/output/${sample_id}_star-fusion.fusion_predictions.abridged.coding_effect.tsv | |||
cp ${sample_id}/output/FusionInspector-validate/finspector.FusionInspector.fusions.tsv ${sample_id}/output/${sample_id}_finspector.FusionInspector.fusions.tsv | |||
cp ${sample_id}/output/FusionInspector-validate/finspector.FusionInspector.fusions.abridged.tsv ${sample_id}/output/${sample_id}_finspector.FusionInspector.fusions.abridged.tsv | |||
cp ${sample_id}/output/FusionInspector-validate/finspector.FusionInspector.fusions.abridged.tsv.annotated ${sample_id}/output/${sample_id}_finspector.FusionInspector.fusions.abridged.tsv.annotated | |||
cp ${sample_id}/output/FusionInspector-validate/finspector.FusionInspector.fusions.abridged.tsv.annotated.coding_effect ${sample_id}/output/${sample_id}_finspector.FusionInspector.fusions.abridged.tsv.annotated.coding_effect | |||
cp ${sample_id}/output/FusionInspector-validate/finspector.fusion_inspector_web.html ${sample_id}/output/${sample_id}_finspector.fusion_inspector_web.html | |||
tar -zcvf ${sample_id}/output/${sample_id}_FusionInspector-validate.tgz ${sample_id}/output/FusionInspector-validate | |||
mv ${sample_id}/output/Aligned.out.bam ${sample_id}/output/${sample_id}_Aligned.out.bam | |||
>>> | |||
runtime { | |||
docker : docker | |||
cluster: cluster | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
Array[File] starfusion_result=glob("${sample_id}/output/${sample_id}_star-fusion*") | |||
Array[File] finspector_result=glob("${sample_id}/output/${sample_id}_finspector*") | |||
Array[File] starfusion_out=glob("${sample_id}/output/*.out") | |||
Array[File] starfusion_tgz=glob("${sample_id}/output/*.tgz") | |||
} | |||
} | |||
@@ -0,0 +1,35 @@ | |||
task trim_galore { | |||
File read1 | |||
File read2 | |||
String sample_id | |||
String docker | |||
String cluster | |||
String disk_size | |||
command <<< | |||
mkdir input1 | |||
cp ${read1} ./input1/${sample_id}_R1.fastq.gz | |||
cp ${read2} ./input1/${sample_id}_R2.fastq.gz | |||
mkdir clean_fastq | |||
trim_galore --paired --cores 16 --fastqc -gzip --stringency 3 --phred33 --length 50 --output_dir ./clean_fastq/ ./input1/${sample_id}_R1.fastq.gz ./input1/${sample_id}_R2.fastq.gz | |||
mv ./clean_fastq/${sample_id}_R1_val_1.fq.gz ./${sample_id}_R1.fastq.gz | |||
mv ./clean_fastq/${sample_id}_R2_val_2.fq.gz ./${sample_id}_R2.fastq.gz | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File txt_R1 = "./clean_fastq/${sample_id}_R1.fastq.gz_trimming_report.txt" | |||
File txt_R2 = "./clean_fastq/${sample_id}_R2.fastq.gz_trimming_report.txt" | |||
File zip_R1 = "./clean_fastq/${sample_id}_R1_val_1_fastqc.zip" | |||
File zip_R2 = "./clean_fastq/${sample_id}_R2_val_2_fastqc.zip" | |||
File Trim_R1 = "${sample_id}_R1.fastq.gz" | |||
File Trim_R2 = "${sample_id}_R2.fastq.gz" | |||
} | |||
} |
@@ -0,0 +1,62 @@ | |||
import "./tasks/starfusion.wdl" as starfusion | |||
import "./tasks/trim_galore.wdl" as trim_galore | |||
import "./tasks/arriba.wdl" as arriba | |||
workflow {{ project_name }} { | |||
String sample_id | |||
File fastq1 | |||
File fastq2 | |||
File starfusion_database_dir | |||
String starfusion_docker | |||
String starfusion_cluster | |||
String disk_size | |||
String trim_galore_docker | |||
String trim_galore_cluster | |||
File STAR_INDEX_DIR | |||
File ASSEMBLY_FA | |||
File ANNOTATION_GTF | |||
String arriba_docker | |||
String arriba_cluster | |||
call trim_galore.trim_galore as trim_galore { | |||
input: | |||
read1=fastq1, | |||
read2=fastq2, | |||
sample_id=sample_id, | |||
docker=trim_galore_docker, | |||
cluster=trim_galore_cluster, | |||
disk_size=disk_size | |||
} | |||
call starfusion.starfusion as starfusion { | |||
input: | |||
sample_id=sample_id, | |||
fastq1=trim_galore.Trim_R1, | |||
fastq2=trim_galore.Trim_R2, | |||
database_dir=starfusion_database_dir, | |||
docker=starfusion_docker, | |||
cluster=starfusion_cluster, | |||
disk_size=disk_size | |||
} | |||
call arriba.arriba as arriba { | |||
input: | |||
sample_id=sample_id, | |||
fastq1=trim_galore.Trim_R1, | |||
fastq2=trim_galore.Trim_R2, | |||
STAR_INDEX_DIR=STAR_INDEX_DIR, | |||
ASSEMBLY_FA=ASSEMBLY_FA, | |||
ANNOTATION_GTF=ANNOTATION_GTF, | |||
disk_size=disk_size, | |||
docker=arriba_docker, | |||
cluster=arriba_cluster | |||
} | |||
} |