ソースを参照

first commit

master
biochenglinliu 2年前
コミット
5345a803fb
7個のファイルの変更263行の追加0行の削除
  1. +1
    -0
      README.md
  2. +13
    -0
      defaults
  3. +16
    -0
      inputs
  4. +80
    -0
      tasks/arriba.wdl
  5. +56
    -0
      tasks/starfusion.wdl
  6. +35
    -0
      tasks/trim_galore.wdl
  7. +62
    -0
      workflow.wdl

+ 1
- 0
README.md ファイルの表示

@@ -0,0 +1 @@
word.docx

+ 13
- 0
defaults ファイルの表示

@@ -0,0 +1,13 @@
{
"starfusion_database_dir":"oss://database-breastsurg/starfusion/ctat_genome_lib_build_dir/",
"starfusion_docker":"registry.cn-shanghai.aliyuncs.com/breastsurg_docker/starfusion:1.10.0",
"starfusion_cluster":"OnDemand bcs.es.r.4xlarge img-ubuntu-vpc",
"disk_size":"300",
"trim_galore_docker":"registry.cn-shanghai.aliyuncs.com/breastsurg_docker/trim_galore:0.6.7",
"trim_galore_cluster":"OnDemand bcs.es.r.4xlarge img-ubuntu-vpc",
"STAR_INDEX_DIR":"oss://database-breastsurg/arriba_hg38_v37/STAR_index_hg38_GENCODE37/",
"ASSEMBLY_FA":"oss://database-breastsurg/arriba/hg38.fa",
"ANNOTATION_GTF":"oss://database-breastsurg/arriba/GENCODE37.gtf",
"arriba_docker":"registry.cn-shanghai.aliyuncs.com/breastsurg_docker/arriba:2.1.0",
"arriba_cluster":"OnDemand bcs.es.r.4xlarge img-ubuntu-vpc"
}

+ 16
- 0
inputs ファイルの表示

@@ -0,0 +1,16 @@
{
"{{ project_name }}.sample_id": "{{ sample_id }}",
"{{ project_name }}.fastq1": "{{ fastq1 }}",
"{{ project_name }}.fastq2": "{{ fastq2 }}",
"{{ project_name }}.starfusion_database_dir": "{{ starfusion_database_dir }}",
"{{ project_name }}.starfusion_docker": "{{ starfusion_docker }}",
"{{ project_name }}.starfusion_cluster": "{{ starfusion_cluster }}",
"{{ project_name }}.disk_size": "{{ disk_size }}",
"{{ project_name }}.trim_galore_docker": "{{ trim_galore_docker }}",
"{{ project_name }}.trim_galore_cluster": "{{ trim_galore_cluster }}",
"{{ project_name }}.STAR_INDEX_DIR": "{{ STAR_INDEX_DIR }}",
"{{ project_name }}.ASSEMBLY_FA": "{{ ASSEMBLY_FA }}",
"{{ project_name }}.ANNOTATION_GTF": "{{ ANNOTATION_GTF }}",
"{{ project_name }}.arriba_docker": "{{ arriba_docker }}",
"{{ project_name }}.arriba_cluster": "{{ arriba_cluster }}"
}

+ 80
- 0
tasks/arriba.wdl ファイルの表示

@@ -0,0 +1,80 @@
task arriba{
String sample_id
File fastq1
File fastq2
File STAR_INDEX_DIR
File ASSEMBLY_FA
File ANNOTATION_GTF

String disk_size
String docker
String cluster

command <<<
set -o pipefail
set -e

mkdir ./output/
STAR \
--runThreadN 16 \
--genomeDir ${STAR_INDEX_DIR} \
--genomeLoad NoSharedMemory \
--readFilesIn ${fastq1} ${fastq2} \
--readFilesCommand zcat \
--outSAMtype BAM SortedByCoordinate \
--outSAMunmapped Within \
--outBAMcompression 0 \
--outFilterMultimapNmax 50 \
--peOverlapNbasesMin 10 \
--alignSplicedMateMapLminOverLmate 0.5 \
--alignSJstitchMismatchNmax 5 -1 5 5 \
--chimSegmentMin 10 \
--chimOutType WithinBAM HardClip \
--chimJunctionOverhangMin 10 \
--chimScoreDropMax 30 \
--chimScoreJunctionNonGTAG 0 \
--chimScoreSeparation 1 \
--chimSegmentReadGapMax 3 \
--chimMultimapNmax 50 \
--outFileNamePrefix ./output/${sample_id}.
/arriba_v2.1.0/arriba \
-x ./output/${sample_id}.Aligned.sortedByCoord.out.bam \
-o ./output/${sample_id}_fusions.tsv -O ./output/${sample_id}_fusions.discarded.tsv \
-a ${ASSEMBLY_FA} \
-g ${ANNOTATION_GTF} \
-b /arriba_v2.1.0/database/blacklist_hg38_GRCh38_v2.1.0.tsv.gz \
-k /arriba_v2.1.0/database/known_fusions_hg38_GRCh38_v2.1.0.tsv.gz \
-t /arriba_v2.1.0/database/known_fusions_hg38_GRCh38_v2.1.0.tsv.gz \
-p /arriba_v2.1.0/database/protein_domains_hg38_GRCh38_v2.1.0.gff3

samtools index -@ 16 ./output/${sample_id}.Aligned.sortedByCoord.out.bam ./output/${sample_id}.Aligned.sortedByCoord.out.bam.bai
Rscript /arriba_v2.1.0/draw_fusions.R \
--fusions=./output/${sample_id}_fusions.tsv \
--alignments=./output/${sample_id}.Aligned.sortedByCoord.out.bam \
--output=./output/${sample_id}_fusion.pdf \
--annotation=${ANNOTATION_GTF} \
--cytobands=/arriba_v2.1.0/database/cytobands_hg38_GRCh38_v2.1.0.tsv \
--proteinDomains=/arriba_v2.1.0/database/protein_domains_hg38_GRCh38_v2.1.0.gff3


>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
Array[File] arriba_result=glob("./output/*.tsv")
Array[File] arriba_pdf=glob("./output/*.pdf")
}

}



+ 56
- 0
tasks/starfusion.wdl ファイルの表示

@@ -0,0 +1,56 @@
task starfusion{
String sample_id
File fastq1
File fastq2
File database_dir
String docker
String cluster
String disk_size

command <<<
set -o pipefail
set -e

mkdir -p ${sample_id}/output

STAR-Fusion --genome_lib_dir ${database_dir} \
--left_fq ${fastq1} \
--right_fq ${fastq2} \
--CPU 16 \
--output_dir ${sample_id}/output \
--FusionInspector validate \
--examine_coding_effect

mv ${sample_id}/output/star-fusion.fusion_predictions.tsv ${sample_id}/output/${sample_id}_star-fusion.fusion_predictions.tsv
mv ${sample_id}/output/star-fusion.fusion_predictions.abridged.tsv ${sample_id}/output/${sample_id}_star-fusion.fusion_predictions.abridged.tsv
mv ${sample_id}/output/star-fusion.fusion_predictions.abridged.coding_effect.tsv ${sample_id}/output/${sample_id}_star-fusion.fusion_predictions.abridged.coding_effect.tsv
cp ${sample_id}/output/FusionInspector-validate/finspector.FusionInspector.fusions.tsv ${sample_id}/output/${sample_id}_finspector.FusionInspector.fusions.tsv
cp ${sample_id}/output/FusionInspector-validate/finspector.FusionInspector.fusions.abridged.tsv ${sample_id}/output/${sample_id}_finspector.FusionInspector.fusions.abridged.tsv
cp ${sample_id}/output/FusionInspector-validate/finspector.FusionInspector.fusions.abridged.tsv.annotated ${sample_id}/output/${sample_id}_finspector.FusionInspector.fusions.abridged.tsv.annotated
cp ${sample_id}/output/FusionInspector-validate/finspector.FusionInspector.fusions.abridged.tsv.annotated.coding_effect ${sample_id}/output/${sample_id}_finspector.FusionInspector.fusions.abridged.tsv.annotated.coding_effect
cp ${sample_id}/output/FusionInspector-validate/finspector.fusion_inspector_web.html ${sample_id}/output/${sample_id}_finspector.fusion_inspector_web.html
tar -zcvf ${sample_id}/output/${sample_id}_FusionInspector-validate.tgz ${sample_id}/output/FusionInspector-validate
mv ${sample_id}/output/Aligned.out.bam ${sample_id}/output/${sample_id}_Aligned.out.bam

>>>
runtime {
docker : docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
Array[File] starfusion_result=glob("${sample_id}/output/${sample_id}_star-fusion*")
Array[File] finspector_result=glob("${sample_id}/output/${sample_id}_finspector*")
Array[File] starfusion_out=glob("${sample_id}/output/*.out")
Array[File] starfusion_tgz=glob("${sample_id}/output/*.tgz")
}

}



+ 35
- 0
tasks/trim_galore.wdl ファイルの表示

@@ -0,0 +1,35 @@
task trim_galore {
File read1
File read2
String sample_id
String docker
String cluster
String disk_size
command <<<
mkdir input1
cp ${read1} ./input1/${sample_id}_R1.fastq.gz
cp ${read2} ./input1/${sample_id}_R2.fastq.gz
mkdir clean_fastq
trim_galore --paired --cores 16 --fastqc -gzip --stringency 3 --phred33 --length 50 --output_dir ./clean_fastq/ ./input1/${sample_id}_R1.fastq.gz ./input1/${sample_id}_R2.fastq.gz
mv ./clean_fastq/${sample_id}_R1_val_1.fq.gz ./${sample_id}_R1.fastq.gz
mv ./clean_fastq/${sample_id}_R2_val_2.fq.gz ./${sample_id}_R2.fastq.gz

>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File txt_R1 = "./clean_fastq/${sample_id}_R1.fastq.gz_trimming_report.txt"
File txt_R2 = "./clean_fastq/${sample_id}_R2.fastq.gz_trimming_report.txt"
File zip_R1 = "./clean_fastq/${sample_id}_R1_val_1_fastqc.zip"
File zip_R2 = "./clean_fastq/${sample_id}_R2_val_2_fastqc.zip"
File Trim_R1 = "${sample_id}_R1.fastq.gz"
File Trim_R2 = "${sample_id}_R2.fastq.gz"
}
}

+ 62
- 0
workflow.wdl ファイルの表示

@@ -0,0 +1,62 @@
import "./tasks/starfusion.wdl" as starfusion
import "./tasks/trim_galore.wdl" as trim_galore
import "./tasks/arriba.wdl" as arriba

workflow {{ project_name }} {
String sample_id
File fastq1
File fastq2
File starfusion_database_dir
String starfusion_docker
String starfusion_cluster
String disk_size

String trim_galore_docker
String trim_galore_cluster
File STAR_INDEX_DIR
File ASSEMBLY_FA
File ANNOTATION_GTF
String arriba_docker
String arriba_cluster



call trim_galore.trim_galore as trim_galore {
input:
read1=fastq1,
read2=fastq2,
sample_id=sample_id,
docker=trim_galore_docker,
cluster=trim_galore_cluster,
disk_size=disk_size
}

call starfusion.starfusion as starfusion {
input:
sample_id=sample_id,
fastq1=trim_galore.Trim_R1,
fastq2=trim_galore.Trim_R2,
database_dir=starfusion_database_dir,
docker=starfusion_docker,
cluster=starfusion_cluster,
disk_size=disk_size

}

call arriba.arriba as arriba {
input:
sample_id=sample_id,
fastq1=trim_galore.Trim_R1,
fastq2=trim_galore.Trim_R2,
STAR_INDEX_DIR=STAR_INDEX_DIR,
ASSEMBLY_FA=ASSEMBLY_FA,
ANNOTATION_GTF=ANNOTATION_GTF,
disk_size=disk_size,
docker=arriba_docker,
cluster=arriba_cluster
}



}

読み込み中…
キャンセル
保存