Browse Source

second commit

master
biolcl 3 years ago
parent
commit
d1c275ca05
4 changed files with 116 additions and 0 deletions
  1. +8
    -0
      defaults
  2. +11
    -0
      inputs
  3. +67
    -0
      tasks/arriba.wdl
  4. +30
    -0
      workflow.wdl

+ 8
- 0
defaults View File

@@ -0,0 +1,8 @@
{
"STAR_INDEX_DIR":"oss://pgx-reference-data/reference/arriba/STAR_index_hg38_GENCODE37",
"ASSEMBLY_FA":"oss://pgx-reference-data/reference/arriba/hg38.fa",
"ANNOTATION_GTF":"oss://pgx-reference-data/reference/arriba/GENCODE37.gtf"
"arriba_docker":"registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/arriba:2.1.0",
"arriba_cluster":"OnDemand bcs.a2.3xlarge img-ubuntu-vpc",
"disk_size":"200"
}

+ 11
- 0
inputs View File

@@ -0,0 +1,11 @@
{
"{{ project_name }}.fastq1": "{{ fastq1 }}",
"{{ project_name }}.fastq2": "{{ fastq2 }}",
"{{ project_name }}.sample_id": "{{ sample_id }}",
"{{ project_name }}.arriba_docker": "{{ arriba_docker }}",
"{{ project_name }}.STAR_INDEX_DIR": "{{ STAR_INDEX_DIR }}",
"{{ project_name }}.ASSEMBLY_FA": "{{ ASSEMBLY_FA }}",
"{{ project_name }}.ANNOTATION_GTF": "{{ ANNOTATION_GTF }}",
"{{ project_name }}.arriba_cluster": "{{ arriba_cluster }}",
"{{ project_name }}.disk_size": "{{ disk_size }}"
}

+ 67
- 0
tasks/arriba.wdl View File

@@ -0,0 +1,67 @@
task arriba{
String sample_id
File fastq1
File fastq2
File STAR_INDEX_DIR
File ASSEMBLY_FA
File ANNOTATION_GTF

String docker
String cluster
String disk_size

command <<<
set -o pipefail
set -e

mkdir ./output/
STAR \
--runThreadN 4 \
--genomeDir ${STAR_INDEX_DIR} \
--genomeLoad NoSharedMemory \
--readFilesIn ${fastq1} ${fastq2} \
--readFilesCommand zcat \
--outStd BAM_Unsorted \
--outSAMtype BAM Unsorted \
--outSAMunmapped Within \
--outBAMcompression 0 \
--outFilterMultimapNmax 50 \
--peOverlapNbasesMin 10 \
--alignSplicedMateMapLminOverLmate 0.5 \
--alignSJstitchMismatchNmax 5 -1 5 5 \
--chimSegmentMin 10 \
--chimOutType WithinBAM HardClip \
--chimJunctionOverhangMin 10 \
--chimScoreDropMax 30 \
--chimScoreJunctionNonGTAG 0 \
--chimScoreSeparation 1 \
--chimSegmentReadGapMax 3 \
--chimMultimapNmax 50 |tee ./output/${sample_id}.Aligned.out.bam |/arriba_v2.1.0/arriba \
-x /dev/stdin \
-o ./output/${sample_id}_fusions.tsv -O ./output/${sample_id}_fusions.discarded.tsv \
-a ${ASSEMBLY_FA} \
-g ${ANNOTATION_GTF} \
-b /arriba_v2.1.0/database/blacklist_hg38_GRCh38_v2.1.0.tsv.gz \
-k /arriba_v2.1.0/database/known_fusions_hg38_GRCh38_v2.1.0.tsv.gz \
-t /arriba_v2.1.0/database/known_fusions_hg38_GRCh38_v2.1.0.tsv.gz \
-p /arriba_v2.1.0/database/protein_domains_hg38_GRCh38_v2.1.0.gff3


>>>
runtime {
docker : docker
cluster : cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
Array[File] arriba_result=glob("./output/*.tsv")
Array[File] arriba_bam=glob("./output/*.bam")
}

}



+ 30
- 0
workflow.wdl View File

@@ -0,0 +1,30 @@
import "./tasks/arriba.wdl" as arriba

workflow run_arriba {
String sample_id
File fastq1
File fastq2
File STAR_INDEX_DIR
File ASSEMBLY_FA
File ANNOTATION_GTF

String arriba_docker
String arriba_cluster
String disk_size

call arriba.arriba as arriba {
input:
sample_id=sample_id,
fastq1=fastq1,
fastq2=fastq2,
STAR_INDEX_DIR=STAR_INDEX_DIR,
ASSEMBLY_FA=ASSEMBLY_FA,
ANNOTATION_GTF= ANNOTATION_GTF,
docker=arriba_docker,
cluster=arriba_cluster,
disk_size=disk_size
}


}


Loading…
Cancel
Save