hace 4 años · d1c275ca05
--- a/defaults
+++ b/defaults
@@ -0,0 +1,8 @@
 {   
    "STAR_INDEX_DIR":"oss://pgx-reference-data/reference/arriba/STAR_index_hg38_GENCODE37",
    "ASSEMBLY_FA":"oss://pgx-reference-data/reference/arriba/hg38.fa",
    "ANNOTATION_GTF":"oss://pgx-reference-data/reference/arriba/GENCODE37.gtf"
    "arriba_docker":"registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/arriba:2.1.0",
    "arriba_cluster":"OnDemand bcs.a2.3xlarge img-ubuntu-vpc",
    "disk_size":"200"
 }
--- a/inputs
+++ b/inputs
@@ -0,0 +1,11 @@
 {
    "{{ project_name }}.fastq1": "{{ fastq1 }}",
    "{{ project_name }}.fastq2": "{{ fastq2 }}",
    "{{ project_name }}.sample_id": "{{ sample_id }}",
    "{{ project_name }}.arriba_docker": "{{ arriba_docker }}",
    "{{ project_name }}.STAR_INDEX_DIR": "{{ STAR_INDEX_DIR }}",
    "{{ project_name }}.ASSEMBLY_FA": "{{ ASSEMBLY_FA }}",
    "{{ project_name }}.ANNOTATION_GTF": "{{ ANNOTATION_GTF }}",
    "{{ project_name }}.arriba_cluster": "{{ arriba_cluster }}",
    "{{ project_name }}.disk_size": "{{ disk_size }}"
 }
--- a/tasks/arriba.wdl
+++ b/tasks/arriba.wdl
@@ -0,0 +1,67 @@
 task arriba{
    String sample_id
    File fastq1
    File fastq2
    File STAR_INDEX_DIR
    File ASSEMBLY_FA
    File ANNOTATION_GTF

    String docker
    String cluster
    String disk_size

    command <<<
 	
        set -o pipefail
        set -e

        mkdir ./output/
        STAR \
            --runThreadN 4 \
            --genomeDir ${STAR_INDEX_DIR} \
            --genomeLoad NoSharedMemory \
            --readFilesIn ${fastq1} ${fastq2} \
            --readFilesCommand zcat \
            --outStd BAM_Unsorted \
            --outSAMtype BAM Unsorted \
            --outSAMunmapped Within \
            --outBAMcompression 0 \
            --outFilterMultimapNmax 50 \
            --peOverlapNbasesMin 10 \
            --alignSplicedMateMapLminOverLmate 0.5 \
            --alignSJstitchMismatchNmax 5 -1 5 5 \
            --chimSegmentMin 10 \
            --chimOutType WithinBAM HardClip \
            --chimJunctionOverhangMin 10 \
            --chimScoreDropMax 30 \
            --chimScoreJunctionNonGTAG 0 \
            --chimScoreSeparation 1 \
            --chimSegmentReadGapMax 3 \
            --chimMultimapNmax 50 |tee ./output/${sample_id}.Aligned.out.bam |/arriba_v2.1.0/arriba \
            -x /dev/stdin \
            -o ./output/${sample_id}_fusions.tsv -O ./output/${sample_id}_fusions.discarded.tsv \
            -a ${ASSEMBLY_FA} \
            -g ${ANNOTATION_GTF} \
            -b /arriba_v2.1.0/database/blacklist_hg38_GRCh38_v2.1.0.tsv.gz \
            -k /arriba_v2.1.0/database/known_fusions_hg38_GRCh38_v2.1.0.tsv.gz \
            -t /arriba_v2.1.0/database/known_fusions_hg38_GRCh38_v2.1.0.tsv.gz \
            -p /arriba_v2.1.0/database/protein_domains_hg38_GRCh38_v2.1.0.gff3


    >>>
 	
    runtime {
        docker : docker
        cluster : cluster
        systemDisk: "cloud_ssd 40"
        dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
    }

    output {
        Array[File] arriba_result=glob("./output/*.tsv")
        Array[File] arriba_bam=glob("./output/*.bam")
    }

 }


--- a/workflow.wdl
+++ b/workflow.wdl
@@ -0,0 +1,30 @@
 import "./tasks/arriba.wdl" as arriba

 workflow run_arriba {
    String sample_id
    File fastq1
    File fastq2
    File STAR_INDEX_DIR
    File ASSEMBLY_FA
    File ANNOTATION_GTF

    String arriba_docker
    String arriba_cluster
    String disk_size

        call arriba.arriba as arriba {
                input:	
                sample_id=sample_id,
                fastq1=fastq1,
                fastq2=fastq2,
                STAR_INDEX_DIR=STAR_INDEX_DIR,
                ASSEMBLY_FA=ASSEMBLY_FA,
                ANNOTATION_GTF= ANNOTATION_GTF,
                docker=arriba_docker,
                cluster=arriba_cluster,
                disk_size=disk_size
        }


 }