4 年前 · 8f0b2c2fde
--- a/.DS_Store
+++ b/.DS_Store
--- a/conf/fastq_screen.conf
+++ b/conf/fastq_screen.conf
 # This is an example configuration file for FastQ Screen
 ############################
 ## Bowtie, Bowtie 2 or BWA #
 ############################
 ## If the Bowtie, Bowtie 2 or BWA binary is not in your PATH, you can set 
 ## this value to tell the program where to find your chosen aligner.  Uncomment 
 ## the relevant line below and set the appropriate location.  Please note, 
 ## this path should INCLUDE the executable filename.
 #BOWTIE	/usr/local/bin/bowtie/bowtie
 #BOWTIE2 /usr/local/bowtie2/bowtie2
 #BWA /usr/local/bwa/bwa
 ############################################
 ## Bismark (for bisulfite sequencing only) #
 ############################################
 ## If the Bismark binary is not in your PATH then you can set this value to 
 ## tell the program where to find it.  Uncomment the line below and set the 
 ## appropriate location. Please note, this path should INCLUDE the executable 
 ## filename.
 #BISMARK	/usr/local/bin/bismark/bismark
 ############
 ## Threads #
 ############
 ## Genome aligners can be made to run across multiple CPU cores to speed up 
 ## searches.  Set this value to the number of cores you want for mapping reads.
 THREADS		32
 ##############
 ## DATABASES #
 ##############
 ## This section enables you to configure multiple genomes databases (aligner index 
 ## files) to search against in your screen.  For each genome you need to provide a 
 ## database name (which can't contain spaces) and the location of the aligner index 
 ## files.
 ##
 ## The path to the index files SHOULD INCLUDE THE BASENAME of the index, e.g:
 ## /data/public/Genomes/Human_Bowtie/GRCh37/Homo_sapiens.GRCh37
 ## Thus, the index files (Homo_sapiens.GRCh37.1.bt2, Homo_sapiens.GRCh37.2.bt2, etc.) 
 ## are found in a folder named 'GRCh37'.
 ##
 ## If, for example, the Bowtie, Bowtie2 and BWA indices of a given genome reside in 
 ## the SAME FOLDER, a SINLGE path may be provided to ALL the of indices.  The index 
 ## used will be the one compatible with the chosen aligner (as specified using the 
 ## --aligner flag).  
 ##
 ## The entries shown below are only suggested examples, you can add as many DATABASE 
 ## sections as required, and you can comment out or remove as many of the existing 
 ## entries as desired.  We suggest including genomes and sequences that may be sources 
 ## of contamination either because they where run on your sequencer previously, or may 
 ## have contaminated your sample during the library preparation step.
 ##
 ## Human - sequences available from
 ## ftp://ftp.ensembl.org/pub/current/fasta/homo_sapiens/dna/
 #DATABASE	Human	/data/public/Genomes/Human_Bowtie/GRCh37/Homo_sapiens.GRCh37
 ##
 ## Mouse - sequence available from
 ## ftp://ftp.ensembl.org/pub/current/fasta/mus_musculus/dna/
 #DATABASE	Mouse	/data/public/Genomes/Mouse/NCBIM37/Mus_musculus.NCBIM37
 ##
 ## Ecoli- sequence available from EMBL accession U00096.2
 #DATABASE	Ecoli	/data/public/Genomes/Ecoli/Ecoli
 ##
 ## PhiX - sequence available from Refseq accession NC_001422.1
 #DATABASE	PhiX	/data/public/Genomes/PhiX/phi_plus_SNPs
 ##
 ## Adapters - sequence derived from the FastQC contaminats file found at: www.bioinformatics.babraham.ac.uk/projects/fastqc
 #DATABASE	Adapters	/data/public/Genomes/Contaminants/Contaminants
 ##
 ## Vector - Sequence taken from the UniVec database
 ## http://www.ncbi.nlm.nih.gov/VecScreen/UniVec.html
 #DATABASE	Vectors		/data/public/Genomes/Vectors/Vectors
 DATABASE	Human	/cromwell_root/tmp/fastq_screen_reference/genome
 DATABASE	Mouse	/cromwell_root/tmp/fastq_screen_reference/mouse
 DATABASE	ERCC	/cromwell_root/tmp/fastq_screen_reference/ERCC
 DATABASE	EColi	/cromwell_root/tmp/fastq_screen_reference/ecoli
 DATABASE	Adapter	/cromwell_root/tmp/fastq_screen_reference/adapters
 DATABASE	Vector	/cromwell_root/tmp/fastq_screen_reference/vector
 DATABASE	rRNA	/cromwell_root/tmp/fastq_screen_reference/rRNARef
 DATABASE	Virus	/cromwell_root/tmp/fastq_screen_reference/viral
 DATABASE	Yeast	/cromwell_root/tmp/fastq_screen_reference/GCF_000146045.2_R64_genomic_modify
 DATABASE	Mitoch	/cromwell_root/tmp/fastq_screen_reference/Human_mitoch
 DATABASE	Phix	/cromwell_root/tmp/fastq_screen_reference/phix
--- a/defaults
+++ b/defaults
 {   
    "fastp_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastp:0.19.6",
    "fastp_cluster": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
    "trim_front1": "0",
    "trim_tail1": "0",
    "max_len1": "0",
    "trim_front2": "0",
    "trim_tail2": "0",
    "max_len2": "0",
    "adapter_sequence": "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA",
    "adapter_sequence_r2": "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT",
    "disable_adapter_trimming": "0",
    "length_required": "50",
    "length_required1": "20",
    "UMI": "0",
    "umi_len": "0",
    "umi_loc": "umi_loc",
    "qualified_quality_phred": "20",
    "disable_quality_filtering": "1",
    "hisat2_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/hisat2:v2.1.0-2",
    "hisat2_cluster": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc",
    "idx_prefix": "genome_snp_tran",
    "idx": "oss://pgx-reference-data/reference/hisat2/grch38_snp_tran/",
    "fasta": "GRCh38.d1.vd1.fa",
    "pen_cansplice":"0",
    "pen_noncansplice":"3",
    "pen_intronlen":"G,-8,1",
    "min_intronlen":"30",
    "max_intronlen":"500000",
    "maxins":"500",
    "minins":"0",
    "samtools_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/samtools:v1.3.1",
    "samtools_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
    "insert_size":"8000",
    "gtf": "oss://pgx-reference-data/reference/annotation/Homo_sapiens.GRCh38.93.gtf",
    "stringtie_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/stringtie:v1.3.4",
    "stringtie_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
    "minimum_length_allowed_for_the_predicted_transcripts":"200",
    "minimum_isoform_abundance":"0.01",
    "Junctions_no_spliced_reads":"10",
    "maximum_fraction_of_muliplelocationmapped_reads":"0.95",
    "fastqc_cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
    "fastqc_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqc:0.11.8",
    "fastqc_disk_size": "150",
    "qualimapBAMqc_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/qualimap:2.0.0",
    "qualimapBAMqc_cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc",
    "qualimapBAMqc_disk_size": "500",
    "qualimapRNAseq_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/qualimap:2.0.0",
    "qualimapRNAseq_cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc",
    "qualimapRNAseq_disk_size": "500",
    "fastqscreen_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqscreen:0.12.0",
    "fastqscreen_cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
    "screen_ref_dir": "oss://pgx-reference-data/fastq_screen_reference/",
    "fastq_screen_conf": "oss://pgx-reference-data/fastq_screen_reference/fastq_screen.conf",
    "fastqscreen_disk_size": "200",
    "multiqc_cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
    "multiqc_docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/multiqc:v1.8",
    "multiqc_disk_size": "100",
    "ballgown_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/pgx-ballgown:0.0.1",
    "ballgown_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
    "count_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/count:v1.0",
    "count_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
    "count_length": "150",
 	"disk_size": "200"
    }
--- a/inputs
+++ b/inputs
 {
 	"{{ project_name }}.read": "{{ read }}",
 	"{{ project_name }}.sample_id": "{{ sample_id }}",
 	"{{ project_name }}.fastp_docker": "{{ fastp_docker }}",
 	"{{ project_name }}.fastp_cluster": "{{ fastp_cluster }}",
 	"{{ project_name }}.trim_front1": "{{ trim_front1 }}",
 	"{{ project_name }}.trim_tail1": "{{ trim_tail1 }}",
 	"{{ project_name }}.max_len1": "{{ max_len1 }}",
 	"{{ project_name }}.trim_front2": "{{ trim_front2 }}",
 	"{{ project_name }}.trim_tail2": "{{ trim_tail2 }}",
 	"{{ project_name }}.max_len2": "{{ max_len2 }}",
 	"{{ project_name }}.adapter_sequence": "{{ adapter_sequence }}",
 	"{{ project_name }}.adapter_sequence_r2": "{{ adapter_sequence_r2 }}",
 	"{{ project_name }}.disable_adapter_trimming": "{{ disable_adapter_trimming }}",
 	"{{ project_name }}.length_required1": "{{ length_required1 }}",
 	"{{ project_name }}.UMI": "{{ UMI }}",
 	"{{ project_name }}.umi_loc": "{{ umi_loc }}",
 	"{{ project_name }}.umi_len": "{{ umi_len }}",
 	"{{ project_name }}.length_required": "{{ length_required }}",
 	"{{ project_name }}.qualified_quality_phred": "{{ qualified_quality_phred }}",
 	"{{ project_name }}.disable_quality_filtering": "{{ disable_quality_filtering }}",
 	"{{ project_name }}.hisat2_docker": "{{ hisat2_docker }}",
 	"{{ project_name }}.hisat2_cluster": "{{ hisat2_cluster }}",
 	"{{ project_name }}.idx_prefix": "{{ idx_prefix }}",
 	"{{ project_name }}.idx": "{{ idx }}",
 	"{{ project_name }}.fasta": "{{ fasta }}",
 	"{{ project_name }}.pen_cansplice": "{{ pen_cansplice }}",
 	"{{ project_name }}.pen_noncansplice": "{{ pen_noncansplice }}",
 	"{{ project_name }}.pen_intronlen": "{{ pen_intronlen }}",
 	"{{ project_name }}.min_intronlen": "{{ min_intronlen }}",
 	"{{ project_name }}.max_intronlen": "{{ max_intronlen }}",
 	"{{ project_name }}.maxins": "{{ maxins }}",
 	"{{ project_name }}.minins": "{{ minins }}",
 	"{{ project_name }}.samtools_docker": "{{ samtools_docker }}",
 	"{{ project_name }}.samtools_cluster": "{{ samtools_cluster }}",
 	"{{ project_name }}.insert_size": "{{ insert_size }}",
 	"{{ project_name }}.gtf": "{{ gtf }}",
 	"{{ project_name }}.stringtie_docker": "{{ stringtie_docker }}",
 	"{{ project_name }}.stringtie_cluster": "{{ stringtie_cluster }}",
 	"{{ project_name }}.minimum_length_allowed_for_the_predicted_transcripts": "{{ minimum_length_allowed_for_the_predicted_transcripts }}",
 	"{{ project_name }}.minimum_isoform_abundance": "{{ minimum_isoform_abundance }}",
 	"{{ project_name }}.Junctions_no_spliced_reads": "{{ Junctions_no_spliced_reads }}",
 	"{{ project_name }}.maximum_fraction_of_muliplelocationmapped_reads": "{{ maximum_fraction_of_muliplelocationmapped_reads }}",
 	"{{ project_name }}.fastqc_cluster_config": "{{ fastqc_cluster_config }}",
 	"{{ project_name }}.fastqc_docker": "{{ fastqc_docker }}",
 	"{{ project_name }}.fastqc_disk_size": "{{ fastqc_disk_size }}",
 	"{{ project_name }}.qualimapBAMqc_docker": "{{ qualimapBAMqc_docker }}",
 	"{{ project_name }}.qualimapBAMqc_cluster_config": "{{ qualimapBAMqc_cluster_config }}",
 	"{{ project_name }}.qualimapBAMqc_disk_size": "{{ qualimapBAMqc_disk_size }}",
 	"{{ project_name }}.qualimapRNAseq_docker": "{{ qualimapRNAseq_docker }}",
 	"{{ project_name }}.qualimapRNAseq_cluster_config": "{{ qualimapRNAseq_cluster_config }}",
 	"{{ project_name }}.qualimapRNAseq_disk_size": "{{ qualimapRNAseq_disk_size }}",
 	"{{ project_name }}.fastqscreen_docker": "{{ fastqscreen_docker }}",
 	"{{ project_name }}.fastqscreen_cluster_config": "{{ fastqscreen_cluster_config }}",
 	"{{ project_name }}.screen_ref_dir": "{{ screen_ref_dir }}",
 	"{{ project_name }}.fastq_screen_conf": "{{ fastq_screen_conf }}",
 	"{{ project_name }}.fastqscreen_disk_size": "{{ fastqscreen_disk_size }}",
 	"{{ project_name }}.multiqc_cluster_config": "{{ multiqc_cluster_config }}",
 	"{{ project_name }}.multiqc_docker": "{{ multiqc_docker }}",
 	"{{ project_name }}.multiqc_disk_size": "{{ multiqc_disk_size }}",
 	"{{ project_name }}.ballgown_docker": "{{ ballgown_docker }}",
 	"{{ project_name }}.ballgown_cluster": "{{ ballgown_cluster }}",
 	"{{ project_name }}.disk_size": "{{ disk_size if disk_size != '' else 200}}",
 	"{{ project_name }}.count_docker": "{{ count_docker }}",
 	"{{ project_name }}.count_cluster": "{{ count_cluster }}",
 	"{{ project_name }}.count_length": "{{ count_length }}"
 }
--- a/manifest.json
+++ b/manifest.json
 {
  "name": "RNA-Seq QC for Quartet",
  "short_name": "quartet-rnaseq-qc",
  "description": "RNA Sequencing Quality Control Pipeline for Quartet.",
  "home": "http://choppy.3steps.cn/lizhihui/quartet-rnaseq-qc",
  "hidden": false,
  "icons": [
    {
      "src": "",
      "type": "image/png",
      "sizes": "192x192"
    },
    {
      "src": "",
      "type": "image/png",
      "sizes": "192x192"
    }
  ],
  "category": "Pipeline",
  "source": "PGx"
 }
--- a/schema.json
+++ b/schema.json
 {
  "formMode":"batch",
  "fields":[
    {
      "tmplType":"sample_id",
      "type":"number",
      "label":"Sample ID",
      "name":"sample_id",
      "question":"How many samples? For auto generating sample id.",
      "placeholder":"How many samples? For auto generating sample id.",
      "model":"sample_id",
      "disabled":false,
      "min":1,
      "max":100,
      "config":{
        "rules":[{"type":"number","required":true,"message":"Please input a number."}]
      }
    },
    {
      "tmplType":"file",
      "label":"Read1 Files",
      "model":"read1",
      "readOnly":false,
      "multiple":true,
      "filterType":"fastq|fastq.gz|fq|fq.gz",
      "placeholder":"Select files for read1.",
      "config":{
        "rules":[
          {
            "type":"array",
            "required":true,
            "message":"Please select files for read1."
          }
        ]
      }
    },
    {
      "tmplType":"file",
      "label":"Read2 Files",
      "model":"read2",
      "readOnly":false,
      "multiple":true,
      "filterType":"fastq|fastq.gz|fq|fq.gz",
      "placeholder":"Select files for read2.",
      "config":{
        "rules":[
          {
            "type":"array",
            "required":true,
            "message":"Please select files for read2."
          }
        ]
      }
    },
    {
      "tmplType":"file",
      "label":"FastqScreen Conf File",
      "model":"fastq_screen_conf",
      "readOnly":false,
      "multiple":false,
      "filterType":"conf",
      "placeholder":"Select file for fastq screen.",
      "config":{
        "initialValue": "oss://pgx-reference-data/fastq_screen_reference/fastq_screen.conf",
        "rules":[{"required":true,"message":"Please select file for fastq screen."}]
      }
    },
    {
      "tmplType":"file",
      "label":"References of Fastq Screen",
      "model":"screen_ref_dir",
      "readOnly":false,
      "filterType":"",
      "multiple":false,
      "placeholder":"Select directory for Fastq Screen References.",
      "config":{
        "initialValue": "oss://pgx-reference-data/fastq_screen_reference/",
        "rules":[
          {
            "required":true,
            "message":"Select directory for Fastq Screen References."
          }
        ]
      }
    },
    {
      "tmplType":"file",
      "label":"Index Directory",
      "model":"idx",
      "readOnly":false,
      "filterType":"",
      "multiple":false,
      "placeholder":"Select directory for idx.",
      "config":{
        "initialValue": "oss://pgx-reference-data/reference/hisat2/grch38_snp_tran/",
        "rules":[{"required":true,"message":"Please select directory for idx."}]
      }
    },
    {
      "tmplType":"file",
      "label":"GTF File",
      "model":"gtf",
      "readOnly":false,
      "multiple":false,
      "filterType":"gtf",
      "placeholder":"Select file for gtf.",
      "config":{
         "initialValue": "oss://pgx-reference-data/reference/annotation/Homo_sapiens.GRCh38.93.gtf",
         "rules":[{"required":true,"message":"Please select file for gtf."}]
      }
    },
    {
      "tmplType":"input",
      "type":"string",
      "label":"Adapter Sequence",
      "name":"adapter_sequence",
      "placeholder":"Please enter adapter sequence.",
      "model":"adapter_sequence",
      "disabled":false,
      "min":1,
      "max":100,
      "config":{
        "initialValue":"AGATCGGAAGAGCACACGTCTGAACTCCAGTCA",
        "rules":[
          {
            "type":"string",
            "required":false,
            "message":"Please input an adapter sequence."
          }
        ]
      }
    },
    {
      "tmplType":"input",
      "type":"string",
      "label":"Adapter Sequence R2",
      "name":"adapter_sequence_r2",
      "placeholder":"Please enter adapter sequence.",
      "model":"adapter_sequence_r2",
      "disabled":false,
      "min":1,
      "max":100,
      "config":{
        "initialValue":"AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT",
        "rules":[
          {
            "type":"string",
            "required":false,
            "message":"Please input an adapter sequence."
          }
        ]
      }
    },
    {
      "tmplType":"actions",
      "buttons":[
        {"type":"cancel","buttonType":"default","buttonLabel":"Previous Step"},
        {
          "type":"submit",
          "buttonType":"primary",
          "buttonLabel":"Next Step",
          "validate":true
        }
      ]
    }
  ]
 }
--- a/tasks/.DS_Store
+++ b/tasks/.DS_Store
--- a/tasks/ballgown.wdl
+++ b/tasks/ballgown.wdl
 task ballgown {
    File gene_abundance
    Array[File] ballgown
    String sample_id=basename(gene_abundance, ".gene.abundance.txt")
    String docker
    String cluster
    String disk_size
    command <<<
      mkdir -p /cromwell_root/tmp/${sample_id}
      cp -r ${sep=" " ballgown} /cromwell_root/tmp/${sample_id}
      ballgown /cromwell_root/tmp/${sample_id} ${sample_id}.txt
      sed -i 's/"//g' ${sample_id}.txt
      sed -i '1s/FPKM./GENE_ID\t/g' ${sample_id}.txt
    >>>
    runtime {
      docker: docker
      cluster: cluster
      systemDisk: "cloud_ssd 40"
      dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
    }
    output {
      File mat_expression = "${sample_id}.txt"
    }
 }
--- a/tasks/count.wdl
+++ b/tasks/count.wdl
 task count {
    File gene_abundance
    Array[File] ballgown
    String sample_id=basename(gene_abundance, ".gene.abundance.txt")
    String docker
    String cluster
    String disk_size
    Int count_length
    command <<<
      mkdir -p /cromwell_root/tmp/ballgown/${sample_id}
      cp -r ${sep=" " ballgown} /cromwell_root/tmp/ballgown/${sample_id}
      count -i /cromwell_root/tmp/ballgown -l ${count_length} -g ${sample_id}_gene_count_matrix.csv -t ${sample_id}_transcript_count_matrix.csv
      sed -i '1s/gene_id/GENE_ID/g' ${sample_id}_gene_count_matrix.csv
    >>>
    runtime {
      docker: docker
      cluster: cluster
      systemDisk: "cloud_ssd 40"
      dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
    }
    output {
      File mat_expression_genecount = "${sample_id}_gene_count_matrix.csv"
      File mat_expression_transcriptcount = "${sample_id}_transcript_count_matrix.csv"
    }
 }
--- a/tasks/fastp.wdl
+++ b/tasks/fastp.wdl
 task fastp {
    File read
    String sample_id=sub(basename(read1), "\\.(fastq|fq)\\.gz$", "")
    String adapter_sequence
    String adapter_sequence_r2
    String docker
    String cluster
    String disk_size
    String umi_loc	
    Int trim_front1
    Int trim_tail1
    Int max_len1
    Int trim_front2
    Int trim_tail2
    Int max_len2
    Int disable_adapter_trimming
    Int length_required
    Int umi_len
    Int UMI
    Int qualified_quality_phred
    Int length_required1
    Int disable_quality_filtering
 	command <<<
        mkdir -p /cromwell_root/tmp/fastp/
 	##1.Disable_quality_filtering
 	if [ "${disable_quality_filtering}" == 0 ]
        then
 	cp ${read} /cromwell_root/tmp/fastp/{sample_id}.fastq.tmp1.gz
        else
 	fastp --thread 4 --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} -i ${read} -o /cromwell_root/tmp/fastp/${sample_id}.fastq.tmp1.gz -j ${sample_id}.json -h ${sample_id}.html
        fi
 	##2.UMI
 	if [ "${UMI}" == 0 ]
        then
 	cp /cromwell_root/tmp/fastp/${sample_id}.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}.fastq.tmp2.gz
        else
 	fastp --thread 4 -U --umi_loc=${umi_loc} --umi_len=${umi_len} --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} -i /cromwell_root/tmp/fastp/${sample_id}.fastq.tmp1.gz -o /cromwell_root/tmp/fastp/${sample_id}.fastq.tmp2.gz -j ${sample_id}.json -h ${sample_id}.html
 	fi
 	##3.Trim
        if [ "${disable_adapter_trimming}" == 0 ]
        then
 	fastp --thread 4 -l ${length_required} -q ${qualified_quality_phred} -u ${length_required1} --adapter_sequence ${adapter_sequence}  --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1}  -i /cromwell_root/tmp/fastp/${sample_id}.fastq.tmp2.gz -o ${sample_id}.fastq.gz -j ${sample_id}.json -h ${sample_id}.html
        else
 	cp /cromwell_root/tmp/fastp/${sample_id}.fastq.tmp2.gz ${sample_id}.fastq.gz
        fi
   >>>
   runtime { 
 		docker: docker
 		cluster: cluster
 		systemDisk: "cloud_ssd 40"
 		dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
   }
   output {
      File json = "${sample_id}.json"
      File report = "${sample_id}.html"
      File Trim = "${sample_id}.fastq.gz"
   }
 }
--- a/tasks/fastqc.wdl
+++ b/tasks/fastqc.wdl
 task fastqc {
 	File read
 	String docker
 	String cluster_config
 	String disk_size
 	command <<<
 		set -o pipefail
 		set -e
 		nt=$(nproc)
 		fastqc -t $nt -o ./ ${read}
 	>>>
 	runtime {
 		docker:docker
    	cluster: cluster_config
    	systemDisk: "cloud_ssd 40"
    	dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
 	}
 	output {
 		File read_html = sub(basename(read), "\\.(fastq|fq)\\.gz$", "_fastqc.html")
 		File read_zip = sub(basename(read), "\\.(fastq|fq)\\.gz$", "_fastqc.zip")
 	}
 }
--- a/tasks/fastqscreen.wdl
+++ b/tasks/fastqscreen.wdl
 task fastq_screen {
 	File read
 	File screen_ref_dir
 	File fastq_screen_conf
 	String readname = basename(read,".fastq.gz")
 	String docker
 	String cluster_config
 	String disk_size
 	command <<<
 		set -o pipefail
 		set -e
 		nt=$(nproc)
 		mkdir -p /cromwell_root/tmp
 		cp -r ${screen_ref_dir} /cromwell_root/tmp/
 		#sed -i "s#/cromwell_root/fastq_screen_reference#${screen_ref_dir}#g" ${fastq_screen_conf}
 		fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${read}
 	>>>
 	runtime {
 		docker:docker
    	cluster: cluster_config
    	systemDisk: "cloud_ssd 40"
    	dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
 	}
 	output {
 		File png = "${readname}_screen.png"
 		File txt = "${readname}_screen.txt"
 		File html = "${readname}_screen.html"
 	}
 }
--- a/tasks/hisat2.wdl
+++ b/tasks/hisat2.wdl
 task hisat2 {
   File idx
   File Trim
   String idx_prefix
   String sample_id=basename(Trim, ".fastq.gz")
   String docker
   String cluster
   String disk_size
   String pen_intronlen
   Int pen_cansplice
   Int pen_noncansplice
   Int min_intronlen
   Int max_intronlen
   Int maxins
   Int minins
   command <<<
      nt=$(nproc)
      hisat2 -t -p $nt -x ${idx}/${idx_prefix} --pen-cansplice ${pen_cansplice} --pen-noncansplice ${pen_noncansplice} --pen-intronlen ${pen_intronlen} --min-intronlen ${min_intronlen} --max-intronlen ${max_intronlen} --maxins ${maxins} --minins ${minins} --un-conc-gz ${sample_id}_un.fq.gz -1 ${Trim}  -S ${sample_id}.sam 
   >>>
   runtime { 
 		docker: docker 
 		cluster: cluster
 		systemDisk: "cloud_ssd 40"
 		dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
   }
   output {
      File sam = "${sample_id}.sam"
      File unmapreads = "${sample_id}_un.fq.gz"
   }
 }
--- a/tasks/multiqc.wdl
+++ b/tasks/multiqc.wdl
 task multiqc {
 	Array[File] read1_zip
 	Array[File] read2_zip
 	Array[File] txt1
 	Array[File] txt2
 	Array[File] bamqc_zip
 	Array[File] rnaseq_zip
 	String docker
 	String cluster_config
 	String disk_size
 	command <<<
 		set -o pipefail
 		set -e
 		mkdir -p /cromwell_root/tmp/fastqc
 		mkdir -p /cromwell_root/tmp/fastqscreen
 		mkdir -p /cromwell_root/tmp/bamqc
 		mkdir -p /cromwell_root/tmp/rnaseq
 		cp ${sep=" " read1_zip} ${sep=" " read2_zip} /cromwell_root/tmp/fastqc
 		cp ${sep=" " txt1} ${sep=" " txt2} /cromwell_root/tmp/fastqscreen
 		for i in ${sep=" " bamqc_zip}
 		do
 		  tar -zxvf $i -C /cromwell_root/tmp/bamqc
 		done
 		for i in ${sep=" " rnaseq_zip}
 		do
 		  tar -zxvf $i -C /cromwell_root/tmp/rnaseq
 		done
 		multiqc /cromwell_root/tmp/
 		cat multiqc_data/multiqc_fastq_screen.txt > multiqc_fastq_screen.txt
 		cat multiqc_data/multiqc_fastqc.txt > multiqc_fastqc.txt
 		cat multiqc_data/multiqc_general_stats.txt > multiqc_general_stats.txt
 		cat multiqc_data/multiqc_qualimap_bamqc_genome_results.txt > multiqc_qualimap_bamqc_genome_results.txt
 	>>>
 	runtime {
 		docker:docker
 		cluster:cluster_config
 		systemDisk:"cloud_ssd 40"
 		dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
 	}
 	output {
 		File multiqc_html = "multiqc_report.html"
 		Array[File] multiqc_txt = glob("multiqc_data/*")
 		File multiqc_fastq_screen = "multiqc_fastq_screen.txt"
 		File multiqc_fastqc = "multiqc_fastqc.txt"
 		File multiqc_general_stats = "multiqc_general_stats.txt"
 		File bamqc_genome_results = "multiqc_qualimap_bamqc_genome_results.txt"
 	}
 }
--- a/tasks/qualimapBAMqc.wdl
+++ b/tasks/qualimapBAMqc.wdl
 task qualimapBAMqc {
 	File bam
 	String bamname = basename(bam,".bam")
 	String docker
 	String cluster_config
 	String disk_size
 	command <<<
 		set -o pipefail
 		set -e
 		nt=$(nproc)
 		/opt/qualimap/qualimap bamqc -bam ${bam} -outformat PDF:HTML -nt $nt -outdir ${bamname} --java-mem-size=32G 
 		tar -zcvf ${bamname}_bamqc_qualimap.tar.gz ${bamname}
 	>>>
 	runtime {
 		docker:docker
 		cluster:cluster_config
 		systemDisk:"cloud_ssd 40"
 		dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
 	}
 	output {
 		File bamqc_gz = "${bamname}_bamqc_qualimap.tar.gz"
 	}
 }
--- a/tasks/qualimapRNAseq.wdl
+++ b/tasks/qualimapRNAseq.wdl
 task qualimapRNAseq {
 	File bam
 	File gtf
 	String bamname = basename(bam,".bam")
 	String docker
 	String cluster_config
 	String disk_size
 	command <<<
 		set -o pipefail
 		set -e
 		nt=$(nproc)
 		/opt/qualimap/qualimap rnaseq -bam ${bam} -outformat HTML -outdir ${bamname} -gtf ${gtf} -pe --java-mem-size=10G
 		tar -zcvf ${bamname}_rnaseq_qualimap.tar.gz ${bamname}
 	>>>
 	runtime {
 		docker:docker
 		cluster:cluster_config
 		systemDisk:"cloud_ssd 40"
 		dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
 	}
 	output {
 		File rnaseq_gz = "${bamname}_rnaseq_qualimap.tar.gz"
 	}
 }
--- a/tasks/samtools.wdl
+++ b/tasks/samtools.wdl
 task samtools {
    File sam
    String sample_id=basename(sam, ".sam")
    String bam = sample_id + ".bam"
    String sorted_bam = sample_id + ".sorted.bam"
    String percent_bam = sample_id + ".percent.bam"
    String sorted_bam_index = sample_id + ".sorted.bam.bai"
    String ins_size = sample_id + ".ins_size"
    String docker
    String cluster
    String disk_size
    Int insert_size
    command <<<
       set -o pipefail
       set -e
       /opt/conda/bin/samtools view -bS ${sam} > ${bam}
       /opt/conda/bin/samtools sort -m 1000000000 ${bam} -o ${sorted_bam}
       /opt/conda/bin/samtools index ${sorted_bam}
       /opt/conda/bin/samtools view -bs 42.1 ${sorted_bam} > ${percent_bam}
       /opt/conda/bin/samtools stats -i ${insert_size} ${sorted_bam} |grep ^IS|cut -f 2- > ${sample_id}.ins_size
    >>>
    runtime {
       docker: docker
       cluster: cluster
       systemDisk: "cloud_ssd 40"
       dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
    }
    output {
      File out_bam = sorted_bam
      File out_percent = percent_bam
      File out_bam_index = sorted_bam_index
      File out_ins_size = ins_size
    }
 }
--- a/tasks/stringtie.wdl
+++ b/tasks/stringtie.wdl
 task stringtie {
    File bam
    File gtf
    String docker
    String sample_id=basename(bam, ".sorted.bam")
    String cluster
    String disk_size
    Int minimum_length_allowed_for_the_predicted_transcripts
    Int Junctions_no_spliced_reads
    Float minimum_isoform_abundance
    Float maximum_fraction_of_muliplelocationmapped_reads
    command <<<
 	nt=$(nproc)
 	mkdir ballgown
 	/opt/conda/bin/stringtie -e -B -p $nt -f ${minimum_isoform_abundance} -m ${minimum_length_allowed_for_the_predicted_transcripts} -a ${Junctions_no_spliced_reads} -M ${maximum_fraction_of_muliplelocationmapped_reads} -G ${gtf} -o ballgown/${sample_id}/${sample_id}.gtf -C ${sample_id}.cov.ref.gtf -A ${sample_id}.gene.abundance.txt ${bam}
    >>>
    runtime {
      docker: docker
      cluster: cluster
      systemDisk: "cloud_ssd 40"
      dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
    }
    output {
      File covered_transcripts = "${sample_id}.cov.ref.gtf"
      File gene_abundance = "${sample_id}.gene.abundance.txt"
      Array[File] ballgown = ["ballgown/${sample_id}/${sample_id}.gtf", "ballgown/${sample_id}/e2t.ctab", "ballgown/${sample_id}/e_data.ctab", "ballgown/${sample_id}/i2t.ctab", "ballgown/${sample_id}/i_data.ctab", "ballgown/${sample_id}/t_data.ctab"]
    }
 }
--- a/workflow.wdl
+++ b/workflow.wdl
 import "./tasks/fastp.wdl" as fastp
 import "./tasks/hisat2.wdl" as hisat2
 import "./tasks/samtools.wdl" as samtools
 import "./tasks/stringtie.wdl" as stringtie
 import "./tasks/fastqc.wdl" as fastqc
 import "./tasks/fastqscreen.wdl" as fastqscreen
 import "./tasks/qualimapBAMqc.wdl" as qualimapBAMqc
 import "./tasks/qualimapRNAseq.wdl" as qualimapRNAseq
 import "./tasks/ballgown.wdl" as ballgown
 import "./tasks/count.wdl" as count
 workflow {{ project_name }} {
 	File read
 	File idx
 	File screen_ref_dir
 	File fastq_screen_conf
 	File gtf
 	String fastp_docker
 	String adapter_sequence
 	String fastp_cluster
 	String umi_loc
 	String idx_prefix
 	String pen_intronlen
 	String fastqc_cluster_config
 	String fastqc_docker
 	String fastqscreen_docker
 	String fastqscreen_cluster_config
 	String hisat2_docker
 	String hisat2_cluster
 	String qualimapBAMqc_docker
 	String qualimapBAMqc_cluster_config
 	String qualimapRNAseq_docker
 	String qualimapRNAseq_cluster_config
 	String samtools_docker
 	String samtools_cluster
 	String stringtie_docker
 	String stringtie_cluster
 	String multiqc_cluster_config
 	String multiqc_docker
 	Int multiqc_disk_size
 	Int trim_front1 
 	Int trim_tail1 
 	Int max_len1 
 	Int trim_front2 
 	Int trim_tail2  
 	Int max_len2 
 	Int disable_adapter_trimming
 	Int length_required
 	Int umi_len
 	Int UMI
 	Int qualified_quality_phred
 	Int length_required1
 	Int disable_quality_filtering
 	Int pen_cansplice
 	Int pen_noncansplice
 	Int min_intronlen
 	Int max_intronlen
 	Int maxins
 	Int minins
 	Int fastqc_disk_size
 	Int fastqscreen_disk_size
 	Int qualimapBAMqc_disk_size
 	Int qualimapRNAseq_disk_size
 	Int insert_size
 	Int minimum_length_allowed_for_the_predicted_transcripts
 	Int Junctions_no_spliced_reads
 	Int count_length
 	Float minimum_isoform_abundance
 	Float maximum_fraction_of_muliplelocationmapped_reads
 	String ballgown_docker
 	String ballgown_cluster
 	String disk_size
 	String count_docker
 	String count_cluster
 	call fastp.fastp as fastp {
 		input: 
 		read = read,
 		docker = fastp_docker,
 		cluster = fastp_cluster,
 		disk_size = disk_size,
 		adapter_sequence = adapter_sequence,
 		adapter_sequence_r2 = adapter_sequence_r2,
 		umi_loc = umi_loc,
 		trim_front1 = trim_front1,
 		trim_tail1 = trim_tail1, 
 		max_len1  = max_len1,
 		trim_front2  = trim_front2,
 		trim_tail2   = trim_tail2,
 		max_len2  = max_len2,
 		disable_adapter_trimming = disable_adapter_trimming,
 		length_required = length_required,
 		umi_len = umi_len,
 		UMI = UMI,
 		qualified_quality_phred = qualified_quality_phred,
 		length_required1 = length_required1,
 		disable_quality_filtering = disable_quality_filtering
 	}
 	call fastqc.fastqc as fastqc {
 		input:
 		read = fastp.Trim,
 		docker = fastqc_docker,
 		cluster_config = fastqc_cluster_config,
 		disk_size = fastqc_disk_size
 	}
 	call fastqscreen.fastq_screen as fastqscreen {
 		input:
 		read = fastp.Trim, 
 		screen_ref_dir = screen_ref_dir,
 		fastq_screen_conf = fastq_screen_conf,
 		docker = fastqscreen_docker,
 		cluster_config = fastqscreen_cluster_config,
 		disk_size = fastqscreen_disk_size
 	}
 	call hisat2.hisat2 as hisat2 {
 		input: 
 		idx = idx, 
 		idx_prefix = idx_prefix, 
 		Trim = fastp.Trim,
 		docker = hisat2_docker,
 		cluster = hisat2_cluster,
 		disk_size = disk_size,
 		pen_intronlen = pen_intronlen,
 		pen_cansplice = pen_cansplice,
 		pen_noncansplice = pen_noncansplice,
 		min_intronlen = min_intronlen,
 		max_intronlen = max_intronlen,
 		maxins = maxins,
 		minins = minins
 	}
 	call samtools.samtools as samtools {
 		input: 
 		sam = hisat2.sam,
 		docker = samtools_docker,
 		cluster = samtools_cluster,
 		disk_size = disk_size,
 		insert_size = insert_size
 	}
 	call qualimapBAMqc.qualimapBAMqc as qualimapBAMqc {
 		input:
 		bam = samtools.out_percent,
 		docker = qualimapBAMqc_docker,
 		cluster_config = qualimapBAMqc_cluster_config,
 		disk_size = qualimapBAMqc_disk_size
 	}
 	call qualimapRNAseq.qualimapRNAseq as qualimapRNAseq {
 		input:
 		bam = samtools.out_percent,
 		docker = qualimapRNAseq_docker,
 		cluster_config = qualimapRNAseq_cluster_config,
 		disk_size = qualimapRNAseq_disk_size,
 		gtf = gtf
 	}
 	call stringtie.stringtie as stringtie {
 		input: 
 		gtf = gtf, 
 		bam = samtools.out_bam,
 		docker = stringtie_docker,
 		cluster = stringtie_cluster,
 		disk_size = disk_size,
 		minimum_length_allowed_for_the_predicted_transcripts = minimum_length_allowed_for_the_predicted_transcripts,
 		Junctions_no_spliced_reads = Junctions_no_spliced_reads,
 		minimum_isoform_abundance = minimum_isoform_abundance,
 		maximum_fraction_of_muliplelocationmapped_reads = maximum_fraction_of_muliplelocationmapped_reads
 	}
 	call ballgown.ballgown as ballgown {
 		input: 
 		docker = ballgown_docker,
 		cluster = ballgown_cluster,
 		ballgown = stringtie.ballgown,
 		gene_abundance = stringtie.gene_abundance,
 		disk_size = disk_size
 	} 
 	call count.count as count {
 		input: 
 		docker = count_docker,
 		cluster = count_cluster,
 		ballgown = stringtie.ballgown,
 		disk_size = disk_size,
                gene_abundance = stringtie.gene_abundance,
 		count_length = count_length
 	} 
 }