4 年前 · 94a980c182
--- a/.DS_Store
+++ b/.DS_Store
--- a/conf/fastq_screen.conf
+++ b/conf/fastq_screen.conf
@@ -0,0 +1,94 @@
 # This is an example configuration file for FastQ Screen

 ############################
 ## Bowtie, Bowtie 2 or BWA #
 ############################
 ## If the Bowtie, Bowtie 2 or BWA binary is not in your PATH, you can set 
 ## this value to tell the program where to find your chosen aligner.  Uncomment 
 ## the relevant line below and set the appropriate location.  Please note, 
 ## this path should INCLUDE the executable filename.

 #BOWTIE	/usr/local/bin/bowtie/bowtie
 #BOWTIE2 /usr/local/bowtie2/bowtie2
 #BWA /usr/local/bwa/bwa



 ############################################
 ## Bismark (for bisulfite sequencing only) #
 ############################################
 ## If the Bismark binary is not in your PATH then you can set this value to 
 ## tell the program where to find it.  Uncomment the line below and set the 
 ## appropriate location. Please note, this path should INCLUDE the executable 
 ## filename.

 #BISMARK	/usr/local/bin/bismark/bismark



 ############
 ## Threads #
 ############
 ## Genome aligners can be made to run across multiple CPU cores to speed up 
 ## searches.  Set this value to the number of cores you want for mapping reads.

 THREADS		32



 ##############
 ## DATABASES #
 ##############
 ## This section enables you to configure multiple genomes databases (aligner index 
 ## files) to search against in your screen.  For each genome you need to provide a 
 ## database name (which can't contain spaces) and the location of the aligner index 
 ## files.
 ##
 ## The path to the index files SHOULD INCLUDE THE BASENAME of the index, e.g:
 ## /data/public/Genomes/Human_Bowtie/GRCh37/Homo_sapiens.GRCh37
 ## Thus, the index files (Homo_sapiens.GRCh37.1.bt2, Homo_sapiens.GRCh37.2.bt2, etc.) 
 ## are found in a folder named 'GRCh37'.
 ##
 ## If, for example, the Bowtie, Bowtie2 and BWA indices of a given genome reside in 
 ## the SAME FOLDER, a SINLGE path may be provided to ALL the of indices.  The index 
 ## used will be the one compatible with the chosen aligner (as specified using the 
 ## --aligner flag).  
 ##
 ## The entries shown below are only suggested examples, you can add as many DATABASE 
 ## sections as required, and you can comment out or remove as many of the existing 
 ## entries as desired.  We suggest including genomes and sequences that may be sources 
 ## of contamination either because they where run on your sequencer previously, or may 
 ## have contaminated your sample during the library preparation step.
 ##
 ## Human - sequences available from
 ## ftp://ftp.ensembl.org/pub/current/fasta/homo_sapiens/dna/
 #DATABASE	Human	/data/public/Genomes/Human_Bowtie/GRCh37/Homo_sapiens.GRCh37
 ##
 ## Mouse - sequence available from
 ## ftp://ftp.ensembl.org/pub/current/fasta/mus_musculus/dna/
 #DATABASE	Mouse	/data/public/Genomes/Mouse/NCBIM37/Mus_musculus.NCBIM37
 ##
 ## Ecoli- sequence available from EMBL accession U00096.2
 #DATABASE	Ecoli	/data/public/Genomes/Ecoli/Ecoli
 ##
 ## PhiX - sequence available from Refseq accession NC_001422.1
 #DATABASE	PhiX	/data/public/Genomes/PhiX/phi_plus_SNPs
 ##
 ## Adapters - sequence derived from the FastQC contaminats file found at: www.bioinformatics.babraham.ac.uk/projects/fastqc
 #DATABASE	Adapters	/data/public/Genomes/Contaminants/Contaminants
 ##
 ## Vector - Sequence taken from the UniVec database
 ## http://www.ncbi.nlm.nih.gov/VecScreen/UniVec.html
 #DATABASE	Vectors		/data/public/Genomes/Vectors/Vectors

 DATABASE	Human	/cromwell_root/tmp/fastq_screen_reference/genome
 DATABASE	Mouse	/cromwell_root/tmp/fastq_screen_reference/mouse
 DATABASE	ERCC	/cromwell_root/tmp/fastq_screen_reference/ERCC
 DATABASE	EColi	/cromwell_root/tmp/fastq_screen_reference/ecoli
 DATABASE	Adapter	/cromwell_root/tmp/fastq_screen_reference/adapters
 DATABASE	Vector	/cromwell_root/tmp/fastq_screen_reference/vector
 DATABASE	rRNA	/cromwell_root/tmp/fastq_screen_reference/rRNARef
 DATABASE	Virus	/cromwell_root/tmp/fastq_screen_reference/viral
 DATABASE	Yeast	/cromwell_root/tmp/fastq_screen_reference/GCF_000146045.2_R64_genomic_modify
 DATABASE	Mitoch	/cromwell_root/tmp/fastq_screen_reference/Human_mitoch
 DATABASE	Phix	/cromwell_root/tmp/fastq_screen_reference/phix
--- a/defaults
+++ b/defaults
@@ -0,0 +1,61 @@
 {   
    "fastp_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastp:0.19.6",
    "fastp_cluster": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
    "trim_front1": "0",
    "trim_tail1": "0",
    "max_len1": "0",
    "trim_front2": "0",
    "trim_tail2": "0",
    "max_len2": "0",
    "adapter_sequence": "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA",
    "adapter_sequence_r2": "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT",
    "disable_adapter_trimming": "0",
    "length_required": "50",
    "length_required1": "20",
    "UMI": "0",
    "umi_len": "0",
    "umi_loc": "umi_loc",
    "qualified_quality_phred": "20",
    "disable_quality_filtering": "1",
    "hisat2_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/hisat2:v2.1.0-2",
    "hisat2_cluster": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc",
    "idx_prefix": "genome_snp_tran",
    "idx": "oss://pgx-reference-data/reference/hisat2/grch38_snp_tran/",
    "fasta": "GRCh38.d1.vd1.fa",
    "pen_cansplice":"0",
    "pen_noncansplice":"3",
    "pen_intronlen":"G,-8,1",
    "min_intronlen":"30",
    "max_intronlen":"500000",
    "maxins":"500",
    "minins":"0",
    "samtools_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/samtools:v1.3.1",
    "samtools_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
    "insert_size":"8000",
    "gtf": "oss://pgx-reference-data/reference/annotation/Homo_sapiens.GRCh38.93.gtf",
    "stringtie_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/stringtie:v1.3.4",
    "stringtie_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
    "minimum_length_allowed_for_the_predicted_transcripts":"200",
    "minimum_isoform_abundance":"0.01",
    "Junctions_no_spliced_reads":"10",
    "maximum_fraction_of_muliplelocationmapped_reads":"0.95",
    "fastqc_cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
    "fastqc_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqc:v0.11.5",
    "fastqc_disk_size": "150",
    "qualimapBAMqc_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/qualimap:2.0.0",
    "qualimapBAMqc_cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc",
    "qualimapBAMqc_disk_size": "500",
    "qualimapRNAseq_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/qualimap:2.0.0",
    "qualimapRNAseq_cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc",
    "qualimapRNAseq_disk_size": "500",
    "fastqscreen_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqscreen:0.12.0",
    "fastqscreen_cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
    "screen_ref_dir": "oss://pgx-reference-data/fastq_screen_reference/",
    "fastq_screen_conf": "oss://pgx-reference-data/fastq_screen_reference/fastq_screen.conf",
    "fastqscreen_disk_size": "200",
    "multiqc_cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
    "multiqc_docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/multiqc:v1.8",
    "multiqc_disk_size": "100",
    "ballgown_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/pgx-ballgown:0.0.1",
    "ballgown_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc"
    }
--- a/inputs
+++ b/inputs
@@ -0,0 +1,65 @@
 {
 	"{{ project_name }}.read1": "{{ read1 }}",
 	"{{ project_name }}.read2": "{{ read2 }}",
 	"{{ project_name }}.sample_id": "{{ sample_id }}",
 	"{{ project_name }}.fastp_docker": "{{ fastp_docker }}",
 	"{{ project_name }}.fastp_cluster": "{{ fastp_cluster }}",
 	"{{ project_name }}.trim_front1": "{{ trim_front1 }}",
 	"{{ project_name }}.trim_tail1": "{{ trim_tail1 }}",
 	"{{ project_name }}.max_len1": "{{ max_len1 }}",
 	"{{ project_name }}.trim_front2": "{{ trim_front2 }}",
 	"{{ project_name }}.trim_tail2": "{{ trim_tail2 }}",
 	"{{ project_name }}.max_len2": "{{ max_len2 }}",
 	"{{ project_name }}.adapter_sequence": "{{ adapter_sequence }}",
 	"{{ project_name }}.adapter_sequence_r2": "{{ adapter_sequence_r2 }}",
 	"{{ project_name }}.disable_adapter_trimming": "{{ disable_adapter_trimming }}",
 	"{{ project_name }}.length_required1": "{{ length_required1 }}",
 	"{{ project_name }}.UMI": "{{ UMI }}",
 	"{{ project_name }}.umi_loc": "{{ umi_loc }}",
 	"{{ project_name }}.umi_len": "{{ umi_len }}",
 	"{{ project_name }}.length_required": "{{ length_required }}",
 	"{{ project_name }}.qualified_quality_phred": "{{ qualified_quality_phred }}",
 	"{{ project_name }}.disable_quality_filtering": "{{ disable_quality_filtering }}",
 	"{{ project_name }}.hisat2_docker": "{{ hisat2_docker }}",
 	"{{ project_name }}.hisat2_cluster": "{{ hisat2_cluster }}",
 	"{{ project_name }}.idx_prefix": "{{ idx_prefix }}",
 	"{{ project_name }}.idx": "{{ idx }}",
 	"{{ project_name }}.fasta": "{{ fasta }}",
 	"{{ project_name }}.pen_cansplice": "{{ pen_cansplice }}",
 	"{{ project_name }}.pen_noncansplice": "{{ pen_noncansplice }}",
 	"{{ project_name }}.pen_intronlen": "{{ pen_intronlen }}",
 	"{{ project_name }}.min_intronlen": "{{ min_intronlen }}",
 	"{{ project_name }}.max_intronlen": "{{ max_intronlen }}",
 	"{{ project_name }}.maxins": "{{ maxins }}",
 	"{{ project_name }}.minins": "{{ minins }}",
 	"{{ project_name }}.samtools_docker": "{{ samtools_docker }}",
 	"{{ project_name }}.samtools_cluster": "{{ samtools_cluster }}",
 	"{{ project_name }}.insert_size": "{{ insert_size }}",
 	"{{ project_name }}.gtf": "{{ gtf }}",
 	"{{ project_name }}.stringtie_docker": "{{ stringtie_docker }}",
 	"{{ project_name }}.stringtie_cluster": "{{ stringtie_cluster }}",
 	"{{ project_name }}.minimum_length_allowed_for_the_predicted_transcripts": "{{ minimum_length_allowed_for_the_predicted_transcripts }}",
 	"{{ project_name }}.minimum_isoform_abundance": "{{ minimum_isoform_abundance }}",
 	"{{ project_name }}.Junctions_no_spliced_reads": "{{ Junctions_no_spliced_reads }}",
 	"{{ project_name }}.maximum_fraction_of_muliplelocationmapped_reads": "{{ maximum_fraction_of_muliplelocationmapped_reads }}",
 	"{{ project_name }}.fastqc_cluster_config": "{{ fastqc_cluster_config }}",
 	"{{ project_name }}.fastqc_docker": "{{ fastqc_docker }}",
 	"{{ project_name }}.fastqc_disk_size": "{{ fastqc_disk_size }}",
 	"{{ project_name }}.qualimapBAMqc_docker": "{{ qualimapBAMqc_docker }}",
 	"{{ project_name }}.qualimapBAMqc_cluster_config": "{{ qualimapBAMqc_cluster_config }}",
 	"{{ project_name }}.qualimapBAMqc_disk_size": "{{ qualimapBAMqc_disk_size }}",
 	"{{ project_name }}.qualimapRNAseq_docker": "{{ qualimapRNAseq_docker }}",
 	"{{ project_name }}.qualimapRNAseq_cluster_config": "{{ qualimapRNAseq_cluster_config }}",
 	"{{ project_name }}.qualimapRNAseq_disk_size": "{{ qualimapRNAseq_disk_size }}",
 	"{{ project_name }}.fastqscreen_docker": "{{ fastqscreen_docker }}",
 	"{{ project_name }}.fastqscreen_cluster_config": "{{ fastqscreen_cluster_config }}",
 	"{{ project_name }}.screen_ref_dir": "{{ screen_ref_dir }}",
 	"{{ project_name }}.fastq_screen_conf": "{{ fastq_screen_conf }}",
 	"{{ project_name }}.fastqscreen_disk_size": "{{ fastqscreen_disk_size }}",
 	"{{ project_name }}.multiqc_cluster_config": "{{ multiqc_cluster_config }}",
 	"{{ project_name }}.multiqc_docker": "{{ multiqc_docker }}",
 	"{{ project_name }}.multiqc_disk_size": "{{ multiqc_disk_size }}",
 	"{{ project_name }}.ballgown_docker": "{{ ballgown_docker }}",
 	"{{ project_name }}.ballgown_cluster": "{{ ballgown_cluster }}",
 	"{{ project_name }}.disk_size": "{{ disk_size if disk_size != '' else 200}}"
 }
--- a/tasks/.DS_Store
+++ b/tasks/.DS_Store
--- a/tasks/ballgown.wdl
+++ b/tasks/ballgown.wdl
@@ -0,0 +1,25 @@
 task ballgown {
    File gene_abundance
    Array[File] ballgown
    String sample_id
    String docker
    String cluster
    String disk_size

    command <<<
      mkdir -p /cromwell_root/tmp/${sample_id}
      cp -r ${sep=" " ballgown} /cromwell_root/tmp/${sample_id}
      ballgown /cromwell_root/tmp/${sample_id} ${sample_id}.txt
    >>>
    
    runtime {
      docker: docker
      cluster: cluster
      systemDisk: "cloud_ssd 40"
      dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
    }
    
    output {
      File mat_expression = "${sample_id}.txt"
    }
 }
--- a/tasks/fastp.wdl
+++ b/tasks/fastp.wdl
@@ -0,0 +1,68 @@
 task fastp {
    String sample_id
    File read1
    File read2
    String adapter_sequence
    String adapter_sequence_r2
    String docker
    String cluster
    String disk_size
    String umi_loc	
    Int trim_front1
    Int trim_tail1
    Int max_len1
    Int trim_front2
    Int trim_tail2
    Int max_len2
    Int disable_adapter_trimming
    Int length_required
    Int umi_len
    Int UMI
    Int qualified_quality_phred
    Int length_required1
    Int disable_quality_filtering
   
 	command <<<
        mkdir -p /cromwell_root/tmp/fastp/
 	##1.Disable_quality_filtering
 	if [ "${disable_quality_filtering}" == 0 ]
        then
 	cp ${read1} /cromwell_root/tmp/fastp/{sample_id}_R1.fastq.tmp1.gz
 	cp ${read2} /cromwell_root/tmp/fastp/{sample_id}_R2.fastq.tmp1.gz
        else
 	fastp --thread 4 --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i ${read1} -I ${read2} -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -j ${sample_id}.json -h ${sample_id}.html
        fi

 	##2.UMI
 	if [ "${UMI}" == 0 ]
        then
 	cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz
 	cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz
        else
 	fastp --thread 4 -U --umi_loc=${umi_loc} --umi_len=${umi_len} --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -j ${sample_id}.json -h ${sample_id}.html
 	fi

 	##3.Trim
        if [ "${disable_adapter_trimming}" == 0 ]
        then
 	fastp --thread 4 -l ${length_required} -q ${qualified_quality_phred} -u ${length_required1} --adapter_sequence ${adapter_sequence} --adapter_sequence_r2 ${adapter_sequence_r2} --detect_adapter_for_pe --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -o ${sample_id}_R1.fastq.gz -O ${sample_id}_R2.fastq.gz -j ${sample_id}.json -h ${sample_id}.html
        else
 	cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz ${sample_id}_R1.fastq.gz
 	cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz ${sample_id}_R2.fastq.gz
        fi
   >>>
   
   runtime { 
 		docker: docker
 		cluster: cluster
 		systemDisk: "cloud_ssd 40"
 		dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
   }

   output {
      File json = "${sample_id}.json"
      File report = "${sample_id}.html"
      File Trim_R1 = "${sample_id}_R1.fastq.gz"
      File Trim_R2 = "${sample_id}_R2.fastq.gz"
   }
 }
--- a/tasks/fastqc.wdl
+++ b/tasks/fastqc.wdl
@@ -0,0 +1,28 @@
 task fastqc {
 	File read1
 	File read2
 	String docker
 	String cluster_config
 	String disk_size

 	command <<<
 		set -o pipefail
 		set -e
 		nt=$(nproc)
 		fastqc -t $nt -o ./ ${read1}
 		fastqc -t $nt -o ./ ${read2}
 	>>>

 	runtime {
 		docker:docker
    	cluster: cluster_config
    	systemDisk: "cloud_ssd 40"
    	dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
 	}
 	output {
 		File read1_html = sub(basename(read1), "\\.(fastq|fq)\\.gz$", "_fastqc.html")
 		File read1_zip = sub(basename(read1), "\\.(fastq|fq)\\.gz$", "_fastqc.zip")
 		File read2_html = sub(basename(read2), "\\.(fastq|fq)\\.gz$", "_fastqc.html")
 		File read2_zip = sub(basename(read2), "\\.(fastq|fq)\\.gz$", "_fastqc.zip")
 	}
 }
--- a/tasks/fastqscreen.wdl
+++ b/tasks/fastqscreen.wdl
@@ -0,0 +1,37 @@
 task fastq_screen {
 	File read1
 	File read2
 	File screen_ref_dir
 	File fastq_screen_conf
 	String read1name = basename(read1,".fastq.gz")
 	String read2name = basename(read2,".fastq.gz")
 	String docker
 	String cluster_config
 	String disk_size

 	command <<<
 		set -o pipefail
 		set -e
 		nt=$(nproc)
 		mkdir -p /cromwell_root/tmp
 		cp -r ${screen_ref_dir} /cromwell_root/tmp/
 		#sed -i "s#/cromwell_root/fastq_screen_reference#${screen_ref_dir}#g" ${fastq_screen_conf}
 		fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${read1}
 		fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${read2}
 	>>>

 	runtime {
 		docker:docker
    	cluster: cluster_config
    	systemDisk: "cloud_ssd 40"
    	dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
 	}
 	output {
 		File png1 = "${read1name}_screen.png"
 		File txt1 = "${read1name}_screen.txt"
 		File html1 = "${read1name}_screen.html"
 		File png2 = "${read2name}_screen.png"
 		File txt2 = "${read2name}_screen.txt"
 		File html2 = "${read2name}_screen.html"
 	}
 }
--- a/tasks/hisat2.wdl
+++ b/tasks/hisat2.wdl
@@ -0,0 +1,35 @@
 task hisat2 {
   File idx
   File Trim_R1
   File Trim_R2
   String idx_prefix
   String sample_id
   String docker
   String cluster
   String disk_size
   String pen_intronlen
   Int pen_cansplice
   Int pen_noncansplice
   Int min_intronlen
   Int max_intronlen
   Int maxins
   Int minins
   
   command <<<
      nt=$(nproc)
      hisat2 -t -p $nt -x ${idx}/${idx_prefix} --pen-cansplice ${pen_cansplice} --pen-noncansplice ${pen_noncansplice} --pen-intronlen ${pen_intronlen} --min-intronlen ${min_intronlen} --max-intronlen ${max_intronlen} --maxins ${maxins} --minins ${minins} --un-conc-gz ${sample_id}_un.fq.gz -1 ${Trim_R1} -2 ${Trim_R2} -S ${sample_id}.sam 
   >>>
   
   runtime { 
 		docker: docker 
 		cluster: cluster
 		systemDisk: "cloud_ssd 40"
 		dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
   }

   output {
      File sam = "${sample_id}.sam"
      File unmapread_1p = "${sample_id}_un.fq.1.gz"
      File unmapread_2p = "${sample_id}_un.fq.2.gz"
   }
 }
--- a/tasks/multiqc.wdl
+++ b/tasks/multiqc.wdl
@@ -0,0 +1,61 @@
 task multiqc {

 	Array[File] read1_zip
 	Array[File] read2_zip

 	Array[File] txt1
 	Array[File] txt2

 	Array[File] bamqc_zip
 	Array[File] rnaseq_zip

 	String docker
 	String cluster_config
 	String disk_size

 	command <<<
 		set -o pipefail
 		set -e
 		mkdir -p /cromwell_root/tmp/fastqc
 		mkdir -p /cromwell_root/tmp/fastqscreen
 		mkdir -p /cromwell_root/tmp/bamqc
 		mkdir -p /cromwell_root/tmp/rnaseq

 		cp ${sep=" " read1_zip} ${sep=" " read2_zip} /cromwell_root/tmp/fastqc
 		cp ${sep=" " txt1} ${sep=" " txt2} /cromwell_root/tmp/fastqscreen
 		for i in ${sep=" " bamqc_zip}
 		do
 		  tar -zxvf $i -C /cromwell_root/tmp/bamqc
 		done
 		
 		for i in ${sep=" " rnaseq_zip}
 		do
 		  tar -zxvf $i -C /cromwell_root/tmp/rnaseq
 		done
 		

 		multiqc /cromwell_root/tmp/
 		cat multiqc_data/multiqc_fastq_screen.txt > multiqc_fastq_screen.txt
 		cat multiqc_data/multiqc_fastqc.txt > multiqc_fastqc.txt
 		cat multiqc_data/multiqc_general_stats.txt > multiqc_general_stats.txt
 		cat multiqc_data/multiqc_qualimap_bamqc_genome_results.txt > multiqc_qualimap_bamqc_genome_results.txt

 	
 	>>>

 	runtime {
 		docker:docker
 		cluster:cluster_config
 		systemDisk:"cloud_ssd 40"
 		dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
 	}

 	output {
 		File multiqc_html = "multiqc_report.html"
 		Array[File] multiqc_txt = glob("multiqc_data/*")
 		File multiqc_fastq_screen = "multiqc_fastq_screen.txt"
 		File multiqc_fastqc = "multiqc_fastqc.txt"
 		File multiqc_general_stats = "multiqc_general_stats.txt"
 		File bamqc_genome_results = "multiqc_qualimap_bamqc_genome_results.txt"
 	}
 }
--- a/tasks/qualimapBAMqc.wdl
+++ b/tasks/qualimapBAMqc.wdl
@@ -0,0 +1,28 @@
 task qualimapBAMqc {
 	File bam
 	String bamname = basename(bam,".bam")
 	String docker
 	String cluster_config
 	String disk_size

 	command <<<
 		set -o pipefail
 		set -e
 		nt=$(nproc)
 		/opt/qualimap/qualimap bamqc -bam ${bam} -outformat PDF:HTML -nt $nt -outdir ${bamname}_bamqc --java-mem-size=32G 
 		tar -zcvf ${bamname}_bamqc_qualimap.zip ${bamname}_bamqc
 		
 	>>>

 	runtime {
 		docker:docker
 		cluster:cluster_config
 		systemDisk:"cloud_ssd 40"
 		dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
 	}

 	output {
 		File bamqc_zip = "${bamname}_bamqc_qualimap.zip"
 		
 	}
 }
--- a/tasks/qualimapRNAseq.wdl
+++ b/tasks/qualimapRNAseq.wdl
@@ -0,0 +1,29 @@
 task qualimapRNAseq {
 	File bam
 	File gtf
 	String bamname = basename(bam,".bam")
 	String docker
 	String cluster_config
 	String disk_size

 	command <<<
 		set -o pipefail
 		set -e
 		nt=$(nproc)
 		/opt/qualimap/qualimap rnaseq -bam ${bam} -outformat HTML -outdir ${bamname}_RNAseq -gtf ${gtf} -pe --java-mem-size=10G
 		tar -zcvf ${bamname}_RNAseq_qualimap.zip ${bamname}_RNAseq
 		
 	>>>

 	runtime {
 		docker:docker
 		cluster:cluster_config
 		systemDisk:"cloud_ssd 40"
 		dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
 	}

 	output {
 		File rnaseq_zip = "${bamname}_RNAseq_qualimap.zip"
 		
 	}
 }
--- a/tasks/samtools.wdl
+++ b/tasks/samtools.wdl
@@ -0,0 +1,39 @@
 task samtools {
    File sam
    String sample_id
    String bam = sample_id + ".bam"
    String sorted_bam = sample_id + ".sorted.bam"
    String percent_bam = sample_id + ".percent.bam"
    String sorted_bam_index = sample_id + ".sorted.bam.bai"
    String ins_size = sample_id + ".ins_size"
    String docker
    String cluster
    String disk_size
    Int insert_size

    command <<<
       set -o pipefail
       set -e
       /opt/conda/bin/samtools view -bS ${sam} > ${bam}
       /opt/conda/bin/samtools sort -m 1000000000 ${bam} -o ${sorted_bam}
       /opt/conda/bin/samtools index ${sorted_bam}
       /opt/conda/bin/samtools view -bs 42.1 ${sorted_bam} > ${percent_bam}
       /opt/conda/bin/samtools stats -i ${insert_size} ${sorted_bam} |grep ^IS|cut -f 2- > ${sample_id}.ins_size
    >>>

    runtime {
       docker: docker
       cluster: cluster
       systemDisk: "cloud_ssd 40"
       dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
    }

    output {
      File out_bam = sorted_bam
      File out_percent = percent_bam
      File out_bam_index = sorted_bam_index
      File out_ins_size = ins_size
    }

 }

--- a/tasks/stringtie.wdl
+++ b/tasks/stringtie.wdl
@@ -0,0 +1,33 @@
 task stringtie {
    File bam
    File gtf
    String docker
    String sample_id
    String cluster
    String disk_size
    Int minimum_length_allowed_for_the_predicted_transcripts
    Int Junctions_no_spliced_reads
    Float minimum_isoform_abundance
    Float maximum_fraction_of_muliplelocationmapped_reads

    command <<<
 	nt=$(nproc)
 	mkdir ballgown
 	/opt/conda/bin/stringtie -e -B -p $nt -f ${minimum_isoform_abundance} -m ${minimum_length_allowed_for_the_predicted_transcripts} -a ${Junctions_no_spliced_reads} -M ${maximum_fraction_of_muliplelocationmapped_reads} -G ${gtf} -o ballgown/${sample_id}/${sample_id}.gtf -C ${sample_id}.cov.ref.gtf -A ${sample_id}.gene.abundance.txt ${bam}
 	
    >>>
    
    runtime {
      docker: docker
      cluster: cluster
      systemDisk: "cloud_ssd 40"
      dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
    }
    
    output {
      File covered_transcripts = "${sample_id}.cov.ref.gtf"
      File gene_abundance = "${sample_id}.gene.abundance.txt"
      Array[File] ballgown = ["ballgown/${sample_id}/${sample_id}.gtf", "ballgown/${sample_id}/e2t.ctab", "ballgown/${sample_id}/e_data.ctab", "ballgown/${sample_id}/i2t.ctab", "ballgown/${sample_id}/i_data.ctab", "ballgown/${sample_id}/t_data.ctab"]
      File genecount = "{sample_id}_genecount.csv"
    }
 }
--- a/workflow.wdl
+++ b/workflow.wdl
@@ -0,0 +1,135 @@
 import "./tasks/fastp.wdl" as fastp
 import "./tasks/hisat2.wdl" as hisat2
 import "./tasks/samtools.wdl" as samtools
 import "./tasks/qualimapRNAseq.wdl" as qualimapRNAseq

 workflow {{ project_name }} {
 	File read1
 	File read2
 	File idx
 	File screen_ref_dir
 	File fastq_screen_conf
 	File gtf
 	String sample_id
 	String fastp_docker
 	String adapter_sequence
 	String adapter_sequence_r2
 	String fastp_cluster
 	String umi_loc
 	String idx_prefix
 	String pen_intronlen
 	String fastqc_cluster_config
 	String fastqc_docker
 	String fastqscreen_docker
 	String fastqscreen_cluster_config
 	String hisat2_docker
 	String hisat2_cluster
 	String qualimapBAMqc_docker
 	String qualimapBAMqc_cluster_config
 	String qualimapRNAseq_docker
 	String qualimapRNAseq_cluster_config
 	String samtools_docker
 	String samtools_cluster
 	String stringtie_docker
 	String stringtie_cluster
 	String multiqc_cluster_config
 	String multiqc_docker
 	Int multiqc_disk_size
 	Int trim_front1 
 	Int trim_tail1 
 	Int max_len1 
 	Int trim_front2 
 	Int trim_tail2  
 	Int max_len2 
 	Int disable_adapter_trimming
 	Int length_required
 	Int umi_len
 	Int UMI
 	Int qualified_quality_phred
 	Int length_required1
 	Int disable_quality_filtering
 	Int pen_cansplice
 	Int pen_noncansplice
 	Int min_intronlen
 	Int max_intronlen
 	Int maxins
 	Int minins
 	Int fastqc_disk_size
 	Int fastqscreen_disk_size
 	Int qualimapBAMqc_disk_size
 	Int qualimapRNAseq_disk_size
 	Int insert_size
 	Int minimum_length_allowed_for_the_predicted_transcripts
 	Int Junctions_no_spliced_reads
 	Float minimum_isoform_abundance
 	Float maximum_fraction_of_muliplelocationmapped_reads
 	String ballgown_docker
 	String ballgown_cluster
 	String disk_size

 	call fastp.fastp as fastp {
 		input: 
 		sample_id=sample_id, 
 		read1 = read1, 
 		read2 = read2,
 		docker = fastp_docker,
 		cluster = fastp_cluster,
 		disk_size = disk_size,
 		adapter_sequence = adapter_sequence,
 		adapter_sequence_r2 = adapter_sequence_r2,
 		umi_loc = umi_loc,
 		trim_front1 = trim_front1,
 		trim_tail1 = trim_tail1, 
 		max_len1  = max_len1,
 		trim_front2  = trim_front2,
 		trim_tail2   = trim_tail2,
 		max_len2  = max_len2,
 		disable_adapter_trimming = disable_adapter_trimming,
 		length_required = length_required,
 		umi_len = umi_len,
 		UMI = UMI,
 		qualified_quality_phred = qualified_quality_phred,
 		length_required1 = length_required1,
 		disable_quality_filtering = disable_quality_filtering
 		}

 	
 	call hisat2.hisat2 as hisat2 {
 		input: 
 		sample_id = sample_id, 
 		idx = idx, 
 		idx_prefix = idx_prefix, 
 		Trim_R1 = fastp.Trim_R1, 
 		Trim_R2 = fastp.Trim_R2,
 		docker = hisat2_docker,
 		cluster = hisat2_cluster,
 		disk_size = disk_size,
 		pen_intronlen = pen_intronlen,
 		pen_cansplice = pen_cansplice,
 		pen_noncansplice = pen_noncansplice,
 		min_intronlen = min_intronlen,
 		max_intronlen = max_intronlen,
 		maxins = maxins,
 		minins = minins
 	}

 	call samtools.samtools as samtools {
 		input: 
 		sample_id = sample_id, 
 		sam = hisat2.sam,
 		docker = samtools_docker,
 		cluster = samtools_cluster,
 		disk_size = disk_size,
 		insert_size = insert_size
 	}
 			
 	call qualimapBAMqc.qualimapBAMqc as qualimapBAMqc {
 		input:
 		bam = samtools.out_percent,
 		docker = qualimapBAMqc_docker,
 		cluster_config = qualimapBAMqc_cluster_config,
 		disk_size = qualimapBAMqc_disk_size
 	}


 }