@@ -0,0 +1,94 @@ | |||
# This is an example configuration file for FastQ Screen | |||
############################ | |||
## Bowtie, Bowtie 2 or BWA # | |||
############################ | |||
## If the Bowtie, Bowtie 2 or BWA binary is not in your PATH, you can set | |||
## this value to tell the program where to find your chosen aligner. Uncomment | |||
## the relevant line below and set the appropriate location. Please note, | |||
## this path should INCLUDE the executable filename. | |||
#BOWTIE /usr/local/bin/bowtie/bowtie | |||
#BOWTIE2 /usr/local/bowtie2/bowtie2 | |||
#BWA /usr/local/bwa/bwa | |||
############################################ | |||
## Bismark (for bisulfite sequencing only) # | |||
############################################ | |||
## If the Bismark binary is not in your PATH then you can set this value to | |||
## tell the program where to find it. Uncomment the line below and set the | |||
## appropriate location. Please note, this path should INCLUDE the executable | |||
## filename. | |||
#BISMARK /usr/local/bin/bismark/bismark | |||
############ | |||
## Threads # | |||
############ | |||
## Genome aligners can be made to run across multiple CPU cores to speed up | |||
## searches. Set this value to the number of cores you want for mapping reads. | |||
THREADS 32 | |||
############## | |||
## DATABASES # | |||
############## | |||
## This section enables you to configure multiple genomes databases (aligner index | |||
## files) to search against in your screen. For each genome you need to provide a | |||
## database name (which can't contain spaces) and the location of the aligner index | |||
## files. | |||
## | |||
## The path to the index files SHOULD INCLUDE THE BASENAME of the index, e.g: | |||
## /data/public/Genomes/Human_Bowtie/GRCh37/Homo_sapiens.GRCh37 | |||
## Thus, the index files (Homo_sapiens.GRCh37.1.bt2, Homo_sapiens.GRCh37.2.bt2, etc.) | |||
## are found in a folder named 'GRCh37'. | |||
## | |||
## If, for example, the Bowtie, Bowtie2 and BWA indices of a given genome reside in | |||
## the SAME FOLDER, a SINLGE path may be provided to ALL the of indices. The index | |||
## used will be the one compatible with the chosen aligner (as specified using the | |||
## --aligner flag). | |||
## | |||
## The entries shown below are only suggested examples, you can add as many DATABASE | |||
## sections as required, and you can comment out or remove as many of the existing | |||
## entries as desired. We suggest including genomes and sequences that may be sources | |||
## of contamination either because they where run on your sequencer previously, or may | |||
## have contaminated your sample during the library preparation step. | |||
## | |||
## Human - sequences available from | |||
## ftp://ftp.ensembl.org/pub/current/fasta/homo_sapiens/dna/ | |||
#DATABASE Human /data/public/Genomes/Human_Bowtie/GRCh37/Homo_sapiens.GRCh37 | |||
## | |||
## Mouse - sequence available from | |||
## ftp://ftp.ensembl.org/pub/current/fasta/mus_musculus/dna/ | |||
#DATABASE Mouse /data/public/Genomes/Mouse/NCBIM37/Mus_musculus.NCBIM37 | |||
## | |||
## Ecoli- sequence available from EMBL accession U00096.2 | |||
#DATABASE Ecoli /data/public/Genomes/Ecoli/Ecoli | |||
## | |||
## PhiX - sequence available from Refseq accession NC_001422.1 | |||
#DATABASE PhiX /data/public/Genomes/PhiX/phi_plus_SNPs | |||
## | |||
## Adapters - sequence derived from the FastQC contaminats file found at: www.bioinformatics.babraham.ac.uk/projects/fastqc | |||
#DATABASE Adapters /data/public/Genomes/Contaminants/Contaminants | |||
## | |||
## Vector - Sequence taken from the UniVec database | |||
## http://www.ncbi.nlm.nih.gov/VecScreen/UniVec.html | |||
#DATABASE Vectors /data/public/Genomes/Vectors/Vectors | |||
DATABASE Human /cromwell_root/tmp/fastq_screen_reference/genome | |||
DATABASE Mouse /cromwell_root/tmp/fastq_screen_reference/mouse | |||
DATABASE ERCC /cromwell_root/tmp/fastq_screen_reference/ERCC | |||
DATABASE EColi /cromwell_root/tmp/fastq_screen_reference/ecoli | |||
DATABASE Adapter /cromwell_root/tmp/fastq_screen_reference/adapters | |||
DATABASE Vector /cromwell_root/tmp/fastq_screen_reference/vector | |||
DATABASE rRNA /cromwell_root/tmp/fastq_screen_reference/rRNARef | |||
DATABASE Virus /cromwell_root/tmp/fastq_screen_reference/viral | |||
DATABASE Yeast /cromwell_root/tmp/fastq_screen_reference/GCF_000146045.2_R64_genomic_modify | |||
DATABASE Mitoch /cromwell_root/tmp/fastq_screen_reference/Human_mitoch | |||
DATABASE Phix /cromwell_root/tmp/fastq_screen_reference/phix |
@@ -0,0 +1,61 @@ | |||
{ | |||
"fastp_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastp:0.19.6", | |||
"fastp_cluster": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc", | |||
"trim_front1": "0", | |||
"trim_tail1": "0", | |||
"max_len1": "0", | |||
"trim_front2": "0", | |||
"trim_tail2": "0", | |||
"max_len2": "0", | |||
"adapter_sequence": "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA", | |||
"adapter_sequence_r2": "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT", | |||
"disable_adapter_trimming": "0", | |||
"length_required": "50", | |||
"length_required1": "20", | |||
"UMI": "0", | |||
"umi_len": "0", | |||
"umi_loc": "umi_loc", | |||
"qualified_quality_phred": "20", | |||
"disable_quality_filtering": "1", | |||
"hisat2_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/hisat2:v2.1.0-2", | |||
"hisat2_cluster": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc", | |||
"idx_prefix": "genome_snp_tran", | |||
"idx": "oss://pgx-reference-data/reference/hisat2/grch38_snp_tran/", | |||
"fasta": "GRCh38.d1.vd1.fa", | |||
"pen_cansplice":"0", | |||
"pen_noncansplice":"3", | |||
"pen_intronlen":"G,-8,1", | |||
"min_intronlen":"30", | |||
"max_intronlen":"500000", | |||
"maxins":"500", | |||
"minins":"0", | |||
"samtools_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/samtools:v1.3.1", | |||
"samtools_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc", | |||
"insert_size":"8000", | |||
"gtf": "oss://pgx-reference-data/reference/annotation/Homo_sapiens.GRCh38.93.gtf", | |||
"stringtie_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/stringtie:v1.3.4", | |||
"stringtie_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc", | |||
"minimum_length_allowed_for_the_predicted_transcripts":"200", | |||
"minimum_isoform_abundance":"0.01", | |||
"Junctions_no_spliced_reads":"10", | |||
"maximum_fraction_of_muliplelocationmapped_reads":"0.95", | |||
"fastqc_cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc", | |||
"fastqc_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqc:v0.11.5", | |||
"fastqc_disk_size": "150", | |||
"qualimapBAMqc_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/qualimap:2.0.0", | |||
"qualimapBAMqc_cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc", | |||
"qualimapBAMqc_disk_size": "500", | |||
"qualimapRNAseq_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/qualimap:2.0.0", | |||
"qualimapRNAseq_cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc", | |||
"qualimapRNAseq_disk_size": "500", | |||
"fastqscreen_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqscreen:0.12.0", | |||
"fastqscreen_cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc", | |||
"screen_ref_dir": "oss://pgx-reference-data/fastq_screen_reference/", | |||
"fastq_screen_conf": "oss://pgx-reference-data/fastq_screen_reference/fastq_screen.conf", | |||
"fastqscreen_disk_size": "200", | |||
"multiqc_cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc", | |||
"multiqc_docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/multiqc:v1.8", | |||
"multiqc_disk_size": "100", | |||
"ballgown_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/pgx-ballgown:0.0.1", | |||
"ballgown_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc" | |||
} |
@@ -0,0 +1,65 @@ | |||
{ | |||
"{{ project_name }}.read1": "{{ read1 }}", | |||
"{{ project_name }}.read2": "{{ read2 }}", | |||
"{{ project_name }}.sample_id": "{{ sample_id }}", | |||
"{{ project_name }}.fastp_docker": "{{ fastp_docker }}", | |||
"{{ project_name }}.fastp_cluster": "{{ fastp_cluster }}", | |||
"{{ project_name }}.trim_front1": "{{ trim_front1 }}", | |||
"{{ project_name }}.trim_tail1": "{{ trim_tail1 }}", | |||
"{{ project_name }}.max_len1": "{{ max_len1 }}", | |||
"{{ project_name }}.trim_front2": "{{ trim_front2 }}", | |||
"{{ project_name }}.trim_tail2": "{{ trim_tail2 }}", | |||
"{{ project_name }}.max_len2": "{{ max_len2 }}", | |||
"{{ project_name }}.adapter_sequence": "{{ adapter_sequence }}", | |||
"{{ project_name }}.adapter_sequence_r2": "{{ adapter_sequence_r2 }}", | |||
"{{ project_name }}.disable_adapter_trimming": "{{ disable_adapter_trimming }}", | |||
"{{ project_name }}.length_required1": "{{ length_required1 }}", | |||
"{{ project_name }}.UMI": "{{ UMI }}", | |||
"{{ project_name }}.umi_loc": "{{ umi_loc }}", | |||
"{{ project_name }}.umi_len": "{{ umi_len }}", | |||
"{{ project_name }}.length_required": "{{ length_required }}", | |||
"{{ project_name }}.qualified_quality_phred": "{{ qualified_quality_phred }}", | |||
"{{ project_name }}.disable_quality_filtering": "{{ disable_quality_filtering }}", | |||
"{{ project_name }}.hisat2_docker": "{{ hisat2_docker }}", | |||
"{{ project_name }}.hisat2_cluster": "{{ hisat2_cluster }}", | |||
"{{ project_name }}.idx_prefix": "{{ idx_prefix }}", | |||
"{{ project_name }}.idx": "{{ idx }}", | |||
"{{ project_name }}.fasta": "{{ fasta }}", | |||
"{{ project_name }}.pen_cansplice": "{{ pen_cansplice }}", | |||
"{{ project_name }}.pen_noncansplice": "{{ pen_noncansplice }}", | |||
"{{ project_name }}.pen_intronlen": "{{ pen_intronlen }}", | |||
"{{ project_name }}.min_intronlen": "{{ min_intronlen }}", | |||
"{{ project_name }}.max_intronlen": "{{ max_intronlen }}", | |||
"{{ project_name }}.maxins": "{{ maxins }}", | |||
"{{ project_name }}.minins": "{{ minins }}", | |||
"{{ project_name }}.samtools_docker": "{{ samtools_docker }}", | |||
"{{ project_name }}.samtools_cluster": "{{ samtools_cluster }}", | |||
"{{ project_name }}.insert_size": "{{ insert_size }}", | |||
"{{ project_name }}.gtf": "{{ gtf }}", | |||
"{{ project_name }}.stringtie_docker": "{{ stringtie_docker }}", | |||
"{{ project_name }}.stringtie_cluster": "{{ stringtie_cluster }}", | |||
"{{ project_name }}.minimum_length_allowed_for_the_predicted_transcripts": "{{ minimum_length_allowed_for_the_predicted_transcripts }}", | |||
"{{ project_name }}.minimum_isoform_abundance": "{{ minimum_isoform_abundance }}", | |||
"{{ project_name }}.Junctions_no_spliced_reads": "{{ Junctions_no_spliced_reads }}", | |||
"{{ project_name }}.maximum_fraction_of_muliplelocationmapped_reads": "{{ maximum_fraction_of_muliplelocationmapped_reads }}", | |||
"{{ project_name }}.fastqc_cluster_config": "{{ fastqc_cluster_config }}", | |||
"{{ project_name }}.fastqc_docker": "{{ fastqc_docker }}", | |||
"{{ project_name }}.fastqc_disk_size": "{{ fastqc_disk_size }}", | |||
"{{ project_name }}.qualimapBAMqc_docker": "{{ qualimapBAMqc_docker }}", | |||
"{{ project_name }}.qualimapBAMqc_cluster_config": "{{ qualimapBAMqc_cluster_config }}", | |||
"{{ project_name }}.qualimapBAMqc_disk_size": "{{ qualimapBAMqc_disk_size }}", | |||
"{{ project_name }}.qualimapRNAseq_docker": "{{ qualimapRNAseq_docker }}", | |||
"{{ project_name }}.qualimapRNAseq_cluster_config": "{{ qualimapRNAseq_cluster_config }}", | |||
"{{ project_name }}.qualimapRNAseq_disk_size": "{{ qualimapRNAseq_disk_size }}", | |||
"{{ project_name }}.fastqscreen_docker": "{{ fastqscreen_docker }}", | |||
"{{ project_name }}.fastqscreen_cluster_config": "{{ fastqscreen_cluster_config }}", | |||
"{{ project_name }}.screen_ref_dir": "{{ screen_ref_dir }}", | |||
"{{ project_name }}.fastq_screen_conf": "{{ fastq_screen_conf }}", | |||
"{{ project_name }}.fastqscreen_disk_size": "{{ fastqscreen_disk_size }}", | |||
"{{ project_name }}.multiqc_cluster_config": "{{ multiqc_cluster_config }}", | |||
"{{ project_name }}.multiqc_docker": "{{ multiqc_docker }}", | |||
"{{ project_name }}.multiqc_disk_size": "{{ multiqc_disk_size }}", | |||
"{{ project_name }}.ballgown_docker": "{{ ballgown_docker }}", | |||
"{{ project_name }}.ballgown_cluster": "{{ ballgown_cluster }}", | |||
"{{ project_name }}.disk_size": "{{ disk_size if disk_size != '' else 200}}" | |||
} |
@@ -0,0 +1,25 @@ | |||
task ballgown { | |||
File gene_abundance | |||
Array[File] ballgown | |||
String sample_id | |||
String docker | |||
String cluster | |||
String disk_size | |||
command <<< | |||
mkdir -p /cromwell_root/tmp/${sample_id} | |||
cp -r ${sep=" " ballgown} /cromwell_root/tmp/${sample_id} | |||
ballgown /cromwell_root/tmp/${sample_id} ${sample_id}.txt | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File mat_expression = "${sample_id}.txt" | |||
} | |||
} |
@@ -0,0 +1,68 @@ | |||
task fastp { | |||
String sample_id | |||
File read1 | |||
File read2 | |||
String adapter_sequence | |||
String adapter_sequence_r2 | |||
String docker | |||
String cluster | |||
String disk_size | |||
String umi_loc | |||
Int trim_front1 | |||
Int trim_tail1 | |||
Int max_len1 | |||
Int trim_front2 | |||
Int trim_tail2 | |||
Int max_len2 | |||
Int disable_adapter_trimming | |||
Int length_required | |||
Int umi_len | |||
Int UMI | |||
Int qualified_quality_phred | |||
Int length_required1 | |||
Int disable_quality_filtering | |||
command <<< | |||
mkdir -p /cromwell_root/tmp/fastp/ | |||
##1.Disable_quality_filtering | |||
if [ "${disable_quality_filtering}" == 0 ] | |||
then | |||
cp ${read1} /cromwell_root/tmp/fastp/{sample_id}_R1.fastq.tmp1.gz | |||
cp ${read2} /cromwell_root/tmp/fastp/{sample_id}_R2.fastq.tmp1.gz | |||
else | |||
fastp --thread 4 --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i ${read1} -I ${read2} -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -j ${sample_id}.json -h ${sample_id}.html | |||
fi | |||
##2.UMI | |||
if [ "${UMI}" == 0 ] | |||
then | |||
cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz | |||
cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz | |||
else | |||
fastp --thread 4 -U --umi_loc=${umi_loc} --umi_len=${umi_len} --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -j ${sample_id}.json -h ${sample_id}.html | |||
fi | |||
##3.Trim | |||
if [ "${disable_adapter_trimming}" == 0 ] | |||
then | |||
fastp --thread 4 -l ${length_required} -q ${qualified_quality_phred} -u ${length_required1} --adapter_sequence ${adapter_sequence} --adapter_sequence_r2 ${adapter_sequence_r2} --detect_adapter_for_pe --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -o ${sample_id}_R1.fastq.gz -O ${sample_id}_R2.fastq.gz -j ${sample_id}.json -h ${sample_id}.html | |||
else | |||
cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz ${sample_id}_R1.fastq.gz | |||
cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz ${sample_id}_R2.fastq.gz | |||
fi | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File json = "${sample_id}.json" | |||
File report = "${sample_id}.html" | |||
File Trim_R1 = "${sample_id}_R1.fastq.gz" | |||
File Trim_R2 = "${sample_id}_R2.fastq.gz" | |||
} | |||
} |
@@ -0,0 +1,28 @@ | |||
task fastqc { | |||
File read1 | |||
File read2 | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
fastqc -t $nt -o ./ ${read1} | |||
fastqc -t $nt -o ./ ${read2} | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File read1_html = sub(basename(read1), "\\.(fastq|fq)\\.gz$", "_fastqc.html") | |||
File read1_zip = sub(basename(read1), "\\.(fastq|fq)\\.gz$", "_fastqc.zip") | |||
File read2_html = sub(basename(read2), "\\.(fastq|fq)\\.gz$", "_fastqc.html") | |||
File read2_zip = sub(basename(read2), "\\.(fastq|fq)\\.gz$", "_fastqc.zip") | |||
} | |||
} |
@@ -0,0 +1,37 @@ | |||
task fastq_screen { | |||
File read1 | |||
File read2 | |||
File screen_ref_dir | |||
File fastq_screen_conf | |||
String read1name = basename(read1,".fastq.gz") | |||
String read2name = basename(read2,".fastq.gz") | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
mkdir -p /cromwell_root/tmp | |||
cp -r ${screen_ref_dir} /cromwell_root/tmp/ | |||
#sed -i "s#/cromwell_root/fastq_screen_reference#${screen_ref_dir}#g" ${fastq_screen_conf} | |||
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${read1} | |||
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${read2} | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File png1 = "${read1name}_screen.png" | |||
File txt1 = "${read1name}_screen.txt" | |||
File html1 = "${read1name}_screen.html" | |||
File png2 = "${read2name}_screen.png" | |||
File txt2 = "${read2name}_screen.txt" | |||
File html2 = "${read2name}_screen.html" | |||
} | |||
} |
@@ -0,0 +1,35 @@ | |||
task hisat2 { | |||
File idx | |||
File Trim_R1 | |||
File Trim_R2 | |||
String idx_prefix | |||
String sample_id | |||
String docker | |||
String cluster | |||
String disk_size | |||
String pen_intronlen | |||
Int pen_cansplice | |||
Int pen_noncansplice | |||
Int min_intronlen | |||
Int max_intronlen | |||
Int maxins | |||
Int minins | |||
command <<< | |||
nt=$(nproc) | |||
hisat2 -t -p $nt -x ${idx}/${idx_prefix} --pen-cansplice ${pen_cansplice} --pen-noncansplice ${pen_noncansplice} --pen-intronlen ${pen_intronlen} --min-intronlen ${min_intronlen} --max-intronlen ${max_intronlen} --maxins ${maxins} --minins ${minins} --un-conc-gz ${sample_id}_un.fq.gz -1 ${Trim_R1} -2 ${Trim_R2} -S ${sample_id}.sam | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File sam = "${sample_id}.sam" | |||
File unmapread_1p = "${sample_id}_un.fq.1.gz" | |||
File unmapread_2p = "${sample_id}_un.fq.2.gz" | |||
} | |||
} |
@@ -0,0 +1,61 @@ | |||
task multiqc { | |||
Array[File] read1_zip | |||
Array[File] read2_zip | |||
Array[File] txt1 | |||
Array[File] txt2 | |||
Array[File] bamqc_zip | |||
Array[File] rnaseq_zip | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
mkdir -p /cromwell_root/tmp/fastqc | |||
mkdir -p /cromwell_root/tmp/fastqscreen | |||
mkdir -p /cromwell_root/tmp/bamqc | |||
mkdir -p /cromwell_root/tmp/rnaseq | |||
cp ${sep=" " read1_zip} ${sep=" " read2_zip} /cromwell_root/tmp/fastqc | |||
cp ${sep=" " txt1} ${sep=" " txt2} /cromwell_root/tmp/fastqscreen | |||
for i in ${sep=" " bamqc_zip} | |||
do | |||
tar -zxvf $i -C /cromwell_root/tmp/bamqc | |||
done | |||
for i in ${sep=" " rnaseq_zip} | |||
do | |||
tar -zxvf $i -C /cromwell_root/tmp/rnaseq | |||
done | |||
multiqc /cromwell_root/tmp/ | |||
cat multiqc_data/multiqc_fastq_screen.txt > multiqc_fastq_screen.txt | |||
cat multiqc_data/multiqc_fastqc.txt > multiqc_fastqc.txt | |||
cat multiqc_data/multiqc_general_stats.txt > multiqc_general_stats.txt | |||
cat multiqc_data/multiqc_qualimap_bamqc_genome_results.txt > multiqc_qualimap_bamqc_genome_results.txt | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster:cluster_config | |||
systemDisk:"cloud_ssd 40" | |||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File multiqc_html = "multiqc_report.html" | |||
Array[File] multiqc_txt = glob("multiqc_data/*") | |||
File multiqc_fastq_screen = "multiqc_fastq_screen.txt" | |||
File multiqc_fastqc = "multiqc_fastqc.txt" | |||
File multiqc_general_stats = "multiqc_general_stats.txt" | |||
File bamqc_genome_results = "multiqc_qualimap_bamqc_genome_results.txt" | |||
} | |||
} |
@@ -0,0 +1,28 @@ | |||
task qualimapBAMqc { | |||
File bam | |||
String bamname = basename(bam,".bam") | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
/opt/qualimap/qualimap bamqc -bam ${bam} -outformat PDF:HTML -nt $nt -outdir ${bamname}_bamqc --java-mem-size=32G | |||
tar -zcvf ${bamname}_bamqc_qualimap.zip ${bamname}_bamqc | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster:cluster_config | |||
systemDisk:"cloud_ssd 40" | |||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File bamqc_zip = "${bamname}_bamqc_qualimap.zip" | |||
} | |||
} |
@@ -0,0 +1,29 @@ | |||
task qualimapRNAseq { | |||
File bam | |||
File gtf | |||
String bamname = basename(bam,".bam") | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
nt=$(nproc) | |||
/opt/qualimap/qualimap rnaseq -bam ${bam} -outformat HTML -outdir ${bamname}_RNAseq -gtf ${gtf} -pe --java-mem-size=10G | |||
tar -zcvf ${bamname}_RNAseq_qualimap.zip ${bamname}_RNAseq | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster:cluster_config | |||
systemDisk:"cloud_ssd 40" | |||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File rnaseq_zip = "${bamname}_RNAseq_qualimap.zip" | |||
} | |||
} |
@@ -0,0 +1,39 @@ | |||
task samtools { | |||
File sam | |||
String sample_id | |||
String bam = sample_id + ".bam" | |||
String sorted_bam = sample_id + ".sorted.bam" | |||
String percent_bam = sample_id + ".percent.bam" | |||
String sorted_bam_index = sample_id + ".sorted.bam.bai" | |||
String ins_size = sample_id + ".ins_size" | |||
String docker | |||
String cluster | |||
String disk_size | |||
Int insert_size | |||
command <<< | |||
set -o pipefail | |||
set -e | |||
/opt/conda/bin/samtools view -bS ${sam} > ${bam} | |||
/opt/conda/bin/samtools sort -m 1000000000 ${bam} -o ${sorted_bam} | |||
/opt/conda/bin/samtools index ${sorted_bam} | |||
/opt/conda/bin/samtools view -bs 42.1 ${sorted_bam} > ${percent_bam} | |||
/opt/conda/bin/samtools stats -i ${insert_size} ${sorted_bam} |grep ^IS|cut -f 2- > ${sample_id}.ins_size | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File out_bam = sorted_bam | |||
File out_percent = percent_bam | |||
File out_bam_index = sorted_bam_index | |||
File out_ins_size = ins_size | |||
} | |||
} | |||
@@ -0,0 +1,33 @@ | |||
task stringtie { | |||
File bam | |||
File gtf | |||
String docker | |||
String sample_id | |||
String cluster | |||
String disk_size | |||
Int minimum_length_allowed_for_the_predicted_transcripts | |||
Int Junctions_no_spliced_reads | |||
Float minimum_isoform_abundance | |||
Float maximum_fraction_of_muliplelocationmapped_reads | |||
command <<< | |||
nt=$(nproc) | |||
mkdir ballgown | |||
/opt/conda/bin/stringtie -e -B -p $nt -f ${minimum_isoform_abundance} -m ${minimum_length_allowed_for_the_predicted_transcripts} -a ${Junctions_no_spliced_reads} -M ${maximum_fraction_of_muliplelocationmapped_reads} -G ${gtf} -o ballgown/${sample_id}/${sample_id}.gtf -C ${sample_id}.cov.ref.gtf -A ${sample_id}.gene.abundance.txt ${bam} | |||
>>> | |||
runtime { | |||
docker: docker | |||
cluster: cluster | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File covered_transcripts = "${sample_id}.cov.ref.gtf" | |||
File gene_abundance = "${sample_id}.gene.abundance.txt" | |||
Array[File] ballgown = ["ballgown/${sample_id}/${sample_id}.gtf", "ballgown/${sample_id}/e2t.ctab", "ballgown/${sample_id}/e_data.ctab", "ballgown/${sample_id}/i2t.ctab", "ballgown/${sample_id}/i_data.ctab", "ballgown/${sample_id}/t_data.ctab"] | |||
File genecount = "{sample_id}_genecount.csv" | |||
} | |||
} |
@@ -0,0 +1,135 @@ | |||
import "./tasks/fastp.wdl" as fastp | |||
import "./tasks/hisat2.wdl" as hisat2 | |||
import "./tasks/samtools.wdl" as samtools | |||
import "./tasks/qualimapRNAseq.wdl" as qualimapRNAseq | |||
workflow {{ project_name }} { | |||
File read1 | |||
File read2 | |||
File idx | |||
File screen_ref_dir | |||
File fastq_screen_conf | |||
File gtf | |||
String sample_id | |||
String fastp_docker | |||
String adapter_sequence | |||
String adapter_sequence_r2 | |||
String fastp_cluster | |||
String umi_loc | |||
String idx_prefix | |||
String pen_intronlen | |||
String fastqc_cluster_config | |||
String fastqc_docker | |||
String fastqscreen_docker | |||
String fastqscreen_cluster_config | |||
String hisat2_docker | |||
String hisat2_cluster | |||
String qualimapBAMqc_docker | |||
String qualimapBAMqc_cluster_config | |||
String qualimapRNAseq_docker | |||
String qualimapRNAseq_cluster_config | |||
String samtools_docker | |||
String samtools_cluster | |||
String stringtie_docker | |||
String stringtie_cluster | |||
String multiqc_cluster_config | |||
String multiqc_docker | |||
Int multiqc_disk_size | |||
Int trim_front1 | |||
Int trim_tail1 | |||
Int max_len1 | |||
Int trim_front2 | |||
Int trim_tail2 | |||
Int max_len2 | |||
Int disable_adapter_trimming | |||
Int length_required | |||
Int umi_len | |||
Int UMI | |||
Int qualified_quality_phred | |||
Int length_required1 | |||
Int disable_quality_filtering | |||
Int pen_cansplice | |||
Int pen_noncansplice | |||
Int min_intronlen | |||
Int max_intronlen | |||
Int maxins | |||
Int minins | |||
Int fastqc_disk_size | |||
Int fastqscreen_disk_size | |||
Int qualimapBAMqc_disk_size | |||
Int qualimapRNAseq_disk_size | |||
Int insert_size | |||
Int minimum_length_allowed_for_the_predicted_transcripts | |||
Int Junctions_no_spliced_reads | |||
Float minimum_isoform_abundance | |||
Float maximum_fraction_of_muliplelocationmapped_reads | |||
String ballgown_docker | |||
String ballgown_cluster | |||
String disk_size | |||
call fastp.fastp as fastp { | |||
input: | |||
sample_id=sample_id, | |||
read1 = read1, | |||
read2 = read2, | |||
docker = fastp_docker, | |||
cluster = fastp_cluster, | |||
disk_size = disk_size, | |||
adapter_sequence = adapter_sequence, | |||
adapter_sequence_r2 = adapter_sequence_r2, | |||
umi_loc = umi_loc, | |||
trim_front1 = trim_front1, | |||
trim_tail1 = trim_tail1, | |||
max_len1 = max_len1, | |||
trim_front2 = trim_front2, | |||
trim_tail2 = trim_tail2, | |||
max_len2 = max_len2, | |||
disable_adapter_trimming = disable_adapter_trimming, | |||
length_required = length_required, | |||
umi_len = umi_len, | |||
UMI = UMI, | |||
qualified_quality_phred = qualified_quality_phred, | |||
length_required1 = length_required1, | |||
disable_quality_filtering = disable_quality_filtering | |||
} | |||
call hisat2.hisat2 as hisat2 { | |||
input: | |||
sample_id = sample_id, | |||
idx = idx, | |||
idx_prefix = idx_prefix, | |||
Trim_R1 = fastp.Trim_R1, | |||
Trim_R2 = fastp.Trim_R2, | |||
docker = hisat2_docker, | |||
cluster = hisat2_cluster, | |||
disk_size = disk_size, | |||
pen_intronlen = pen_intronlen, | |||
pen_cansplice = pen_cansplice, | |||
pen_noncansplice = pen_noncansplice, | |||
min_intronlen = min_intronlen, | |||
max_intronlen = max_intronlen, | |||
maxins = maxins, | |||
minins = minins | |||
} | |||
call samtools.samtools as samtools { | |||
input: | |||
sample_id = sample_id, | |||
sam = hisat2.sam, | |||
docker = samtools_docker, | |||
cluster = samtools_cluster, | |||
disk_size = disk_size, | |||
insert_size = insert_size | |||
} | |||
call qualimapBAMqc.qualimapBAMqc as qualimapBAMqc { | |||
input: | |||
bam = samtools.out_percent, | |||
docker = qualimapBAMqc_docker, | |||
cluster_config = qualimapBAMqc_cluster_config, | |||
disk_size = qualimapBAMqc_disk_size | |||
} | |||
} |