Zhihui пре 4 година
родитељ
комит
94a980c182
16 измењених фајлова са 738 додато и 0 уклоњено
  1. BIN
      .DS_Store
  2. +94
    -0
      conf/fastq_screen.conf
  3. +61
    -0
      defaults
  4. +65
    -0
      inputs
  5. BIN
      tasks/.DS_Store
  6. +25
    -0
      tasks/ballgown.wdl
  7. +68
    -0
      tasks/fastp.wdl
  8. +28
    -0
      tasks/fastqc.wdl
  9. +37
    -0
      tasks/fastqscreen.wdl
  10. +35
    -0
      tasks/hisat2.wdl
  11. +61
    -0
      tasks/multiqc.wdl
  12. +28
    -0
      tasks/qualimapBAMqc.wdl
  13. +29
    -0
      tasks/qualimapRNAseq.wdl
  14. +39
    -0
      tasks/samtools.wdl
  15. +33
    -0
      tasks/stringtie.wdl
  16. +135
    -0
      workflow.wdl

+ 94
- 0
conf/fastq_screen.conf Прегледај датотеку

@@ -0,0 +1,94 @@
# This is an example configuration file for FastQ Screen

############################
## Bowtie, Bowtie 2 or BWA #
############################
## If the Bowtie, Bowtie 2 or BWA binary is not in your PATH, you can set
## this value to tell the program where to find your chosen aligner. Uncomment
## the relevant line below and set the appropriate location. Please note,
## this path should INCLUDE the executable filename.

#BOWTIE /usr/local/bin/bowtie/bowtie
#BOWTIE2 /usr/local/bowtie2/bowtie2
#BWA /usr/local/bwa/bwa



############################################
## Bismark (for bisulfite sequencing only) #
############################################
## If the Bismark binary is not in your PATH then you can set this value to
## tell the program where to find it. Uncomment the line below and set the
## appropriate location. Please note, this path should INCLUDE the executable
## filename.

#BISMARK /usr/local/bin/bismark/bismark



############
## Threads #
############
## Genome aligners can be made to run across multiple CPU cores to speed up
## searches. Set this value to the number of cores you want for mapping reads.

THREADS 32



##############
## DATABASES #
##############
## This section enables you to configure multiple genomes databases (aligner index
## files) to search against in your screen. For each genome you need to provide a
## database name (which can't contain spaces) and the location of the aligner index
## files.
##
## The path to the index files SHOULD INCLUDE THE BASENAME of the index, e.g:
## /data/public/Genomes/Human_Bowtie/GRCh37/Homo_sapiens.GRCh37
## Thus, the index files (Homo_sapiens.GRCh37.1.bt2, Homo_sapiens.GRCh37.2.bt2, etc.)
## are found in a folder named 'GRCh37'.
##
## If, for example, the Bowtie, Bowtie2 and BWA indices of a given genome reside in
## the SAME FOLDER, a SINLGE path may be provided to ALL the of indices. The index
## used will be the one compatible with the chosen aligner (as specified using the
## --aligner flag).
##
## The entries shown below are only suggested examples, you can add as many DATABASE
## sections as required, and you can comment out or remove as many of the existing
## entries as desired. We suggest including genomes and sequences that may be sources
## of contamination either because they where run on your sequencer previously, or may
## have contaminated your sample during the library preparation step.
##
## Human - sequences available from
## ftp://ftp.ensembl.org/pub/current/fasta/homo_sapiens/dna/
#DATABASE Human /data/public/Genomes/Human_Bowtie/GRCh37/Homo_sapiens.GRCh37
##
## Mouse - sequence available from
## ftp://ftp.ensembl.org/pub/current/fasta/mus_musculus/dna/
#DATABASE Mouse /data/public/Genomes/Mouse/NCBIM37/Mus_musculus.NCBIM37
##
## Ecoli- sequence available from EMBL accession U00096.2
#DATABASE Ecoli /data/public/Genomes/Ecoli/Ecoli
##
## PhiX - sequence available from Refseq accession NC_001422.1
#DATABASE PhiX /data/public/Genomes/PhiX/phi_plus_SNPs
##
## Adapters - sequence derived from the FastQC contaminats file found at: www.bioinformatics.babraham.ac.uk/projects/fastqc
#DATABASE Adapters /data/public/Genomes/Contaminants/Contaminants
##
## Vector - Sequence taken from the UniVec database
## http://www.ncbi.nlm.nih.gov/VecScreen/UniVec.html
#DATABASE Vectors /data/public/Genomes/Vectors/Vectors

DATABASE Human /cromwell_root/tmp/fastq_screen_reference/genome
DATABASE Mouse /cromwell_root/tmp/fastq_screen_reference/mouse
DATABASE ERCC /cromwell_root/tmp/fastq_screen_reference/ERCC
DATABASE EColi /cromwell_root/tmp/fastq_screen_reference/ecoli
DATABASE Adapter /cromwell_root/tmp/fastq_screen_reference/adapters
DATABASE Vector /cromwell_root/tmp/fastq_screen_reference/vector
DATABASE rRNA /cromwell_root/tmp/fastq_screen_reference/rRNARef
DATABASE Virus /cromwell_root/tmp/fastq_screen_reference/viral
DATABASE Yeast /cromwell_root/tmp/fastq_screen_reference/GCF_000146045.2_R64_genomic_modify
DATABASE Mitoch /cromwell_root/tmp/fastq_screen_reference/Human_mitoch
DATABASE Phix /cromwell_root/tmp/fastq_screen_reference/phix

+ 61
- 0
defaults Прегледај датотеку

@@ -0,0 +1,61 @@
{
"fastp_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastp:0.19.6",
"fastp_cluster": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
"trim_front1": "0",
"trim_tail1": "0",
"max_len1": "0",
"trim_front2": "0",
"trim_tail2": "0",
"max_len2": "0",
"adapter_sequence": "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA",
"adapter_sequence_r2": "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT",
"disable_adapter_trimming": "0",
"length_required": "50",
"length_required1": "20",
"UMI": "0",
"umi_len": "0",
"umi_loc": "umi_loc",
"qualified_quality_phred": "20",
"disable_quality_filtering": "1",
"hisat2_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/hisat2:v2.1.0-2",
"hisat2_cluster": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc",
"idx_prefix": "genome_snp_tran",
"idx": "oss://pgx-reference-data/reference/hisat2/grch38_snp_tran/",
"fasta": "GRCh38.d1.vd1.fa",
"pen_cansplice":"0",
"pen_noncansplice":"3",
"pen_intronlen":"G,-8,1",
"min_intronlen":"30",
"max_intronlen":"500000",
"maxins":"500",
"minins":"0",
"samtools_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/samtools:v1.3.1",
"samtools_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
"insert_size":"8000",
"gtf": "oss://pgx-reference-data/reference/annotation/Homo_sapiens.GRCh38.93.gtf",
"stringtie_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/stringtie:v1.3.4",
"stringtie_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
"minimum_length_allowed_for_the_predicted_transcripts":"200",
"minimum_isoform_abundance":"0.01",
"Junctions_no_spliced_reads":"10",
"maximum_fraction_of_muliplelocationmapped_reads":"0.95",
"fastqc_cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
"fastqc_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqc:v0.11.5",
"fastqc_disk_size": "150",
"qualimapBAMqc_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/qualimap:2.0.0",
"qualimapBAMqc_cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc",
"qualimapBAMqc_disk_size": "500",
"qualimapRNAseq_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/qualimap:2.0.0",
"qualimapRNAseq_cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc",
"qualimapRNAseq_disk_size": "500",
"fastqscreen_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqscreen:0.12.0",
"fastqscreen_cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
"screen_ref_dir": "oss://pgx-reference-data/fastq_screen_reference/",
"fastq_screen_conf": "oss://pgx-reference-data/fastq_screen_reference/fastq_screen.conf",
"fastqscreen_disk_size": "200",
"multiqc_cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
"multiqc_docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/multiqc:v1.8",
"multiqc_disk_size": "100",
"ballgown_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/pgx-ballgown:0.0.1",
"ballgown_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc"
}

+ 65
- 0
inputs Прегледај датотеку

@@ -0,0 +1,65 @@
{
"{{ project_name }}.read1": "{{ read1 }}",
"{{ project_name }}.read2": "{{ read2 }}",
"{{ project_name }}.sample_id": "{{ sample_id }}",
"{{ project_name }}.fastp_docker": "{{ fastp_docker }}",
"{{ project_name }}.fastp_cluster": "{{ fastp_cluster }}",
"{{ project_name }}.trim_front1": "{{ trim_front1 }}",
"{{ project_name }}.trim_tail1": "{{ trim_tail1 }}",
"{{ project_name }}.max_len1": "{{ max_len1 }}",
"{{ project_name }}.trim_front2": "{{ trim_front2 }}",
"{{ project_name }}.trim_tail2": "{{ trim_tail2 }}",
"{{ project_name }}.max_len2": "{{ max_len2 }}",
"{{ project_name }}.adapter_sequence": "{{ adapter_sequence }}",
"{{ project_name }}.adapter_sequence_r2": "{{ adapter_sequence_r2 }}",
"{{ project_name }}.disable_adapter_trimming": "{{ disable_adapter_trimming }}",
"{{ project_name }}.length_required1": "{{ length_required1 }}",
"{{ project_name }}.UMI": "{{ UMI }}",
"{{ project_name }}.umi_loc": "{{ umi_loc }}",
"{{ project_name }}.umi_len": "{{ umi_len }}",
"{{ project_name }}.length_required": "{{ length_required }}",
"{{ project_name }}.qualified_quality_phred": "{{ qualified_quality_phred }}",
"{{ project_name }}.disable_quality_filtering": "{{ disable_quality_filtering }}",
"{{ project_name }}.hisat2_docker": "{{ hisat2_docker }}",
"{{ project_name }}.hisat2_cluster": "{{ hisat2_cluster }}",
"{{ project_name }}.idx_prefix": "{{ idx_prefix }}",
"{{ project_name }}.idx": "{{ idx }}",
"{{ project_name }}.fasta": "{{ fasta }}",
"{{ project_name }}.pen_cansplice": "{{ pen_cansplice }}",
"{{ project_name }}.pen_noncansplice": "{{ pen_noncansplice }}",
"{{ project_name }}.pen_intronlen": "{{ pen_intronlen }}",
"{{ project_name }}.min_intronlen": "{{ min_intronlen }}",
"{{ project_name }}.max_intronlen": "{{ max_intronlen }}",
"{{ project_name }}.maxins": "{{ maxins }}",
"{{ project_name }}.minins": "{{ minins }}",
"{{ project_name }}.samtools_docker": "{{ samtools_docker }}",
"{{ project_name }}.samtools_cluster": "{{ samtools_cluster }}",
"{{ project_name }}.insert_size": "{{ insert_size }}",
"{{ project_name }}.gtf": "{{ gtf }}",
"{{ project_name }}.stringtie_docker": "{{ stringtie_docker }}",
"{{ project_name }}.stringtie_cluster": "{{ stringtie_cluster }}",
"{{ project_name }}.minimum_length_allowed_for_the_predicted_transcripts": "{{ minimum_length_allowed_for_the_predicted_transcripts }}",
"{{ project_name }}.minimum_isoform_abundance": "{{ minimum_isoform_abundance }}",
"{{ project_name }}.Junctions_no_spliced_reads": "{{ Junctions_no_spliced_reads }}",
"{{ project_name }}.maximum_fraction_of_muliplelocationmapped_reads": "{{ maximum_fraction_of_muliplelocationmapped_reads }}",
"{{ project_name }}.fastqc_cluster_config": "{{ fastqc_cluster_config }}",
"{{ project_name }}.fastqc_docker": "{{ fastqc_docker }}",
"{{ project_name }}.fastqc_disk_size": "{{ fastqc_disk_size }}",
"{{ project_name }}.qualimapBAMqc_docker": "{{ qualimapBAMqc_docker }}",
"{{ project_name }}.qualimapBAMqc_cluster_config": "{{ qualimapBAMqc_cluster_config }}",
"{{ project_name }}.qualimapBAMqc_disk_size": "{{ qualimapBAMqc_disk_size }}",
"{{ project_name }}.qualimapRNAseq_docker": "{{ qualimapRNAseq_docker }}",
"{{ project_name }}.qualimapRNAseq_cluster_config": "{{ qualimapRNAseq_cluster_config }}",
"{{ project_name }}.qualimapRNAseq_disk_size": "{{ qualimapRNAseq_disk_size }}",
"{{ project_name }}.fastqscreen_docker": "{{ fastqscreen_docker }}",
"{{ project_name }}.fastqscreen_cluster_config": "{{ fastqscreen_cluster_config }}",
"{{ project_name }}.screen_ref_dir": "{{ screen_ref_dir }}",
"{{ project_name }}.fastq_screen_conf": "{{ fastq_screen_conf }}",
"{{ project_name }}.fastqscreen_disk_size": "{{ fastqscreen_disk_size }}",
"{{ project_name }}.multiqc_cluster_config": "{{ multiqc_cluster_config }}",
"{{ project_name }}.multiqc_docker": "{{ multiqc_docker }}",
"{{ project_name }}.multiqc_disk_size": "{{ multiqc_disk_size }}",
"{{ project_name }}.ballgown_docker": "{{ ballgown_docker }}",
"{{ project_name }}.ballgown_cluster": "{{ ballgown_cluster }}",
"{{ project_name }}.disk_size": "{{ disk_size if disk_size != '' else 200}}"
}


+ 25
- 0
tasks/ballgown.wdl Прегледај датотеку

@@ -0,0 +1,25 @@
task ballgown {
File gene_abundance
Array[File] ballgown
String sample_id
String docker
String cluster
String disk_size

command <<<
mkdir -p /cromwell_root/tmp/${sample_id}
cp -r ${sep=" " ballgown} /cromwell_root/tmp/${sample_id}
ballgown /cromwell_root/tmp/${sample_id} ${sample_id}.txt
>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File mat_expression = "${sample_id}.txt"
}
}

+ 68
- 0
tasks/fastp.wdl Прегледај датотеку

@@ -0,0 +1,68 @@
task fastp {
String sample_id
File read1
File read2
String adapter_sequence
String adapter_sequence_r2
String docker
String cluster
String disk_size
String umi_loc
Int trim_front1
Int trim_tail1
Int max_len1
Int trim_front2
Int trim_tail2
Int max_len2
Int disable_adapter_trimming
Int length_required
Int umi_len
Int UMI
Int qualified_quality_phred
Int length_required1
Int disable_quality_filtering
command <<<
mkdir -p /cromwell_root/tmp/fastp/
##1.Disable_quality_filtering
if [ "${disable_quality_filtering}" == 0 ]
then
cp ${read1} /cromwell_root/tmp/fastp/{sample_id}_R1.fastq.tmp1.gz
cp ${read2} /cromwell_root/tmp/fastp/{sample_id}_R2.fastq.tmp1.gz
else
fastp --thread 4 --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i ${read1} -I ${read2} -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -j ${sample_id}.json -h ${sample_id}.html
fi

##2.UMI
if [ "${UMI}" == 0 ]
then
cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz
cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz
else
fastp --thread 4 -U --umi_loc=${umi_loc} --umi_len=${umi_len} --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -j ${sample_id}.json -h ${sample_id}.html
fi

##3.Trim
if [ "${disable_adapter_trimming}" == 0 ]
then
fastp --thread 4 -l ${length_required} -q ${qualified_quality_phred} -u ${length_required1} --adapter_sequence ${adapter_sequence} --adapter_sequence_r2 ${adapter_sequence_r2} --detect_adapter_for_pe --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -o ${sample_id}_R1.fastq.gz -O ${sample_id}_R2.fastq.gz -j ${sample_id}.json -h ${sample_id}.html
else
cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz ${sample_id}_R1.fastq.gz
cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz ${sample_id}_R2.fastq.gz
fi
>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File json = "${sample_id}.json"
File report = "${sample_id}.html"
File Trim_R1 = "${sample_id}_R1.fastq.gz"
File Trim_R2 = "${sample_id}_R2.fastq.gz"
}
}

+ 28
- 0
tasks/fastqc.wdl Прегледај датотеку

@@ -0,0 +1,28 @@
task fastqc {
File read1
File read2
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
fastqc -t $nt -o ./ ${read1}
fastqc -t $nt -o ./ ${read2}
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File read1_html = sub(basename(read1), "\\.(fastq|fq)\\.gz$", "_fastqc.html")
File read1_zip = sub(basename(read1), "\\.(fastq|fq)\\.gz$", "_fastqc.zip")
File read2_html = sub(basename(read2), "\\.(fastq|fq)\\.gz$", "_fastqc.html")
File read2_zip = sub(basename(read2), "\\.(fastq|fq)\\.gz$", "_fastqc.zip")
}
}

+ 37
- 0
tasks/fastqscreen.wdl Прегледај датотеку

@@ -0,0 +1,37 @@
task fastq_screen {
File read1
File read2
File screen_ref_dir
File fastq_screen_conf
String read1name = basename(read1,".fastq.gz")
String read2name = basename(read2,".fastq.gz")
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
mkdir -p /cromwell_root/tmp
cp -r ${screen_ref_dir} /cromwell_root/tmp/
#sed -i "s#/cromwell_root/fastq_screen_reference#${screen_ref_dir}#g" ${fastq_screen_conf}
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${read1}
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${read2}
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File png1 = "${read1name}_screen.png"
File txt1 = "${read1name}_screen.txt"
File html1 = "${read1name}_screen.html"
File png2 = "${read2name}_screen.png"
File txt2 = "${read2name}_screen.txt"
File html2 = "${read2name}_screen.html"
}
}

+ 35
- 0
tasks/hisat2.wdl Прегледај датотеку

@@ -0,0 +1,35 @@
task hisat2 {
File idx
File Trim_R1
File Trim_R2
String idx_prefix
String sample_id
String docker
String cluster
String disk_size
String pen_intronlen
Int pen_cansplice
Int pen_noncansplice
Int min_intronlen
Int max_intronlen
Int maxins
Int minins
command <<<
nt=$(nproc)
hisat2 -t -p $nt -x ${idx}/${idx_prefix} --pen-cansplice ${pen_cansplice} --pen-noncansplice ${pen_noncansplice} --pen-intronlen ${pen_intronlen} --min-intronlen ${min_intronlen} --max-intronlen ${max_intronlen} --maxins ${maxins} --minins ${minins} --un-conc-gz ${sample_id}_un.fq.gz -1 ${Trim_R1} -2 ${Trim_R2} -S ${sample_id}.sam
>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File sam = "${sample_id}.sam"
File unmapread_1p = "${sample_id}_un.fq.1.gz"
File unmapread_2p = "${sample_id}_un.fq.2.gz"
}
}

+ 61
- 0
tasks/multiqc.wdl Прегледај датотеку

@@ -0,0 +1,61 @@
task multiqc {

Array[File] read1_zip
Array[File] read2_zip

Array[File] txt1
Array[File] txt2

Array[File] bamqc_zip
Array[File] rnaseq_zip

String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
mkdir -p /cromwell_root/tmp/fastqc
mkdir -p /cromwell_root/tmp/fastqscreen
mkdir -p /cromwell_root/tmp/bamqc
mkdir -p /cromwell_root/tmp/rnaseq

cp ${sep=" " read1_zip} ${sep=" " read2_zip} /cromwell_root/tmp/fastqc
cp ${sep=" " txt1} ${sep=" " txt2} /cromwell_root/tmp/fastqscreen
for i in ${sep=" " bamqc_zip}
do
tar -zxvf $i -C /cromwell_root/tmp/bamqc
done
for i in ${sep=" " rnaseq_zip}
do
tar -zxvf $i -C /cromwell_root/tmp/rnaseq
done

multiqc /cromwell_root/tmp/
cat multiqc_data/multiqc_fastq_screen.txt > multiqc_fastq_screen.txt
cat multiqc_data/multiqc_fastqc.txt > multiqc_fastqc.txt
cat multiqc_data/multiqc_general_stats.txt > multiqc_general_stats.txt
cat multiqc_data/multiqc_qualimap_bamqc_genome_results.txt > multiqc_qualimap_bamqc_genome_results.txt

>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File multiqc_html = "multiqc_report.html"
Array[File] multiqc_txt = glob("multiqc_data/*")
File multiqc_fastq_screen = "multiqc_fastq_screen.txt"
File multiqc_fastqc = "multiqc_fastqc.txt"
File multiqc_general_stats = "multiqc_general_stats.txt"
File bamqc_genome_results = "multiqc_qualimap_bamqc_genome_results.txt"
}
}

+ 28
- 0
tasks/qualimapBAMqc.wdl Прегледај датотеку

@@ -0,0 +1,28 @@
task qualimapBAMqc {
File bam
String bamname = basename(bam,".bam")
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
/opt/qualimap/qualimap bamqc -bam ${bam} -outformat PDF:HTML -nt $nt -outdir ${bamname}_bamqc --java-mem-size=32G
tar -zcvf ${bamname}_bamqc_qualimap.zip ${bamname}_bamqc
>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File bamqc_zip = "${bamname}_bamqc_qualimap.zip"
}
}

+ 29
- 0
tasks/qualimapRNAseq.wdl Прегледај датотеку

@@ -0,0 +1,29 @@
task qualimapRNAseq {
File bam
File gtf
String bamname = basename(bam,".bam")
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
/opt/qualimap/qualimap rnaseq -bam ${bam} -outformat HTML -outdir ${bamname}_RNAseq -gtf ${gtf} -pe --java-mem-size=10G
tar -zcvf ${bamname}_RNAseq_qualimap.zip ${bamname}_RNAseq
>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File rnaseq_zip = "${bamname}_RNAseq_qualimap.zip"
}
}

+ 39
- 0
tasks/samtools.wdl Прегледај датотеку

@@ -0,0 +1,39 @@
task samtools {
File sam
String sample_id
String bam = sample_id + ".bam"
String sorted_bam = sample_id + ".sorted.bam"
String percent_bam = sample_id + ".percent.bam"
String sorted_bam_index = sample_id + ".sorted.bam.bai"
String ins_size = sample_id + ".ins_size"
String docker
String cluster
String disk_size
Int insert_size

command <<<
set -o pipefail
set -e
/opt/conda/bin/samtools view -bS ${sam} > ${bam}
/opt/conda/bin/samtools sort -m 1000000000 ${bam} -o ${sorted_bam}
/opt/conda/bin/samtools index ${sorted_bam}
/opt/conda/bin/samtools view -bs 42.1 ${sorted_bam} > ${percent_bam}
/opt/conda/bin/samtools stats -i ${insert_size} ${sorted_bam} |grep ^IS|cut -f 2- > ${sample_id}.ins_size
>>>

runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File out_bam = sorted_bam
File out_percent = percent_bam
File out_bam_index = sorted_bam_index
File out_ins_size = ins_size
}

}


+ 33
- 0
tasks/stringtie.wdl Прегледај датотеку

@@ -0,0 +1,33 @@
task stringtie {
File bam
File gtf
String docker
String sample_id
String cluster
String disk_size
Int minimum_length_allowed_for_the_predicted_transcripts
Int Junctions_no_spliced_reads
Float minimum_isoform_abundance
Float maximum_fraction_of_muliplelocationmapped_reads

command <<<
nt=$(nproc)
mkdir ballgown
/opt/conda/bin/stringtie -e -B -p $nt -f ${minimum_isoform_abundance} -m ${minimum_length_allowed_for_the_predicted_transcripts} -a ${Junctions_no_spliced_reads} -M ${maximum_fraction_of_muliplelocationmapped_reads} -G ${gtf} -o ballgown/${sample_id}/${sample_id}.gtf -C ${sample_id}.cov.ref.gtf -A ${sample_id}.gene.abundance.txt ${bam}
>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File covered_transcripts = "${sample_id}.cov.ref.gtf"
File gene_abundance = "${sample_id}.gene.abundance.txt"
Array[File] ballgown = ["ballgown/${sample_id}/${sample_id}.gtf", "ballgown/${sample_id}/e2t.ctab", "ballgown/${sample_id}/e_data.ctab", "ballgown/${sample_id}/i2t.ctab", "ballgown/${sample_id}/i_data.ctab", "ballgown/${sample_id}/t_data.ctab"]
File genecount = "{sample_id}_genecount.csv"
}
}

+ 135
- 0
workflow.wdl Прегледај датотеку

@@ -0,0 +1,135 @@
import "./tasks/fastp.wdl" as fastp
import "./tasks/hisat2.wdl" as hisat2
import "./tasks/samtools.wdl" as samtools
import "./tasks/qualimapRNAseq.wdl" as qualimapRNAseq

workflow {{ project_name }} {
File read1
File read2
File idx
File screen_ref_dir
File fastq_screen_conf
File gtf
String sample_id
String fastp_docker
String adapter_sequence
String adapter_sequence_r2
String fastp_cluster
String umi_loc
String idx_prefix
String pen_intronlen
String fastqc_cluster_config
String fastqc_docker
String fastqscreen_docker
String fastqscreen_cluster_config
String hisat2_docker
String hisat2_cluster
String qualimapBAMqc_docker
String qualimapBAMqc_cluster_config
String qualimapRNAseq_docker
String qualimapRNAseq_cluster_config
String samtools_docker
String samtools_cluster
String stringtie_docker
String stringtie_cluster
String multiqc_cluster_config
String multiqc_docker
Int multiqc_disk_size
Int trim_front1
Int trim_tail1
Int max_len1
Int trim_front2
Int trim_tail2
Int max_len2
Int disable_adapter_trimming
Int length_required
Int umi_len
Int UMI
Int qualified_quality_phred
Int length_required1
Int disable_quality_filtering
Int pen_cansplice
Int pen_noncansplice
Int min_intronlen
Int max_intronlen
Int maxins
Int minins
Int fastqc_disk_size
Int fastqscreen_disk_size
Int qualimapBAMqc_disk_size
Int qualimapRNAseq_disk_size
Int insert_size
Int minimum_length_allowed_for_the_predicted_transcripts
Int Junctions_no_spliced_reads
Float minimum_isoform_abundance
Float maximum_fraction_of_muliplelocationmapped_reads
String ballgown_docker
String ballgown_cluster
String disk_size

call fastp.fastp as fastp {
input:
sample_id=sample_id,
read1 = read1,
read2 = read2,
docker = fastp_docker,
cluster = fastp_cluster,
disk_size = disk_size,
adapter_sequence = adapter_sequence,
adapter_sequence_r2 = adapter_sequence_r2,
umi_loc = umi_loc,
trim_front1 = trim_front1,
trim_tail1 = trim_tail1,
max_len1 = max_len1,
trim_front2 = trim_front2,
trim_tail2 = trim_tail2,
max_len2 = max_len2,
disable_adapter_trimming = disable_adapter_trimming,
length_required = length_required,
umi_len = umi_len,
UMI = UMI,
qualified_quality_phred = qualified_quality_phred,
length_required1 = length_required1,
disable_quality_filtering = disable_quality_filtering
}

call hisat2.hisat2 as hisat2 {
input:
sample_id = sample_id,
idx = idx,
idx_prefix = idx_prefix,
Trim_R1 = fastp.Trim_R1,
Trim_R2 = fastp.Trim_R2,
docker = hisat2_docker,
cluster = hisat2_cluster,
disk_size = disk_size,
pen_intronlen = pen_intronlen,
pen_cansplice = pen_cansplice,
pen_noncansplice = pen_noncansplice,
min_intronlen = min_intronlen,
max_intronlen = max_intronlen,
maxins = maxins,
minins = minins
}

call samtools.samtools as samtools {
input:
sample_id = sample_id,
sam = hisat2.sam,
docker = samtools_docker,
cluster = samtools_cluster,
disk_size = disk_size,
insert_size = insert_size
}
call qualimapBAMqc.qualimapBAMqc as qualimapBAMqc {
input:
bam = samtools.out_percent,
docker = qualimapBAMqc_docker,
cluster_config = qualimapBAMqc_cluster_config,
disk_size = qualimapBAMqc_disk_size
}


}

Loading…
Откажи
Сачувај