Browse Source

first commit

master
LUYAO REN 5 years ago
commit
e898856d66
10 changed files with 303 additions and 0 deletions
  1. +0
    -0
      README.md
  2. +94
    -0
      fastq_screen.conf
  3. +1
    -0
      inputSamplesFileExamples.tsv
  4. +25
    -0
      inputs
  5. BIN
      pictures/.DS_Store
  6. +28
    -0
      tasks/fastqc.wdl
  7. +36
    -0
      tasks/fastqscreen.wdl
  8. +44
    -0
      tasks/multiqc.wdl
  9. +27
    -0
      tasks/qualimap.wdl
  10. +48
    -0
      workflow.wdl

+ 0
- 0
README.md View File


+ 94
- 0
fastq_screen.conf View File

@@ -0,0 +1,94 @@
# This is an example configuration file for FastQ Screen

############################
## Bowtie, Bowtie 2 or BWA #
############################
## If the Bowtie, Bowtie 2 or BWA binary is not in your PATH, you can set
## this value to tell the program where to find your chosen aligner. Uncomment
## the relevant line below and set the appropriate location. Please note,
## this path should INCLUDE the executable filename.

#BOWTIE /usr/local/bin/bowtie/bowtie
#BOWTIE2 /usr/local/bowtie2/bowtie2
#BWA /usr/local/bwa/bwa



############################################
## Bismark (for bisulfite sequencing only) #
############################################
## If the Bismark binary is not in your PATH then you can set this value to
## tell the program where to find it. Uncomment the line below and set the
## appropriate location. Please note, this path should INCLUDE the executable
## filename.

#BISMARK /usr/local/bin/bismark/bismark



############
## Threads #
############
## Genome aligners can be made to run across multiple CPU cores to speed up
## searches. Set this value to the number of cores you want for mapping reads.

THREADS 32



##############
## DATABASES #
##############
## This section enables you to configure multiple genomes databases (aligner index
## files) to search against in your screen. For each genome you need to provide a
## database name (which can't contain spaces) and the location of the aligner index
## files.
##
## The path to the index files SHOULD INCLUDE THE BASENAME of the index, e.g:
## /data/public/Genomes/Human_Bowtie/GRCh37/Homo_sapiens.GRCh37
## Thus, the index files (Homo_sapiens.GRCh37.1.bt2, Homo_sapiens.GRCh37.2.bt2, etc.)
## are found in a folder named 'GRCh37'.
##
## If, for example, the Bowtie, Bowtie2 and BWA indices of a given genome reside in
## the SAME FOLDER, a SINLGE path may be provided to ALL the of indices. The index
## used will be the one compatible with the chosen aligner (as specified using the
## --aligner flag).
##
## The entries shown below are only suggested examples, you can add as many DATABASE
## sections as required, and you can comment out or remove as many of the existing
## entries as desired. We suggest including genomes and sequences that may be sources
## of contamination either because they where run on your sequencer previously, or may
## have contaminated your sample during the library preparation step.
##
## Human - sequences available from
## ftp://ftp.ensembl.org/pub/current/fasta/homo_sapiens/dna/
#DATABASE Human /data/public/Genomes/Human_Bowtie/GRCh37/Homo_sapiens.GRCh37
##
## Mouse - sequence available from
## ftp://ftp.ensembl.org/pub/current/fasta/mus_musculus/dna/
#DATABASE Mouse /data/public/Genomes/Mouse/NCBIM37/Mus_musculus.NCBIM37
##
## Ecoli- sequence available from EMBL accession U00096.2
#DATABASE Ecoli /data/public/Genomes/Ecoli/Ecoli
##
## PhiX - sequence available from Refseq accession NC_001422.1
#DATABASE PhiX /data/public/Genomes/PhiX/phi_plus_SNPs
##
## Adapters - sequence derived from the FastQC contaminats file found at: www.bioinformatics.babraham.ac.uk/projects/fastqc
#DATABASE Adapters /data/public/Genomes/Contaminants/Contaminants
##
## Vector - Sequence taken from the UniVec database
## http://www.ncbi.nlm.nih.gov/VecScreen/UniVec.html
#DATABASE Vectors /data/public/Genomes/Vectors/Vectors

DATABASE Human /cromwell_root/tmp/fastq_screen_reference/genome
DATABASE Mouse /cromwell_root/tmp/fastq_screen_reference/mouse
DATABASE ERCC /cromwell_root/tmp/fastq_screen_reference/ERCC
DATABASE EColi /cromwell_root/tmp/fastq_screen_reference/ecoli
DATABASE Adapter /cromwell_root/tmp/fastq_screen_reference/adapters
DATABASE Vector /cromwell_root/tmp/fastq_screen_reference/vector
DATABASE rRNA /cromwell_root/tmp/fastq_screen_reference/rRNARef
DATABASE Virus /cromwell_root/tmp/fastq_screen_reference/viral
DATABASE Yeast /cromwell_root/tmp/fastq_screen_reference/GCF_000146045.2_R64_genomic_modify
DATABASE Mitoch /cromwell_root/tmp/fastq_screen_reference/Human_mitoch
DATABASE Phix /cromwell_root/tmp/fastq_screen_reference/phix

+ 1
- 0
inputSamplesFileExamples.tsv View File

@@ -0,0 +1 @@
#read1 #read2 #bam #bai

+ 25
- 0
inputs View File

@@ -0,0 +1,25 @@
{
"{{ project_name }}.qualimap.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/qualimap:2.0.0",
"{{ project_name }}.qualimap.cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc",
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa",
"{{ project_name }}.fastqc.disk_size": "150",
"{{ project_name }}.mergeSentieon.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/gatk:v2019.01",
"{{ project_name }}.sentieon.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/gatk:v2019.01",
"{{ project_name }}.fastqscreen.cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
"{{ project_name }}.fastqc.cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
"{{ project_name }}.fastqc.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqc:v0.11.5",
"{{ project_name }}.sentieon.cluster_config": "OnDemand bcs.a2.large img-ubuntu-vpc",
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}",
"{{ project_name }}.fastqscreen.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqscreen:0.12.0",
"{{ project_name }}.screen_ref_dir": "oss://pgx-reference-data/fastq_screen_reference/",
"{{ project_name }}.fastq_screen_conf": "oss://pgx-reference-data/fastq_screen_reference/fastq_screen.conf",
"{{ project_name }}.multiqc.cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
"{{ project_name }}.qualimap.disk_size": "500",
"{{ project_name }}.multiqc.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/multiqc:v1.8",
"{{ project_name }}.mergeSentieon.disk_size": "100",
"{{ project_name }}.sentieon.disk_size": "100",
"{{ project_name }}.mergeSentieon.cluster_config": "OnDemand bcs.a2.large img-ubuntu-vpc",
"{{ project_name }}.fastqscreen.disk_size": "100",
"{{ project_name }}.multiqc.disk_size": "100",
"{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/"
}

BIN
pictures/.DS_Store View File


+ 28
- 0
tasks/fastqc.wdl View File

@@ -0,0 +1,28 @@
task fastqc {
File read1
File read2
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
fastqc -t $nt -o ./ ${read1}
fastqc -t $nt -o ./ ${read2}
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File read1_html = sub(basename(read1), "\\.(fastq|fq)\\.gz$", "_fastqc.html")
File read1_zip = sub(basename(read1), "\\.(fastq|fq)\\.gz$", "_fastqc.zip")
File read2_html = sub(basename(read2), "\\.(fastq|fq)\\.gz$", "_fastqc.html")
File read2_zip = sub(basename(read2), "\\.(fastq|fq)\\.gz$", "_fastqc.zip")
}
}

+ 36
- 0
tasks/fastqscreen.wdl View File

@@ -0,0 +1,36 @@
task fastq_screen {
File read1
File read2
File screen_ref_dir
File fastq_screen_conf
String read1name = basename(read1,".fastq.gz")
String read2name = basename(read2,".fastq.gz")
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
mkdir -p /cromwell_root/tmp
cp -r ${screen_ref_dir} /cromwell_root/tmp/
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${read1}
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${read2}
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File png1 = "${read1name}_screen.png"
File txt1 = "${read1name}_screen.txt"
File html1 = "${read1name}_screen.html"
File png2 = "${read2name}_screen.png"
File txt2 = "${read2name}_screen.txt"
File html2 = "${read2name}_screen.html"
}
}

+ 44
- 0
tasks/multiqc.wdl View File

@@ -0,0 +1,44 @@
task multiqc {

Array[File] read1_zip
Array[File] read2_zip

Array[File] txt1
Array[File] txt2

Array[File] zip

String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
mkdir -p /cromwell_root/tmp/fastqc
mkdir -p /cromwell_root/tmp/fastqscreen
mkdir -p /cromwell_root/tmp/bamqc

cp ${sep=" " read1_zip} ${sep=" " read2_zip} /cromwell_root/tmp/fastqc
cp ${sep=" " txt1} ${sep=" " txt2} /cromwell_root/tmp/fastqscreen
for i in ${sep=" " zip}
do
tar -zxvf $i -C /cromwell_root/tmp/bamqc
done

multiqc /cromwell_root/tmp/
>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File multiqc_html = "multiqc_report.html"
Array[File] multiqc_txt = glob("multiqc_data/*")
}
}

+ 27
- 0
tasks/qualimap.wdl View File

@@ -0,0 +1,27 @@
task qualimap {
File bam
File bai
String bamname = basename(bam,".bam")
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
/opt/qualimap/qualimap bamqc -bam ${bam} -outformat PDF:HTML -nt $nt -outdir ${bamname} --java-mem-size=32G
tar -zcvf ${bamname}_qualimap.zip ${bamname}
>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File zip = "${bamname}_qualimap.zip"
}
}

+ 48
- 0
workflow.wdl View File

@@ -0,0 +1,48 @@
import "./tasks/fastqc.wdl" as fastqc
import "./tasks/fastqscreen.wdl" as fastqscreen
import "./tasks/qualimap.wdl" as qualimap
import "./tasks/multiqc.wdl" as multiqc

workflow {{ project_name }} {

File inputSamplesFile
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile)
File screen_ref_dir
File fastq_screen_conf
File ref_dir
String fasta

scatter (sample in inputSamples) {
call fastqc.fastqc as fastqc {
input:
read1=sample[0],
read2=sample[1]
}

call fastqscreen.fastq_screen as fastqscreen {
input:
read1=sample[0],
read2=sample[1],
screen_ref_dir=screen_ref_dir,
fastq_screen_conf=fastq_screen_conf
}

call qualimap.qualimap as qualimap {
input:
bam=sample[2],
bai=sample[3]
}

}
call multiqc.multiqc as multiqc {
input:
read1_zip=fastqc.read1_zip,
read2_zip=fastqc.read2_zip,
txt1=fastqscreen.txt1,
txt2=fastqscreen.txt2,
zip=qualimap.zip
}

}


Loading…
Cancel
Save