瀏覽代碼

se

master
Zhihui 4 年之前
父節點
當前提交
8f0b2c2fde
共有 19 個檔案被更改,包括 1002 行新增0 行删除
  1. 二進制
      .DS_Store
  2. +94
    -0
      conf/fastq_screen.conf
  3. +65
    -0
      defaults
  4. +67
    -0
      inputs
  5. +21
    -0
      manifest.json
  6. +167
    -0
      schema.json
  7. 二進制
      tasks/.DS_Store
  8. +27
    -0
      tasks/ballgown.wdl
  9. +28
    -0
      tasks/count.wdl
  10. +63
    -0
      tasks/fastp.wdl
  11. +24
    -0
      tasks/fastqc.wdl
  12. +32
    -0
      tasks/fastqscreen.wdl
  13. +33
    -0
      tasks/hisat2.wdl
  14. +61
    -0
      tasks/multiqc.wdl
  15. +28
    -0
      tasks/qualimapBAMqc.wdl
  16. +29
    -0
      tasks/qualimapRNAseq.wdl
  17. +39
    -0
      tasks/samtools.wdl
  18. +32
    -0
      tasks/stringtie.wdl
  19. +192
    -0
      workflow.wdl

二進制
.DS_Store 查看文件


+ 94
- 0
conf/fastq_screen.conf 查看文件

# This is an example configuration file for FastQ Screen

############################
## Bowtie, Bowtie 2 or BWA #
############################
## If the Bowtie, Bowtie 2 or BWA binary is not in your PATH, you can set
## this value to tell the program where to find your chosen aligner. Uncomment
## the relevant line below and set the appropriate location. Please note,
## this path should INCLUDE the executable filename.

#BOWTIE /usr/local/bin/bowtie/bowtie
#BOWTIE2 /usr/local/bowtie2/bowtie2
#BWA /usr/local/bwa/bwa



############################################
## Bismark (for bisulfite sequencing only) #
############################################
## If the Bismark binary is not in your PATH then you can set this value to
## tell the program where to find it. Uncomment the line below and set the
## appropriate location. Please note, this path should INCLUDE the executable
## filename.

#BISMARK /usr/local/bin/bismark/bismark



############
## Threads #
############
## Genome aligners can be made to run across multiple CPU cores to speed up
## searches. Set this value to the number of cores you want for mapping reads.

THREADS 32



##############
## DATABASES #
##############
## This section enables you to configure multiple genomes databases (aligner index
## files) to search against in your screen. For each genome you need to provide a
## database name (which can't contain spaces) and the location of the aligner index
## files.
##
## The path to the index files SHOULD INCLUDE THE BASENAME of the index, e.g:
## /data/public/Genomes/Human_Bowtie/GRCh37/Homo_sapiens.GRCh37
## Thus, the index files (Homo_sapiens.GRCh37.1.bt2, Homo_sapiens.GRCh37.2.bt2, etc.)
## are found in a folder named 'GRCh37'.
##
## If, for example, the Bowtie, Bowtie2 and BWA indices of a given genome reside in
## the SAME FOLDER, a SINLGE path may be provided to ALL the of indices. The index
## used will be the one compatible with the chosen aligner (as specified using the
## --aligner flag).
##
## The entries shown below are only suggested examples, you can add as many DATABASE
## sections as required, and you can comment out or remove as many of the existing
## entries as desired. We suggest including genomes and sequences that may be sources
## of contamination either because they where run on your sequencer previously, or may
## have contaminated your sample during the library preparation step.
##
## Human - sequences available from
## ftp://ftp.ensembl.org/pub/current/fasta/homo_sapiens/dna/
#DATABASE Human /data/public/Genomes/Human_Bowtie/GRCh37/Homo_sapiens.GRCh37
##
## Mouse - sequence available from
## ftp://ftp.ensembl.org/pub/current/fasta/mus_musculus/dna/
#DATABASE Mouse /data/public/Genomes/Mouse/NCBIM37/Mus_musculus.NCBIM37
##
## Ecoli- sequence available from EMBL accession U00096.2
#DATABASE Ecoli /data/public/Genomes/Ecoli/Ecoli
##
## PhiX - sequence available from Refseq accession NC_001422.1
#DATABASE PhiX /data/public/Genomes/PhiX/phi_plus_SNPs
##
## Adapters - sequence derived from the FastQC contaminats file found at: www.bioinformatics.babraham.ac.uk/projects/fastqc
#DATABASE Adapters /data/public/Genomes/Contaminants/Contaminants
##
## Vector - Sequence taken from the UniVec database
## http://www.ncbi.nlm.nih.gov/VecScreen/UniVec.html
#DATABASE Vectors /data/public/Genomes/Vectors/Vectors

DATABASE Human /cromwell_root/tmp/fastq_screen_reference/genome
DATABASE Mouse /cromwell_root/tmp/fastq_screen_reference/mouse
DATABASE ERCC /cromwell_root/tmp/fastq_screen_reference/ERCC
DATABASE EColi /cromwell_root/tmp/fastq_screen_reference/ecoli
DATABASE Adapter /cromwell_root/tmp/fastq_screen_reference/adapters
DATABASE Vector /cromwell_root/tmp/fastq_screen_reference/vector
DATABASE rRNA /cromwell_root/tmp/fastq_screen_reference/rRNARef
DATABASE Virus /cromwell_root/tmp/fastq_screen_reference/viral
DATABASE Yeast /cromwell_root/tmp/fastq_screen_reference/GCF_000146045.2_R64_genomic_modify
DATABASE Mitoch /cromwell_root/tmp/fastq_screen_reference/Human_mitoch
DATABASE Phix /cromwell_root/tmp/fastq_screen_reference/phix

+ 65
- 0
defaults 查看文件

{
"fastp_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastp:0.19.6",
"fastp_cluster": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
"trim_front1": "0",
"trim_tail1": "0",
"max_len1": "0",
"trim_front2": "0",
"trim_tail2": "0",
"max_len2": "0",
"adapter_sequence": "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA",
"adapter_sequence_r2": "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT",
"disable_adapter_trimming": "0",
"length_required": "50",
"length_required1": "20",
"UMI": "0",
"umi_len": "0",
"umi_loc": "umi_loc",
"qualified_quality_phred": "20",
"disable_quality_filtering": "1",
"hisat2_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/hisat2:v2.1.0-2",
"hisat2_cluster": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc",
"idx_prefix": "genome_snp_tran",
"idx": "oss://pgx-reference-data/reference/hisat2/grch38_snp_tran/",
"fasta": "GRCh38.d1.vd1.fa",
"pen_cansplice":"0",
"pen_noncansplice":"3",
"pen_intronlen":"G,-8,1",
"min_intronlen":"30",
"max_intronlen":"500000",
"maxins":"500",
"minins":"0",
"samtools_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/samtools:v1.3.1",
"samtools_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
"insert_size":"8000",
"gtf": "oss://pgx-reference-data/reference/annotation/Homo_sapiens.GRCh38.93.gtf",
"stringtie_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/stringtie:v1.3.4",
"stringtie_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
"minimum_length_allowed_for_the_predicted_transcripts":"200",
"minimum_isoform_abundance":"0.01",
"Junctions_no_spliced_reads":"10",
"maximum_fraction_of_muliplelocationmapped_reads":"0.95",
"fastqc_cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
"fastqc_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqc:0.11.8",
"fastqc_disk_size": "150",
"qualimapBAMqc_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/qualimap:2.0.0",
"qualimapBAMqc_cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc",
"qualimapBAMqc_disk_size": "500",
"qualimapRNAseq_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/qualimap:2.0.0",
"qualimapRNAseq_cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc",
"qualimapRNAseq_disk_size": "500",
"fastqscreen_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqscreen:0.12.0",
"fastqscreen_cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
"screen_ref_dir": "oss://pgx-reference-data/fastq_screen_reference/",
"fastq_screen_conf": "oss://pgx-reference-data/fastq_screen_reference/fastq_screen.conf",
"fastqscreen_disk_size": "200",
"multiqc_cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
"multiqc_docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/multiqc:v1.8",
"multiqc_disk_size": "100",
"ballgown_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/pgx-ballgown:0.0.1",
"ballgown_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
"count_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/count:v1.0",
"count_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
"count_length": "150",
"disk_size": "200"
}

+ 67
- 0
inputs 查看文件

{
"{{ project_name }}.read": "{{ read }}",
"{{ project_name }}.sample_id": "{{ sample_id }}",
"{{ project_name }}.fastp_docker": "{{ fastp_docker }}",
"{{ project_name }}.fastp_cluster": "{{ fastp_cluster }}",
"{{ project_name }}.trim_front1": "{{ trim_front1 }}",
"{{ project_name }}.trim_tail1": "{{ trim_tail1 }}",
"{{ project_name }}.max_len1": "{{ max_len1 }}",
"{{ project_name }}.trim_front2": "{{ trim_front2 }}",
"{{ project_name }}.trim_tail2": "{{ trim_tail2 }}",
"{{ project_name }}.max_len2": "{{ max_len2 }}",
"{{ project_name }}.adapter_sequence": "{{ adapter_sequence }}",
"{{ project_name }}.adapter_sequence_r2": "{{ adapter_sequence_r2 }}",
"{{ project_name }}.disable_adapter_trimming": "{{ disable_adapter_trimming }}",
"{{ project_name }}.length_required1": "{{ length_required1 }}",
"{{ project_name }}.UMI": "{{ UMI }}",
"{{ project_name }}.umi_loc": "{{ umi_loc }}",
"{{ project_name }}.umi_len": "{{ umi_len }}",
"{{ project_name }}.length_required": "{{ length_required }}",
"{{ project_name }}.qualified_quality_phred": "{{ qualified_quality_phred }}",
"{{ project_name }}.disable_quality_filtering": "{{ disable_quality_filtering }}",
"{{ project_name }}.hisat2_docker": "{{ hisat2_docker }}",
"{{ project_name }}.hisat2_cluster": "{{ hisat2_cluster }}",
"{{ project_name }}.idx_prefix": "{{ idx_prefix }}",
"{{ project_name }}.idx": "{{ idx }}",
"{{ project_name }}.fasta": "{{ fasta }}",
"{{ project_name }}.pen_cansplice": "{{ pen_cansplice }}",
"{{ project_name }}.pen_noncansplice": "{{ pen_noncansplice }}",
"{{ project_name }}.pen_intronlen": "{{ pen_intronlen }}",
"{{ project_name }}.min_intronlen": "{{ min_intronlen }}",
"{{ project_name }}.max_intronlen": "{{ max_intronlen }}",
"{{ project_name }}.maxins": "{{ maxins }}",
"{{ project_name }}.minins": "{{ minins }}",
"{{ project_name }}.samtools_docker": "{{ samtools_docker }}",
"{{ project_name }}.samtools_cluster": "{{ samtools_cluster }}",
"{{ project_name }}.insert_size": "{{ insert_size }}",
"{{ project_name }}.gtf": "{{ gtf }}",
"{{ project_name }}.stringtie_docker": "{{ stringtie_docker }}",
"{{ project_name }}.stringtie_cluster": "{{ stringtie_cluster }}",
"{{ project_name }}.minimum_length_allowed_for_the_predicted_transcripts": "{{ minimum_length_allowed_for_the_predicted_transcripts }}",
"{{ project_name }}.minimum_isoform_abundance": "{{ minimum_isoform_abundance }}",
"{{ project_name }}.Junctions_no_spliced_reads": "{{ Junctions_no_spliced_reads }}",
"{{ project_name }}.maximum_fraction_of_muliplelocationmapped_reads": "{{ maximum_fraction_of_muliplelocationmapped_reads }}",
"{{ project_name }}.fastqc_cluster_config": "{{ fastqc_cluster_config }}",
"{{ project_name }}.fastqc_docker": "{{ fastqc_docker }}",
"{{ project_name }}.fastqc_disk_size": "{{ fastqc_disk_size }}",
"{{ project_name }}.qualimapBAMqc_docker": "{{ qualimapBAMqc_docker }}",
"{{ project_name }}.qualimapBAMqc_cluster_config": "{{ qualimapBAMqc_cluster_config }}",
"{{ project_name }}.qualimapBAMqc_disk_size": "{{ qualimapBAMqc_disk_size }}",
"{{ project_name }}.qualimapRNAseq_docker": "{{ qualimapRNAseq_docker }}",
"{{ project_name }}.qualimapRNAseq_cluster_config": "{{ qualimapRNAseq_cluster_config }}",
"{{ project_name }}.qualimapRNAseq_disk_size": "{{ qualimapRNAseq_disk_size }}",
"{{ project_name }}.fastqscreen_docker": "{{ fastqscreen_docker }}",
"{{ project_name }}.fastqscreen_cluster_config": "{{ fastqscreen_cluster_config }}",
"{{ project_name }}.screen_ref_dir": "{{ screen_ref_dir }}",
"{{ project_name }}.fastq_screen_conf": "{{ fastq_screen_conf }}",
"{{ project_name }}.fastqscreen_disk_size": "{{ fastqscreen_disk_size }}",
"{{ project_name }}.multiqc_cluster_config": "{{ multiqc_cluster_config }}",
"{{ project_name }}.multiqc_docker": "{{ multiqc_docker }}",
"{{ project_name }}.multiqc_disk_size": "{{ multiqc_disk_size }}",
"{{ project_name }}.ballgown_docker": "{{ ballgown_docker }}",
"{{ project_name }}.ballgown_cluster": "{{ ballgown_cluster }}",
"{{ project_name }}.disk_size": "{{ disk_size if disk_size != '' else 200}}",
"{{ project_name }}.count_docker": "{{ count_docker }}",
"{{ project_name }}.count_cluster": "{{ count_cluster }}",
"{{ project_name }}.count_length": "{{ count_length }}"
}

+ 21
- 0
manifest.json 查看文件

{
"name": "RNA-Seq QC for Quartet",
"short_name": "quartet-rnaseq-qc",
"description": "RNA Sequencing Quality Control Pipeline for Quartet.",
"home": "http://choppy.3steps.cn/lizhihui/quartet-rnaseq-qc",
"hidden": false,
"icons": [
{
"src": "",
"type": "image/png",
"sizes": "192x192"
},
{
"src": "",
"type": "image/png",
"sizes": "192x192"
}
],
"category": "Pipeline",
"source": "PGx"
}

+ 167
- 0
schema.json 查看文件

{
"formMode":"batch",
"fields":[
{
"tmplType":"sample_id",
"type":"number",
"label":"Sample ID",
"name":"sample_id",
"question":"How many samples? For auto generating sample id.",
"placeholder":"How many samples? For auto generating sample id.",
"model":"sample_id",
"disabled":false,
"min":1,
"max":100,
"config":{
"rules":[{"type":"number","required":true,"message":"Please input a number."}]
}
},
{
"tmplType":"file",
"label":"Read1 Files",
"model":"read1",
"readOnly":false,
"multiple":true,
"filterType":"fastq|fastq.gz|fq|fq.gz",
"placeholder":"Select files for read1.",
"config":{
"rules":[
{
"type":"array",
"required":true,
"message":"Please select files for read1."
}
]
}
},
{
"tmplType":"file",
"label":"Read2 Files",
"model":"read2",
"readOnly":false,
"multiple":true,
"filterType":"fastq|fastq.gz|fq|fq.gz",
"placeholder":"Select files for read2.",
"config":{
"rules":[
{
"type":"array",
"required":true,
"message":"Please select files for read2."
}
]
}
},
{
"tmplType":"file",
"label":"FastqScreen Conf File",
"model":"fastq_screen_conf",
"readOnly":false,
"multiple":false,
"filterType":"conf",
"placeholder":"Select file for fastq screen.",
"config":{
"initialValue": "oss://pgx-reference-data/fastq_screen_reference/fastq_screen.conf",
"rules":[{"required":true,"message":"Please select file for fastq screen."}]
}
},
{
"tmplType":"file",
"label":"References of Fastq Screen",
"model":"screen_ref_dir",
"readOnly":false,
"filterType":"",
"multiple":false,
"placeholder":"Select directory for Fastq Screen References.",
"config":{
"initialValue": "oss://pgx-reference-data/fastq_screen_reference/",
"rules":[
{
"required":true,
"message":"Select directory for Fastq Screen References."
}
]
}
},
{
"tmplType":"file",
"label":"Index Directory",
"model":"idx",
"readOnly":false,
"filterType":"",
"multiple":false,
"placeholder":"Select directory for idx.",
"config":{
"initialValue": "oss://pgx-reference-data/reference/hisat2/grch38_snp_tran/",
"rules":[{"required":true,"message":"Please select directory for idx."}]
}
},
{
"tmplType":"file",
"label":"GTF File",
"model":"gtf",
"readOnly":false,
"multiple":false,
"filterType":"gtf",
"placeholder":"Select file for gtf.",
"config":{
"initialValue": "oss://pgx-reference-data/reference/annotation/Homo_sapiens.GRCh38.93.gtf",
"rules":[{"required":true,"message":"Please select file for gtf."}]
}
},
{
"tmplType":"input",
"type":"string",
"label":"Adapter Sequence",
"name":"adapter_sequence",
"placeholder":"Please enter adapter sequence.",
"model":"adapter_sequence",
"disabled":false,
"min":1,
"max":100,
"config":{
"initialValue":"AGATCGGAAGAGCACACGTCTGAACTCCAGTCA",
"rules":[
{
"type":"string",
"required":false,
"message":"Please input an adapter sequence."
}
]
}
},
{
"tmplType":"input",
"type":"string",
"label":"Adapter Sequence R2",
"name":"adapter_sequence_r2",
"placeholder":"Please enter adapter sequence.",
"model":"adapter_sequence_r2",
"disabled":false,
"min":1,
"max":100,
"config":{
"initialValue":"AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT",
"rules":[
{
"type":"string",
"required":false,
"message":"Please input an adapter sequence."
}
]
}
},
{
"tmplType":"actions",
"buttons":[
{"type":"cancel","buttonType":"default","buttonLabel":"Previous Step"},
{
"type":"submit",
"buttonType":"primary",
"buttonLabel":"Next Step",
"validate":true
}
]
}
]
}

二進制
tasks/.DS_Store 查看文件


+ 27
- 0
tasks/ballgown.wdl 查看文件

task ballgown {
File gene_abundance
Array[File] ballgown
String sample_id=basename(gene_abundance, ".gene.abundance.txt")
String docker
String cluster
String disk_size

command <<<
mkdir -p /cromwell_root/tmp/${sample_id}
cp -r ${sep=" " ballgown} /cromwell_root/tmp/${sample_id}
ballgown /cromwell_root/tmp/${sample_id} ${sample_id}.txt
sed -i 's/"//g' ${sample_id}.txt
sed -i '1s/FPKM./GENE_ID\t/g' ${sample_id}.txt
>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File mat_expression = "${sample_id}.txt"
}
}

+ 28
- 0
tasks/count.wdl 查看文件

task count {
File gene_abundance
Array[File] ballgown
String sample_id=basename(gene_abundance, ".gene.abundance.txt")
String docker
String cluster
String disk_size
Int count_length

command <<<
mkdir -p /cromwell_root/tmp/ballgown/${sample_id}
cp -r ${sep=" " ballgown} /cromwell_root/tmp/ballgown/${sample_id}
count -i /cromwell_root/tmp/ballgown -l ${count_length} -g ${sample_id}_gene_count_matrix.csv -t ${sample_id}_transcript_count_matrix.csv
sed -i '1s/gene_id/GENE_ID/g' ${sample_id}_gene_count_matrix.csv
>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File mat_expression_genecount = "${sample_id}_gene_count_matrix.csv"
File mat_expression_transcriptcount = "${sample_id}_transcript_count_matrix.csv"
}
}

+ 63
- 0
tasks/fastp.wdl 查看文件

task fastp {
File read
String sample_id=sub(basename(read1), "\\.(fastq|fq)\\.gz$", "")
String adapter_sequence
String adapter_sequence_r2
String docker
String cluster
String disk_size
String umi_loc
Int trim_front1
Int trim_tail1
Int max_len1
Int trim_front2
Int trim_tail2
Int max_len2
Int disable_adapter_trimming
Int length_required
Int umi_len
Int UMI
Int qualified_quality_phred
Int length_required1
Int disable_quality_filtering
command <<<
mkdir -p /cromwell_root/tmp/fastp/
##1.Disable_quality_filtering
if [ "${disable_quality_filtering}" == 0 ]
then
cp ${read} /cromwell_root/tmp/fastp/{sample_id}.fastq.tmp1.gz
else
fastp --thread 4 --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} -i ${read} -o /cromwell_root/tmp/fastp/${sample_id}.fastq.tmp1.gz -j ${sample_id}.json -h ${sample_id}.html
fi

##2.UMI
if [ "${UMI}" == 0 ]
then
cp /cromwell_root/tmp/fastp/${sample_id}.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}.fastq.tmp2.gz
else
fastp --thread 4 -U --umi_loc=${umi_loc} --umi_len=${umi_len} --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} -i /cromwell_root/tmp/fastp/${sample_id}.fastq.tmp1.gz -o /cromwell_root/tmp/fastp/${sample_id}.fastq.tmp2.gz -j ${sample_id}.json -h ${sample_id}.html
fi

##3.Trim
if [ "${disable_adapter_trimming}" == 0 ]
then
fastp --thread 4 -l ${length_required} -q ${qualified_quality_phred} -u ${length_required1} --adapter_sequence ${adapter_sequence} --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} -i /cromwell_root/tmp/fastp/${sample_id}.fastq.tmp2.gz -o ${sample_id}.fastq.gz -j ${sample_id}.json -h ${sample_id}.html
else
cp /cromwell_root/tmp/fastp/${sample_id}.fastq.tmp2.gz ${sample_id}.fastq.gz
fi
>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File json = "${sample_id}.json"
File report = "${sample_id}.html"
File Trim = "${sample_id}.fastq.gz"
}
}

+ 24
- 0
tasks/fastqc.wdl 查看文件

task fastqc {
File read
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
fastqc -t $nt -o ./ ${read}
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File read_html = sub(basename(read), "\\.(fastq|fq)\\.gz$", "_fastqc.html")
File read_zip = sub(basename(read), "\\.(fastq|fq)\\.gz$", "_fastqc.zip")
}
}

+ 32
- 0
tasks/fastqscreen.wdl 查看文件

task fastq_screen {
File read
File screen_ref_dir
File fastq_screen_conf
String readname = basename(read,".fastq.gz")
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
mkdir -p /cromwell_root/tmp
cp -r ${screen_ref_dir} /cromwell_root/tmp/
#sed -i "s#/cromwell_root/fastq_screen_reference#${screen_ref_dir}#g" ${fastq_screen_conf}
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${read}
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File png = "${readname}_screen.png"
File txt = "${readname}_screen.txt"
File html = "${readname}_screen.html"
}
}

+ 33
- 0
tasks/hisat2.wdl 查看文件

task hisat2 {
File idx
File Trim
String idx_prefix
String sample_id=basename(Trim, ".fastq.gz")
String docker
String cluster
String disk_size
String pen_intronlen
Int pen_cansplice
Int pen_noncansplice
Int min_intronlen
Int max_intronlen
Int maxins
Int minins
command <<<
nt=$(nproc)
hisat2 -t -p $nt -x ${idx}/${idx_prefix} --pen-cansplice ${pen_cansplice} --pen-noncansplice ${pen_noncansplice} --pen-intronlen ${pen_intronlen} --min-intronlen ${min_intronlen} --max-intronlen ${max_intronlen} --maxins ${maxins} --minins ${minins} --un-conc-gz ${sample_id}_un.fq.gz -1 ${Trim} -S ${sample_id}.sam
>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File sam = "${sample_id}.sam"
File unmapreads = "${sample_id}_un.fq.gz"
}
}

+ 61
- 0
tasks/multiqc.wdl 查看文件

task multiqc {

Array[File] read1_zip
Array[File] read2_zip

Array[File] txt1
Array[File] txt2

Array[File] bamqc_zip
Array[File] rnaseq_zip

String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
mkdir -p /cromwell_root/tmp/fastqc
mkdir -p /cromwell_root/tmp/fastqscreen
mkdir -p /cromwell_root/tmp/bamqc
mkdir -p /cromwell_root/tmp/rnaseq

cp ${sep=" " read1_zip} ${sep=" " read2_zip} /cromwell_root/tmp/fastqc
cp ${sep=" " txt1} ${sep=" " txt2} /cromwell_root/tmp/fastqscreen
for i in ${sep=" " bamqc_zip}
do
tar -zxvf $i -C /cromwell_root/tmp/bamqc
done
for i in ${sep=" " rnaseq_zip}
do
tar -zxvf $i -C /cromwell_root/tmp/rnaseq
done

multiqc /cromwell_root/tmp/
cat multiqc_data/multiqc_fastq_screen.txt > multiqc_fastq_screen.txt
cat multiqc_data/multiqc_fastqc.txt > multiqc_fastqc.txt
cat multiqc_data/multiqc_general_stats.txt > multiqc_general_stats.txt
cat multiqc_data/multiqc_qualimap_bamqc_genome_results.txt > multiqc_qualimap_bamqc_genome_results.txt

>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File multiqc_html = "multiqc_report.html"
Array[File] multiqc_txt = glob("multiqc_data/*")
File multiqc_fastq_screen = "multiqc_fastq_screen.txt"
File multiqc_fastqc = "multiqc_fastqc.txt"
File multiqc_general_stats = "multiqc_general_stats.txt"
File bamqc_genome_results = "multiqc_qualimap_bamqc_genome_results.txt"
}
}

+ 28
- 0
tasks/qualimapBAMqc.wdl 查看文件

task qualimapBAMqc {
File bam
String bamname = basename(bam,".bam")
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
/opt/qualimap/qualimap bamqc -bam ${bam} -outformat PDF:HTML -nt $nt -outdir ${bamname} --java-mem-size=32G
tar -zcvf ${bamname}_bamqc_qualimap.tar.gz ${bamname}
>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File bamqc_gz = "${bamname}_bamqc_qualimap.tar.gz"
}
}

+ 29
- 0
tasks/qualimapRNAseq.wdl 查看文件

task qualimapRNAseq {
File bam
File gtf
String bamname = basename(bam,".bam")
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
/opt/qualimap/qualimap rnaseq -bam ${bam} -outformat HTML -outdir ${bamname} -gtf ${gtf} -pe --java-mem-size=10G
tar -zcvf ${bamname}_rnaseq_qualimap.tar.gz ${bamname}
>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File rnaseq_gz = "${bamname}_rnaseq_qualimap.tar.gz"
}
}

+ 39
- 0
tasks/samtools.wdl 查看文件

task samtools {
File sam
String sample_id=basename(sam, ".sam")
String bam = sample_id + ".bam"
String sorted_bam = sample_id + ".sorted.bam"
String percent_bam = sample_id + ".percent.bam"
String sorted_bam_index = sample_id + ".sorted.bam.bai"
String ins_size = sample_id + ".ins_size"
String docker
String cluster
String disk_size
Int insert_size

command <<<
set -o pipefail
set -e
/opt/conda/bin/samtools view -bS ${sam} > ${bam}
/opt/conda/bin/samtools sort -m 1000000000 ${bam} -o ${sorted_bam}
/opt/conda/bin/samtools index ${sorted_bam}
/opt/conda/bin/samtools view -bs 42.1 ${sorted_bam} > ${percent_bam}
/opt/conda/bin/samtools stats -i ${insert_size} ${sorted_bam} |grep ^IS|cut -f 2- > ${sample_id}.ins_size
>>>

runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File out_bam = sorted_bam
File out_percent = percent_bam
File out_bam_index = sorted_bam_index
File out_ins_size = ins_size
}

}


+ 32
- 0
tasks/stringtie.wdl 查看文件

task stringtie {
File bam
File gtf
String docker
String sample_id=basename(bam, ".sorted.bam")
String cluster
String disk_size
Int minimum_length_allowed_for_the_predicted_transcripts
Int Junctions_no_spliced_reads
Float minimum_isoform_abundance
Float maximum_fraction_of_muliplelocationmapped_reads

command <<<
nt=$(nproc)
mkdir ballgown
/opt/conda/bin/stringtie -e -B -p $nt -f ${minimum_isoform_abundance} -m ${minimum_length_allowed_for_the_predicted_transcripts} -a ${Junctions_no_spliced_reads} -M ${maximum_fraction_of_muliplelocationmapped_reads} -G ${gtf} -o ballgown/${sample_id}/${sample_id}.gtf -C ${sample_id}.cov.ref.gtf -A ${sample_id}.gene.abundance.txt ${bam}
>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File covered_transcripts = "${sample_id}.cov.ref.gtf"
File gene_abundance = "${sample_id}.gene.abundance.txt"
Array[File] ballgown = ["ballgown/${sample_id}/${sample_id}.gtf", "ballgown/${sample_id}/e2t.ctab", "ballgown/${sample_id}/e_data.ctab", "ballgown/${sample_id}/i2t.ctab", "ballgown/${sample_id}/i_data.ctab", "ballgown/${sample_id}/t_data.ctab"]
}
}

+ 192
- 0
workflow.wdl 查看文件

import "./tasks/fastp.wdl" as fastp
import "./tasks/hisat2.wdl" as hisat2
import "./tasks/samtools.wdl" as samtools
import "./tasks/stringtie.wdl" as stringtie
import "./tasks/fastqc.wdl" as fastqc
import "./tasks/fastqscreen.wdl" as fastqscreen
import "./tasks/qualimapBAMqc.wdl" as qualimapBAMqc
import "./tasks/qualimapRNAseq.wdl" as qualimapRNAseq
import "./tasks/ballgown.wdl" as ballgown
import "./tasks/count.wdl" as count

workflow {{ project_name }} {
File read
File idx
File screen_ref_dir
File fastq_screen_conf
File gtf
String fastp_docker
String adapter_sequence
String fastp_cluster
String umi_loc
String idx_prefix
String pen_intronlen
String fastqc_cluster_config
String fastqc_docker
String fastqscreen_docker
String fastqscreen_cluster_config
String hisat2_docker
String hisat2_cluster
String qualimapBAMqc_docker
String qualimapBAMqc_cluster_config
String qualimapRNAseq_docker
String qualimapRNAseq_cluster_config
String samtools_docker
String samtools_cluster
String stringtie_docker
String stringtie_cluster
String multiqc_cluster_config
String multiqc_docker
Int multiqc_disk_size
Int trim_front1
Int trim_tail1
Int max_len1
Int trim_front2
Int trim_tail2
Int max_len2
Int disable_adapter_trimming
Int length_required
Int umi_len
Int UMI
Int qualified_quality_phred
Int length_required1
Int disable_quality_filtering
Int pen_cansplice
Int pen_noncansplice
Int min_intronlen
Int max_intronlen
Int maxins
Int minins
Int fastqc_disk_size
Int fastqscreen_disk_size
Int qualimapBAMqc_disk_size
Int qualimapRNAseq_disk_size
Int insert_size
Int minimum_length_allowed_for_the_predicted_transcripts
Int Junctions_no_spliced_reads
Int count_length
Float minimum_isoform_abundance
Float maximum_fraction_of_muliplelocationmapped_reads
String ballgown_docker
String ballgown_cluster
String disk_size
String count_docker
String count_cluster

call fastp.fastp as fastp {
input:
read = read,
docker = fastp_docker,
cluster = fastp_cluster,
disk_size = disk_size,
adapter_sequence = adapter_sequence,
adapter_sequence_r2 = adapter_sequence_r2,
umi_loc = umi_loc,
trim_front1 = trim_front1,
trim_tail1 = trim_tail1,
max_len1 = max_len1,
trim_front2 = trim_front2,
trim_tail2 = trim_tail2,
max_len2 = max_len2,
disable_adapter_trimming = disable_adapter_trimming,
length_required = length_required,
umi_len = umi_len,
UMI = UMI,
qualified_quality_phred = qualified_quality_phred,
length_required1 = length_required1,
disable_quality_filtering = disable_quality_filtering
}

call fastqc.fastqc as fastqc {
input:
read = fastp.Trim,
docker = fastqc_docker,
cluster_config = fastqc_cluster_config,
disk_size = fastqc_disk_size
}

call fastqscreen.fastq_screen as fastqscreen {
input:
read = fastp.Trim,
screen_ref_dir = screen_ref_dir,
fastq_screen_conf = fastq_screen_conf,
docker = fastqscreen_docker,
cluster_config = fastqscreen_cluster_config,
disk_size = fastqscreen_disk_size
}

call hisat2.hisat2 as hisat2 {
input:
idx = idx,
idx_prefix = idx_prefix,
Trim = fastp.Trim,
docker = hisat2_docker,
cluster = hisat2_cluster,
disk_size = disk_size,
pen_intronlen = pen_intronlen,
pen_cansplice = pen_cansplice,
pen_noncansplice = pen_noncansplice,
min_intronlen = min_intronlen,
max_intronlen = max_intronlen,
maxins = maxins,
minins = minins
}

call samtools.samtools as samtools {
input:
sam = hisat2.sam,
docker = samtools_docker,
cluster = samtools_cluster,
disk_size = disk_size,
insert_size = insert_size
}
call qualimapBAMqc.qualimapBAMqc as qualimapBAMqc {
input:
bam = samtools.out_percent,
docker = qualimapBAMqc_docker,
cluster_config = qualimapBAMqc_cluster_config,
disk_size = qualimapBAMqc_disk_size
}

call qualimapRNAseq.qualimapRNAseq as qualimapRNAseq {
input:
bam = samtools.out_percent,
docker = qualimapRNAseq_docker,
cluster_config = qualimapRNAseq_cluster_config,
disk_size = qualimapRNAseq_disk_size,
gtf = gtf
}

call stringtie.stringtie as stringtie {
input:
gtf = gtf,
bam = samtools.out_bam,
docker = stringtie_docker,
cluster = stringtie_cluster,
disk_size = disk_size,
minimum_length_allowed_for_the_predicted_transcripts = minimum_length_allowed_for_the_predicted_transcripts,
Junctions_no_spliced_reads = Junctions_no_spliced_reads,
minimum_isoform_abundance = minimum_isoform_abundance,
maximum_fraction_of_muliplelocationmapped_reads = maximum_fraction_of_muliplelocationmapped_reads
}

call ballgown.ballgown as ballgown {
input:
docker = ballgown_docker,
cluster = ballgown_cluster,
ballgown = stringtie.ballgown,
gene_abundance = stringtie.gene_abundance,
disk_size = disk_size
}

call count.count as count {
input:
docker = count_docker,
cluster = count_cluster,
ballgown = stringtie.ballgown,
disk_size = disk_size,
gene_abundance = stringtie.gene_abundance,
count_length = count_length
}
}

Loading…
取消
儲存