Explorar el Código

first commit

tags/v0.1.0
Zhihui hace 4 años
padre
commit
cffafd77cd
Se han modificado 15 ficheros con 684 adiciones y 0 borrados
  1. BIN
      .DS_Store
  2. +62
    -0
      defaults
  3. +65
    -0
      inputs
  4. BIN
      tasks/.DS_Store
  5. +25
    -0
      tasks/ballgown.wdl
  6. +67
    -0
      tasks/fastp.wdl
  7. +34
    -0
      tasks/fastqc.wdl
  8. +36
    -0
      tasks/fastqscreen.wdl
  9. +34
    -0
      tasks/hisat2.wdl
  10. +61
    -0
      tasks/multiqc.wdl
  11. +27
    -0
      tasks/qualimapBAMqc.wdl
  12. +28
    -0
      tasks/qualimapRNAseq.wdl
  13. +38
    -0
      tasks/samtools.wdl
  14. +32
    -0
      tasks/stringtie.wdl
  15. +175
    -0
      workflow.wdl

BIN
.DS_Store Ver fichero


+ 62
- 0
defaults Ver fichero

@@ -0,0 +1,62 @@
{
"fastp_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastp:0.19.6",
"fastp_cluster": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
"trim_front1": "0",
"trim_tail1": "0",
"max_len1": "0",
"trim_front2": "0",
"trim_tail2": "0",
"max_len2": "0",
"adapter_sequence": "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA",
"adapter_sequence_r2": "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT",
"disable_adapter_trimming": "0",
"length_required": "50",
"length_required1": "20",
"UMI": "0",
"umi_len": "0",
"umi_loc": "umi_loc",
"qualified_quality_phred": "20",
"disable_quality_filtering": "1",
"hisat2_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/hisat2:v2.1.0-2",
"hisat2_cluster": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc",
"idx_prefix": "genome_snp_tran",
"idx": "oss://pgx-reference-data/reference/hisat2/grch38_snp_tran/",
"fasta": "GRCh38.d1.vd1.fa",
"pen_cansplice":"0",
"pen_noncansplice":"3",
"pen_intronlen":"G,-8,1",
"min_intronlen":"30",
"max_intronlen":"500000",
"maxins":"500",
"minins":"0",
"samtools_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/samtools:v1.3.1",
"samtools_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
"insert_size":"8000",
"gtf": "oss://pgx-reference-data/reference/annotation/Homo_sapiens.GRCh38.93.gtf",
"stringtie_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/stringtie:v1.3.4",
"stringtie_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
"minimum_length_allowed_for_the_predicted_transcripts":"200",
"minimum_isoform_abundance":"0.01",
"Junctions_no_spliced_reads":"10",
"maximum_fraction_of_muliplelocationmapped_reads":"0.95",
"fastqc_cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
"fastqc_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqc:v0.11.5",
"fastqc_disk_size": "150",
"qualimapBAMqc_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/qualimap:2.0.0",
"qualimapBAMqc_cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc",
"qualimapBAMqc_disk_size": "500",
"qualimapRNAseq_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/qualimap:2.0.0",
"qualimapRNAseq_cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc",
"qualimapRNAseq_disk_size": "500",
"fastqscreen_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastqscreen:0.12.0",
"fastqscreen_cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
"screen_ref_dir": "oss://pgx-reference-data/fastq_screen_reference/",
"fastq_screen_conf": "oss://pgx-reference-data/fastq_screen_reference/fastq_screen.conf",
"fastqscreen_disk_size": "200",
"ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/",
"multiqc_cluster_config": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
"multiqc_docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/multiqc:v1.8",
"multiqc_disk_size": "100",
"ballgown_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/pgx-ballgown:0.0.1",
"ballgown_cluster": "OnDemand bcs.a2.large img-ubuntu-vpc"
}

+ 65
- 0
inputs Ver fichero

@@ -0,0 +1,65 @@
{
"{{ project_name }}.read1": "{{ read1 }}",
"{{ project_name }}.read2": "{{ read2 }}",
"{{ project_name }}.sample_id": "{{ sample_id }}",
"{{ project_name }}.fastp_docker": "{{ fastp_docker }}",
"{{ project_name }}.fastp_cluster": "{{ fastp_cluster }}",
"{{ project_name }}.trim_front1": "{{ trim_front1 }}",
"{{ project_name }}.trim_tail1": "{{ trim_tail1 }}",
"{{ project_name }}.max_len1": "{{ max_len1 }}",
"{{ project_name }}.trim_front2": "{{ trim_front2 }}",
"{{ project_name }}.trim_tail2": "{{ trim_tail2 }}",
"{{ project_name }}.max_len2": "{{ max_len2 }}",
"{{ project_name }}.adapter_sequence": "{{ adapter_sequence }}",
"{{ project_name }}.adapter_sequence_r2": "{{ adapter_sequence_r2 }}",
"{{ project_name }}.disable_adapter_trimming": "{{ disable_adapter_trimming }}",
"{{ project_name }}.length_required1": "{{ length_required1 }}",
"{{ project_name }}.UMI": "{{ UMI }}",
"{{ project_name }}.umi_loc": "{{ umi_loc }}",
"{{ project_name }}.umi_len": "{{ umi_len }}",
"{{ project_name }}.length_required": "{{ length_required }}",
"{{ project_name }}.qualified_quality_phred": "{{ qualified_quality_phred }}",
"{{ project_name }}.disable_quality_filtering": "{{ disable_quality_filtering }}",
"{{ project_name }}.hisat2_docker": "{{ hisat2_docker }}",
"{{ project_name }}.hisat2_cluster": "{{ hisat2_cluster }}",
"{{ project_name }}.idx_prefix": "{{ idx_prefix }}",
"{{ project_name }}.idx": "{{ idx }}",
"{{ project_name }}.fasta": "{{ fasta }}",
"{{ project_name }}.pen_cansplice": "{{ pen_cansplice }}",
"{{ project_name }}.pen_noncansplice": "{{ pen_noncansplice }}",
"{{ project_name }}.pen_intronlen": "{{ pen_intronlen }}",
"{{ project_name }}.min_intronlen": "{{ min_intronlen }}",
"{{ project_name }}.max_intronlen": "{{ max_intronlen }}",
"{{ project_name }}.maxins": "{{ maxins }}",
"{{ project_name }}.minins": "{{ minins }}",
"{{ project_name }}.samtools_docker": "{{ samtools_docker }}",
"{{ project_name }}.samtools_cluster": "{{ samtools_cluster }}",
"{{ project_name }}.insert_size": "{{ insert_size }}",
"{{ project_name }}.gtf": "{{ gtf }}",
"{{ project_name }}.stringtie_docker": "{{ stringtie_docker }}",
"{{ project_name }}.stringtie_cluster": "{{ stringtie_cluster }}",
"{{ project_name }}.minimum_length_allowed_for_the_predicted_transcripts": "{{ minimum_length_allowed_for_the_predicted_transcripts }}",
"{{ project_name }}.minimum_isoform_abundance": "{{ minimum_isoform_abundance }}",
"{{ project_name }}.Junctions_no_spliced_reads": "{{ Junctions_no_spliced_reads }}",
"{{ project_name }}.maximum_fraction_of_muliplelocationmapped_reads": "{{ maximum_fraction_of_muliplelocationmapped_reads }}",
"{{ project_name }}.fastqc_cluster_config": "{{ fastqc_cluster_config }}",
"{{ project_name }}.fastqc_docker": "{{ fastqc_docker }}",
"{{ project_name }}.fastqc_disk_size": "{{ fastqc_disk_size }}",
"{{ project_name }}.qualimapBAMqc_docker": "{{ qualimapBAMqc_docker }}",
"{{ project_name }}.qualimapBAMqc_cluster_config": "{{ qualimapBAMqc_cluster_config }}",
"{{ project_name }}.qualimapBAMqc_disk_size": "{{ qualimapBAMqc_disk_size }}",
"{{ project_name }}.qualimapRNAseq_docker": "{{ qualimapRNAseq_docker }}",
"{{ project_name }}.qualimapRNAseq_cluster_config": "{{ qualimapRNAseq_cluster_config }}",
"{{ project_name }}.qualimapRNAseq_disk_size": "{{ qualimapRNAseq_disk_size }}",
"{{ project_name }}.fastqscreen_docker": "{{ fastqscreen_docker }}",
"{{ project_name }}.fastqscreen_cluster_config": "{{ fastqscreen_cluster_config }}",
"{{ project_name }}.screen_ref_dir": "{{ screen_ref_dir }}",
"{{ project_name }}.fastq_screen_conf": "{{ fastq_screen_conf }}",
"{{ project_name }}.fastqscreen_disk_size": "{{ fastqscreen_disk_size }}",
"{{ project_name }}.ref_dir": "{{ ref_dir }}",
"{{ project_name }}.multiqc_cluster_config": "{{ multiqc_cluster_config }}",
"{{ project_name }}.multiqc_docker": "{{ multiqc_docker }}",
"{{ project_name }}.multiqc_disk_size": "{{ multiqc_disk_size }}",
"{{ project_name }}.ballgown_docker": "{{ ballgown_docker }}",
"{{ project_name }}.ballgown_cluster": "{{ ballgown_cluster }}"
}

BIN
tasks/.DS_Store Ver fichero


+ 25
- 0
tasks/ballgown.wdl Ver fichero

@@ -0,0 +1,25 @@
task ballgown {
File gene_abundance
String base = basename(gene_abundance, ".gene.abundance.txt")
Array[File] ballgown
String docker
String cluster
String disk_size

command <<<
mkdir -p /cromwell_root/tmp/${base}
cp -r ${sep=" " ballgown} /cromwell_root/tmp/${base}
ballgown /cromwell_root/tmp/${base} ${base}.txt
>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File mat_expression = "${base}.txt"
}
}

+ 67
- 0
tasks/fastp.wdl Ver fichero

@@ -0,0 +1,67 @@
task fastp {
String sample_id
File read1
File read2
String adapter_sequence
String adapter_sequence_r2
String docker
String cluster
String umi_loc
Int trim_front1
Int trim_tail1
Int max_len1
Int trim_front2
Int trim_tail2
Int max_len2
Int disable_adapter_trimming
Int length_required
Int umi_len
Int UMI
Int qualified_quality_phred
Int length_required1
Int disable_quality_filtering
command <<<
mkdir -p /cromwell_root/tmp/fastp/
##1.Disable_quality_filtering
if [ "${disable_quality_filtering}" == 0 ]
then
cp ${read1} /cromwell_root/tmp/fastp/{sample_id}_R1.fastq.tmp1.gz
cp ${read2} /cromwell_root/tmp/fastp/{sample_id}_R2.fastq.tmp1.gz
else
fastp --thread 4 --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i ${read1} -I ${read2} -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -j ${sample_id}.json -h ${sample_id}.html
fi

##2.UMI
if [ "${UMI}" == 0 ]
then
cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz
cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz
else
fastp --thread 4 -U --umi_loc=${umi_loc} --umi_len=${umi_len} --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -j ${sample_id}.json -h ${sample_id}.html
fi

##3.Trim
if [ "${disable_adapter_trimming}" == 0 ]
then
fastp --thread 4 -l ${length_required} -q ${qualified_quality_phred} -u ${length_required1} --adapter_sequence ${adapter_sequence} --adapter_sequence_r2 ${adapter_sequence_r2} --detect_adapter_for_pe --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -o ${sample_id}_R1.fastq.gz -O ${sample_id}_R2.fastq.gz -j ${sample_id}.json -h ${sample_id}.html
else
cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz ${sample_id}_R1.fastq.gz
cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz ${sample_id}_R2.fastq.gz
fi
>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd 200 /cromwell_root/"
}

output {
File json = "${sample_id}.json"
File report = "${sample_id}.html"
File Trim_R1 = "${sample_id}_R1.fastq.gz"
File Trim_R2 = "${sample_id}_R2.fastq.gz"
}
}

+ 34
- 0
tasks/fastqc.wdl Ver fichero

@@ -0,0 +1,34 @@
task fastqc {
File read1
File read2
String bamname1 = basename(read1,"\\.(fastq|fq)\\.gz$")
String bamname2 = basename(read2,"\\.(fastq|fq)\\.gz$")
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
fastqc -t $nt -o ./ ${read1}
fastqc -t $nt -o ./ ${read2}
tar -zcvf ${bamname1}_fastqc.zip ${bamname1}_fastqc
tar -zcvf ${bamname2}_fastqc.zip ${bamname2}_fastqc
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
Array[File] fastqc1 = glob("${bamname1}_fastqc/*")
Array[File] fastqc2 = glob("${bamname2}_fastqc/*")
File read1_html = sub(basename(read1), "\\.(fastq|fq)\\.gz$", "_fastqc.html")
File read1_zip = sub(basename(read1), "\\.(fastq|fq)\\.gz$", "_fastqc.zip")
File read2_html = sub(basename(read2), "\\.(fastq|fq)\\.gz$", "_fastqc.html")
File read2_zip = sub(basename(read2), "\\.(fastq|fq)\\.gz$", "_fastqc.zip")
}
}

+ 36
- 0
tasks/fastqscreen.wdl Ver fichero

@@ -0,0 +1,36 @@
task fastq_screen {
File read1
File read2
File screen_ref_dir
File fastq_screen_conf
String read1name = basename(read1,".fastq.gz")
String read2name = basename(read2,".fastq.gz")
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
mkdir -p /cromwell_root/tmp
cp -r ${screen_ref_dir} /cromwell_root/tmp/
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${read1}
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${read2}
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File png1 = "${read1name}_screen.png"
File txt1 = "${read1name}_screen.txt"
File html1 = "${read1name}_screen.html"
File png2 = "${read2name}_screen.png"
File txt2 = "${read2name}_screen.txt"
File html2 = "${read2name}_screen.html"
}
}

+ 34
- 0
tasks/hisat2.wdl Ver fichero

@@ -0,0 +1,34 @@
task hisat2 {
File idx
File Trim_R1
File Trim_R2
String idx_prefix
String sample_id
String docker
String cluster
String pen_intronlen
Int pen_cansplice
Int pen_noncansplice
Int min_intronlen
Int max_intronlen
Int maxins
Int minins
command <<<
nt=$(nproc)
hisat2 -t -p $nt -x ${idx}/${idx_prefix} --pen-cansplice ${pen_cansplice} --pen-noncansplice ${pen_noncansplice} --pen-intronlen ${pen_intronlen} --min-intronlen ${min_intronlen} --max-intronlen ${max_intronlen} --maxins ${maxins} --minins ${minins} --un-conc-gz ${sample_id}_un.fq.gz -1 ${Trim_R1} -2 ${Trim_R2} -S ${sample_id}.sam
>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd 200 /cromwell_root/"
}

output {
File sam = "${sample_id}.sam"
File unmapread_1p = "${sample_id}_un.fq.1.gz"
File unmapread_2p = "${sample_id}_un.fq.2.gz"
}
}

+ 61
- 0
tasks/multiqc.wdl Ver fichero

@@ -0,0 +1,61 @@
task multiqc {

Array[File] read1_zip
Array[File] read2_zip

Array[File] txt1
Array[File] txt2

Array[File] bamqc_zip
Array[File] rnaseq_zip

String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
mkdir -p /cromwell_root/tmp/fastqc
mkdir -p /cromwell_root/tmp/fastqscreen
mkdir -p /cromwell_root/tmp/bamqc
mkdir -p /cromwell_root/tmp/rnaseq

cp ${sep=" " read1_zip} ${sep=" " read2_zip} /cromwell_root/tmp/fastqc
cp ${sep=" " txt1} ${sep=" " txt2} /cromwell_root/tmp/fastqscreen
for i in ${sep=" " bamqc_zip}
do
tar -zxvf $i -C /cromwell_root/tmp/bamqc
done
for i in ${sep=" " rnaseq_zip}
do
tar -zxvf $i -C /cromwell_root/tmp/rnaseq
done

multiqc /cromwell_root/tmp/
cat multiqc_data/multiqc_fastq_screen.txt > multiqc_fastq_screen.txt
cat multiqc_data/multiqc_fastqc.txt > multiqc_fastqc.txt
cat multiqc_data/multiqc_general_stats.txt > multiqc_general_stats.txt
cat multiqc_data/multiqc_qualimap_bamqc_genome_results.txt > multiqc_qualimap_bamqc_genome_results.txt

>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File multiqc_html = "multiqc_report.html"
Array[File] multiqc_txt = glob("multiqc_data/*")
File multiqc_fastq_screen = "multiqc_fastq_screen.txt"
File multiqc_fastqc = "multiqc_fastqc.txt"
File multiqc_general_stats = "multiqc_general_stats.txt"
File bamqc_genome_results = "multiqc_qualimap_bamqc_genome_results.txt"
}
}

+ 27
- 0
tasks/qualimapBAMqc.wdl Ver fichero

@@ -0,0 +1,27 @@
task qualimapBAMqc {
File bam
String bamname = basename(bam,".bam")
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
/opt/qualimap/qualimap bamqc -bam ${bam} -outformat PDF:HTML -nt $nt -outdir ${bamname}_bamqc --java-mem-size=32G
tar -zcvf ${bamname}_bamqc_qualimap.zip ${bamname}_bamqc
>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File bamqc_zip = "${bamname}_bamqc_qualimap.zip"
Array[File] bamqc = glob("${bamname}_bamqc/*")
}
}

+ 28
- 0
tasks/qualimapRNAseq.wdl Ver fichero

@@ -0,0 +1,28 @@
task qualimapRNAseq {
File bam
File gtf
String bamname = basename(bam,".bam")
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
/opt/qualimap/qualimap rnaseq -bam ${bam} -outformat HTML -outdir ${bamname}_RNAseq -gtf ${gtf} -pe --java-mem-size=10G
tar -zcvf ${bamname}_RNAseq_qualimap.zip ${bamname}_RNAseq
>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File rnaseq_zip = "${bamname}_RNAseq_qualimap.zip"
Array[File] RNAseqqc = glob("${bamname}_RNAseq/*")
}
}

+ 38
- 0
tasks/samtools.wdl Ver fichero

@@ -0,0 +1,38 @@
task samtools {
File sam
String sample_id
String bam = sample_id + ".bam"
String sorted_bam = sample_id + ".sorted.bam"
String percent_bam = sample_id + ".percent.bam"
String sorted_bam_index = sample_id + ".sorted.bam.bai"
String ins_size = sample_id + ".ins_size"
String docker
String cluster
Int insert_size

command <<<
set -o pipefail
set -e
/opt/conda/bin/samtools view -bS ${sam} > ${bam}
/opt/conda/bin/samtools sort -m 1000000000 ${bam} -o ${sorted_bam}
/opt/conda/bin/samtools index ${sorted_bam}
/opt/conda/bin/samtools view -bs 42.1 ${sorted_bam} > ${percent_bam}
/opt/conda/bin/samtools stats -i ${insert_size} ${sorted_bam} |grep ^IS|cut -f 2- > ${sample_id}.ins_size
>>>

runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd 200 /cromwell_root/"
}

output {
File out_bam = sorted_bam
File out_percent = percent_bam
File out_bam_index = sorted_bam_index
File out_ins_size = ins_size
}

}


+ 32
- 0
tasks/stringtie.wdl Ver fichero

@@ -0,0 +1,32 @@
task stringtie {
File bam
File gtf
String docker
String sample_id
String cluster
Int minimum_length_allowed_for_the_predicted_transcripts
Int Junctions_no_spliced_reads
Float minimum_isoform_abundance
Float maximum_fraction_of_muliplelocationmapped_reads

command <<<
nt=$(nproc)
mkdir ballgown
/opt/conda/bin/stringtie -e -B -p $nt -f ${minimum_isoform_abundance} -m ${minimum_length_allowed_for_the_predicted_transcripts} -a ${Junctions_no_spliced_reads} -M ${maximum_fraction_of_muliplelocationmapped_reads} -G ${gtf} -o ballgown/${sample_id}/${sample_id}.gtf -C ${sample_id}.cov.ref.gtf -A ${sample_id}.gene.abundance.txt ${bam}
>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd 150 /cromwell_root/"
}
output {
File covered_transcripts = "${sample_id}.cov.ref.gtf"
File gene_abundance = "${sample_id}.gene.abundance.txt"
Array[File] ballgown = ["ballgown/${sample_id}/${sample_id}.gtf", "ballgown/${sample_id}/e2t.ctab", "ballgown/${sample_id}/e_data.ctab", "ballgown/${sample_id}/i2t.ctab", "ballgown/${sample_id}/i_data.ctab", "ballgown/${sample_id}/t_data.ctab"]
File genecount = "{sample_id}_genecount.csv"
}
}

+ 175
- 0
workflow.wdl Ver fichero

@@ -0,0 +1,175 @@
import "./tasks/fastp.wdl" as fastp
import "./tasks/hisat2.wdl" as hisat2
import "./tasks/samtools.wdl" as samtools
import "./tasks/stringtie.wdl" as stringtie
import "./tasks/fastqc.wdl" as fastqc
import "./tasks/fastqscreen.wdl" as fastqscreen
import "./tasks/qualimapBAMqc.wdl" as qualimapBAMqc
import "./tasks/qualimapRNAseq.wdl" as qualimapRNAseq
import "./tasks/ballgown.wdl" as ballgown

workflow {{ project_name }} {
File read1
File read2
File idx
File screen_ref_dir
File fastq_screen_conf
File gtf
String sample_id
String fastp_docker
String adapter_sequence
String adapter_sequence_r2
String fastp_cluster
String umi_loc
String idx_prefix
String pen_intronlen
String fastqc_cluster_config
String fastqc_docker
String fastqscreen_docker
String fastqscreen_cluster_config
String hisat2_docker
String hisat2_cluster
String qualimapBAMqc_docker
String qualimapBAMqc_cluster_config
String qualimapRNAseq_docker
String qualimapRNAseq_cluster_config
String samtools_docker
String samtools_cluster
String stringtie_docker
String stringtie_cluster
String multiqc_cluster_config
String multiqc_docker
Int multiqc_disk_size
Int trim_front1
Int trim_tail1
Int max_len1
Int trim_front2
Int trim_tail2
Int max_len2
Int disable_adapter_trimming
Int length_required
Int umi_len
Int UMI
Int qualified_quality_phred
Int length_required1
Int disable_quality_filtering
Int pen_cansplice
Int pen_noncansplice
Int min_intronlen
Int max_intronlen
Int maxins
Int minins
Int fastqc_disk_size
Int fastqscreen_disk_size
Int qualimapBAMqc_disk_size
Int qualimapRNAseq_disk_size
Int insert_size
Int minimum_length_allowed_for_the_predicted_transcripts
Int Junctions_no_spliced_reads
Float minimum_isoform_abundance
Float maximum_fraction_of_muliplelocationmapped_reads
String ballgown_docker
String ballgown_cluster

call fastp.fastp as fastp {
input:
sample_id=sample_id,
read1= read1,
read2= read2,
docker = fastp_docker,
cluster = fastp_cluster,
adapter_sequence = adapter_sequence,
adapter_sequence_r2 = adapter_sequence_r2,
umi_loc = umi_loc,
trim_front1 = trim_front1,
trim_tail1 = trim_tail1,
max_len1 = max_len1,
trim_front2 = trim_front2,
trim_tail2 = trim_tail2,
max_len2 = max_len2,
disable_adapter_trimming = disable_adapter_trimming,
length_required = length_required,
umi_len = umi_len,
UMI = UMI,
qualified_quality_phred = qualified_quality_phred,
length_required1 = length_required1,
disable_quality_filtering = disable_quality_filtering
}

call fastqc.fastqc as fastqc {
input:
read1=fastp.Trim_R1,
read2=fastp.Trim_R2,
docker=fastqc_docker,
cluster_config=fastqc_cluster_config,
disk_size=fastqc_disk_size
}

call fastqscreen.fastq_screen as fastqscreen {
input:
read1=fastp.Trim_R1,
read2=fastp.Trim_R2,
screen_ref_dir=screen_ref_dir,
fastq_screen_conf=fastq_screen_conf,
docker = fastqscreen_docker,
cluster_config = fastqscreen_cluster_config,
disk_size= fastqscreen_disk_size
}

call hisat2.hisat2 as hisat2 {
input:
sample_id=sample_id,
idx = idx,
idx_prefix = idx_prefix,
Trim_R1 = fastp.Trim_R1,
Trim_R2 = fastp.Trim_R2,
docker = hisat2_docker,
cluster = hisat2_cluster,
pen_intronlen = pen_intronlen,
pen_cansplice = pen_cansplice,
pen_noncansplice = pen_noncansplice,
min_intronlen = min_intronlen,
max_intronlen = max_intronlen,
maxins = maxins,
minins = minins
}

call samtools.samtools as samtools {
input:
sample_id=sample_id,
sam = hisat2.sam,
docker = samtools_docker,
cluster = samtools_cluster,
insert_size = insert_size
}
call qualimapBAMqc.qualimapBAMqc as qualimapBAMqc {
input:
bam = samtools.out_percent,
docker = qualimapBAMqc_docker,
cluster_config = qualimapBAMqc_cluster_config,
disk_size = qualimapBAMqc_disk_size
}

call qualimapRNAseq.qualimapRNAseq as qualimapRNAseq {
input:
bam = samtools.out_percent,
docker = qualimapRNAseq_docker,
cluster_config = qualimapRNAseq_cluster_config,
disk_size = qualimapRNAseq_disk_size,
gtf = gtf
}

call stringtie.stringtie as stringtie {
input:
sample_id=sample_id,
gtf = gtf,
bam = samtools.out_bam,
docker = stringtie_docker,
cluster = stringtie_cluster,
minimum_length_allowed_for_the_predicted_transcripts = minimum_length_allowed_for_the_predicted_transcripts,
Junctions_no_spliced_reads = Junctions_no_spliced_reads,
minimum_isoform_abundance = minimum_isoform_abundance,
maximum_fraction_of_muliplelocationmapped_reads = maximum_fraction_of_muliplelocationmapped_reads
}
}

Cargando…
Cancelar
Guardar