5 jaren geleden · 9093b6c179
--- a/README.md
+++ b/README.md
--- a/inputs
+++ b/inputs
@@ -0,0 +1,18 @@
 {
    "{{ project_name }}.sample_id": "{{ sample_id }}",
    "{{ project_name }}.read1": "{{ read1 }}",
    "{{ project_name }}.read2": "{{ read2 }}",
    "{{ project_name }}.idx": "oss://pgx-reference-data/reference/hisat2/grch38_snp_tran/",
    "{{ project_name }}.gtf": "oss://pgx-reference-data/reference/annotation/Homo_sapiens.GRCh38.93.gtf",
    "{{ project_name }}.idx_prefix": "genome_snp_tran",
    "{{ project_name }}.hisat2.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/hisat2:v2.0.5-1-deb-cv1",
    "{{ project_name }}.hisat2.cluster": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc",
    "{{ project_name }}.samtools.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/samtools:v1.3.1",
    "{{ project_name }}.samtools.cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
    "{{ project_name }}.stringtie.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/stringtie:v1.3.4",
    "{{ project_name }}.stringtie.cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
    "{{ project_name }}.fastp.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastp:0.19.6",
    "{{ project_name }}.fastp.cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
    "{{ project_name }}.adapter_sequence": "{{ adapter_sequence if adapter_sequence != '' else 'AGATCGGAAGAGCACACGTCTGAACTCCAGTCA' }}",
    "{{ project_name }}.adapter_sequence_r2": "{{ adapter_sequence_r2 if adapter_sequence_r2 != '' else 'AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT' }}"
 }
--- a/tasks/fastp.wdl
+++ b/tasks/fastp.wdl
@@ -0,0 +1,30 @@
 task fastp {
    String sample_id
    File read1
    File read2
    String adapter_sequence
    String adapter_sequence_r2
    String docker
    String cluster

   command <<<
     fastp --thread 4 -l 50 -q 20 -u 20 --adapter_sequence ${adapter_sequence} --adapter_sequence_r2 ${adapter_sequence_r2} --detect_adapter_for_pe -i  ${read1} -I ${read2} -o ${sample_id}_R1.fastq.gz -O ${sample_id}_R2.fastq.gz -j ${sample_id}.json -h ${sample_id}.html
   >>>
   
   runtime { 
 		docker: docker 
 		cluster: cluster
 		systemDisk: "cloud_ssd 40"
 		dataDisk: "cloud_ssd 200 /cromwell_root/"
   }

   output {
      File json = "${sample_id}.json"
      File report = "${sample_id}.html"
      File Trim_R1 = "${sample_id}_R1.fastq.gz"
      File Trim_R2 = "${sample_id}_R2.fastq.gz"
   }
 }



--- a/tasks/hisat2.wdl
+++ b/tasks/hisat2.wdl
@@ -0,0 +1,27 @@
 task hisat2 {
   File idx
   File Trim_R1
   File Trim_R2
   String idx_prefix
   String sample_id
   String docker
   String cluster

   command <<<
   	 nt=$(nproc)
     hisat2 -t -p $nt -x ${idx}/${idx_prefix} -1 ${Trim_R1} -2 ${Trim_R2} -S ${sample_id}.sam  --un-conc-gz ${sample_id}_un.fq.gz
   >>>
   
   runtime { 
 		docker: docker 
 		cluster: cluster
 		systemDisk: "cloud_ssd 40"
 		dataDisk: "cloud_ssd 200 /cromwell_root/"
   }

   output {
      File sam = "${sample_id}.sam"
      File unmapread_1p = "${sample_id}_un.fq.1.gz"
      File unmapread_2p = "${sample_id}_un.fq.2.gz"
   }
 }
--- a/tasks/samtools.wdl
+++ b/tasks/samtools.wdl
@@ -0,0 +1,34 @@
 task samtools {
    File sam
    String sample_id
    String bam = sample_id + ".bam"
    String sorted_bam = sample_id + ".sorted.bam"
    String sorted_bam_index = sample_id + ".sorted.bam.bai"
    String ins_size = sample_id + ".ins_size"
    String docker
    String cluster

    command <<<
       set -o pipefail
       set -e
       /opt/conda/bin/samtools view -bS ${sam} > ${bam}
       /opt/conda/bin/samtools sort -m 1000000000 ${bam} -o ${sorted_bam}
       /opt/conda/bin/samtools index ${sorted_bam}
       /opt/conda/bin/samtools stats -i 8000 ${sorted_bam} |grep ^IS|cut -f 2- > ${sample_id}.ins_size
    >>>

    runtime {
       docker: docker
       cluster: cluster
       systemDisk: "cloud_ssd 40"
       dataDisk: "cloud_ssd 200 /cromwell_root/"
    }

    output {
      File out_bam = sorted_bam
      File out_bam_index = sorted_bam_index
      File out_ins_size = ins_size
    }

 }

--- a/tasks/stringtie.wdl
+++ b/tasks/stringtie.wdl
@@ -0,0 +1,26 @@
 task stringtie {
    File bam
    File gtf
    String docker
    String sample_id
    String cluster

    command <<<
      nt=$(nproc)
      mkdir ballgown
      /opt/conda/bin/stringtie -e -B -p $nt -G ${gtf} -o ballgown/${sample_id}/${sample_id}.gtf -C ${sample_id}.cov.ref.gtf -A ${sample_id}.gene.abundance.txt ${bam}
    >>>
    
    runtime {
      docker: docker
      cluster: cluster
      systemDisk: "cloud_ssd 40"
      dataDisk: "cloud_ssd 150 /cromwell_root/"
    }
    
    output {
      File covered_transcripts = "${sample_id}.cov.ref.gtf"
      File gene_abundance = "${sample_id}.gene.abundance.txt"
      Array[File] ballgown = ["ballgown/${sample_id}/${sample_id}.gtf", "ballgown/${sample_id}/e2t.ctab", "ballgown/${sample_id}/e_data.ctab", "ballgown/${sample_id}/i2t.ctab", "ballgown/${sample_id}/i_data.ctab", "ballgown/${sample_id}/t_data.ctab"]
    }
 }
--- a/workflow.wdl
+++ b/workflow.wdl
@@ -0,0 +1,49 @@
 import "./tasks/fastp.wdl" as fastp
 import "./tasks/hisat2.wdl" as hisat2
 import "./tasks/samtools.wdl" as samtools
 import "./tasks/stringtie.wdl" as stringtie

 workflow {{ project_name }} {

 	String sample_id
 	File read1
 	File read2
        File idx
 	String adapter_sequence
        String adapter_sequence_r2
 	String idx_prefix
        File gtf
 	
 	call fastp.fastp as fastp {
        input:
 		sample_id=sample_id, 
 		read1=read1, 
 		read2=read2, 
 		adapter_sequence=adapter_sequence, 
 		adapter_sequence_r2=adapter_sequence_r2
    }
 	
 	call hisat2.hisat2 as hisat2 {
 	input: 
 		sample_id=sample_id, 
 		idx=idx, 
 		idx_prefix=idx_prefix, 
 		Trim_R1=fastp.Trim_R1, 
 		Trim_R2=fastp.Trim_R2
 	}

        call samtools.samtools as samtools {
               input: 
 		sample_id=sample_id, 
 		sam = hisat2.sam 
 	}

        call stringtie.stringtie as stringtie {
              input: 
 		gtf = gtf, 
 		bam = samtools.out_bam ,
 		sample_id=sample_id
 	}
 }