Bladeren bron

first commit

master
lizhihui 5 jaren geleden
commit
9093b6c179
7 gewijzigde bestanden met toevoegingen van 184 en 0 verwijderingen
  1. +0
    -0
      README.md
  2. +18
    -0
      inputs
  3. +30
    -0
      tasks/fastp.wdl
  4. +27
    -0
      tasks/hisat2.wdl
  5. +34
    -0
      tasks/samtools.wdl
  6. +26
    -0
      tasks/stringtie.wdl
  7. +49
    -0
      workflow.wdl

+ 0
- 0
README.md Bestand weergeven


+ 18
- 0
inputs Bestand weergeven

@@ -0,0 +1,18 @@
{
"{{ project_name }}.sample_id": "{{ sample_id }}",
"{{ project_name }}.read1": "{{ read1 }}",
"{{ project_name }}.read2": "{{ read2 }}",
"{{ project_name }}.idx": "oss://pgx-reference-data/reference/hisat2/grch38_snp_tran/",
"{{ project_name }}.gtf": "oss://pgx-reference-data/reference/annotation/Homo_sapiens.GRCh38.93.gtf",
"{{ project_name }}.idx_prefix": "genome_snp_tran",
"{{ project_name }}.hisat2.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/hisat2:v2.0.5-1-deb-cv1",
"{{ project_name }}.hisat2.cluster": "OnDemand bcs.a2.3xlarge img-ubuntu-vpc",
"{{ project_name }}.samtools.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/samtools:v1.3.1",
"{{ project_name }}.samtools.cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
"{{ project_name }}.stringtie.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/stringtie:v1.3.4",
"{{ project_name }}.stringtie.cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
"{{ project_name }}.fastp.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastp:0.19.6",
"{{ project_name }}.fastp.cluster": "OnDemand bcs.a2.large img-ubuntu-vpc",
"{{ project_name }}.adapter_sequence": "{{ adapter_sequence if adapter_sequence != '' else 'AGATCGGAAGAGCACACGTCTGAACTCCAGTCA' }}",
"{{ project_name }}.adapter_sequence_r2": "{{ adapter_sequence_r2 if adapter_sequence_r2 != '' else 'AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT' }}"
}

+ 30
- 0
tasks/fastp.wdl Bestand weergeven

@@ -0,0 +1,30 @@
task fastp {
String sample_id
File read1
File read2
String adapter_sequence
String adapter_sequence_r2
String docker
String cluster

command <<<
fastp --thread 4 -l 50 -q 20 -u 20 --adapter_sequence ${adapter_sequence} --adapter_sequence_r2 ${adapter_sequence_r2} --detect_adapter_for_pe -i ${read1} -I ${read2} -o ${sample_id}_R1.fastq.gz -O ${sample_id}_R2.fastq.gz -j ${sample_id}.json -h ${sample_id}.html
>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd 200 /cromwell_root/"
}

output {
File json = "${sample_id}.json"
File report = "${sample_id}.html"
File Trim_R1 = "${sample_id}_R1.fastq.gz"
File Trim_R2 = "${sample_id}_R2.fastq.gz"
}
}




+ 27
- 0
tasks/hisat2.wdl Bestand weergeven

@@ -0,0 +1,27 @@
task hisat2 {
File idx
File Trim_R1
File Trim_R2
String idx_prefix
String sample_id
String docker
String cluster

command <<<
nt=$(nproc)
hisat2 -t -p $nt -x ${idx}/${idx_prefix} -1 ${Trim_R1} -2 ${Trim_R2} -S ${sample_id}.sam --un-conc-gz ${sample_id}_un.fq.gz
>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd 200 /cromwell_root/"
}

output {
File sam = "${sample_id}.sam"
File unmapread_1p = "${sample_id}_un.fq.1.gz"
File unmapread_2p = "${sample_id}_un.fq.2.gz"
}
}

+ 34
- 0
tasks/samtools.wdl Bestand weergeven

@@ -0,0 +1,34 @@
task samtools {
File sam
String sample_id
String bam = sample_id + ".bam"
String sorted_bam = sample_id + ".sorted.bam"
String sorted_bam_index = sample_id + ".sorted.bam.bai"
String ins_size = sample_id + ".ins_size"
String docker
String cluster

command <<<
set -o pipefail
set -e
/opt/conda/bin/samtools view -bS ${sam} > ${bam}
/opt/conda/bin/samtools sort -m 1000000000 ${bam} -o ${sorted_bam}
/opt/conda/bin/samtools index ${sorted_bam}
/opt/conda/bin/samtools stats -i 8000 ${sorted_bam} |grep ^IS|cut -f 2- > ${sample_id}.ins_size
>>>

runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd 200 /cromwell_root/"
}

output {
File out_bam = sorted_bam
File out_bam_index = sorted_bam_index
File out_ins_size = ins_size
}

}


+ 26
- 0
tasks/stringtie.wdl Bestand weergeven

@@ -0,0 +1,26 @@
task stringtie {
File bam
File gtf
String docker
String sample_id
String cluster

command <<<
nt=$(nproc)
mkdir ballgown
/opt/conda/bin/stringtie -e -B -p $nt -G ${gtf} -o ballgown/${sample_id}/${sample_id}.gtf -C ${sample_id}.cov.ref.gtf -A ${sample_id}.gene.abundance.txt ${bam}
>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd 150 /cromwell_root/"
}
output {
File covered_transcripts = "${sample_id}.cov.ref.gtf"
File gene_abundance = "${sample_id}.gene.abundance.txt"
Array[File] ballgown = ["ballgown/${sample_id}/${sample_id}.gtf", "ballgown/${sample_id}/e2t.ctab", "ballgown/${sample_id}/e_data.ctab", "ballgown/${sample_id}/i2t.ctab", "ballgown/${sample_id}/i_data.ctab", "ballgown/${sample_id}/t_data.ctab"]
}
}

+ 49
- 0
workflow.wdl Bestand weergeven

@@ -0,0 +1,49 @@
import "./tasks/fastp.wdl" as fastp
import "./tasks/hisat2.wdl" as hisat2
import "./tasks/samtools.wdl" as samtools
import "./tasks/stringtie.wdl" as stringtie

workflow {{ project_name }} {

String sample_id
File read1
File read2
File idx
String adapter_sequence
String adapter_sequence_r2
String idx_prefix
File gtf
call fastp.fastp as fastp {
input:
sample_id=sample_id,
read1=read1,
read2=read2,
adapter_sequence=adapter_sequence,
adapter_sequence_r2=adapter_sequence_r2
}
call hisat2.hisat2 as hisat2 {
input:
sample_id=sample_id,
idx=idx,
idx_prefix=idx_prefix,
Trim_R1=fastp.Trim_R1,
Trim_R2=fastp.Trim_R2
}

call samtools.samtools as samtools {
input:
sample_id=sample_id,
sam = hisat2.sam
}

call stringtie.stringtie as stringtie {
input:
gtf = gtf,
bam = samtools.out_bam ,
sample_id=sample_id
}
}



Laden…
Annuleren
Opslaan