task hisat2 { | |||||
File idx | |||||
File read1 | |||||
File read2 | |||||
String idx_prefix | |||||
String sample_id | |||||
String docker | |||||
String cluster | |||||
command { | |||||
nt=$(nproc) | |||||
hisat2 -t -p $nt -x ${idx}/${idx_prefix} -1 ${read1} -2 ${read2} -S ${sample_id}.sam --un-conc-gz ${sample_id}_un.fq.gz | |||||
} | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd 200 /cromwell_root/" | |||||
} | |||||
output { | |||||
File sam = "${sample_id}.sam" | |||||
File unmapread_1p = "${sample_id}_un.fq.1.gz" | |||||
File unmapread_2p = "${sample_id}_un.fq.2.gz" | |||||
} | |||||
} |
{ | { | ||||
"{{ project_name }}.sample_id": "{{ sample_id }}", | |||||
"{{ project_name }}.read1": "{{ read1 }}", | "{{ project_name }}.read1": "{{ read1 }}", | ||||
"{{ project_name }}.read2": "{{ read2 }}", | "{{ project_name }}.read2": "{{ read2 }}", | ||||
"{{ project_name }}.idx": "oss://pgx-reference-data/reference/hisat2/grch38_snp_tran/", | "{{ project_name }}.idx": "oss://pgx-reference-data/reference/hisat2/grch38_snp_tran/", |
task samtools { | |||||
File sam | |||||
String sample_id | |||||
String bam = sample_id + ".bam" | |||||
String sorted_bam = sample_id + ".sorted.bam" | |||||
String sorted_bam_index = sample_id + ".sorted.bam.bai" | |||||
String ins_size = sample_id + ".ins_size" | |||||
String docker | |||||
String cluster | |||||
command <<< | |||||
set -o pipefail | |||||
set -e | |||||
/opt/conda/bin/samtools view -bS ${sam} > ${bam} | |||||
/opt/conda/bin/samtools sort -m 1000000000 ${bam} -o ${sorted_bam} | |||||
/opt/conda/bin/samtools index ${sorted_bam} | |||||
/opt/conda/bin/samtools stats -i 8000 ${sorted_bam} |grep ^IS|cut -f 2- > ${sample_id}.ins_size | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd 200 /cromwell_root/" | |||||
} | |||||
output { | |||||
File out_bam = sorted_bam | |||||
File out_bam_index = sorted_bam_index | |||||
File out_ins_size = ins_size | |||||
} | |||||
} | |||||
task stringtie { | |||||
File bam | |||||
File gtf | |||||
String docker | |||||
String sample_id | |||||
String cluster | |||||
command <<< | |||||
nt=$(nproc) | |||||
mkdir ballgown | |||||
/opt/conda/bin/stringtie -e -B -p $nt -G ${gtf} -o ballgown/${sample_id}/${sample_id}.gtf -C ${sample_id}.cov.ref.gtf -A ${sample_id}.gene.abundance.txt ${bam} -g ${sample_id}_genecount.csv | |||||
>>> | |||||
runtime { | |||||
docker: docker | |||||
cluster: cluster | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd 150 /cromwell_root/" | |||||
} | |||||
output { | |||||
File covered_transcripts = "${sample_id}.cov.ref.gtf" | |||||
File gene_abundance = "${sample_id}.gene.abundance.txt" | |||||
Array[File] ballgown = ["ballgown/${sample_id}/${sample_id}.gtf", "ballgown/${sample_id}/e2t.ctab", "ballgown/${sample_id}/e_data.ctab", "ballgown/${sample_id}/i2t.ctab", "ballgown/${sample_id}/i_data.ctab", "ballgown/${sample_id}/t_data.ctab"] | |||||
File genecount = "{sample_id}_genecount.csv" | |||||
} | |||||
} |