Quellcode durchsuchen

first commit

master
biolcl vor 3 Jahren
Commit
8fe665b073
8 geänderte Dateien mit 397 neuen und 0 gelöschten Zeilen
  1. +1
    -0
      README.md
  2. +31
    -0
      defaults
  3. +34
    -0
      inputs
  4. +70
    -0
      tasks/arriba.wdl
  5. +68
    -0
      tasks/fastp.wdl
  6. +36
    -0
      tasks/mixcr.wdl
  7. +50
    -0
      tasks/starfusion.wdl
  8. +107
    -0
      workflow.wdl

+ 1
- 0
README.md Datei anzeigen

@@ -0,0 +1 @@
word.docx

+ 31
- 0
defaults Datei anzeigen

@@ -0,0 +1,31 @@
{
"starfusion_database_dir":"oss://pgx-reference-data/reference/starfusion/ctat_genome_lib_build_dir/",
"starfusion_docker":"registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/starfusion:1.10.0",
"starfusion_cluster":"OnDemand bcs.b4.3xlarge img-ubuntu-vpc",
"disk_size":"200",
"fastp_docker":"registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/fastp:0.19.6",
"fastp_cluster":"OnDemand bcs.b4.3xlarge img-ubuntu-vpc",
"trim_front1":"0",
"trim_tail1":"0",
"max_len1":"0",
"trim_front2":"0",
"trim_tail2":"0",
"max_len2":"0",
"adapter_sequence":"AGATCGGAAGAGCACACGTCTGAACTCCAGTCA",
"adapter_sequence_r2":"AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT",
"disable_adapter_trimming":"0",
"length_required":"50",
"length_required1":"20",
"UMI":"0",
"umi_len":"0",
"umi_loc":"umi_loc",
"qualified_quality_phred":"20",
"disable_quality_filtering":"1",
"STAR_INDEX_DIR":"oss://pgx-reference-data/reference/arriba_hg38_v37/STAR_index_hg38_GENCODE37/",
"ASSEMBLY_FA":"oss://pgx-result/chenqingwang/arriba/hg38.fa",
"ANNOTATION_GTF":"oss://pgx-result/chenqingwang/arriba/GENCODE37.gtf",
"arriba_docker":"registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/arriba:2.1.0",
"arriba_cluster":"OnDemand bcs.b4.3xlarge img-ubuntu-vpc",
"mixcr_docker":"registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/mixcr:3",
"mixcr_cluster":"OnDemand bcs.b4.3xlarge img-ubuntu-vpc"
}

+ 34
- 0
inputs Datei anzeigen

@@ -0,0 +1,34 @@
{
"{{ project_name }}.sample_id": "{{ sample_id }}",
"{{ project_name }}.fastq1": "{{ fastq1 }}",
"{{ project_name }}.fastq2": "{{ fastq2 }}",
"{{ project_name }}.starfusion_database_dir": "{{ starfusion_database_dir }}",
"{{ project_name }}.starfusion_docker": "{{ starfusion_docker }}",
"{{ project_name }}.starfusion_cluster": "{{ starfusion_cluster }}",
"{{ project_name }}.disk_size": "{{ disk_size }}",
"{{ project_name }}.fastp_docker": "{{ fastp_docker }}",
"{{ project_name }}.fastp_cluster": "{{ fastp_cluster }}",
"{{ project_name }}.trim_front1": "{{ trim_front1 }}",
"{{ project_name }}.trim_tail1": "{{ trim_tail1 }}",
"{{ project_name }}.max_len1": "{{ max_len1 }}",
"{{ project_name }}.trim_front2": "{{ trim_front2 }}",
"{{ project_name }}.trim_tail2": "{{ trim_tail2 }}",
"{{ project_name }}.max_len2": "{{ max_len2 }}",
"{{ project_name }}.adapter_sequence": "{{ adapter_sequence }}",
"{{ project_name }}.adapter_sequence_r2": "{{ adapter_sequence_r2 }}",
"{{ project_name }}.disable_adapter_trimming": "{{ disable_adapter_trimming }}",
"{{ project_name }}.length_required1": "{{ length_required1 }}",
"{{ project_name }}.UMI": "{{ UMI }}",
"{{ project_name }}.umi_loc": "{{ umi_loc }}",
"{{ project_name }}.umi_len": "{{ umi_len }}",
"{{ project_name }}.length_required": "{{ length_required }}",
"{{ project_name }}.qualified_quality_phred": "{{ qualified_quality_phred }}",
"{{ project_name }}.disable_quality_filtering": "{{ disable_quality_filtering }}",
"{{ project_name }}.STAR_INDEX_DIR": "{{ STAR_INDEX_DIR }}",
"{{ project_name }}.ASSEMBLY_FA": "{{ ASSEMBLY_FA }}",
"{{ project_name }}.ANNOTATION_GTF": "{{ ANNOTATION_GTF }}",
"{{ project_name }}.arriba_docker": "{{ arriba_docker }}",
"{{ project_name }}.arriba_cluster": "{{ arriba_cluster }}",
"{{ project_name }}.mixcr_docker": "{{ mixcr_docker }}",
"{{ project_name }}.mixcr_cluster": "{{ mixcr_cluster }}"
}

+ 70
- 0
tasks/arriba.wdl Datei anzeigen

@@ -0,0 +1,70 @@
task arriba{
String sample_id
File fastq1
File fastq2
File STAR_INDEX_DIR
File ASSEMBLY_FA
File ANNOTATION_GTF

String disk_size
String docker
String cluster

command <<<
set -o pipefail
set -e

mkdir ./output/
STAR \
--runThreadN 16 \
--genomeDir ${STAR_INDEX_DIR} \
--genomeLoad NoSharedMemory \
--readFilesIn ${fastq1} ${fastq2} \
--readFilesCommand zcat \
--outSAMtype BAM Unsorted \
--outSAMunmapped Within \
--outBAMcompression 0 \
--outFilterMultimapNmax 50 \
--peOverlapNbasesMin 10 \
--alignSplicedMateMapLminOverLmate 0.5 \
--alignSJstitchMismatchNmax 5 -1 5 5 \
--chimSegmentMin 10 \
--chimOutType WithinBAM HardClip \
--chimJunctionOverhangMin 10 \
--chimScoreDropMax 30 \
--chimScoreJunctionNonGTAG 0 \
--chimScoreSeparation 1 \
--chimSegmentReadGapMax 3 \
--chimMultimapNmax 50 \
--outFileNamePrefix ./output/${sample_id}.
/arriba_v2.1.0/arriba \
-x ./output/${sample_id}.Aligned.out.bam \
-o ./output/${sample_id}_fusions.tsv -O ./output/${sample_id}_fusions.discarded.tsv \
-a ${ASSEMBLY_FA} \
-g ${ANNOTATION_GTF} \
-b /arriba_v2.1.0/database/blacklist_hg38_GRCh38_v2.1.0.tsv.gz \
-k /arriba_v2.1.0/database/known_fusions_hg38_GRCh38_v2.1.0.tsv.gz \
-t /arriba_v2.1.0/database/known_fusions_hg38_GRCh38_v2.1.0.tsv.gz \
-p /arriba_v2.1.0/database/protein_domains_hg38_GRCh38_v2.1.0.gff3


>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
Array[File] arriba_result=glob("./output/*.tsv")
Array[File] arriba_bam=glob("./output/*.bam")
}

}



+ 68
- 0
tasks/fastp.wdl Datei anzeigen

@@ -0,0 +1,68 @@
task fastp {
File read1
File read2
String sample_id
String adapter_sequence
String adapter_sequence_r2
String docker
String cluster
String disk_size
String umi_loc
Int trim_front1
Int trim_tail1
Int max_len1
Int trim_front2
Int trim_tail2
Int max_len2
Int disable_adapter_trimming
Int length_required
Int umi_len
Int UMI
Int qualified_quality_phred
Int length_required1
Int disable_quality_filtering
command <<<
mkdir -p /cromwell_root/tmp/fastp/
##1.Disable_quality_filtering
if [ "${disable_quality_filtering}" == 0 ]
then
cp ${read1} /cromwell_root/tmp/fastp/{sample_id}_R1.fastq.tmp1.gz
cp ${read2} /cromwell_root/tmp/fastp/{sample_id}_R2.fastq.tmp1.gz
else
fastp --thread 16 --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i ${read1} -I ${read2} -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -j ${sample_id}.json -h ${sample_id}.html
fi

##2.UMI
if [ "${UMI}" == 0 ]
then
cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz
cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz
else
fastp --thread 16 -U --umi_loc=${umi_loc} --umi_len=${umi_len} --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp1.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp1.gz -o /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -O /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -j ${sample_id}.json -h ${sample_id}.html
fi

##3.Trim
if [ "${disable_adapter_trimming}" == 0 ]
then
fastp --thread 16 -l ${length_required} -q ${qualified_quality_phred} -u ${length_required1} --adapter_sequence ${adapter_sequence} --adapter_sequence_r2 ${adapter_sequence_r2} --detect_adapter_for_pe --trim_front1 ${trim_front1} --trim_tail1 ${trim_tail1} --max_len1 ${max_len1} --trim_front2 ${trim_front2} --trim_tail2 ${trim_tail2} --max_len2 ${max_len2} -i /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz -I /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz -o ${sample_id}_R1.fastq.gz -O ${sample_id}_R2.fastq.gz -j ${sample_id}.json -h ${sample_id}.html
else
cp /cromwell_root/tmp/fastp/${sample_id}_R1.fastq.tmp2.gz ${sample_id}_R1.fastq.gz
cp /cromwell_root/tmp/fastp/${sample_id}_R2.fastq.tmp2.gz ${sample_id}_R2.fastq.gz
fi
>>>
runtime {
docker: docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File json = "${sample_id}.json"
File report = "${sample_id}.html"
File Trim_R1 = "${sample_id}_R1.fastq.gz"
File Trim_R2 = "${sample_id}_R2.fastq.gz"
}
}

+ 36
- 0
tasks/mixcr.wdl Datei anzeigen

@@ -0,0 +1,36 @@
task mixcr{
String sample_id
File fastq1
File fastq2
String docker
String cluster
String disk_size

command <<<
set -o pipefail
set -e

mkdir ${sample_id}_mixcr_output
mixcr analyze shotgun --species hs \
-t 16 \
--starting-material rna \
--only-productive ${fastq1} ${fastq2} ${sample_id}_mixcr_output/${sample_id}

>>>
runtime {
docker : docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
Array[File] mixcr_result=glob("${sample_id}_mixcr_output/${sample_id}*")
}

}



+ 50
- 0
tasks/starfusion.wdl Datei anzeigen

@@ -0,0 +1,50 @@
task starfusion{
String sample_id
File fastq1
File fastq2
File database_dir
String docker
String cluster
String disk_size

command <<<
set -o pipefail
set -e

mkdir -p ${sample_id}/output

STAR-Fusion --genome_lib_dir ${database_dir} \
--left_fq ${fastq1} \
--right_fq ${fastq2} \
--CPU 16 \
--output_dir ${sample_id}/output \
--FusionInspector validate \
--examine_coding_effect


tar -zcvf ${sample_id}/output/${sample_id}_FusionInspector-validate.tgz ${sample_id}/output/FusionInspector-validate

>>>
runtime {
docker : docker
cluster: cluster
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
Array[File] starfusion_result=glob("${sample_id}/output/*.tsv")
Array[File] FusionInspector_result=glob("${sample_id}/output/FusionInspector-validate/*.tsv")
Array[File] starfusion_bam=glob("${sample_id}/output/*.bam")
Array[File] starfusion_tab=glob("${sample_id}/output/*.tab")
Array[File] starfusion_junction=glob("${sample_id}/output/*.junction")
Array[File] starfusion_out=glob("${sample_id}/output/*.out")
Array[File] starfusion_tgz=glob("${sample_id}/output/*.tgz")
}

}



+ 107
- 0
workflow.wdl Datei anzeigen

@@ -0,0 +1,107 @@
import "./tasks/starfusion.wdl" as starfusion
import "./tasks/fastp.wdl" as fastp
import "./tasks/arriba.wdl" as arriba
import "./tasks/mixcr.wdl" as mixcr

workflow {{ project_name }} {
String sample_id
File fastq1
File fastq2
File starfusion_database_dir
String starfusion_docker
String starfusion_cluster
String disk_size

String fastp_docker
String fastp_cluster
String adapter_sequence
String adapter_sequence_r2
String umi_loc
Int trim_front1
Int trim_tail1
Int max_len1
Int trim_front2
Int trim_tail2
Int max_len2
Int disable_adapter_trimming
Int length_required
Int umi_len
Int UMI
Int qualified_quality_phred
Int length_required1
Int disable_quality_filtering
File STAR_INDEX_DIR
File ASSEMBLY_FA
File ANNOTATION_GTF
String arriba_docker
String arriba_cluster

String mixcr_docker
String mixcr_cluster


call fastp.fastp as fastp {
input:
read1=fastq1,
read2=fastq2,
sample_id=sample_id,
docker=fastp_docker,
cluster=fastp_cluster,
disk_size=disk_size,
adapter_sequence=adapter_sequence,
adapter_sequence_r2=adapter_sequence_r2,
umi_loc=umi_loc,
trim_front1=trim_front1,
trim_tail1=trim_tail1,
max_len1=max_len1,
trim_front2=trim_front2,
trim_tail2=trim_tail2,
max_len2=max_len2,
disable_adapter_trimming=disable_adapter_trimming,
length_required=length_required,
umi_len=umi_len,
UMI=UMI,
qualified_quality_phred=qualified_quality_phred,
length_required1=length_required1,
disable_quality_filtering=disable_quality_filtering
}

call starfusion.starfusion as starfusion {
input:
sample_id=sample_id,
fastq1=fastp.Trim_R1,
fastq2=fastp.Trim_R2,
database_dir=starfusion_database_dir,
docker=starfusion_docker,
cluster=starfusion_cluster,
disk_size=disk_size

}

call arriba.arriba as arriba {
input:
sample_id=sample_id,
fastq1=fastp.Trim_R1,
fastq2=fastp.Trim_R2,
STAR_INDEX_DIR=STAR_INDEX_DIR,
ASSEMBLY_FA=ASSEMBLY_FA,
ANNOTATION_GTF=ANNOTATION_GTF,
disk_size=disk_size,
docker=arriba_docker,
cluster=arriba_cluster
}

call mixcr.mixcr as mixcr {
input:
sample_id=sample_id,
fastq1=fastp.Trim_R1,
fastq2=fastp.Trim_R2,
docker=mixcr_docker,
cluster=mixcr_cluster,
disk_size=disk_size

}


}

Laden…
Abbrechen
Speichern