{ | |||||
"adapter_seq": "not_available", | |||||
"randomBase_in_adapter": "0", | |||||
"QFILTER_MIN_QUAL": "20", | |||||
"QFILTER_MIN_READ_FRAC": "80", | |||||
"MIN_READ_LENGTH": "18", | |||||
"KEEP_RANDOM_BARCODE_STATS": "false", | |||||
"MAIN_ORGANISM_GENOME_ID": "hg38", | |||||
"STAR_alignEndsType": "Local", | |||||
"outFilterMatchNmin": "18", | |||||
"outFilterMatchNminOverLread": "0.9", | |||||
"STAR_outFilterMismatchNmax": "1", | |||||
"ENDOGENOUS_LIB_PRIORITY": "miRNA,tRNA,gencode,piRNA,circRNA", | |||||
"JAVA_RAM": "10G", | |||||
"cluster_config": "cls-grhjsd45els3f5kg62u002", | |||||
"disk_size": "200" | |||||
} |
{ | |||||
"{{ project_name }}.sample_id": "{{ sample_id }}", | |||||
"{{ project_name }}.INPUT_FILE_PATH": "{{ raw_fastq }}", | |||||
"{{ project_name }}.ADAPTER_SEQ": "{{ adapter_seq }}", | |||||
"{{ project_name }}.RANDOM_BARCODE_LENGTH": "{{ randomBase_in_adapter }}", | |||||
"{{ project_name }}.QFILTER_MIN_QUAL": "{{ QFILTER_MIN_QUAL }}", | |||||
"{{ project_name }}.QFILTER_MIN_READ_FRAC": "{{ QFILTER_MIN_READ_FRAC }}", | |||||
"{{ project_name }}.MIN_READ_LENGTH": "{{ MIN_READ_LENGTH }}", | |||||
"{{ project_name }}.KEEP_RANDOM_BARCODE_STATS": "{{ KEEP_RANDOM_BARCODE_STATS }}", | |||||
"{{ project_name }}.MAIN_ORGANISM_GENOME_ID": "{{ MAIN_ORGANISM_GENOME_ID }}", | |||||
"{{ project_name }}.STAR_alignEndsType": "{{ STAR_alignEndsType }}", | |||||
"{{ project_name }}.STAR_outFilterMatchNmin": "{{ outFilterMatchNmin }}", | |||||
"{{ project_name }}.STAR_outFilterMatchNminOverLread": "{{ outFilterMatchNminOverLread }}", | |||||
"{{ project_name }}.STAR_outFilterMismatchNmax": "{{ STAR_outFilterMismatchNmax }}", | |||||
"{{ project_name }}.ENDOGENOUS_LIB_PRIORITY": "{{ ENDOGENOUS_LIB_PRIORITY }}", | |||||
"{{ project_name }}.JAVA_RAM": "{{ JAVA_RAM }}", | |||||
"{{ project_name }}.DATABASE_PATH": "oss://pgx-reference-data/exceRpt_reference/", | |||||
"{{ project_name }}.docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/excerpt:latest", | |||||
"{{ project_name }}.cluster_config": "{{ cluster_config }}", | |||||
"{{ project_name }}.disk_size": "{{ disk_size }}" | |||||
} |
task Core { | |||||
String sample_id | |||||
File INPUT_FILE_PATH | |||||
File DATABASE_PATH | |||||
String ADAPTER_SEQ | |||||
String RANDOM_BARCODE_LENGTH | |||||
Int QFILTER_MIN_QUAL | |||||
Int QFILTER_MIN_READ_FRAC | |||||
Int MIN_READ_LENGTH | |||||
String KEEP_RANDOM_BARCODE_STATS | |||||
String MAIN_ORGANISM_GENOME_ID | |||||
String STAR_alignEndsType | |||||
Int STAR_outFilterMatchNmin | |||||
Float STAR_outFilterMatchNminOverLread | |||||
Int STAR_outFilterMismatchNmax | |||||
String ENDOGENOUS_LIB_PRIORITY | |||||
String JAVA_RAM | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
String INPUT_FILE_ID = if basename("${INPUT_FILE_PATH}", ".gz") != basename("${INPUT_FILE_PATH}") then basename("${INPUT_FILE_PATH}", ".gz") else basename(basename("${INPUT_FILE_PATH}", ".fastq"), ".sra") | |||||
command <<< | |||||
#set -o pipefail | |||||
#set -e | |||||
nt=$(nproc) | |||||
mkdir ${sample_id} | |||||
if [ "${ADAPTER_SEQ}" = "not_available" ] | |||||
then | |||||
PARAMETERS_ADAPTER="" | |||||
else | |||||
PARAMETERS_ADAPTER=$(echo '-A ${ADAPTER_SEQ}') | |||||
fi | |||||
exceRpt \ | |||||
-d ${DATABASE_PATH} \ | |||||
-N $nt \ | |||||
$PARAMETERS_ADAPTER -l ${RANDOM_BARCODE_LENGTH} -m ${MIN_READ_LENGTH} \ | |||||
-Q ${QFILTER_MIN_QUAL} -F ${QFILTER_MIN_READ_FRAC} \ | |||||
-M ${MAIN_ORGANISM_GENOME_ID} \ | |||||
-S ${STAR_alignEndsType} -O ${STAR_outFilterMatchNmin} -f ${STAR_outFilterMatchNminOverLread} -T ${STAR_outFilterMismatchNmax} \ | |||||
-E ${ENDOGENOUS_LIB_PRIORITY} \ | |||||
-K ${KEEP_RANDOM_BARCODE_STATS} \ | |||||
-J ${JAVA_RAM} \ | |||||
-i ${INPUT_FILE_PATH} \ | |||||
-o ${sample_id} \ | |||||
-s ${sample_id} | |||||
find . -depth > fileList.txt | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster:cluster_config | |||||
systemDisk:"cloud_ssd 40" | |||||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File fileList = "fileList.txt" | |||||
Array[File] files_stat = glob("${sample_id}/*") | |||||
Array[File] files_core = glob("${sample_id}/${INPUT_FILE_ID}_${sample_id}/*") | |||||
} | |||||
} |
import "./tasks/Core.wdl" as Core | |||||
workflow {{ project_name }} { | |||||
String sample_id | |||||
File INPUT_FILE_PATH | |||||
File DATABASE_PATH | |||||
String ADAPTER_SEQ | |||||
Int RANDOM_BARCODE_LENGTH | |||||
Int QFILTER_MIN_QUAL | |||||
Int QFILTER_MIN_READ_FRAC | |||||
Int MIN_READ_LENGTH | |||||
String KEEP_RANDOM_BARCODE_STATS | |||||
String MAIN_ORGANISM_GENOME_ID | |||||
String STAR_alignEndsType | |||||
Int STAR_outFilterMatchNmin | |||||
Float STAR_outFilterMatchNminOverLread | |||||
Int STAR_outFilterMismatchNmax | |||||
String ENDOGENOUS_LIB_PRIORITY | |||||
String JAVA_RAM | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
call Core.Core as Core { | |||||
input: | |||||
sample_id=sample_id, | |||||
INPUT_FILE_PATH=INPUT_FILE_PATH, DATABASE_PATH=DATABASE_PATH, | |||||
ADAPTER_SEQ=ADAPTER_SEQ, RANDOM_BARCODE_LENGTH=RANDOM_BARCODE_LENGTH, QFILTER_MIN_QUAL=QFILTER_MIN_QUAL, QFILTER_MIN_READ_FRAC=QFILTER_MIN_READ_FRAC, MIN_READ_LENGTH=MIN_READ_LENGTH, | |||||
KEEP_RANDOM_BARCODE_STATS=KEEP_RANDOM_BARCODE_STATS, MAIN_ORGANISM_GENOME_ID=MAIN_ORGANISM_GENOME_ID, | |||||
STAR_alignEndsType=STAR_alignEndsType, STAR_outFilterMatchNmin=STAR_outFilterMatchNmin, STAR_outFilterMatchNminOverLread=STAR_outFilterMatchNminOverLread, STAR_outFilterMismatchNmax=STAR_outFilterMismatchNmax, | |||||
ENDOGENOUS_LIB_PRIORITY=ENDOGENOUS_LIB_PRIORITY, | |||||
JAVA_RAM=JAVA_RAM, | |||||
docker=docker, cluster_config=cluster_config, disk_size=disk_size | |||||
} | |||||
} |