task fastp { | |||||
# I/O options | |||||
File in1 | |||||
File in2 | |||||
String sample_id | |||||
Boolean? phred64 = false | |||||
Boolean? fix_mgi_id = false | |||||
String? adapter_sequence | |||||
String? adapter_sequence_r2 | |||||
Int? reads_to_process # specify how many reads/pairs to be processed. Default 0 means process all reads. | |||||
# reporting options | |||||
String json = sample_id+"fastp.json" | |||||
String html = sample_id+"fastp.html" | |||||
String report_title = "\'fastp report\'" | |||||
# excute env | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
String out1_name = sample_id+'_clean_1.fastq' | |||||
String out2_name = sample_id+'_clean_2.fastq' | |||||
command <<< | |||||
# basic command | |||||
/opt/conda/bin/fastp \ | |||||
--in1 ${in1} \ | |||||
--in2 ${in2} \ | |||||
--out1 ${out1_name} \ | |||||
--out2 ${out2_name} \ | |||||
--json ${json} \ | |||||
--html ${html} \ | |||||
--report_title ${report_title} \ | |||||
# options | |||||
${ true="--phred64 " false="" phred64 } \ | |||||
${ "--reads_to_process " + reads_to_process } \ | |||||
${ true="--fix_mgi_id " false="" fix_mgi_id } \ | |||||
${ "--adapter_sequence " + adapter_sequence } \ | |||||
${ "--adapter_sequence_r2 " + adapter_sequence_r2 } | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster:cluster_config | |||||
systemDisk:"cloud_ssd 40" | |||||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File out1 = out1_name | |||||
File out2 = out2_name | |||||
File json_report = json | |||||
File html_report = html | |||||
} | |||||
} |
task SentieonFastqToBam { | |||||
# 工具输入文件和参数 | |||||
File fastq1 | |||||
File fastq2 | |||||
String sample_id | |||||
String Seq_platform | |||||
String ref_fasta | |||||
File ref_fasta_dir | |||||
String SENTIEON_LICENSE | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
## Extra driver parameters | |||||
String qc_driver_args = "" | |||||
String lc_driver_args = "--traverse_param=200000/10000" | |||||
String dedup_driver_args = "--traverse_param=200000/10000" | |||||
## Extra algo parameters | |||||
String bwa_args = "-Y -M" | |||||
String bwa_chunk_size = "100000000" | |||||
String lc_args = "" | |||||
String bam_option = "--bam_compression 1" | |||||
String out_bam = sample_id + ".dedup.bam" | |||||
String out_bai = sample_id + ".dedup.bam.bai" | |||||
# 工具运行命令 | |||||
command <<< | |||||
set -exo pipefail | |||||
export SENTIEON_LICENSE=${SENTIEON_LICENSE} | |||||
nt=$(nproc) | |||||
sentieon bwa mem -R "@RG\tID:${sample_id}\tSM:${sample_id}\tPL:${Seq_platform}" ${bwa_args} -K ${bwa_chunk_size} -t $nt ${ref_fasta_dir}/${ref_fasta} ${fastq1} ${fastq2} \ | |||||
| sentieon util sort ${bam_option} -i - -r ${ref_fasta_dir}/${ref_fasta} -t $nt -o ${sample_id}.sorted.bam --sam2bam | |||||
ls ./ | |||||
sentieon driver -r ${ref_fasta_dir}/${ref_fasta} -t $nt -i ${sample_id}.sorted.bam ${qc_driver_args} \ | |||||
--algo MeanQualityByCycle ${sample_id}.mq_metrics.txt \ | |||||
--algo QualDistribution ${sample_id}.qd_metrics.txt \ | |||||
--algo GCBias --summary ${sample_id}.gc_summary_metrics.txt ${sample_id}.gc_metrics.txt \ | |||||
--algo AlignmentStat ${sample_id}.aln_metrics.txt \ | |||||
--algo InsertSizeMetricAlgo ${sample_id}.is_metrics.txt | |||||
ls ./ | |||||
sentieon driver -r ${ref_fasta_dir}/${ref_fasta} -t $nt -i ${sample_id}.sorted.bam ${lc_driver_args} \ | |||||
--algo LocusCollector \ | |||||
${lc_args} \ | |||||
${sample_id}.score.txt.gz | |||||
ls ./ | |||||
sentieon driver -r ${ref_fasta_dir}/${ref_fasta} -t $nt -i ${sample_id}.sorted.bam ${dedup_driver_args} \ | |||||
--algo Dedup \ | |||||
--score_info ${sample_id}.score.txt.gz \ | |||||
--metrics ${sample_id}.dedup_metrics.txt \ | |||||
${bam_option} ${out_bam} | |||||
ls ./ | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster:cluster_config | |||||
systemDisk:"cloud_ssd 40" | |||||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
# 工具运行输出结果 | |||||
output { | |||||
File deduped_bam = out_bam | |||||
File deduped_bam_bai = out_bai | |||||
Array[File] qc_metrics = glob("*_metrics.txt") | |||||
} | |||||
} |
task manta_calling{ | |||||
File tumor_bam | |||||
File tumor_bam_bai | |||||
File normal_bam | |||||
File normal_bam_bai | |||||
String ref_fasta | |||||
File ref_dir | |||||
String sample_id | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
String out_dir = "${sample_id}_result" | |||||
command <<< | |||||
set -exo pipefail | |||||
nt=$(nproc) | |||||
/home/biosoft/manta-1.6.0.centos6_x86_64/bin/configManta.py \ | |||||
--normalBam ${normal_bam} \ | |||||
--tumorBam ${tumor_bam} \ | |||||
--referenceFasta ${ref_dir}/${ref_fasta} \ | |||||
--runDir ${out_dir} | |||||
ls ${out_dir} | |||||
python2.7 ${out_dir}/runWorkflow.py -m local -j $nt | |||||
ls ${out_dir} | |||||
tar cvf ${out_dir}.tar ${out_dir} | |||||
>>> | |||||
runtime{ | |||||
docker:docker | |||||
cluster:cluster_config | |||||
systemDisk:"cloud_ssd 40" | |||||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output{ | |||||
File out_file = "${out_dir}.tar" | |||||
File manta_indel_vcf = "${out_dir}/results/variants/candidateSmallIndels.vcf.gz" | |||||
File manta_indel_vcf_index = "${out_dir}/results/variants/candidateSmallIndels.vcf.gz.tbi" | |||||
} | |||||
} |
task qualimap{ | |||||
String sample_id | |||||
File bam_file | |||||
File bam_bai | |||||
File annot_gff | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
String out_dir = sample_id+'_BamQC' | |||||
command <<< | |||||
set -o pipefail | |||||
set -exo | |||||
nt=$(nproc) | |||||
/opt/qualimap/qualimap bamqc -bam ${bam_file} -gff ${annot_gff} -outformat PDF:HTML -nt $nt -outdir ${out_dir} --java-mem-size=32G | |||||
tar -zcvf ${out_dir}.tar ${out_dir} | |||||
>>> | |||||
runtime{ | |||||
docker:docker | |||||
cluster:cluster_config | |||||
systemDisk:"cloud_ssd 40" | |||||
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output{ | |||||
File out_file = "${out_dir}.tar" | |||||
} | |||||
} |