task bcftools { File ref_dir String fasta File vcf String basename = basename(vcf,".vcf") String docker String cluster_config String disk_size command <<< set -o pipefail set -e nt=$(nproc) # bcftools norm -m -both ${vcf} | bcftools norm -f ${ref_dir}/${fasta} -Ov -o ${basename}.norm.vcf # Split multiallelic sites bcftools norm -m -both ${vcf} -o ${basename}.norm.vcf >>> runtime { docker: docker cluster: cluster_config systemDisk: "cloud_ssd 40" dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" } output { File norm_vcf = "${basename}.norm.vcf" } } task Sentieon_BQSR{ File ref_dir File dbsnp_dir File dbmills_dir String sample_id String ref_fasta String dbsnp String db_mills File deduped_bam File deduped_bam_index # excute env String docker String cluster_config String disk_size String SENTIEON_LICENSE command<<< set -o pipefail set -exo export SENTIEON_LICENSE=${SENTIEON_LICENSE} nt=$(nproc) sentieon driver -t $nt \ -r ${ref_dir}/${ref_fasta} -i ${deduped_bam} \ --algo QualCal \ -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} \ ${sample_id}_recal_data.table sentieon driver -t $nt \ -r ${ref_dir}/${ref_fasta} -i ${deduped_bam} \ -q ${sample_id}_recal_data.table \ --algo QualCal \ -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} \ ${sample_id}_recal_data.table.post \ --algo ReadWriter ${sample_id}.sorted.deduped.recaled.bam sentieon driver -t $nt --algo QualCal \ --plot --before ${sample_id}_recal_data.table --after ${sample_id}_recal_data.table.post ${sample_id}_recal_data.csv sentieon plot bqsr -o ${sample_id}_bqsrreport.pdf ${sample_id}_recal_data.csv >>> runtime{ docker:docker cluster:cluster_config systemDisk:"cloud_ssd 250" dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" } output{ File recal_table = "${sample_id}_recal_data.table" File recal_post = "${sample_id}_recal_data.table.post" File recaled_bam = "${sample_id}.sorted.deduped.recaled.bam" File recaled_bam_index = "${sample_id}.sorted.deduped.recaled.bam.bai" File recal_csv = "${sample_id}_recal_data.csv" File bqsrreport_pdf = "${sample_id}_bqsrreport.pdf" } } task fastp { # I/O options File in1 File in2 String sample_id Boolean? phred64 = false Boolean? fix_mgi_id = false String? adapter_sequence String? adapter_sequence_r2 Int? reads_to_process # specify how many reads/pairs to be processed. Default 0 means process all reads. # reporting options String json = sample_id+"fastp.json" String html = sample_id+"fastp.html" String report_title = "\'fastp report\'" # excute env String docker String cluster_config String disk_size String out1_name = sample_id+'_clean_1.fastq' String out2_name = sample_id+'_clean_2.fastq' command <<< # basic command /opt/conda/bin/fastp \ --in1 ${in1} \ --in2 ${in2} \ --out1 ${out1_name} \ --out2 ${out2_name} \ --json ${json} \ --html ${html} \ --report_title ${report_title} \ # options ${ true="--phred64 " false="" phred64 } \ ${ "--reads_to_process " + reads_to_process } \ ${ true="--fix_mgi_id " false="" fix_mgi_id } \ ${ "--adapter_sequence " + adapter_sequence } \ ${ "--adapter_sequence_r2 " + adapter_sequence_r2 } >>> runtime { docker:docker cluster:cluster_config systemDisk:"cloud_ssd 40" dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" } output { File out1 = out1_name File out2 = out2_name File json_report = json File html_report = html } } task SentieonFastqToBam { # 工具输入文件和参数 File fastq1 File fastq2 String sample_id String Seq_platform String ref_fasta File ref_fasta_dir String SENTIEON_LICENSE String docker String cluster_config String disk_size ## Extra driver parameters String qc_driver_args = "" String lc_driver_args = "--traverse_param=200000/10000" String dedup_driver_args = "--traverse_param=200000/10000" ## Extra algo parameters String bwa_args = "-Y -M" String bwa_chunk_size = "100000000" String lc_args = "" String bam_option = "--bam_compression 1" String out_bam = sample_id + ".dedup.bam" String out_bai = sample_id + ".dedup.bam.bai" # 工具运行命令 command <<< set -exo pipefail export SENTIEON_LICENSE=${SENTIEON_LICENSE} nt=$(nproc) sentieon bwa mem -R "@RG\tID:${sample_id}\tSM:${sample_id}\tPL:${Seq_platform}" ${bwa_args} -K ${bwa_chunk_size} -t $nt ${ref_fasta_dir}/${ref_fasta} ${fastq1} ${fastq2} \ | sentieon util sort ${bam_option} -i - -r ${ref_fasta_dir}/${ref_fasta} -t $nt -o ${sample_id}.sorted.bam --sam2bam ls ./ sentieon driver -r ${ref_fasta_dir}/${ref_fasta} -t $nt -i ${sample_id}.sorted.bam ${qc_driver_args} \ --algo MeanQualityByCycle ${sample_id}.mq_metrics.txt \ --algo QualDistribution ${sample_id}.qd_metrics.txt \ --algo GCBias --summary ${sample_id}.gc_summary_metrics.txt ${sample_id}.gc_metrics.txt \ --algo AlignmentStat ${sample_id}.aln_metrics.txt \ --algo InsertSizeMetricAlgo ${sample_id}.is_metrics.txt ls ./ sentieon driver -r ${ref_fasta_dir}/${ref_fasta} -t $nt -i ${sample_id}.sorted.bam ${lc_driver_args} \ --algo LocusCollector \ ${lc_args} \ ${sample_id}.score.txt.gz ls ./ sentieon driver -r ${ref_fasta_dir}/${ref_fasta} -t $nt -i ${sample_id}.sorted.bam ${dedup_driver_args} \ --algo Dedup \ --score_info ${sample_id}.score.txt.gz \ --metrics ${sample_id}.dedup_metrics.txt \ ${bam_option} ${out_bam} ls ./ >>> runtime { docker:docker cluster:cluster_config systemDisk:"cloud_ssd 40" dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" } # 工具运行输出结果 output { File deduped_bam = out_bam File deduped_bam_bai = out_bai Array[File] qc_metrics = glob("*_metrics.txt") } } task manta_calling{ File tumor_bam File tumor_bam_bai File normal_bam File normal_bam_bai String ref_fasta File ref_dir String sample_id String docker String cluster_config String disk_size String out_dir = "${sample_id}_result" command <<< set -exo pipefail nt=$(nproc) /home/biosoft/manta-1.6.0.centos6_x86_64/bin/configManta.py \ --normalBam ${normal_bam} \ --tumorBam ${tumor_bam} \ --referenceFasta ${ref_dir}/${ref_fasta} \ --runDir ${out_dir} ls ${out_dir} python2.7 ${out_dir}/runWorkflow.py -m local -j $nt ls ${out_dir} tar cvf ${out_dir}.tar ${out_dir} >>> runtime{ docker:docker cluster:cluster_config systemDisk:"cloud_ssd 40" dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" } output{ File out_file = "${out_dir}.tar" File manta_indel_vcf = "${out_dir}/results/variants/candidateSmallIndels.vcf.gz" File manta_indel_vcf_index = "${out_dir}/results/variants/candidateSmallIndels.vcf.gz.tbi" } } task qualimap{ String sample_id File bam_file File bam_bai File annot_gff String docker String cluster_config String disk_size String out_dir = sample_id+'_BamQC' command <<< set -o pipefail set -exo nt=$(nproc) /opt/qualimap/qualimap bamqc -bam ${bam_file} -gff ${annot_gff} -outformat PDF:HTML -nt $nt -outdir ${out_dir} --java-mem-size=32G tar -zcvf ${out_dir}.tar ${out_dir} >>> runtime{ docker:docker cluster:cluster_config systemDisk:"cloud_ssd 40" dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" } output{ File out_file = "${out_dir}.tar" } } task strelka_calling{ File tumor_bam File tumor_bam_bai File normal_bam File normal_bam_bai String ref_fasta File ref_dir String sample_id File manta_indel_vcf File manta_indel_vcf_index String docker String cluster_config String disk_size String out_dir = "${sample_id}_result" command <<< set -exo pipefail nt=$(nproc) /home/biosoft/strelka-2.9.10.centos6_x86_64/bin/configureStrelkaSomaticWorkflow.py \ --normalBam ${normal_bam} \ --tumorBam ${tumor_bam} \ --referenceFasta ${ref_dir}/${ref_fasta} \ --indelCandidates ${manta_indel_vcf} \ --runDir ${out_dir} ls ${out_dir} python2.7 ${out_dir}/runWorkflow.py -m local -j $nt ls ${out_dir} tar cvf ${out_dir}.tar ${out_dir} >>> runtime{ docker:docker cluster:cluster_config systemDisk:"cloud_ssd 40" dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" } output{ File out_file = "${out_dir}.tar" } } task sentieon_TNscope{ String sample_id File tumor_bam File tumor_bam_bai File? normal_bam File? normal_bam_bai String tumor_name String normal_name File tumor_recall_data File normal_recall_data File ref_dir String ref_fasta File dbsnp_dir String dbsnp # excute env String docker String cluster_config String disk_size String SENTIEON_LICENSE command <<< set -o pipefail set -exo export SENTIEON_LICENSE=${SENTIEON_LICENSE} nt=$(nproc) sentieon driver -t $nt -r ${ref_dir}/${ref_fasta} \ -i ${tumor_bam} -q ${tumor_recall_data} \ -i ${normal_bam} -q ${normal_recall_data} \ --algo TNscope --tumor_sample ${tumor_name} --normal_sample ${normal_name} \ --disable_detector sv --trim_soft_clip \ --dbsnp ${dbsnp_dir}/${dbsnp} ${sample_id}.TNscope.vcf || { echo "TNscope failed"; exit 1; } ls ./ >>> runtime{ docker:docker cluster:cluster_config systemDisk:"cloud_ssd 40" dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" } output{ File vcf = "${sample_id}.TNscope.vcf" File vcf_index = "${sample_id}.TNscope.vcf.idx" } } task sentieon_TNseq{ String sample_id File tumor_bam File tumor_bam_bai File? normal_bam File? normal_bam_bai String tumor_name String normal_name File ref_dir String ref_fasta File germline_resource File germline_resource_tbi # excute env String docker String cluster_config String disk_size String SENTIEON_LICENSE command <<< set -o pipefail set -exo export SENTIEON_LICENSE=${SENTIEON_LICENSE} nt=$(nproc) if [${normal_bam}];then INPUT="-i ${tumor_bam} -i ${normal_bam}" SAMPLE="--tumor_sample ${tumor_name} --normal_sample ${normal_name}" else INPUT="-i ${tumor_bam}" SAMPLE="--tumor_sample ${tumor_name}" fi sentieon driver -t $nt -r ${ref_dir}/${ref_fasta} \ $INPUT \ --algo TNhaplotyper2 $SAMPLE \ --germline_vcf ${germline_resource} \ ${sample_id}.TNseq.raw.vcf \ --algo OrientationBias --tumor_sample ${tumor_name} \ ${sample_id}.orientation \ --algo ContaminationModel $SAMPLE \ --vcf ${germline_resource} \ --tumor_segments ${sample_id}.contamination.segments \ ${sample_id}.contamination sentieon driver -t $nt \ -r ${ref_dir}/${ref_fasta} \ --algo TNfilter $SAMPLE \ -v ${sample_id}.TNseq.raw.vcf \ --contamination ${sample_id}.contamination \ --tumor_segments ${sample_id}.contamination.segments \ --orientation_priors ${sample_id}.orientation \ ${sample_id}.bwa_TNseq.vcf >>> runtime{ docker:docker cluster:cluster_config systemDisk:"cloud_ssd 40" dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/" } output{ File raw_vcf = "${sample_id}.TNseq.raw.vcf" File raw_vcf_index = "${sample_id}.TNseq.raw.vcf.idx" File vcf = "${sample_id}.bwa_TNseq.vcf" File vcf_index = "${sample_id}.bwa_TNseq.vcf.idx" File contamination = "${sample_id}.contamination" File contamination_segments = "${sample_id}.contamination.segments" File orientation = "${sample_id}.orientation" } }