|
- task bcftools {
-
- File ref_dir
- String fasta
- File vcf
- String basename = basename(vcf,".vcf")
- String docker
- String cluster_config
- String disk_size
-
- command <<<
- set -o pipefail
- set -e
- nt=$(nproc)
-
- # bcftools norm -m -both ${vcf} | bcftools norm -f ${ref_dir}/${fasta} -Ov -o ${basename}.norm.vcf
- # Split multiallelic sites
- bcftools norm -m -both ${vcf} -o ${basename}.norm.vcf
- >>>
-
- runtime {
- docker: docker
- cluster: cluster_config
- systemDisk: "cloud_ssd 40"
- dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
- }
-
- output {
- File norm_vcf = "${basename}.norm.vcf"
- }
- }
- task Sentieon_BQSR{
- File ref_dir
- File dbsnp_dir
- File dbmills_dir
-
- String sample_id
- String ref_fasta
- String dbsnp
- String db_mills
-
- File deduped_bam
- File deduped_bam_index
-
- # excute env
- String docker
- String cluster_config
- String disk_size
-
- String SENTIEON_LICENSE
-
- command<<<
- set -o pipefail
- set -exo
- export SENTIEON_LICENSE=${SENTIEON_LICENSE}
-
- nt=$(nproc)
-
-
- sentieon driver -t $nt \
- -r ${ref_dir}/${ref_fasta} -i ${deduped_bam} \
- --algo QualCal \
- -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} \
- ${sample_id}_recal_data.table
-
- sentieon driver -t $nt \
- -r ${ref_dir}/${ref_fasta} -i ${deduped_bam} \
- -q ${sample_id}_recal_data.table \
- --algo QualCal \
- -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} \
- ${sample_id}_recal_data.table.post \
- --algo ReadWriter ${sample_id}.sorted.deduped.recaled.bam
-
- sentieon driver -t $nt --algo QualCal \
- --plot --before ${sample_id}_recal_data.table --after ${sample_id}_recal_data.table.post ${sample_id}_recal_data.csv
-
- sentieon plot bqsr -o ${sample_id}_bqsrreport.pdf ${sample_id}_recal_data.csv
- >>>
-
- runtime{
- docker:docker
- cluster:cluster_config
- systemDisk:"cloud_ssd 250"
- dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
-
-
- }
-
- output{
- File recal_table = "${sample_id}_recal_data.table"
- File recal_post = "${sample_id}_recal_data.table.post"
- File recaled_bam = "${sample_id}.sorted.deduped.recaled.bam"
- File recaled_bam_index = "${sample_id}.sorted.deduped.recaled.bam.bai"
- File recal_csv = "${sample_id}_recal_data.csv"
- File bqsrreport_pdf = "${sample_id}_bqsrreport.pdf"
-
- }
- }
-
-
- task fastp {
-
- # I/O options
- File in1
- File in2
- String sample_id
-
- Boolean? phred64 = false
- Boolean? fix_mgi_id = false
-
- String? adapter_sequence
- String? adapter_sequence_r2
-
- Int? reads_to_process # specify how many reads/pairs to be processed. Default 0 means process all reads.
-
- # reporting options
- String json = sample_id+"fastp.json"
- String html = sample_id+"fastp.html"
- String report_title = "\'fastp report\'"
-
- # excute env
- String docker
- String cluster_config
- String disk_size
-
- String out1_name = sample_id+'_clean_1.fastq'
- String out2_name = sample_id+'_clean_2.fastq'
-
- command <<<
-
- # basic command
- /opt/conda/bin/fastp \
- --in1 ${in1} \
- --in2 ${in2} \
- --out1 ${out1_name} \
- --out2 ${out2_name} \
- --json ${json} \
- --html ${html} \
- --report_title ${report_title} \
-
- # options
- ${ true="--phred64 " false="" phred64 } \
- ${ "--reads_to_process " + reads_to_process } \
- ${ true="--fix_mgi_id " false="" fix_mgi_id } \
- ${ "--adapter_sequence " + adapter_sequence } \
- ${ "--adapter_sequence_r2 " + adapter_sequence_r2 }
-
- >>>
-
- runtime {
- docker:docker
- cluster:cluster_config
- systemDisk:"cloud_ssd 40"
- dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
- }
-
- output {
- File out1 = out1_name
- File out2 = out2_name
- File json_report = json
- File html_report = html
- }
-
- }
-
- task SentieonFastqToBam {
- # 工具输入文件和参数
- File fastq1
- File fastq2
- String sample_id
- String Seq_platform
- String ref_fasta
- File ref_fasta_dir
- String SENTIEON_LICENSE
-
- String docker
- String cluster_config
- String disk_size
-
- ## Extra driver parameters
- String qc_driver_args = ""
- String lc_driver_args = "--traverse_param=200000/10000"
- String dedup_driver_args = "--traverse_param=200000/10000"
- ## Extra algo parameters
- String bwa_args = "-Y -M"
- String bwa_chunk_size = "100000000"
- String lc_args = ""
- String bam_option = "--bam_compression 1"
-
-
-
-
- String out_bam = sample_id + ".dedup.bam"
- String out_bai = sample_id + ".dedup.bam.bai"
-
- # 工具运行命令
- command <<<
- set -exo pipefail
- export SENTIEON_LICENSE=${SENTIEON_LICENSE}
- nt=$(nproc)
-
- sentieon bwa mem -R "@RG\tID:${sample_id}\tSM:${sample_id}\tPL:${Seq_platform}" ${bwa_args} -K ${bwa_chunk_size} -t $nt ${ref_fasta_dir}/${ref_fasta} ${fastq1} ${fastq2} \
- | sentieon util sort ${bam_option} -i - -r ${ref_fasta_dir}/${ref_fasta} -t $nt -o ${sample_id}.sorted.bam --sam2bam
-
- ls ./
-
- sentieon driver -r ${ref_fasta_dir}/${ref_fasta} -t $nt -i ${sample_id}.sorted.bam ${qc_driver_args} \
- --algo MeanQualityByCycle ${sample_id}.mq_metrics.txt \
- --algo QualDistribution ${sample_id}.qd_metrics.txt \
- --algo GCBias --summary ${sample_id}.gc_summary_metrics.txt ${sample_id}.gc_metrics.txt \
- --algo AlignmentStat ${sample_id}.aln_metrics.txt \
- --algo InsertSizeMetricAlgo ${sample_id}.is_metrics.txt
-
- ls ./
-
- sentieon driver -r ${ref_fasta_dir}/${ref_fasta} -t $nt -i ${sample_id}.sorted.bam ${lc_driver_args} \
- --algo LocusCollector \
- ${lc_args} \
- ${sample_id}.score.txt.gz
-
- ls ./
-
- sentieon driver -r ${ref_fasta_dir}/${ref_fasta} -t $nt -i ${sample_id}.sorted.bam ${dedup_driver_args} \
- --algo Dedup \
- --score_info ${sample_id}.score.txt.gz \
- --metrics ${sample_id}.dedup_metrics.txt \
- ${bam_option} ${out_bam}
- ls ./
-
- >>>
-
-
- runtime {
- docker:docker
- cluster:cluster_config
- systemDisk:"cloud_ssd 40"
- dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
-
- }
-
-
- # 工具运行输出结果
- output {
- File deduped_bam = out_bam
- File deduped_bam_bai = out_bai
- Array[File] qc_metrics = glob("*_metrics.txt")
- }
-
- }
- task manta_calling{
- File tumor_bam
- File tumor_bam_bai
- File normal_bam
- File normal_bam_bai
- String ref_fasta
- File ref_dir
- String sample_id
-
- String docker
- String cluster_config
- String disk_size
-
-
- String out_dir = "${sample_id}_result"
- command <<<
- set -exo pipefail
- nt=$(nproc)
- /home/biosoft/manta-1.6.0.centos6_x86_64/bin/configManta.py \
- --normalBam ${normal_bam} \
- --tumorBam ${tumor_bam} \
- --referenceFasta ${ref_dir}/${ref_fasta} \
- --runDir ${out_dir}
-
- ls ${out_dir}
-
- python2.7 ${out_dir}/runWorkflow.py -m local -j $nt
-
- ls ${out_dir}
-
- tar cvf ${out_dir}.tar ${out_dir}
- >>>
-
- runtime{
- docker:docker
- cluster:cluster_config
- systemDisk:"cloud_ssd 40"
- dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
-
- }
-
- output{
- File out_file = "${out_dir}.tar"
- File manta_indel_vcf = "${out_dir}/results/variants/candidateSmallIndels.vcf.gz"
- File manta_indel_vcf_index = "${out_dir}/results/variants/candidateSmallIndels.vcf.gz.tbi"
- }
- }
-
- task qualimap{
- String sample_id
- File bam_file
- File bam_bai
- File annot_gff
-
- String docker
- String cluster_config
- String disk_size
-
- String out_dir = sample_id+'_BamQC'
-
- command <<<
- set -o pipefail
- set -exo
- nt=$(nproc)
- /opt/qualimap/qualimap bamqc -bam ${bam_file} -gff ${annot_gff} -outformat PDF:HTML -nt $nt -outdir ${out_dir} --java-mem-size=32G
- tar -zcvf ${out_dir}.tar ${out_dir}
- >>>
-
- runtime{
- docker:docker
- cluster:cluster_config
- systemDisk:"cloud_ssd 40"
- dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
- }
-
- output{
- File out_file = "${out_dir}.tar"
- }
- }
-
- task strelka_calling{
- File tumor_bam
- File tumor_bam_bai
- File normal_bam
- File normal_bam_bai
- String ref_fasta
- File ref_dir
- String sample_id
- File manta_indel_vcf
- File manta_indel_vcf_index
-
- String docker
- String cluster_config
- String disk_size
-
-
- String out_dir = "${sample_id}_result"
- command <<<
- set -exo pipefail
- nt=$(nproc)
- /home/biosoft/strelka-2.9.10.centos6_x86_64/bin/configureStrelkaSomaticWorkflow.py \
- --normalBam ${normal_bam} \
- --tumorBam ${tumor_bam} \
- --referenceFasta ${ref_dir}/${ref_fasta} \
- --indelCandidates ${manta_indel_vcf} \
- --runDir ${out_dir}
-
- ls ${out_dir}
-
- python2.7 ${out_dir}/runWorkflow.py -m local -j $nt
-
- ls ${out_dir}
-
- tar cvf ${out_dir}.tar ${out_dir}
- >>>
-
- runtime{
- docker:docker
- cluster:cluster_config
- systemDisk:"cloud_ssd 40"
- dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
- }
-
- output{
- File out_file = "${out_dir}.tar"
- }
- }
- task sentieon_TNscope{
- String sample_id
- File tumor_bam
- File tumor_bam_bai
- File? normal_bam
- File? normal_bam_bai
- String tumor_name
- String normal_name
- File tumor_recall_data
- File normal_recall_data
-
- File ref_dir
- String ref_fasta
- File dbsnp_dir
- String dbsnp
-
- # excute env
- String docker
- String cluster_config
- String disk_size
- String SENTIEON_LICENSE
-
-
- command <<<
- set -o pipefail
- set -exo
- export SENTIEON_LICENSE=${SENTIEON_LICENSE}
- nt=$(nproc)
-
- sentieon driver -t $nt -r ${ref_dir}/${ref_fasta} \
- -i ${tumor_bam} -q ${tumor_recall_data} \
- -i ${normal_bam} -q ${normal_recall_data} \
- --algo TNscope --tumor_sample ${tumor_name} --normal_sample ${normal_name} \
- --disable_detector sv --trim_soft_clip \
- --dbsnp ${dbsnp_dir}/${dbsnp} ${sample_id}.TNscope.vcf || { echo "TNscope failed"; exit 1; }
-
- ls ./
-
- >>>
-
- runtime{
- docker:docker
- cluster:cluster_config
- systemDisk:"cloud_ssd 40"
- dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
- }
-
- output{
- File vcf = "${sample_id}.TNscope.vcf"
- File vcf_index = "${sample_id}.TNscope.vcf.idx"
-
- }
- }
- task sentieon_TNseq{
- String sample_id
- File tumor_bam
- File tumor_bam_bai
- File? normal_bam
- File? normal_bam_bai
- String tumor_name
- String normal_name
-
- File ref_dir
- String ref_fasta
- File germline_resource
- File germline_resource_tbi
-
- # excute env
- String docker
- String cluster_config
- String disk_size
- String SENTIEON_LICENSE
-
-
- command <<<
- set -o pipefail
- set -exo
- export SENTIEON_LICENSE=${SENTIEON_LICENSE}
- nt=$(nproc)
-
-
- if [${normal_bam}];then
- INPUT="-i ${tumor_bam} -i ${normal_bam}"
- SAMPLE="--tumor_sample ${tumor_name} --normal_sample ${normal_name}"
- else
- INPUT="-i ${tumor_bam}"
- SAMPLE="--tumor_sample ${tumor_name}"
- fi
-
- sentieon driver -t $nt -r ${ref_dir}/${ref_fasta} \
- $INPUT \
- --algo TNhaplotyper2 $SAMPLE \
- --germline_vcf ${germline_resource} \
- ${sample_id}.TNseq.raw.vcf \
- --algo OrientationBias --tumor_sample ${tumor_name} \
- ${sample_id}.orientation \
- --algo ContaminationModel $SAMPLE \
- --vcf ${germline_resource} \
- --tumor_segments ${sample_id}.contamination.segments \
- ${sample_id}.contamination
-
- sentieon driver -t $nt \
- -r ${ref_dir}/${ref_fasta} \
- --algo TNfilter $SAMPLE \
- -v ${sample_id}.TNseq.raw.vcf \
- --contamination ${sample_id}.contamination \
- --tumor_segments ${sample_id}.contamination.segments \
- --orientation_priors ${sample_id}.orientation \
- ${sample_id}.bwa_TNseq.vcf
-
- >>>
-
- runtime{
- docker:docker
- cluster:cluster_config
- systemDisk:"cloud_ssd 40"
- dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
-
- }
-
- output{
- File raw_vcf = "${sample_id}.TNseq.raw.vcf"
- File raw_vcf_index = "${sample_id}.TNseq.raw.vcf.idx"
- File vcf = "${sample_id}.bwa_TNseq.vcf"
- File vcf_index = "${sample_id}.bwa_TNseq.vcf.idx"
- File contamination = "${sample_id}.contamination"
- File contamination_segments = "${sample_id}.contamination.segments"
- File orientation = "${sample_id}.orientation"
-
-
- }
- }
|