meng 2 лет назад
Родитель
Сommit
63debca463
1 измененных файлов: 508 добавлений и 0 удалений
  1. +508
    -0
      tasks

+ 508
- 0
tasks Просмотреть файл

@@ -0,0 +1,508 @@
task bcftools {
File ref_dir
String fasta
File vcf
String basename = basename(vcf,".vcf")
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
# bcftools norm -m -both ${vcf} | bcftools norm -f ${ref_dir}/${fasta} -Ov -o ${basename}.norm.vcf
# Split multiallelic sites
bcftools norm -m -both ${vcf} -o ${basename}.norm.vcf
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File norm_vcf = "${basename}.norm.vcf"
}
}
task Sentieon_BQSR{
File ref_dir
File dbsnp_dir
File dbmills_dir

String sample_id
String ref_fasta
String dbsnp
String db_mills

File deduped_bam
File deduped_bam_index

# excute env
String docker
String cluster_config
String disk_size
String SENTIEON_LICENSE

command<<<
set -o pipefail
set -exo
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)


sentieon driver -t $nt \
-r ${ref_dir}/${ref_fasta} -i ${deduped_bam} \
--algo QualCal \
-k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} \
${sample_id}_recal_data.table

sentieon driver -t $nt \
-r ${ref_dir}/${ref_fasta} -i ${deduped_bam} \
-q ${sample_id}_recal_data.table \
--algo QualCal \
-k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} \
${sample_id}_recal_data.table.post \
--algo ReadWriter ${sample_id}.sorted.deduped.recaled.bam

sentieon driver -t $nt --algo QualCal \
--plot --before ${sample_id}_recal_data.table --after ${sample_id}_recal_data.table.post ${sample_id}_recal_data.csv

sentieon plot bqsr -o ${sample_id}_bqsrreport.pdf ${sample_id}_recal_data.csv
>>>

runtime{
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 250"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"


}

output{
File recal_table = "${sample_id}_recal_data.table"
File recal_post = "${sample_id}_recal_data.table.post"
File recaled_bam = "${sample_id}.sorted.deduped.recaled.bam"
File recaled_bam_index = "${sample_id}.sorted.deduped.recaled.bam.bai"
File recal_csv = "${sample_id}_recal_data.csv"
File bqsrreport_pdf = "${sample_id}_bqsrreport.pdf"

}
}


task fastp {
# I/O options
File in1
File in2
String sample_id

Boolean? phred64 = false
Boolean? fix_mgi_id = false

String? adapter_sequence
String? adapter_sequence_r2

Int? reads_to_process # specify how many reads/pairs to be processed. Default 0 means process all reads.

# reporting options
String json = sample_id+"fastp.json"
String html = sample_id+"fastp.html"
String report_title = "\'fastp report\'"

# excute env
String docker
String cluster_config
String disk_size

String out1_name = sample_id+'_clean_1.fastq'
String out2_name = sample_id+'_clean_2.fastq'

command <<<

# basic command
/opt/conda/bin/fastp \
--in1 ${in1} \
--in2 ${in2} \
--out1 ${out1_name} \
--out2 ${out2_name} \
--json ${json} \
--html ${html} \
--report_title ${report_title} \
# options
${ true="--phred64 " false="" phred64 } \
${ "--reads_to_process " + reads_to_process } \
${ true="--fix_mgi_id " false="" fix_mgi_id } \
${ "--adapter_sequence " + adapter_sequence } \
${ "--adapter_sequence_r2 " + adapter_sequence_r2 }

>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File out1 = out1_name
File out2 = out2_name
File json_report = json
File html_report = html
}

}

task SentieonFastqToBam {
# 工具输入文件和参数
File fastq1
File fastq2
String sample_id
String Seq_platform
String ref_fasta
File ref_fasta_dir
String SENTIEON_LICENSE

String docker
String cluster_config
String disk_size

## Extra driver parameters
String qc_driver_args = ""
String lc_driver_args = "--traverse_param=200000/10000"
String dedup_driver_args = "--traverse_param=200000/10000"
## Extra algo parameters
String bwa_args = "-Y -M"
String bwa_chunk_size = "100000000"
String lc_args = ""
String bam_option = "--bam_compression 1"



String out_bam = sample_id + ".dedup.bam"
String out_bai = sample_id + ".dedup.bam.bai"

# 工具运行命令
command <<<
set -exo pipefail
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
sentieon bwa mem -R "@RG\tID:${sample_id}\tSM:${sample_id}\tPL:${Seq_platform}" ${bwa_args} -K ${bwa_chunk_size} -t $nt ${ref_fasta_dir}/${ref_fasta} ${fastq1} ${fastq2} \
| sentieon util sort ${bam_option} -i - -r ${ref_fasta_dir}/${ref_fasta} -t $nt -o ${sample_id}.sorted.bam --sam2bam

ls ./
sentieon driver -r ${ref_fasta_dir}/${ref_fasta} -t $nt -i ${sample_id}.sorted.bam ${qc_driver_args} \
--algo MeanQualityByCycle ${sample_id}.mq_metrics.txt \
--algo QualDistribution ${sample_id}.qd_metrics.txt \
--algo GCBias --summary ${sample_id}.gc_summary_metrics.txt ${sample_id}.gc_metrics.txt \
--algo AlignmentStat ${sample_id}.aln_metrics.txt \
--algo InsertSizeMetricAlgo ${sample_id}.is_metrics.txt
ls ./

sentieon driver -r ${ref_fasta_dir}/${ref_fasta} -t $nt -i ${sample_id}.sorted.bam ${lc_driver_args} \
--algo LocusCollector \
${lc_args} \
${sample_id}.score.txt.gz
ls ./

sentieon driver -r ${ref_fasta_dir}/${ref_fasta} -t $nt -i ${sample_id}.sorted.bam ${dedup_driver_args} \
--algo Dedup \
--score_info ${sample_id}.score.txt.gz \
--metrics ${sample_id}.dedup_metrics.txt \
${bam_option} ${out_bam}
ls ./

>>>


runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"

}

# 工具运行输出结果
output {
File deduped_bam = out_bam
File deduped_bam_bai = out_bai
Array[File] qc_metrics = glob("*_metrics.txt")
}

}
task manta_calling{
File tumor_bam
File tumor_bam_bai
File normal_bam
File normal_bam_bai
String ref_fasta
File ref_dir
String sample_id
String docker
String cluster_config
String disk_size

String out_dir = "${sample_id}_result"
command <<<
set -exo pipefail
nt=$(nproc)
/home/biosoft/manta-1.6.0.centos6_x86_64/bin/configManta.py \
--normalBam ${normal_bam} \
--tumorBam ${tumor_bam} \
--referenceFasta ${ref_dir}/${ref_fasta} \
--runDir ${out_dir}
ls ${out_dir}

python2.7 ${out_dir}/runWorkflow.py -m local -j $nt

ls ${out_dir}

tar cvf ${out_dir}.tar ${out_dir}
>>>

runtime{
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"

}

output{
File out_file = "${out_dir}.tar"
File manta_indel_vcf = "${out_dir}/results/variants/candidateSmallIndels.vcf.gz"
File manta_indel_vcf_index = "${out_dir}/results/variants/candidateSmallIndels.vcf.gz.tbi"
}
}

task qualimap{
String sample_id
File bam_file
File bam_bai
File annot_gff

String docker
String cluster_config
String disk_size

String out_dir = sample_id+'_BamQC'

command <<<
set -o pipefail
set -exo
nt=$(nproc)
/opt/qualimap/qualimap bamqc -bam ${bam_file} -gff ${annot_gff} -outformat PDF:HTML -nt $nt -outdir ${out_dir} --java-mem-size=32G
tar -zcvf ${out_dir}.tar ${out_dir}
>>>

runtime{
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}

output{
File out_file = "${out_dir}.tar"
}
}

task strelka_calling{
File tumor_bam
File tumor_bam_bai
File normal_bam
File normal_bam_bai
String ref_fasta
File ref_dir
String sample_id
File manta_indel_vcf
File manta_indel_vcf_index
String docker
String cluster_config
String disk_size

String out_dir = "${sample_id}_result"
command <<<
set -exo pipefail
nt=$(nproc)
/home/biosoft/strelka-2.9.10.centos6_x86_64/bin/configureStrelkaSomaticWorkflow.py \
--normalBam ${normal_bam} \
--tumorBam ${tumor_bam} \
--referenceFasta ${ref_dir}/${ref_fasta} \
--indelCandidates ${manta_indel_vcf} \
--runDir ${out_dir}
ls ${out_dir}

python2.7 ${out_dir}/runWorkflow.py -m local -j $nt

ls ${out_dir}

tar cvf ${out_dir}.tar ${out_dir}
>>>

runtime{
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}

output{
File out_file = "${out_dir}.tar"
}
}
task sentieon_TNscope{
String sample_id
File tumor_bam
File tumor_bam_bai
File? normal_bam
File? normal_bam_bai
String tumor_name
String normal_name
File tumor_recall_data
File normal_recall_data

File ref_dir
String ref_fasta
File dbsnp_dir
String dbsnp

# excute env
String docker
String cluster_config
String disk_size
String SENTIEON_LICENSE


command <<<
set -o pipefail
set -exo
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)

sentieon driver -t $nt -r ${ref_dir}/${ref_fasta} \
-i ${tumor_bam} -q ${tumor_recall_data} \
-i ${normal_bam} -q ${normal_recall_data} \
--algo TNscope --tumor_sample ${tumor_name} --normal_sample ${normal_name} \
--disable_detector sv --trim_soft_clip \
--dbsnp ${dbsnp_dir}/${dbsnp} ${sample_id}.TNscope.vcf || { echo "TNscope failed"; exit 1; }
ls ./

>>>

runtime{
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}

output{
File vcf = "${sample_id}.TNscope.vcf"
File vcf_index = "${sample_id}.TNscope.vcf.idx"

}
}
task sentieon_TNseq{
String sample_id
File tumor_bam
File tumor_bam_bai
File? normal_bam
File? normal_bam_bai
String tumor_name
String normal_name

File ref_dir
String ref_fasta
File germline_resource
File germline_resource_tbi

# excute env
String docker
String cluster_config
String disk_size
String SENTIEON_LICENSE


command <<<
set -o pipefail
set -exo
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)


if [${normal_bam}];then
INPUT="-i ${tumor_bam} -i ${normal_bam}"
SAMPLE="--tumor_sample ${tumor_name} --normal_sample ${normal_name}"
else
INPUT="-i ${tumor_bam}"
SAMPLE="--tumor_sample ${tumor_name}"
fi

sentieon driver -t $nt -r ${ref_dir}/${ref_fasta} \
$INPUT \
--algo TNhaplotyper2 $SAMPLE \
--germline_vcf ${germline_resource} \
${sample_id}.TNseq.raw.vcf \
--algo OrientationBias --tumor_sample ${tumor_name} \
${sample_id}.orientation \
--algo ContaminationModel $SAMPLE \
--vcf ${germline_resource} \
--tumor_segments ${sample_id}.contamination.segments \
${sample_id}.contamination

sentieon driver -t $nt \
-r ${ref_dir}/${ref_fasta} \
--algo TNfilter $SAMPLE \
-v ${sample_id}.TNseq.raw.vcf \
--contamination ${sample_id}.contamination \
--tumor_segments ${sample_id}.contamination.segments \
--orientation_priors ${sample_id}.orientation \
${sample_id}.bwa_TNseq.vcf

>>>

runtime{
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"

}

output{
File raw_vcf = "${sample_id}.TNseq.raw.vcf"
File raw_vcf_index = "${sample_id}.TNseq.raw.vcf.idx"
File vcf = "${sample_id}.bwa_TNseq.vcf"
File vcf_index = "${sample_id}.bwa_TNseq.vcf.idx"
File contamination = "${sample_id}.contamination"
File contamination_segments = "${sample_id}.contamination.segments"
File orientation = "${sample_id}.orientation"


}
}

Загрузка…
Отмена
Сохранить