Pārlūkot izejas kodu

上传文件至 'tasks'

master
meng pirms 2 gadiem
vecāks
revīzija
eb40529258
5 mainītis faili ar 294 papildinājumiem un 0 dzēšanām
  1. +67
    -0
      tasks/BQSR.wdl
  2. +31
    -0
      tasks/bcftools.wdl
  3. +65
    -0
      tasks/fastp.wdl
  4. +84
    -0
      tasks/fastqTobam.wdl
  5. +47
    -0
      tasks/manta.wdl

+ 67
- 0
tasks/BQSR.wdl Parādīt failu

@@ -0,0 +1,67 @@
task Sentieon_BQSR{
File ref_dir
File dbsnp_dir
File dbmills_dir

String sample_id
String ref_fasta
String dbsnp
String db_mills

File deduped_bam
File deduped_bam_index

# excute env
String docker
String cluster_config
String disk_size
String SENTIEON_LICENSE

command<<<
set -o pipefail
set -exo
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)


sentieon driver -t $nt \
-r ${ref_dir}/${ref_fasta} -i ${deduped_bam} \
--algo QualCal \
-k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} \
${sample_id}_recal_data.table

sentieon driver -t $nt \
-r ${ref_dir}/${ref_fasta} -i ${deduped_bam} \
-q ${sample_id}_recal_data.table \
--algo QualCal \
-k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} \
${sample_id}_recal_data.table.post \
--algo ReadWriter ${sample_id}.sorted.deduped.recaled.bam

sentieon driver -t $nt --algo QualCal \
--plot --before ${sample_id}_recal_data.table --after ${sample_id}_recal_data.table.post ${sample_id}_recal_data.csv

sentieon plot bqsr -o ${sample_id}_bqsrreport.pdf ${sample_id}_recal_data.csv
>>>

runtime{
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 250"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"


}

output{
File recal_table = "${sample_id}_recal_data.table"
File recal_post = "${sample_id}_recal_data.table.post"
File recaled_bam = "${sample_id}.sorted.deduped.recaled.bam"
File recaled_bam_index = "${sample_id}.sorted.deduped.recaled.bam.bai"
File recal_csv = "${sample_id}_recal_data.csv"
File bqsrreport_pdf = "${sample_id}_bqsrreport.pdf"

}
}

+ 31
- 0
tasks/bcftools.wdl Parādīt failu

@@ -0,0 +1,31 @@
task bcftools {
File ref_dir
String fasta
File vcf
String basename = basename(vcf,".vcf")
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
# bcftools norm -m -both ${vcf} | bcftools norm -f ${ref_dir}/${fasta} -Ov -o ${basename}.norm.vcf
# Split multiallelic sites
bcftools norm -m -both ${vcf} -o ${basename}.norm.vcf
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File norm_vcf = "${basename}.norm.vcf"
}
}

+ 65
- 0
tasks/fastp.wdl Parādīt failu

@@ -0,0 +1,65 @@

task fastp {
# I/O options
File in1
File in2
String sample_id

Boolean? phred64 = false
Boolean? fix_mgi_id = false

String? adapter_sequence
String? adapter_sequence_r2

Int? reads_to_process # specify how many reads/pairs to be processed. Default 0 means process all reads.

# reporting options
String json = sample_id+"fastp.json"
String html = sample_id+"fastp.html"
String report_title = "\'fastp report\'"

# excute env
String docker
String cluster_config
String disk_size

String out1_name = sample_id+'_clean_1.fastq'
String out2_name = sample_id+'_clean_2.fastq'

command <<<

# basic command
/opt/conda/bin/fastp \
--in1 ${in1} \
--in2 ${in2} \
--out1 ${out1_name} \
--out2 ${out2_name} \
--json ${json} \
--html ${html} \
--report_title ${report_title} \
# options
${ true="--phred64 " false="" phred64 } \
${ "--reads_to_process " + reads_to_process } \
${ true="--fix_mgi_id " false="" fix_mgi_id } \
${ "--adapter_sequence " + adapter_sequence } \
${ "--adapter_sequence_r2 " + adapter_sequence_r2 }

>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File out1 = out1_name
File out2 = out2_name
File json_report = json
File html_report = html
}

}

+ 84
- 0
tasks/fastqTobam.wdl Parādīt failu

@@ -0,0 +1,84 @@
task SentieonFastqToBam {
# 工具输入文件和参数
File fastq1
File fastq2
String sample_id
String Seq_platform
String ref_fasta
File ref_fasta_dir
String SENTIEON_LICENSE

String docker
String cluster_config
String disk_size

## Extra driver parameters
String qc_driver_args = ""
String lc_driver_args = "--traverse_param=200000/10000"
String dedup_driver_args = "--traverse_param=200000/10000"
## Extra algo parameters
String bwa_args = "-Y -M"
String bwa_chunk_size = "100000000"
String lc_args = ""
String bam_option = "--bam_compression 1"



String out_bam = sample_id + ".dedup.bam"
String out_bai = sample_id + ".dedup.bam.bai"

# 工具运行命令
command <<<
set -exo pipefail
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
sentieon bwa mem -R "@RG\tID:${sample_id}\tSM:${sample_id}\tPL:${Seq_platform}" ${bwa_args} -K ${bwa_chunk_size} -t $nt ${ref_fasta_dir}/${ref_fasta} ${fastq1} ${fastq2} \
| sentieon util sort ${bam_option} -i - -r ${ref_fasta_dir}/${ref_fasta} -t $nt -o ${sample_id}.sorted.bam --sam2bam

ls ./
sentieon driver -r ${ref_fasta_dir}/${ref_fasta} -t $nt -i ${sample_id}.sorted.bam ${qc_driver_args} \
--algo MeanQualityByCycle ${sample_id}.mq_metrics.txt \
--algo QualDistribution ${sample_id}.qd_metrics.txt \
--algo GCBias --summary ${sample_id}.gc_summary_metrics.txt ${sample_id}.gc_metrics.txt \
--algo AlignmentStat ${sample_id}.aln_metrics.txt \
--algo InsertSizeMetricAlgo ${sample_id}.is_metrics.txt
ls ./

sentieon driver -r ${ref_fasta_dir}/${ref_fasta} -t $nt -i ${sample_id}.sorted.bam ${lc_driver_args} \
--algo LocusCollector \
${lc_args} \
${sample_id}.score.txt.gz
ls ./

sentieon driver -r ${ref_fasta_dir}/${ref_fasta} -t $nt -i ${sample_id}.sorted.bam ${dedup_driver_args} \
--algo Dedup \
--score_info ${sample_id}.score.txt.gz \
--metrics ${sample_id}.dedup_metrics.txt \
${bam_option} ${out_bam}
ls ./

>>>


runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"

}

# 工具运行输出结果
output {
File deduped_bam = out_bam
File deduped_bam_bai = out_bai
Array[File] qc_metrics = glob("*_metrics.txt")
}

}

+ 47
- 0
tasks/manta.wdl Parādīt failu

@@ -0,0 +1,47 @@
task manta_calling{
File tumor_bam
File tumor_bam_bai
File normal_bam
File normal_bam_bai
String ref_fasta
File ref_dir
String sample_id
String docker
String cluster_config
String disk_size

String out_dir = "${sample_id}_result"
command <<<
set -exo pipefail
nt=$(nproc)
/home/biosoft/manta-1.6.0.centos6_x86_64/bin/configManta.py \
--normalBam ${normal_bam} \
--tumorBam ${tumor_bam} \
--referenceFasta ${ref_dir}/${ref_fasta} \
--runDir ${out_dir}
ls ${out_dir}

python2.7 ${out_dir}/runWorkflow.py -m local -j $nt

ls ${out_dir}

tar cvf ${out_dir}.tar ${out_dir}
>>>

runtime{
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"

}

output{
File out_file = "${out_dir}.tar"
File manta_indel_vcf = "${out_dir}/results/variants/candidateSmallIndels.vcf.gz"
File manta_indel_vcf_index = "${out_dir}/results/variants/candidateSmallIndels.vcf.gz.tbi"
}
}

Notiek ielāde…
Atcelt
Saglabāt