2年前 · 497029e07f
--- a/tasks/BQSR.wdl
+++ b/tasks/BQSR.wdl
@@ -0,0 +1,67 @@
 task Sentieon_BQSR{
    File ref_dir
    File dbsnp_dir
    File dbmills_dir

    String sample_id
    String ref_fasta
    String dbsnp
    String db_mills

    File deduped_bam
    File deduped_bam_index

    # excute env
    String docker
    String cluster_config
    String disk_size
    
    String SENTIEON_LICENSE

    command<<<
        set -o pipefail
        set -exo
        export SENTIEON_LICENSE=${SENTIEON_LICENSE}
        
        nt=$(nproc)


        sentieon driver -t $nt \
        -r ${ref_dir}/${ref_fasta} -i ${deduped_bam} \
        --algo QualCal \
        -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} \
        ${sample_id}_recal_data.table

        sentieon driver -t $nt \
        -r ${ref_dir}/${ref_fasta} -i ${deduped_bam} \
        -q ${sample_id}_recal_data.table \
        --algo QualCal \
        -k ${dbsnp_dir}/${dbsnp} -k ${dbmills_dir}/${db_mills} \
        ${sample_id}_recal_data.table.post \
        --algo ReadWriter ${sample_id}.sorted.deduped.recaled.bam

        sentieon driver -t $nt --algo QualCal \
        --plot --before ${sample_id}_recal_data.table --after ${sample_id}_recal_data.table.post ${sample_id}_recal_data.csv

        sentieon plot bqsr -o ${sample_id}_bqsrreport.pdf ${sample_id}_recal_data.csv
    >>>

    runtime{
        docker:docker
        cluster:cluster_config
        systemDisk:"cloud_ssd 250"
        dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"


    }

    output{
        File recal_table = "${sample_id}_recal_data.table"
        File recal_post = "${sample_id}_recal_data.table.post"
        File recaled_bam = "${sample_id}.sorted.deduped.recaled.bam"
        File recaled_bam_index = "${sample_id}.sorted.deduped.recaled.bam.bai"
        File recal_csv = "${sample_id}_recal_data.csv"
        File bqsrreport_pdf = "${sample_id}_bqsrreport.pdf"

    }
 }
--- a/tasks/annovar.wdl
+++ b/tasks/annovar.wdl
@@ -0,0 +1,35 @@
 task ANNOVAR {

  File vcf
  String basename = basename(vcf,".vcf")
  File annovar_database
  String docker
  String cluster_config
  String disk_size

  command <<<
    set -o pipefail
    set -e
    nt=$(nproc)
    
    /installations/annovar/table_annovar.pl ${vcf} \
    ${annovar_database} -buildver hg38 \
    -out ${basename} -remove \
    -protocol refGene,cytoBand,genomicSuperDups,clinvar_20220320,intervar_20180118,cosmic95_coding,cosmic95_noncoding,gnomad211_exome,dbnsfp42c,avsnp150 \
    -operation g,r,r,f,f,f,f,f,f,f \
    -nastring . -vcfinput -polish -thread $nt
  >>>
  
  runtime {
    docker: docker
    cluster: cluster_config
    systemDisk: "cloud_ssd 40"
    dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  }
  
  output {
    File avinput = "${basename}.avinput"
    File multianno_txt = "${basename}.hg38_multianno.txt"
    File multianno_vcf = "${basename}.hg38_multianno.vcf"
  }
 }
--- a/tasks/bcftools.wdl
+++ b/tasks/bcftools.wdl
@@ -0,0 +1,31 @@
 task bcftools {
  
  File ref_dir
  String fasta
  File vcf
  String basename = basename(vcf,".vcf")
  String docker
  String cluster_config
  String disk_size

  command <<<
    set -o pipefail
    set -e
    nt=$(nproc)
    
    # bcftools norm -m -both ${vcf} | bcftools norm -f ${ref_dir}/${fasta} -Ov -o ${basename}.norm.vcf
    # Split multiallelic sites
    bcftools norm -m -both ${vcf} -o ${basename}.norm.vcf
  >>>
  
  runtime {
    docker: docker
    cluster: cluster_config
    systemDisk: "cloud_ssd 40"
    dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  }
  
  output {
    File norm_vcf = "${basename}.norm.vcf"
  }
 }
--- a/tasks/bcftools_concat.wdl
+++ b/tasks/bcftools_concat.wdl
@@ -0,0 +1,33 @@
 task bcftools {
  
  File ref_dir
  String fasta
  File vcf_indels
  File vcf_snvs
  String sample_id
  String docker
  String cluster_config
  String disk_size

  command <<<
    set -o pipefail
    set -e
    nt=$(nproc)
    
    bcftools concat -a ${vcf_indels} ${vcf_snvs} -Oz -o ${sample_id}_stralka.vcf.gz
    bcftools index -t ${sample_id}_stralka.vcf.gz

  >>>
  
  runtime {
    docker: docker
    cluster: cluster_config
    systemDisk: "cloud_ssd 40"
    dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
  }
  
  output {
    File concat_vcf = "${sample_id}_stralka.vcf.gz"
    File concat_vcf_index = "${sample_id}_stralka.vcf.gz.tbi"
  }
 }
--- a/tasks/fastp.wdl
+++ b/tasks/fastp.wdl
@@ -0,0 +1,65 @@

 task fastp {
    
    # I/O options
    File in1
    File in2
    String sample_id

    Boolean? phred64 = false 
    Boolean? fix_mgi_id = false

    String? adapter_sequence
    String? adapter_sequence_r2

    Int? reads_to_process # specify how many reads/pairs to be processed. Default 0 means process all reads.

    # reporting options
    String json = sample_id+"fastp.json"
    String html = sample_id+"fastp.html"
    String report_title = "\'fastp report\'"

    # excute env
    String docker
    String cluster_config
    String disk_size

    String out1_name = sample_id+'_clean_1.fastq'
    String out2_name = sample_id+'_clean_2.fastq'

    command <<<

        # basic command
        /opt/conda/bin/fastp \
        --in1 ${in1} \
        --in2 ${in2} \
        --out1 ${out1_name} \
        --out2 ${out2_name} \
        --json ${json} \
        --html ${html} \
        --report_title ${report_title} \
        
        # options 
        ${ true="--phred64 " false="" phred64 } \
        ${ "--reads_to_process " + reads_to_process } \
        ${ true="--fix_mgi_id " false="" fix_mgi_id } \
        ${ "--adapter_sequence " + adapter_sequence } \
        ${ "--adapter_sequence_r2 " + adapter_sequence_r2 }

    >>>

    runtime {
 		docker:docker
 		cluster:cluster_config
 		systemDisk:"cloud_ssd 40"
 		dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
    }

    output {
        File out1 = out1_name
        File out2 = out2_name
        File json_report = json
        File html_report = html
    }

 }