Browse Source

Add: mergePoN

master
YaqingLiu 4 years ago
parent
commit
f45237e4df
7 changed files with 119 additions and 16 deletions
  1. +19
    -0
      README.md
  2. +1
    -1
      defaults
  3. +1
    -2
      inputs
  4. +9
    -0
      samples.json
  5. +15
    -5
      tasks/PON.wdl
  6. +33
    -0
      tasks/mergePoN.wdl
  7. +41
    -8
      workflow.wdl

+ 19
- 0
README.md View File

# README.md

> Author: Qingwang Chen
>
> Email: [yaqing.liu@outlook.com](mailto:yaqing.liu@outlook.com)
>
> Last Updates: 07/04/2021

#### Requirements

- choppy
- Ali-Cloud
- Linux

#### Panel of Normals
The Panel of Normals approach uses a set of matched normal samples to determine the baseline level from which to call CNV events. These matched normal samples should be derived from the same library prep and sequencing workflow that was used for the case sample. This allows the algorithm to subtract out system level biases that are not sample specific.

#### Usage
```

+ 1
- 1
defaults View File

"SENTIEON_LICENSE": "192.168.0.55:8990", "SENTIEON_LICENSE": "192.168.0.55:8990",
"dbmills_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/", "dbmills_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/",
"db_mills": "Mills_and_1000G_gold_standard.indels.hg38.vcf", "db_mills": "Mills_and_1000G_gold_standard.indels.hg38.vcf",
"sentieon_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v2019.11.28",
"sentieon_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/sentieon-genomics:v2020.10.07",
"annovar_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/annovar:v2018.04", "annovar_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/annovar:v2018.04",
"maftools_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/r-base:4.0.2", "maftools_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/r-base:4.0.2",
"bcftools_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", "bcftools_docker": "registry.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9",

+ 1
- 2
inputs View File

{ {
"{{ project_name }}.sample_id": "{{ sample_id }}", "{{ project_name }}.sample_id": "{{ sample_id }}",
"{{ project_name }}.normal_recaled_bam": "{{ normal_recaled_bam }}",
"{{ project_name }}.normal_recaled_bam_index": "{{ normal_recaled_bam_index }}",
"{{ project_name }}.normal_recaled_bam_bai": "{{ normal_recaled_bam_bai | tojson }}",
"{{ project_name }}.fasta": "{{ fasta }}", "{{ project_name }}.fasta": "{{ fasta }}",
"{{ project_name }}.ref_dir": "{{ ref_dir }}", "{{ project_name }}.ref_dir": "{{ ref_dir }}",
"{{ project_name }}.dbsnp": "{{ dbsnp }}", "{{ project_name }}.dbsnp": "{{ dbsnp }}",

+ 9
- 0
samples.json View File

{
"vcf": [
["oss://choppy-cromwell-result/test-choppy/..._pon.vcf",]
["oss://choppy-cromwell-result/test-choppy/..._pon.vcf",]
["oss://choppy-cromwell-result/test-choppy/..._pon.vcf",]
],
"panel_id": "PON_20210407",
"sample_id": "PON_20210407"
}

+ 15
- 5
tasks/PON.wdl View File

task PON {
task PoN {


String SENTIEON_INSTALL_DIR String SENTIEON_INSTALL_DIR
String SENTIEON_LICENSE String SENTIEON_LICENSE
String dbsnp String dbsnp
File regions File regions


File normal_recaled_bam
File normal_recaled_bam_index
File normal_bam
File normal_bam_index
String docker String docker
String cluster_config String cluster_config
String disk_size String disk_size
mkdir -p /cromwell_root/tmp/cosmic/ mkdir -p /cromwell_root/tmp/cosmic/
cp ${cosmic_dir}/${cosmic_vcf} /cromwell_root/tmp/cosmic/ cp ${cosmic_dir}/${cosmic_vcf} /cromwell_root/tmp/cosmic/
${SENTIEON_INSTALL_DIR}/bin/sentieon util vcfindex /cromwell_root/tmp/cosmic/${cosmic_vcf} ${SENTIEON_INSTALL_DIR}/bin/sentieon util vcfindex /cromwell_root/tmp/cosmic/${cosmic_vcf}
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt --interval ${regions} -r ${ref_dir}/${fasta} -i ${normal_recaled_bam} --algo TNhaplotyper --detect_pon --cosmic /cromwell_root/tmp/cosmic/${cosmic_vcf} --dbsnp ${dbsnp_dir}/${dbsnp} ${sample}_pon.vcf

# Generating TNhaplotyper PON
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt --interval ${regions} -r ${ref_dir}/${fasta} -i ${normal_bam} --algo TNhaplotyper --detect_pon --cosmic /cromwell_root/tmp/cosmic/${cosmic_vcf} --dbsnp ${dbsnp_dir}/${dbsnp} ${sample}_TNhaplotyper_pon.vcf

# Generating TNhaplotyper2 PON
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt --interval ${regions} -r ${ref_dir}/${fasta} -i ${normal_bam} --algo TNhaplotyper2 --normal_sample ${sample} ${sample}_TNhaplotyper2_pon.vcf

# Generating TNscope PON
${SENTIEON_INSTALL_DIR}/bin/sentieon driver -t $nt --interval ${regions} -r ${ref_dir}/${fasta} -i ${normal_bam} --algo TNscope --normal_sample ${sample} ${sample}_TNscope_pon.vcf
>>> >>>
runtime { runtime {
} }


output { output {
File pon_vcf = "${sample}_pon.vcf"
File TNhaplotyper_pon_vcf = "${sample}_TNhaplotyper_pon.vcf"
File TNhaplotyper2_pon_vcf = "${sample}_TNhaplotyper2_pon.vcf"
File TNscope_pon_vcf = "${sample}_TNscope_pon.vcf"
} }
} }

+ 33
- 0
tasks/mergePoN.wdl View File

task mergePoN {

Array[File] pon_vcfs
String panel_id
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
mkdir -p /cromwell_root/tmp/bcftools

for i in ${sep=" " pon_vcfs}
do
bcftools view $i -Oz -o /cromwell_root/tmp/bcftools/$(basename $i.gz)
bcftools index /cromwell_root/tmp/bcftools/$(basename $i.gz) -f
done

bcftools merge -m all -f PASS,. --force-samples /cromwell_root/tmp/bcftools/*.vcf.gz | bcftools plugin fill-AN-AC | bcftools filter -i 'SUM(AC)>1' > ${panel_id}_pon.vcf
>>>

runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File panel_of_normal_vcf = "${panel_id}_pon.vcf"
}
}

+ 41
- 8
workflow.wdl View File

import "./tasks/PON.wdl" as PON
import "./tasks/PoN.wdl" as PoN
import "./tasks/mergePoN.wdl" as mergePoN
workflow {{ project_name }} { workflow {{ project_name }} {
String SENTIEON_INSTALL_DIR String SENTIEON_INSTALL_DIR
String dbsnp String dbsnp
File regions File regions


File normal_recaled_bam
File normal_recaled_bam_index
Array[Array[File]] normal_recaled_bam_bai
String sentieon_docker String sentieon_docker
String cluster_config String cluster_config
String disk_size String disk_size

call PON.PON as PON {
scatter (item in normal_recaled_bam_bai){
call PoN.PoN as PoN {
input: input:
SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR, SENTIEON_INSTALL_DIR=SENTIEON_INSTALL_DIR,
SENTIEON_LICENSE=SENTIEON_LICENSE, SENTIEON_LICENSE=SENTIEON_LICENSE,
sample=sample_id,
fasta=fasta, fasta=fasta,
ref_dir=ref_dir, ref_dir=ref_dir,
regions=regions, regions=regions,
normal_recaled_bam=normal_recaled_bam,
normal_recaled_bam_index=normal_recaled_bam_index,
sample=item[0],
normal_bam=item[1],
normal_bam_index=item[2],
cosmic_vcf=cosmic_vcf, cosmic_vcf=cosmic_vcf,
cosmic_dir=cosmic_dir, cosmic_dir=cosmic_dir,
dbsnp=dbsnp, dbsnp=dbsnp,
docker=sentieon_docker, docker=sentieon_docker,
disk_size=disk_size, disk_size=disk_size,
cluster_config=cluster_config cluster_config=cluster_config
}
}

Array[File] TNhaplotyper_pon_vcfs = PON.TNhaplotyper_pon_vcf
Array[File] TNhaplotyper2_pon_vcfs = PON.TNhaplotyper2_pon_vcf
Array[File] TNscope_pon_vcfs = PON.TNscope_pon_vcf

call mergePoN.mergePoN as mergePoN_TNhaplotyper {
input:
pon_vcfs=TNhaplotyper_pon_vcfs,
panel_id=panel_id,
docker=bcftools_docker,
disk_size=disk_size,
cluster_config=cluster_config
}

call mergePoN.mergePoN as mergePoN_TNhaplotyper2 {
input:
pon_vcfs=TNhaplotyper2_pon_vcfs,
panel_id=panel_id,
docker=bcftools_docker,
disk_size=disk_size,
cluster_config=cluster_config
}
call mergePoN.mergePoN as mergePoN_TNscope {
input:
pon_vcfs=TNscope_pon_vcfs,
panel_id=panel_id,
docker=bcftools_docker,
disk_size=disk_size,
cluster_config=cluster_config
} }
} }

Loading…
Cancel
Save