Browse Source

feat: qc and multi callers

master
YaqingLiu 2 years ago
parent
commit
a3fc61a4e6
10 changed files with 499 additions and 3 deletions
  1. +9
    -1
      defaults
  2. +9
    -1
      inputs
  3. +73
    -0
      tasks/TNscope.wdl
  4. +33
    -0
      tasks/fastqc.wdl
  5. +40
    -0
      tasks/fastqscreen.wdl
  6. +40
    -0
      tasks/processSomatic.wdl
  7. +33
    -0
      tasks/qualimap.wdl
  8. +31
    -0
      tasks/somatic.wdl
  9. +47
    -0
      tasks/somaticFilter.wdl
  10. +184
    -1
      workflow.wdl

+ 9
- 1
defaults View File

@@ -21,7 +21,10 @@
"germline_resource_tbi": "oss://genomics-platform-reference-data/gnomAD/af-only-gnomad.v3.1.1.vcf.gz.tbi",
"gc": "oss://ivd-product/reference/Sequenza/GRCh38.gc50Base.wig.gz",
"baseline": "oss://ivd-product/reference/MSIsensor/hg38_reference.list_baseline",
"fastq_screen_conf": "oss://genomics-platform-reference-data/fastq_screen_reference/fastq_screen.conf",
"screen_ref_dir": "oss://genomics-platform-reference-data/fastq_screen_reference/",
"sentieon_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/sentieon-genomics:v202112.05",
"varscan_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/varscan2:v2.4.3",
"manta_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/manta:1.6.0",
"bcftools_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/bcftools:v1.9",
"annovar_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/annovar:v20191024",
@@ -29,8 +32,13 @@
"cnvkit_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/cnvkit:0.9.9",
"sequenza_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/sequenza:3.0.0",
"msisensor_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/msisensor-pro:1.2.0",
"fastqscreen_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/fastqscreen:0.12.0",
"fastqc_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/fastqc:0.11.8",
"qualimap_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/qualimap:2.0.0",
"tmb_docker": "registry.cn-shanghai.aliyuncs.com/choppy-pipe/tmb:1.0.0",
"cluster_16cpu_32gb": "OnDemand bcs.b2.3xlarge img-ubuntu-vpc",
"cluster_2cpu_4gb": "OnDemand bcs.es.c.large img-ubuntu-vpc",
"disk_size": "500"
"disk_size": "500",
"qc": false,
"multi_caller": false
}

+ 9
- 1
inputs View File

@@ -8,6 +8,7 @@
"{{ project_name }}.duplex_umi": "{{ duplex_umi }}",
"{{ project_name }}.SENTIEON_LICENSE": "{{ SENTIEON_LICENSE }}",
"{{ project_name }}.sentieon_docker": "{{ sentieon_docker }}",
"{{ project_name }}.varscan_docker": "{{ varscan_docker }}",
"{{ project_name }}.manta_docker": "{{ manta_docker }}",
"{{ project_name }}.bcftools_docker": "{{ bcftools_docker }}",
"{{ project_name }}.annovar_docker": "{{ annovar_docker }}",
@@ -16,6 +17,11 @@
"{{ project_name }}.sequenza_docker": "{{ sequenza_docker }}",
"{{ project_name }}.msisensor_docker": "{{ msisensor_docker }}",
"{{ project_name }}.tmb_docker": "{{ tmb_docker }}",
"{{ project_name }}.fastqscreen_docker": "{{ fastqscreen_docker }}",
"{{ project_name }}.fastqc_docker": "{{ fastqc_docker }}",
"{{ project_name }}.qualimap_docker": "{{ qualimap_docker }}",
"{{ project_name }}.fastq_screen_conf": "{{ fastq_screen_conf }}",
"{{ project_name }}.screen_ref_dir": "{{ screen_ref_dir }}",
"{{ project_name }}.platform": "{{ platform }}",
"{{ project_name }}.fasta": "{{ fasta }}",
"{{ project_name }}.ref_dir": "{{ ref_dir }}",
@@ -34,5 +40,7 @@
"{{ project_name }}.ref_flat": "{{ ref_flat }}",
"{{ project_name }}.disk_size": "{{ disk_size }}",
"{{ project_name }}.cluster_2cpu_4gb": "{{ cluster_2cpu_4gb }}",
"{{ project_name }}.cluster_16cpu_32gb": "{{ cluster_16cpu_32gb }}"
"{{ project_name }}.cluster_16cpu_32gb": "{{ cluster_16cpu_32gb }}",
"{{ project_name }}.qc": {{ qc | tojson }},
"{{ project_name }}.multi_caller": {{ multi_caller | tojson }}
}

+ 73
- 0
tasks/TNscope.wdl View File

@@ -0,0 +1,73 @@
task TNscope {
String sample
String SENTIEON_LICENSE
File tumor_recaled_bam
File tumor_recaled_bam_index
File? normal_recaled_bam
File? normal_recaled_bam_index
String tumor_name
String normal_name
File ref_dir
String fasta
File dbsnp_dir
String dbsnp
File? regions
Int? interval_padding
File? pon_vcf
String docker
String cluster_config
String disk_size
command <<<
set -o pipefail
set -e
export SENTIEON_LICENSE=${SENTIEON_LICENSE}
nt=$(nproc)
if [ ${regions} ]; then
INTERVAL="--interval ${regions} --interval_padding ${interval_padding}"
else
INTERVAL=""
fi
# if [ ${pon_vcf} ]; then
# PON="--pon ${pon_vcf}"
# sentieon util vcfindex ${pon_vcf}
# else
# PON=""
# fi
if [ ${normal_recaled_bam} ]; then
INPUT="-i ${tumor_recaled_bam} -i ${normal_recaled_bam}"
SAMPLE="--tumor_sample ${tumor_name} --normal_sample ${normal_name}"
else
INPUT="-i ${tumor_recaled_bam}"
SAMPLE="--tumor_sample ${tumor_name}"
fi
sentieon driver -t $nt -r ${ref_dir}/${fasta} \
$INPUT \
$INTERVAL \
--algo TNscope \
$SAMPLE \
--dbsnp ${dbsnp_dir}/${dbsnp} \
${sample}.TNscope.vcf
# awk -F'\t' '{if(($1~"^#")||($1!~"^#" && $7=="PASS")){print $0}}' ${sample}.TNscope.vcf > ${sample}.TNscope.PASS.vcf
>>>
runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File TNscope_vcf= "${sample}.TNscope.vcf"
File TNscope_vcf_index = "${sample}.TNscope.vcf.idx"
}
}

+ 33
- 0
tasks/fastqc.wdl View File

@@ -0,0 +1,33 @@
task fastqc {
String sample
File read1
File read2
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
ln -s ${read1} ${sample}_R1.fastq.gz
ln -s ${read2} ${sample}_R2.fastq.gz
fastqc -t $nt -o ./ ${sample}_R1.fastq.gz
fastqc -t $nt -o ./ ${sample}_R2.fastq.gz
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File read1_html="${sample}_R1_fastqc.html"
File read1_zip="${sample}_R1_fastqc.zip"
File read2_html="${sample}_R2_fastqc.html"
File read2_zip="${sample}_R2_fastqc.zip"
}
}

+ 40
- 0
tasks/fastqscreen.wdl View File

@@ -0,0 +1,40 @@
task fastq_screen {
String sample
File read1
File read2
File screen_ref_dir
File fastq_screen_conf

String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
mkdir -p /cromwell_root/tmp
cp -r ${screen_ref_dir} /cromwell_root/tmp/
ln -s ${read1} ${sample}_R1.fastq.gz
ln -s ${read2} ${sample}_R2.fastq.gz
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${sample}_R1.fastq.gz
fastq_screen --aligner bowtie2 --conf ${fastq_screen_conf} --top 100000 --threads $nt ${sample}_R2.fastq.gz
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File png1 = "${sample}_R1_screen.png"
File txt1 = "${sample}_R1_screen.txt"
File html1 = "${sample}_R1_screen.html"
File png2 = "${sample}_R2_screen.png"
File txt2 = "${sample}_R2_screen.txt"
File html2 = "${sample}_R2_screen.html"
}
}

+ 40
- 0
tasks/processSomatic.wdl View File

@@ -0,0 +1,40 @@
task processSomatic {
String sample
File varscan_snp
File varscan_indel
String docker
String cluster_config
String disk_size

command <<<
cp ${varscan_snp} ./
cp ${varscan_indel} ./
java -Xmx32g -jar /opt/VarScan.v2.4.3.jar processSomatic ${sample}.VarScan.SNP.vcf
java -Xmx32g -jar /opt/VarScan.v2.4.3.jar processSomatic ${sample}.VarScan.INDEL.vcf

>>>

runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File varscan_snp_somatic = "${sample}.VarScan.SNP.Somatic.vcf"
File varscan_snp_germline = "${sample}.VarScan.SNP.Germline.vcf"
File varscan_snp_loh = "${sample}.VarScan.SNP.LOH.vcf"
File varscan_snp_somatic_hc = "${sample}.VarScan.SNP.Somatic.hc.vcf"
File varscan_snp_germline_hc = "${sample}.VarScan.SNP.Germline.hc.vcf"
File varscan_snp_loh_hc = "${sample}.VarScan.SNP.LOH.hc.vcf"
File varscan_indel_somatic = "${sample}.VarScan.INDEL.Somatic.vcf"
File varscan_indel_germline = "${sample}.VarScan.INDEL.Germline.vcf"
File varscan_indel_loh = "${sample}.VarScan.INDEL.LOH.vcf"
File varscan_indel_somatic_hc = "${sample}.VarScan.INDEL.Somatic.hc.vcf"
File varscan_indel_germline_hc = "${sample}.VarScan.INDEL.Germline.hc.vcf"
File varscan_indel_loh_hc = "${sample}.VarScan.INDEL.LOH.hc.vcf"
}
}

+ 33
- 0
tasks/qualimap.wdl View File

@@ -0,0 +1,33 @@
task qualimap {
String sample
File bam
File bai
File covered_bed
String docker
String cluster_config
String disk_size

command <<<
set -o pipefail
set -e
nt=$(nproc)
awk 'BEGIN{OFS="\t"}{sub("\r","",$3);print $1,$2,$3,"",0,"."}' ${covered_bed} > new.bed
/opt/qualimap/qualimap bamqc -bam ${bam} -gff new.bed -outformat PDF:HTML -nt $nt -outdir ${sample} --java-mem-size=32G
cat ${sample}/genome_results.txt | grep duplication | awk -F "= |%" '{print "'"${sample}"'""\t"$2}' > ${sample}_qualimap_duplication.txt
tar -zcvf ${sample}_qualimap.tar ${sample}
>>>

runtime {
docker:docker
cluster:cluster_config
systemDisk:"cloud_ssd 40"
dataDisk:"cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File tar = "${sample}_qualimap.tar"
File duplication = "${sample}_qualimap_duplication.txt"
}
}

+ 31
- 0
tasks/somatic.wdl View File

@@ -0,0 +1,31 @@
task somatic {

String sample
File normal_bam
File normal_bam_index
File tumor_bam
File tumor_bam_index

File ref_dir
String fasta
String docker
String cluster_config
String disk_size

command <<<
samtools mpileup -f ${ref_dir}/${fasta} -B ${normal_bam} ${tumor_bam} | java -Xmx32g -jar /opt/VarScan.v2.4.3.jar somatic --mpileup 1 --min-coverage 3 --min-coverage-normal 3 --min-coverage-tumor 3 --min-var-freq 0.08 --p-value 0.10 --somatic-p-value 0.05 --output-vcf 1 --output-snp ${sample}.VarScan.SNP --output-indel ${sample}.VarScan.INDEL --strand-filter 1
>>>

runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File varscan_snp = "${sample}.VarScan.SNP.vcf"
File varscan_indel = "${sample}.VarScan.INDEL.vcf"
}
}

+ 47
- 0
tasks/somaticFilter.wdl View File

@@ -0,0 +1,47 @@
task somaticFilter {
String sample
File varscan_snp_somatic_hc
File varscan_snp_germline_hc
File varscan_snp_loh_hc
File varscan_indel_somatic_hc
File varscan_indel_germline_hc
File varscan_indel_loh_hc
File varscan_indel

String docker
String cluster_config
String disk_size

command <<<
java -Xmx32g -jar /opt/VarScan.v2.4.3.jar somaticFilter ${varscan_snp_somatic_hc} --min-coverage 10 --min-reads2 2 --min-strands2 1 --min-avg-qual 20 --p-value 0.1 --indel-file ${varscan_indel} --output-file ${sample}.VarScan.SNP.Somatic.filter.vcf

java -Xmx32g -jar /opt/VarScan.v2.4.3.jar somaticFilter ${varscan_snp_germline_hc} --min-coverage 10 --min-reads2 2 --min-strands2 1 --min-avg-qual 20 --p-value 0.1 --indel-file ${varscan_indel} --output-file ${sample}.VarScan.SNP.Germline.filter.vcf

java -Xmx32g -jar /opt/VarScan.v2.4.3.jar somaticFilter ${varscan_snp_loh_hc} --min-coverage 10 --min-reads2 2 --min-strands2 1 --min-avg-qual 20 --p-value 0.1 --indel-file ${varscan_indel} --output-file ${sample}.VarScan.SNP.LOH.filter.vcf

java -Xmx32g -jar /opt/VarScan.v2.4.3.jar somaticFilter ${varscan_indel_somatic_hc} --min-coverage 10 --min-reads2 2 --min-strands2 1 --min-avg-qual 20 --p-value 0.1 --output-file ${sample}.VarScan.INDEL.Somatic.filter.vcf

java -Xmx32g -jar /opt/VarScan.v2.4.3.jar somaticFilter ${varscan_indel_germline_hc} --min-coverage 10 --min-reads2 2 --min-strands2 1 --min-avg-qual 20 --p-value 0.1 --output-file ${sample}.VarScan.INDEL.Germline.filter.vcf

java -Xmx32g -jar /opt/VarScan.v2.4.3.jar somaticFilter ${varscan_indel_loh_hc} --min-coverage 10 --min-reads2 2 --min-strands2 1 --min-avg-qual 20 --p-value 0.1 --output-file ${sample}.VarScan.INDEL.LOH.filter.vcf

# Merge SNP and INDEL
awk '{if ($1!~/^#/) print}' ${sample}.VarScan.SNP.Somatic.filter.vcf | cat ${sample}.VarScan.INDEL.Somatic.filter.vcf - > ${sample}.VarScan.Somatic.filter.vcf
awk '{if ($1!~/^#/) print}' ${sample}.VarScan.SNP.Germline.filter.vcf | cat ${sample}.VarScan.INDEL.Germline.filter.vcf - > ${sample}.VarScan.Germline.filter.vcf
awk '{if ($1!~/^#/) print}' ${sample}.VarScan.SNP.LOH.filter.vcf | cat ${sample}.VarScan.INDEL.LOH.filter.vcf - > ${sample}.VarScan.LOH.filter.vcf
>>>

runtime {
docker: docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File varscan_somatic_filter = "${sample}.VarScan.Somatic.filter.vcf"
File varscan_germline_filter = "${sample}.VarScan.Germline.filter.vcf"
File varscan_loh_filter = "${sample}.VarScan.LOH.filter.vcf"
}
}

+ 184
- 1
workflow.wdl View File

@@ -14,6 +14,10 @@ import "./tasks/MSIsensor.wdl" as MSIsensor
import "./tasks/HRD.wdl" as HRD
import "./tasks/TMB.wdl" as TMB

import "./tasks/fastqc.wdl" as fastqc
import "./tasks/fastqscreen.wdl" as fastqscreen
import "./tasks/qualimap.wdl" as qualimap

workflow {{ project_name }} {
String sample_id
@@ -57,6 +61,16 @@ workflow {{ project_name }} {
String cluster_2cpu_4gb
String cluster_16cpu_32gb
Boolean qc
String fastqc_docker
String fastqscreen_docker
String qualimap_docker
File screen_ref_dir
File fastq_screen_conf

Boolean multi_caller
String varscan_docker

if (tumor_fastq_1!= "") {
call mapping.mapping as tumor_mapping {
input:
@@ -128,6 +142,41 @@ workflow {{ project_name }} {
disk_size=disk_size,
cluster_config=cluster_16cpu_32gb
}

if (qc){
call fastqc.fastqc as tumor_fastqc {
input:
sample=sample_id + '.T',
read1=tumor_fastq_1,
read2=tumor_fastq_2,
docker=fastqc_docker,
disk_size=disk_size,
cluster_config=cluster_2cpu_4gb
}
call fastqscreen.fastq_screen as tumor_fastqscreen {
input:
sample=sample_id + '.T',
read1=tumor_fastq_1,
read2=tumor_fastq_2,
screen_ref_dir=screen_ref_dir,
fastq_screen_conf=fastq_screen_conf,
docker=fastqscreen_docker,
disk_size=disk_size,
cluster_config=cluster_2cpu_4gb
}
call qualimap.qualimap as tumor_qualimap {
input:
sample=sample_id + '.T',
bam=tumor_Dedup.deduped_bam,
bai=tumor_Dedup.deduped_bam_index,
covered_bed=regions,
docker=qualimap_docker,
disk_size=disk_size,
cluster_config=cluster_16cpu_32gb
}
}
}
if (normal_fastq_1!= "") {
@@ -229,8 +278,43 @@ workflow {{ project_name }} {
cluster_config=cluster_2cpu_4gb,
disk_size=disk_size
}

if (qc){
call fastqc.fastqc as normal_fastqc {
input:
sample=sample_id + '.N',
read1=normal_fastq_1,
read2=normal_fastq_2,
docker=fastqc_docker,
disk_size=disk_size,
cluster_config=cluster_2cpu_4gb
}
call fastqscreen.fastq_screen as normal_fastqscreen {
input:
sample=sample_id + '.N',
read1=normal_fastq_1,
read2=normal_fastq_2,
screen_ref_dir=screen_ref_dir,
fastq_screen_conf=fastq_screen_conf,
docker=fastqscreen_docker,
disk_size=disk_size,
cluster_config=cluster_2cpu_4gb
}
call qualimap.qualimap as normal_qualimap {
input:
sample=sample_id + '.N',
bam=normal_Dedup.deduped_bam,
bai=normal_Dedup.deduped_bam_index,
covered_bed=regions,
docker=qualimap_docker,
disk_size=disk_size,
cluster_config=cluster_16cpu_32gb
}
}
}
call Manta.Manta as Manta {
input:
sample=sample_id,
@@ -353,4 +437,103 @@ workflow {{ project_name }} {
cluster_config=cluster_2cpu_4gb,
disk_size=disk_size
}
call somatic.somatic as somatic {
input:
sample=sample_id,
normal_bam=normal_BQSR.recaled_bam,
normal_bam_index=normal_BQSR.recaled_bam_index,
tumor_bam=tumor_BQSR.recaled_bam,
tumor_bam_index=tumor_BQSR.recaled_bam_index,
ref_dir=ref_dir,
fasta=fasta,
docker=varscan_docker,
disk_size=disk_size,
cluster_config=cluster_16cpu_32gb
}
call processSomatic.processSomatic as processSomatic {
input:
sample=sample_id,
varscan_snp=somatic.varscan_snp,
varscan_indel=somatic.varscan_indel,
docker=varscan_docker,
disk_size=disk_size,
cluster_config=cluster_16cpu_32gb
}
call somaticFilter.somaticFilter as somaticFilter {
input:
sample=sample_id,
varscan_snp_somatic_hc=processSomatic.varscan_snp_somatic_hc,
varscan_snp_germline_hc=processSomatic.varscan_snp_germline_hc,
varscan_snp_loh_hc=processSomatic.varscan_snp_loh_hc,
varscan_indel_somatic_hc=processSomatic.varscan_indel_somatic_hc,
varscan_indel_germline_hc=processSomatic.varscan_indel_germline_hc,
varscan_indel_loh_hc=processSomatic.varscan_indel_loh_hc,
varscan_indel=somatic.varscan_indel,
docker=varscan_docker,
disk_size=disk_size,
cluster_config=cluster_16cpu_32gb
}
call bcftools.bcftools as VarScan_bcftools {
input:
vcf=somaticFilter.varscan_somatic_filter,
fasta=fasta,
ref_dir=ref_dir,
docker=bcftools_docker,
cluster_config=cluster_2cpu_4gb,
disk_size=disk_size
}
call ANNOVAR.ANNOVAR as VarScan_ANNOVAR {
input:
vcf=VarScan_bcftools.norm_vcf,
annovar_database=annovar_database,
docker=annovar_docker,
cluster_config=cluster_16cpu_32gb,
disk_size=disk_size
}
call TNscope.TNscope as TNscope {
input:
SENTIEON_LICENSE=SENTIEON_LICENSE,
sample=sample_id,
normal_recaled_bam=normal_BQSR.recaled_bam,
normal_recaled_bam_index=normal_BQSR.recaled_bam_index,
tumor_recaled_bam=tumor_BQSR.recaled_bam,
tumor_recaled_bam_index=tumor_BQSR.recaled_bam_index,
normal_name=sample_id + ".N",
tumor_name=sample_id + ".T",
fasta=fasta,
ref_dir=ref_dir,
regions=regions,
interval_padding=interval_padding,
dbsnp=dbsnp,
dbsnp_dir=dbsnp_dir,
docker=sentieon_docker,
cluster_config=cluster_16cpu_32gb,
disk_size=disk_size
}
call bcftools.bcftools as TNscope_bcftools {
input:
vcf=TNscope.TNscope_vcf,
fasta=fasta,
ref_dir=ref_dir,
docker=bcftools_docker,
cluster_config=cluster_2cpu_4gb,
disk_size=disk_size
}
call ANNOVAR.ANNOVAR as TNscope_ANNOVAR {
input:
vcf=TNscope_bcftools.norm_vcf,
annovar_database=annovar_database,
docker=annovar_docker,
cluster_config=cluster_16cpu_32gb,
disk_size=disk_size
}
}

Loading…
Cancel
Save