Browse Source

validation

master
LUYAO REN 4 years ago
parent
commit
8e2e25eda6
3 changed files with 127 additions and 3 deletions
  1. +3
    -0
      inputs
  2. +111
    -0
      tasks/validation.wdl
  3. +13
    -3
      workflow.wdl

+ 3
- 0
inputs View File

@@ -1,5 +1,6 @@
{
"{{ project_name }}.satellite": "oss://pgx-result/renluyao/manuscript_v3.0/difficult_region/Satellite.3.bed",
"{{ project_name }}.D5_10X": "oss://pgx-result/renluyao/manuscript_v3.0/10X/LCL5_10X.vcf.gz",
"{{ project_name }}.mhc": "oss://pgx-result/renluyao/manuscript_v3.0/difficult_region/mhc.hg38.bed",
"{{ project_name }}.disk_size": "100",
"{{ project_name }}.vcf_idx": "{{ vcf_idx }}",
@@ -8,10 +9,12 @@
"{{ project_name }}.sample_name": "{{ sample_name }}",
"{{ project_name }}.del_breakpoint": "oss://pgx-result/renluyao/manuscript_v3.0/difficult_region/Tier1.del.breakpoint",
"{{ project_name }}.ins_breakpoint": "oss://pgx-result/renluyao/manuscript_v3.0/difficult_region/Tier1.ins.breakpoint",
"{{ project_name }}.sdf": "oss://pgx-reference-data/GRCh38.d1.vd1/GRCh38.d1.vd1.sdf/"
"{{ project_name }}.LINE": "oss://pgx-result/renluyao/manuscript_v3.0/difficult_region/LINE.3.bed",
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.large img-ubuntu-vpc",
"{{ project_name }}.LTR": "oss://pgx-result/renluyao/manuscript_v3.0/difficult_region/LTR.3.bed",
"{{ project_name }}.simple_repeat": "oss://pgx-result/renluyao/manuscript_v3.0/difficult_region/simple_repeat.3.bed",
"{{ project_name }}.D5_10X_idx": "oss://pgx-result/renluyao/manuscript_v3.0/10X/LCL5_10X.vcf.gz.tbi",
"{{ project_name }}.vcf": "{{ vcf }}",
"{{ project_name }}.confidence_bed": "oss://pgx-result/renluyao/manuscript_v3.0/reference_dataset_v202011/Quartet.callable.voted.collapse.bed",
"{{ project_name }}.low_complexity": "oss://pgx-result/renluyao/manuscript_v3.0/difficult_region/Low_complexity.3.bed",

+ 111
- 0
tasks/validation.wdl View File

@@ -0,0 +1,111 @@
task validation {
File sub_vcf
File sub_vcf_idx

File satellite
File simple_repeat
File SINE
File LINE
File LTR
File low_complexity
File SD
File ins_breakpoint
File del_breakpoint
File mhc

File sdf
File D5_10X
File D5_10X_idx

String sample_name
String tag
String docker
String cluster_config
String disk_size
command <<<
rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.satellite.vcf.gz --include-bed=${satellite}

rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.simple_repeat.vcf.gz --include-bed=${simple_repeat}

rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.SINE.vcf.gz --include-bed=${SINE}

rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.LINE.vcf.gz --include-bed=${LINE}

rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.LTR.vcf.gz --include-bed=${LTR}

rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.low_complexity.vcf.gz --include-bed=${low_complexity}

rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.SD.vcf.gz --include-bed=${SD}

rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.ins_breakpoint.vcf.gz --include-bed=${ins_breakpoint}

rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.del_breakpoint.vcf.gz --include-bed=${del_breakpoint}

rtg vcffilter -i ${sub_vcf} -o ${sample_name}.${tag}.mhc.vcf.gz --include-bed=${mhc}


rtg vcfeval -b ${D5_10X} -c ${sample_name}.${tag}.satellite.vcf.gz -o ${sample_name}.${tag}.satellite -t ${sdf}

rtg vcfeval -b ${D5_10X} -c ${sample_name}.${tag}.simple_repeat.vcf.gz -o ${sample_name}.${tag}.simple_repeat -t ${sdf}

rtg vcfeval -b ${D5_10X} -c ${sample_name}.${tag}.SINE.vcf.gz -o ${sample_name}.${tag}.SINE -t ${sdf}

rtg vcfeval -b ${D5_10X} -c ${sample_name}.${tag}.LINE.vcf.gz -o ${sample_name}.${tag}.LINE -t ${sdf}

rtg vcfeval -b ${D5_10X} -c ${sample_name}.${tag}.LTR.vcf.gz -o ${sample_name}.${tag}.LTR -t ${sdf}

rtg vcfeval -b ${D5_10X} -c ${sample_name}.${tag}.low_complexity.vcf.gz -o ${sample_name}.${tag}.low_complexity -t ${sdf}

rtg vcfeval -b ${D5_10X} -c ${sample_name}.${tag}.SD.vcf.gz -o ${sample_name}.${tag}.SD -t ${sdf}

rtg vcfeval -b ${D5_10X} -c ${sample_name}.${tag}.ins_breakpoint.vcf.gz -o ${sample_name}.${tag}.ins_breakpoint -t ${sdf}

rtg vcfeval -b ${D5_10X} -c ${sample_name}.${tag}.del_breakpoint.vcf.gz -o ${sample_name}.${tag}.del_breakpoint -t ${sdf}

rtg vcfeval -b ${D5_10X} -c ${sample_name}.${tag}.mhc.vcf.gz -o ${sample_name}.${tag}.mhc -t ${sdf}


cp ${sample_name}.${tag}.satellite/summary.txt > ${sample_name}.${tag}.satellite.summary.txt
cp ${sample_name}.${tag}.simple_repeat/summary.txt > ${sample_name}.${tag}.simple_repeat.summary.txt
cp ${sample_name}.${tag}.SINE/summary.txt > ${sample_name}.${tag}.SINE.summary.txt
cp ${sample_name}.${tag}.LINE/summary.txt > ${sample_name}.${tag}.LINE.summary.txt
cp ${sample_name}.${tag}.LTR/summary.txt > ${sample_name}.${tag}.LTR.summary.txt
cp ${sample_name}.${tag}.low_complexity/summary.txt > ${sample_name}.${tag}.low_complexity.summary.txt
cp ${sample_name}.${tag}.SD/summary.txt > ${sample_name}.${tag}.SD.summary.txt
cp ${sample_name}.${tag}.ins_breakpoint/summary.txt > ${sample_name}.${tag}.ins_breakpoint.summary.txt
cp ${sample_name}.${tag}.del_breakpoint/summary.txt > ${sample_name}.${tag}.del_breakpoint.summary.txt
cp ${sample_name}.${tag}.mhc/summary.txt > ${sample_name}.${tag}.mhc.summary.txt


>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File satellite_vcf = "${sample_name}.${tag}.satellite.vcf.gz"
File simple_repeat_vcf = "${sample_name}.${tag}.simple_repeat.vcf.gz"
File SINE_vcf = "${sample_name}.${tag}.SINE.vcf.gz"
File LINE_vcf = "${sample_name}.${tag}.LINE.vcf.gz"
File LTR_vcf = "${sample_name}.${tag}.LTR.vcf.gz"
File low_complexity_vcf = "${sample_name}.${tag}.low_complexity.vcf.gz"
File SD_vcf = "${sample_name}.${tag}.SD.vcf.gz"
File ins_breakpoint_vcf = "${sample_name}.${tag}.ins_breakpoint.vcf.gz"
File del_breakpoint_vcf = "${sample_name}.${tag}.del_breakpoint.vcf.gz"
File mhc_vcf = "${sample_name}.${tag}.mhc.vcf.gz"
File satellite_summary = "${sample_name}.${tag}.satellite.summary.txt"
File simple_repeat_summary = "${sample_name}.${tag}.simple_repeat.summary.txt"
File SINE_summary = "${sample_name}.${tag}.SINE.summary.txt"
File LINE_summary = "${sample_name}.${tag}.LINE.summary.txt"
File LTR_summary = "${sample_name}.${tag}.LTR.summary.txt"
File low_complexity_summary = "${sample_name}.${tag}.low_complexity.summary.txt"
File SD_summary = "${sample_name}.${tag}.SD.summary.txt"
File ins_breakpoint_summary = "${sample_name}.${tag}.ins_breakpoint.summary.txt"
File del_breakpoint_summary = "${sample_name}.${tag}.del_breakpoint.summary.txt"
File mhc_summary = "${sample_name}.${tag}.mhc.summary.txt"
}
}

+ 13
- 3
workflow.wdl View File

@@ -1,5 +1,5 @@
import "./tasks/in_out.wdl" as in_out
import "./tasks/region.wdl" as region
import "./tasks/validation.wdl" as validation


workflow {{ project_name }} {
@@ -20,6 +20,10 @@ workflow {{ project_name }} {
File del_breakpoint
File mhc

File sdf
File D5_10X
File D5_10X_idx

String sample_name

String docker
@@ -37,7 +41,7 @@ workflow {{ project_name }} {
disk_size=disk_size
}

call region.region as in_region {
call validation.validation as in_validation {
input:
sub_vcf=in_out.in_vcf,
sub_vcf_idx=in_out.in_vcf_idx,
@@ -51,6 +55,9 @@ workflow {{ project_name }} {
ins_breakpoint=ins_breakpoint,
del_breakpoint=del_breakpoint,
mhc=mhc,
sdf=sdf,
D5_10X=D5_10X,
D5_10X_idx=D5_10X_idx,
sample_name=sample_name,
tag="inside",
docker=docker,
@@ -58,7 +65,7 @@ workflow {{ project_name }} {
disk_size=disk_size
}

call region.region as out_region {
call validation.validation as out_validation {
input:
sub_vcf=in_out.out_vcf,
sub_vcf_idx=in_out.out_vcf_idx,
@@ -72,6 +79,9 @@ workflow {{ project_name }} {
ins_breakpoint=ins_breakpoint,
del_breakpoint=del_breakpoint,
mhc=mhc,
sdf=sdf,
D5_10X=D5_10X,
D5_10X_idx=D5_10X_idx,
sample_name=sample_name,
tag="outside",
docker=docker,

Loading…
Cancel
Save