浏览代码

vcf

master
LUYAO REN 4 年前
父节点
当前提交
5b9cf8702a
共有 3 个文件被更改,包括 25 次插入12 次删除
  1. +2
    -3
      inputs
  2. +22
    -5
      tasks/benchmark.wdl
  3. +1
    -4
      workflow.wdl

+ 2
- 3
inputs 查看文件

@@ -1,12 +1,11 @@
{
"{{ project_name }}.benchmarking_dir": "oss://pgx-result/renluyao/manuscript_v3.0/reference_dataset_v202011/",
"{{ project_name }}.benchmarking_dir": "oss://pgx-result/renluyao/manuscript_v3.0/reference_datasets_v202103/",
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa",
"{{ project_name }}.disk_size": "500",
"{{ project_name }}.vcf_idx": "{{ vcf_idx }}",
"{{ project_name }}.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-hap:latest",
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.7xlarge img-ubuntu-vpc",
"{{ project_name }}.vcf": "{{ vcf }}",
"{{ project_name }}.sample": "{{ sample }}",
"{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/"
"{{ project_name }}.ref_dir": "oss://pgx-reference-data/GRCh38.d1.vd1/"
}


+ 22
- 5
tasks/benchmark.wdl 查看文件

@@ -1,6 +1,5 @@
task benchmark {
File vcf
File vcf_idx
File benchmarking_dir
File ref_dir
String sample
@@ -16,17 +15,33 @@ task benchmark {
nt=$(nproc)
mkdir -p /cromwell_root/tmp
cp -r ${ref_dir} /cromwell_root/tmp/
cp -r ${benchmarking_dir} /cromwell_root/tmp/

export HGREF=/cromwell_root/tmp/reference_data/GRCh38.d1.vd1.fa

cat ${vcf} | grep '#' > header
cat ${vcf} | grep -v '#' | grep -v '0/0' | grep -v '\./\.'| awk '
BEGIN { OFS = "\t" }
{
for ( i=9; i<=NF; i++ ) {
split($i,a,":") ;$i = a[1];
}
}
{ print }
' > body
cat header body > filtered.vcf

/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg bgzip filtered.vcf -c > ${sample}.rtg.vcf.gz
/opt/rtg-tools/dist/rtg-tools-3.10.1-4d58ead/rtg index -f vcf ${sample}.rtg.vcf.gz

if [[ ${sample} =~ "LCL5" ]];then
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL5.ref.v20201103.vcf.gz ${vcf} -f ${benchmarking_dir}/Quartet.callable.voted.collapse.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL5.high.confidence.calls.vcf ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [[ ${sample} =~ "LCL6" ]]; then
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL6.ref.v20201103.vcf.gz ${vcf} -f ${benchmarking_dir}/Quartet.callable.voted.collapse.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL6.high.confidence.calls.vcf.gz ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [[ ${sample} =~ "LCL7" ]]; then
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL7.ref.v20201103.vcf.gz ${vcf} -f ${benchmarking_dir}/Quartet.callable.voted.collapse.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL7.high.confidence.calls.vcf.gz ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
elif [[ ${sample} =~ "LCL8" ]]; then
/opt/hap.py/bin/hap.py ${benchmarking_dir}/LCL8.ref.v20201103.vcf.gz ${vcf} -f ${benchmarking_dir}/Quartet.callable.voted.collapse.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
/opt/hap.py/bin/hap.py /cromwell_root/tmp/reference_datasets_v202103/LCL8.high.confidence.calls.vcf.gz ${sample}.rtg.vcf.gz -f /cromwell_root/tmp/reference_datasets_v202103/Quartet.high.confidence.region.v202103.bed --threads $nt -o ${sample} -r ${ref_dir}/${fasta}
else
echo "only for quartet samples"
fi
@@ -40,6 +55,8 @@ task benchmark {
}

output {
File rtg_vcf = "${sample}.rtg.vcf.gz"
File rtg_vcf_index = "${sample}.rtg.vcf.gz.tbi"
File gzip_vcf = "${sample}.vcf.gz"
File gzip_vcf_index = "${sample}.vcf.gz.tbi"
File roc_all_csv = "${sample}.roc.all.csv.gz"

+ 1
- 4
workflow.wdl 查看文件

@@ -2,7 +2,6 @@ import "./tasks/benchmark.wdl" as benchmark

workflow {{ project_name }} {
File vcf
File vcf_idx
File benchmarking_dir
File ref_dir
String sample
@@ -14,7 +13,6 @@ workflow {{ project_name }} {
call benchmark.benchmark as benchmark {
input:
vcf=vcf,
vcf_idx=vcf_idx,
benchmarking_dir=benchmarking_dir,
ref_dir=ref_dir,
sample=sample,
@@ -23,5 +21,4 @@ workflow {{ project_name }} {
cluster_config=cluster_config,
disk_size=disk_size
}
}

}

正在加载...
取消
保存