Explorar el Código

merge results

master
LUYAO REN hace 5 años
padre
commit
5615b504e6
Se han modificado 3 ficheros con 156 adiciones y 5 borrados
  1. +14
    -1
      inputs
  2. +36
    -4
      tasks/merge.wdl
  3. +106
    -0
      workflow.wdl

+ 14
- 1
inputs Ver fichero

@@ -1,13 +1,26 @@
{
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa",
"{{ project_name }}.LCL6familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL6mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1",
"{{ project_name }}.mergeSister.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1",
"{{ project_name }}.test_name": "{{ test_name }}",
"{{ project_name }}.disk_size": "150",
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}",
"{{ project_name }}.LCL6variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9",
"{{ project_name }}.LCL6zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL7familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL6VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.reformVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call:v1.1",
"{{ project_name }}.LCL5zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.xlarge img-ubuntu-vpc",
"{{ project_name }}.LCL8familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL7variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9",
"{{ project_name }}.LCL5variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9",
"{{ project_name }}.LCL8variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9",
"{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/"
}
}


+ 36
- 4
tasks/merge.wdl Ver fichero

@@ -1,6 +1,7 @@
task merge {
Array[File] family_vcf_gz
Array[File] family_vcf_idx
String test_name
String sample
String docker
String cluster_config
@@ -8,9 +9,34 @@ task merge {
command <<<

rtg vcfmerge --force-merge-all --no-gzip -o ${sample}.merged.vcf ${sep=" " family_vcf_gz}
rtg vcfmerge --force-merge-all -o ${sample}.merged.vcf.gz ${sep=" " family_vcf_gz}
rtg vcffilter -i ${sample}.merged.vcf.gz -o ${sample}.snv.merged.vcf.gz --snps-only --all-samples
rtg vcffilter -i ${sample}.merged.vcf.gz -o ${sample}.indel.merged.vcf.gz --non-snps-only --all-samples

zcat ${sample}.indel.merged.vcf.gz | grep '#CHROM' | cut -f10-12 > name

for i in {10..12}; do zcat ${sample}.snv.merged.vcf.gz | grep -v '#' | cut -f$i | cut -d ':' -f2-4 | grep -v '\.'| sort | uniq -c | awk '{print $1, substr($1,0,7)}' | sed 's/\s\+/\t/g' | cut -f1 > $i.snv.txt; done

paste *.snv.txt | cat name - > snv.txt

for i in {10..12}; do zcat ${sample}.indel.merged.vcf.gz | grep -v '#' | cut -f$i | cut -d ':' -f2-4 | grep -v '\.'| sort | uniq -c | awk '{print $1, substr($1,0,7)}' | sed 's/\s\+/\t/g' | cut -f1 > $i.indel.txt; done

paste *.indel.txt | cat name - > index.txt

echo 'type' > column
echo '0,0,0' >> column
echo '0,0,1' >> column
echo '0,1,0' >> column
echo '0,1,1' >> column
echo '1,0,0' >> column
echo '1,0,1' >> column
echo '1,1,0' >> column
echo '1,1,1' >> column

paste column snv.txt > ${test_name}.snv.txt

paste column indel.txt > ${test_name}.indel.txt

cat ${sample}.merged.vcf | grep -v '#' | cut -f1-2 | sed s'/\t/_/g' | sort | uniq -c | sed 's/\s\+/\t/g' | awk '{ if ($1 != 1) { print } }' | cut -f3 > ${sample}.vcf_dup.txt

>>>

@@ -21,7 +47,13 @@ task merge {
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File merged_vcf = "${sample}.merged.vcf"
File vcf_dup = "${sample}.vcf_dup.txt"
File merged_vcf = "${sample}.merged.vcf.gz"
File merged_vcf_idx = "${sample}.merged.vcf.gz.tbi"
File merged_snv = "${sample}.snv.merged.vcf.gz"
File merged_snv_idx = "${sample}.snv.merged.vcf.gz.tbi"
File merged_indel = "${sample}.indel.merged.vcf.gz"
File merged_indel_idx = "${sample}.indel.merged.vcf.gz.tbi"
File snv = "${test_name}.snv.txt"
File indel = "${test_name}.indel.txt"
}
}

+ 106
- 0
workflow.wdl Ver fichero

@@ -1,11 +1,18 @@
import "./tasks/variantsNorm.wdl" as variantsNorm
import "./tasks/mendelian.wdl" as mendelian
import "./tasks/zipIndex.wdl" as zipIndex
import "./tasks/VCFrename.wdl" as VCFrename
import "./tasks/mergeSister.wdl" as mergeSister
import "./tasks/reformVCF.wdl" as reformVCF
import "./tasks/merge.wdl" as merge
import "./tasks/votes.wdl" as votes

workflow {{ project_name }} {
File inputSamplesFile
Array[Array[File]] inputSamples = read_tsv(inputSamplesFile)
File ref_dir
String fasta
String test_name
String cluster_config
String disk_size

@@ -72,5 +79,104 @@ workflow {{ project_name }} {
cluster_config=cluster_config,
disk_size=disk_size
}
call zipIndex.zipIndex as LCL5zipIndex {
input:
vcf=LCL5mendelian.trio_vcf,
sample="LCL5",
family_name=quartet[8],
cluster_config=cluster_config,
disk_size=disk_size
}
call zipIndex.zipIndex as LCL6zipIndex {
input:
vcf=LCL6mendelian.trio_vcf,
sample="LCL6",
family_name=quartet[8],
cluster_config=cluster_config,
disk_size=disk_size
}
call VCFrename.VCFrename as LCL5VCFrename {
input:
trio_vcf_gz=LCL5zipIndex.vcf_gz,
trio_vcf_idx=LCL5zipIndex.vcf_idx,
mother_name=quartet[7],
father_name=quartet[6],
child_name=quartet[4],
family_name=quartet[8],
child="LCL5",
cluster_config=cluster_config,
disk_size=disk_size
}
call VCFrename.VCFrename as LCL6VCFrename {
input:
trio_vcf_gz=LCL6zipIndex.vcf_gz,
trio_vcf_idx=LCL6zipIndex.vcf_idx,
mother_name=quartet[7],
father_name=quartet[6],
child_name=quartet[5],
family_name=quartet[8],
child="LCL6",
cluster_config=cluster_config,
disk_size=disk_size
}
call mergeSister.mergeSister as mergeSister {
input:
LCL5_trio_vcf_gz=LCL5VCFrename.rename_trio_vcf_gz,
LCL5_trio_vcf_idx=LCL5VCFrename.rename_trio_vcf_idx,
LCL6_trio_vcf_gz=LCL6VCFrename.rename_trio_vcf_gz,
LCL6_trio_vcf_idx=LCL6VCFrename.rename_trio_vcf_idx,
family_name=quartet[8],
cluster_config=cluster_config,
disk_size=disk_size
}
call reformVCF.reformVCF as reformVCF {
input:
family_mendelian_info=mergeSister.family_mendelian_info,
family_name=quartet[8],
cluster_config=cluster_config,
disk_size=disk_size
}
call zipIndex.zipIndex as LCL5familyzipIndex {
input:
vcf=reformVCF.LCL5_family_info,
sample='LCL5',
family_name=quartet[8],
cluster_config=cluster_config,
disk_size=disk_size
}
call zipIndex.zipIndex as LCL6familyzipIndex {
input:
vcf=reformVCF.LCL6_family_info,
sample='LCL6',
family_name=quartet[8],
cluster_config=cluster_config,
disk_size=disk_size
}
call zipIndex.zipIndex as LCL7familyzipIndex {
input:
vcf=reformVCF.LCL7_family_info,
sample='LCL7',
family_name=quartet[8],
cluster_config=cluster_config,
disk_size=disk_size
}
call zipIndex.zipIndex as LCL8familyzipIndex {
input:
vcf=reformVCF.LCL8_family_info,
sample='LCL8',
family_name=quartet[8],
cluster_config=cluster_config,
disk_size=disk_size
}
}
call merge.merge as LCL5merge {
input:
family_vcf_gz=LCL5familyzipIndex.vcf_gz,
family_vcf_idx=LCL5familyzipIndex.vcf_idx,
sample="LCL5",
test_name=test_name,
cluster_config=cluster_config,
disk_size=disk_size
}

}

Cargando…
Cancelar
Guardar