@@ -0,0 +1,49 @@ | |||
from __future__ import division | |||
import pandas as pd | |||
import sys, argparse, os | |||
import fileinput | |||
import re | |||
# input arguments | |||
parser = argparse.ArgumentParser(description="this script is to extract mendelian concordance information") | |||
parser.add_argument('-LCL5', '--LCL5', type=str, help='LCL5 family info', required=True) | |||
parser.add_argument('-LCL6', '--LCL6', type=str, help='LCL6 family info', required=True) | |||
parser.add_argument('-family', '--family', type=str, help='family name', required=True) | |||
args = parser.parse_args() | |||
lcl5 = args.LCL5 | |||
lcl6 = args.LCL6 | |||
family = args.family | |||
# output file | |||
family_name = family + '.txt' | |||
family_file = open(family_name,'w') | |||
# input files | |||
lcl5_dat = pd.read_table(lcl5) | |||
lcl6_dat = pd.read_table(lcl6) | |||
merged_df = pd.merge(lcl5_dat, lcl6_dat, how='outer', left_on=['#CHROM','POS'], right_on = ['#CHROM','POS']) | |||
for row in merged_df.itertuples(): | |||
if row.CHILD_x == row.CHILD_y: | |||
mendelian = '1' | |||
else: | |||
mendelian = '0' | |||
if pd.isnull(row.INFO_x) == True: | |||
mendelian = mendelian + ':.' | |||
else: | |||
mendelian = mendelian + ':' + row.INFO_x.split('=')[1] | |||
if pd.isnull(row.INFO_y) == True: | |||
mendelian = mendelian + ':.' | |||
else: | |||
mendelian = mendelian + ':' + row.INFO_y.split('=')[1] | |||
outline = row._1 + '\t' + str(row.POS) + '\t' + mendelian + '\n' | |||
family_file.write(outline) |
@@ -83,14 +83,14 @@ def process(oneLine): | |||
line = oneLine.rstrip() | |||
strings = line.strip().split('\t') | |||
# replace . | |||
# LCL5 uniq | |||
# LCL6 uniq | |||
if strings[11] == '.': | |||
strings[11] = '0/0' | |||
strings[9] = strings[12] | |||
strings[10] = strings[13] | |||
else: | |||
pass | |||
# LCL6 uniq | |||
# LCL5 uniq | |||
if strings[14] == '.': | |||
strings[14] = '0/0' | |||
strings[12] = strings[9] |
@@ -1,15 +1,10 @@ | |||
{ | |||
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | |||
"{{ project_name }}.LCL5VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.two_family_merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.2", | |||
"{{ project_name }}.LCL6mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | |||
"{{ project_name }}.mergeSister.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL5mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | |||
"{{ project_name }}.disk_size": "150", | |||
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", | |||
"{{ project_name }}.LCL6zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.LCL6VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.reformVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||
"{{ project_name }}.LCL5zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||
"{{ project_name }}.cluster_config": "OnDemand bcs.b4.xlarge img-ubuntu-vpc", | |||
"{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/" | |||
} |
@@ -9,10 +9,10 @@ task reformVCF { | |||
python /opt/reformVCF.py -vcf ${family_mendelian_info} -name ${family_name} | |||
cat ${family_name}.LCL5.vcf | grep -v '##' | grep -v '0/0' > ${family_name}.LCL5.txt | |||
cat ${family_name}.LCL6.vcf | grep -v '##' | grep -v '0/0' > ${family_name}.LCL6.txt | |||
cat ${family_name}.LCL7.vcf | grep -v '##' | grep -v '0/0' > ${family_name}.LCL7.txt | |||
cat ${family_name}.LCL8.vcf | grep -v '##' | grep -v '0/0' > ${family_name}.LCL8.txt | |||
cat ${family_name}.LCL5.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL5.txt | |||
cat ${family_name}.LCL6.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL6.txt | |||
cat ${family_name}.LCL7.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL7.txt | |||
cat ${family_name}.LCL8.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL8.txt | |||
>>> | |||
@@ -0,0 +1,26 @@ | |||
task two_family_merge { | |||
File LCL5_trio_vcf | |||
File LCL6_trio_vcf | |||
String family_name | |||
String docker | |||
String cluster_config | |||
String disk_size | |||
command <<< | |||
cat ${LCL5_trio_vcf} | grep -v '##' > ${family_name}.LCL5.txt | |||
cat ${LCL6_trio_vcf} | grep -v '##' > ${family_name}.LCL6.txt | |||
python opt/merge_two_family.py -LCL5 ${family_name}.LCL5.txt -LCL6 ${family_name}.LCL6.txt -family ${family_name} | |||
>>> | |||
runtime { | |||
docker:docker | |||
cluster: cluster_config | |||
systemDisk: "cloud_ssd 40" | |||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||
} | |||
output { | |||
File family_mendelian_info = "${family_name}.txt" | |||
} | |||
} |
@@ -1,8 +1,5 @@ | |||
import "./tasks/mendelian.wdl" as mendelian | |||
import "./tasks/zipIndex.wdl" as zipIndex | |||
import "./tasks/VCFrename.wdl" as VCFrename | |||
import "./tasks/mergeSister.wdl" as mergeSister | |||
import "./tasks/reformVCF.wdl" as reformVCF | |||
import "./tasks/two_family_merge.wdl" as two_family_merge | |||
workflow {{ project_name }} { | |||
@@ -40,58 +37,13 @@ workflow {{ project_name }} { | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call zipIndex.zipIndex as LCL5zipIndex { | |||
call two_family_merge.two_family_merge as two_family_merge { | |||
input: | |||
vcf=LCL5mendelian.trio_vcf, | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call zipIndex.zipIndex as LCL6zipIndex { | |||
input: | |||
vcf=LCL6mendelian.trio_vcf, | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call VCFrename.VCFrename as LCL5VCFrename { | |||
input: | |||
trio_vcf_gz=LCL5zipIndex.vcf_gz, | |||
trio_vcf_idx=LCL5zipIndex.vcf_idx, | |||
mother_name=quartet[7], | |||
father_name=quartet[6], | |||
child_name=quartet[4], | |||
LCL5_trio_vcf=LCL5mendelian.trio_vcf, | |||
LCL6_trio_vcf=LCL6mendelian.trio_vcf, | |||
family_name=quartet[8], | |||
child="LCL5", | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call VCFrename.VCFrename as LCL6VCFrename { | |||
input: | |||
trio_vcf_gz=LCL6zipIndex.vcf_gz, | |||
trio_vcf_idx=LCL6zipIndex.vcf_idx, | |||
mother_name=quartet[7], | |||
father_name=quartet[6], | |||
child_name=quartet[5], | |||
family_name=quartet[8], | |||
child="LCL6", | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call mergeSister.mergeSister as mergeSister { | |||
input: | |||
LCL5_trio_vcf_gz=LCL5VCFrename.rename_trio_vcf_gz, | |||
LCL5_trio_vcf_idx=LCL5VCFrename.rename_trio_vcf_idx, | |||
LCL6_trio_vcf_gz=LCL6VCFrename.rename_trio_vcf_gz, | |||
LCL6_trio_vcf_idx=LCL6VCFrename.rename_trio_vcf_idx, | |||
family_name=quartet[8], | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
call reformVCF.reformVCF as reformVCF { | |||
input: | |||
family_mendelian_info=mergeSister.family_mendelian_info, | |||
family_name=quartet[8], | |||
cluster_config=cluster_config, | |||
disk_size=disk_size | |||
} | |||
} | |||
} |