from __future__ import division | |||||
import pandas as pd | |||||
import sys, argparse, os | |||||
import fileinput | |||||
import re | |||||
# input arguments | |||||
parser = argparse.ArgumentParser(description="this script is to extract mendelian concordance information") | |||||
parser.add_argument('-LCL5', '--LCL5', type=str, help='LCL5 family info', required=True) | |||||
parser.add_argument('-LCL6', '--LCL6', type=str, help='LCL6 family info', required=True) | |||||
parser.add_argument('-family', '--family', type=str, help='family name', required=True) | |||||
args = parser.parse_args() | |||||
lcl5 = args.LCL5 | |||||
lcl6 = args.LCL6 | |||||
family = args.family | |||||
# output file | |||||
family_name = family + '.txt' | |||||
family_file = open(family_name,'w') | |||||
# input files | |||||
lcl5_dat = pd.read_table(lcl5) | |||||
lcl6_dat = pd.read_table(lcl6) | |||||
merged_df = pd.merge(lcl5_dat, lcl6_dat, how='outer', left_on=['#CHROM','POS'], right_on = ['#CHROM','POS']) | |||||
for row in merged_df.itertuples(): | |||||
if row.CHILD_x == row.CHILD_y: | |||||
mendelian = '1' | |||||
else: | |||||
mendelian = '0' | |||||
if pd.isnull(row.INFO_x) == True: | |||||
mendelian = mendelian + ':.' | |||||
else: | |||||
mendelian = mendelian + ':' + row.INFO_x.split('=')[1] | |||||
if pd.isnull(row.INFO_y) == True: | |||||
mendelian = mendelian + ':.' | |||||
else: | |||||
mendelian = mendelian + ':' + row.INFO_y.split('=')[1] | |||||
outline = row._1 + '\t' + str(row.POS) + '\t' + mendelian + '\n' | |||||
family_file.write(outline) |
line = oneLine.rstrip() | line = oneLine.rstrip() | ||||
strings = line.strip().split('\t') | strings = line.strip().split('\t') | ||||
# replace . | # replace . | ||||
# LCL5 uniq | |||||
# LCL6 uniq | |||||
if strings[11] == '.': | if strings[11] == '.': | ||||
strings[11] = '0/0' | strings[11] = '0/0' | ||||
strings[9] = strings[12] | strings[9] = strings[12] | ||||
strings[10] = strings[13] | strings[10] = strings[13] | ||||
else: | else: | ||||
pass | pass | ||||
# LCL6 uniq | |||||
# LCL5 uniq | |||||
if strings[14] == '.': | if strings[14] == '.': | ||||
strings[14] = '0/0' | strings[14] = '0/0' | ||||
strings[12] = strings[9] | strings[12] = strings[9] |
{ | { | ||||
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | "{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | ||||
"{{ project_name }}.LCL5VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.two_family_merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.2", | |||||
"{{ project_name }}.LCL6mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | "{{ project_name }}.LCL6mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | ||||
"{{ project_name }}.mergeSister.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL5mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | "{{ project_name }}.LCL5mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | ||||
"{{ project_name }}.disk_size": "150", | "{{ project_name }}.disk_size": "150", | ||||
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", | "{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", | ||||
"{{ project_name }}.LCL6zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.LCL6VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.reformVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.LCL5zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | |||||
"{{ project_name }}.cluster_config": "OnDemand bcs.b4.xlarge img-ubuntu-vpc", | "{{ project_name }}.cluster_config": "OnDemand bcs.b4.xlarge img-ubuntu-vpc", | ||||
"{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/" | "{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/" | ||||
} | } |
python /opt/reformVCF.py -vcf ${family_mendelian_info} -name ${family_name} | python /opt/reformVCF.py -vcf ${family_mendelian_info} -name ${family_name} | ||||
cat ${family_name}.LCL5.vcf | grep -v '##' | grep -v '0/0' > ${family_name}.LCL5.txt | |||||
cat ${family_name}.LCL6.vcf | grep -v '##' | grep -v '0/0' > ${family_name}.LCL6.txt | |||||
cat ${family_name}.LCL7.vcf | grep -v '##' | grep -v '0/0' > ${family_name}.LCL7.txt | |||||
cat ${family_name}.LCL8.vcf | grep -v '##' | grep -v '0/0' > ${family_name}.LCL8.txt | |||||
cat ${family_name}.LCL5.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL5.txt | |||||
cat ${family_name}.LCL6.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL6.txt | |||||
cat ${family_name}.LCL7.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL7.txt | |||||
cat ${family_name}.LCL8.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL8.txt | |||||
>>> | >>> | ||||
task two_family_merge { | |||||
File LCL5_trio_vcf | |||||
File LCL6_trio_vcf | |||||
String family_name | |||||
String docker | |||||
String cluster_config | |||||
String disk_size | |||||
command <<< | |||||
cat ${LCL5_trio_vcf} | grep -v '##' > ${family_name}.LCL5.txt | |||||
cat ${LCL6_trio_vcf} | grep -v '##' > ${family_name}.LCL6.txt | |||||
python opt/merge_two_family.py -LCL5 ${family_name}.LCL5.txt -LCL6 ${family_name}.LCL6.txt -family ${family_name} | |||||
>>> | |||||
runtime { | |||||
docker:docker | |||||
cluster: cluster_config | |||||
systemDisk: "cloud_ssd 40" | |||||
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/" | |||||
} | |||||
output { | |||||
File family_mendelian_info = "${family_name}.txt" | |||||
} | |||||
} |
import "./tasks/mendelian.wdl" as mendelian | import "./tasks/mendelian.wdl" as mendelian | ||||
import "./tasks/zipIndex.wdl" as zipIndex | |||||
import "./tasks/VCFrename.wdl" as VCFrename | |||||
import "./tasks/mergeSister.wdl" as mergeSister | |||||
import "./tasks/reformVCF.wdl" as reformVCF | |||||
import "./tasks/two_family_merge.wdl" as two_family_merge | |||||
workflow {{ project_name }} { | workflow {{ project_name }} { | ||||
cluster_config=cluster_config, | cluster_config=cluster_config, | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
call zipIndex.zipIndex as LCL5zipIndex { | |||||
call two_family_merge.two_family_merge as two_family_merge { | |||||
input: | input: | ||||
vcf=LCL5mendelian.trio_vcf, | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call zipIndex.zipIndex as LCL6zipIndex { | |||||
input: | |||||
vcf=LCL6mendelian.trio_vcf, | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call VCFrename.VCFrename as LCL5VCFrename { | |||||
input: | |||||
trio_vcf_gz=LCL5zipIndex.vcf_gz, | |||||
trio_vcf_idx=LCL5zipIndex.vcf_idx, | |||||
mother_name=quartet[7], | |||||
father_name=quartet[6], | |||||
child_name=quartet[4], | |||||
LCL5_trio_vcf=LCL5mendelian.trio_vcf, | |||||
LCL6_trio_vcf=LCL6mendelian.trio_vcf, | |||||
family_name=quartet[8], | family_name=quartet[8], | ||||
child="LCL5", | |||||
cluster_config=cluster_config, | cluster_config=cluster_config, | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
call VCFrename.VCFrename as LCL6VCFrename { | |||||
input: | |||||
trio_vcf_gz=LCL6zipIndex.vcf_gz, | |||||
trio_vcf_idx=LCL6zipIndex.vcf_idx, | |||||
mother_name=quartet[7], | |||||
father_name=quartet[6], | |||||
child_name=quartet[5], | |||||
family_name=quartet[8], | |||||
child="LCL6", | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call mergeSister.mergeSister as mergeSister { | |||||
input: | |||||
LCL5_trio_vcf_gz=LCL5VCFrename.rename_trio_vcf_gz, | |||||
LCL5_trio_vcf_idx=LCL5VCFrename.rename_trio_vcf_idx, | |||||
LCL6_trio_vcf_gz=LCL6VCFrename.rename_trio_vcf_gz, | |||||
LCL6_trio_vcf_idx=LCL6VCFrename.rename_trio_vcf_idx, | |||||
family_name=quartet[8], | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call reformVCF.reformVCF as reformVCF { | |||||
input: | |||||
family_mendelian_info=mergeSister.family_mendelian_info, | |||||
family_name=quartet[8], | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
} | } | ||||
} | } |