LUYAO REN 5 lat temu
rodzic
commit
f5116e3ebc
6 zmienionych plików z 86 dodań i 64 usunięć
  1. +49
    -0
      codescripts/merge_two_family.py
  2. +2
    -2
      codescripts/reformVCF.py
  3. +1
    -6
      inputs
  4. +4
    -4
      tasks/reformVCF.wdl
  5. +26
    -0
      tasks/two_family_merge.wdl
  6. +4
    -52
      workflow.wdl

+ 49
- 0
codescripts/merge_two_family.py Wyświetl plik

from __future__ import division
import pandas as pd
import sys, argparse, os
import fileinput
import re

# input arguments
parser = argparse.ArgumentParser(description="this script is to extract mendelian concordance information")

parser.add_argument('-LCL5', '--LCL5', type=str, help='LCL5 family info', required=True)
parser.add_argument('-LCL6', '--LCL6', type=str, help='LCL6 family info', required=True)
parser.add_argument('-family', '--family', type=str, help='family name', required=True)


args = parser.parse_args()
lcl5 = args.LCL5
lcl6 = args.LCL6
family = args.family


# output file
family_name = family + '.txt'

family_file = open(family_name,'w')

# input files
lcl5_dat = pd.read_table(lcl5)
lcl6_dat = pd.read_table(lcl6)

merged_df = pd.merge(lcl5_dat, lcl6_dat, how='outer', left_on=['#CHROM','POS'], right_on = ['#CHROM','POS'])


for row in merged_df.itertuples():
if row.CHILD_x == row.CHILD_y:
mendelian = '1'
else:
mendelian = '0'
if pd.isnull(row.INFO_x) == True:
mendelian = mendelian + ':.'
else:
mendelian = mendelian + ':' + row.INFO_x.split('=')[1]
if pd.isnull(row.INFO_y) == True:
mendelian = mendelian + ':.'
else:
mendelian = mendelian + ':' + row.INFO_y.split('=')[1]


outline = row._1 + '\t' + str(row.POS) + '\t' + mendelian + '\n'
family_file.write(outline)

+ 2
- 2
codescripts/reformVCF.py Wyświetl plik

line = oneLine.rstrip() line = oneLine.rstrip()
strings = line.strip().split('\t') strings = line.strip().split('\t')
# replace . # replace .
# LCL5 uniq
# LCL6 uniq
if strings[11] == '.': if strings[11] == '.':
strings[11] = '0/0' strings[11] = '0/0'
strings[9] = strings[12] strings[9] = strings[12]
strings[10] = strings[13] strings[10] = strings[13]
else: else:
pass pass
# LCL6 uniq
# LCL5 uniq
if strings[14] == '.': if strings[14] == '.':
strings[14] = '0/0' strings[14] = '0/0'
strings[12] = strings[9] strings[12] = strings[9]

+ 1
- 6
inputs Wyświetl plik

{ {
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", "{{ project_name }}.fasta": "GRCh38.d1.vd1.fa",
"{{ project_name }}.LCL5VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.two_family_merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.2",
"{{ project_name }}.LCL6mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", "{{ project_name }}.LCL6mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1",
"{{ project_name }}.mergeSister.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", "{{ project_name }}.LCL5mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1",
"{{ project_name }}.disk_size": "150", "{{ project_name }}.disk_size": "150",
"{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}", "{{ project_name }}.inputSamplesFile": "{{ inputSamplesFile }}",
"{{ project_name }}.LCL6zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL6VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.reformVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1",
"{{ project_name }}.LCL5zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.cluster_config": "OnDemand bcs.b4.xlarge img-ubuntu-vpc", "{{ project_name }}.cluster_config": "OnDemand bcs.b4.xlarge img-ubuntu-vpc",
"{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/" "{{ project_name }}.ref_dir": "oss://chinese-quartet/quartet-storage-data/reference_data/"
} }

+ 4
- 4
tasks/reformVCF.wdl Wyświetl plik



python /opt/reformVCF.py -vcf ${family_mendelian_info} -name ${family_name} python /opt/reformVCF.py -vcf ${family_mendelian_info} -name ${family_name}


cat ${family_name}.LCL5.vcf | grep -v '##' | grep -v '0/0' > ${family_name}.LCL5.txt
cat ${family_name}.LCL6.vcf | grep -v '##' | grep -v '0/0' > ${family_name}.LCL6.txt
cat ${family_name}.LCL7.vcf | grep -v '##' | grep -v '0/0' > ${family_name}.LCL7.txt
cat ${family_name}.LCL8.vcf | grep -v '##' | grep -v '0/0' > ${family_name}.LCL8.txt
cat ${family_name}.LCL5.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL5.txt
cat ${family_name}.LCL6.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL6.txt
cat ${family_name}.LCL7.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL7.txt
cat ${family_name}.LCL8.vcf | grep -v '##' | grep -v '0/0' | grep -v '\./\.' > ${family_name}.LCL8.txt


>>> >>>



+ 26
- 0
tasks/two_family_merge.wdl Wyświetl plik

task two_family_merge {
File LCL5_trio_vcf
File LCL6_trio_vcf
String family_name
String docker
String cluster_config
String disk_size
command <<<
cat ${LCL5_trio_vcf} | grep -v '##' > ${family_name}.LCL5.txt
cat ${LCL6_trio_vcf} | grep -v '##' > ${family_name}.LCL6.txt
python opt/merge_two_family.py -LCL5 ${family_name}.LCL5.txt -LCL6 ${family_name}.LCL6.txt -family ${family_name}
>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}

output {
File family_mendelian_info = "${family_name}.txt"
}

}

+ 4
- 52
workflow.wdl Wyświetl plik

import "./tasks/mendelian.wdl" as mendelian import "./tasks/mendelian.wdl" as mendelian
import "./tasks/zipIndex.wdl" as zipIndex
import "./tasks/VCFrename.wdl" as VCFrename
import "./tasks/mergeSister.wdl" as mergeSister
import "./tasks/reformVCF.wdl" as reformVCF
import "./tasks/two_family_merge.wdl" as two_family_merge




workflow {{ project_name }} { workflow {{ project_name }} {
cluster_config=cluster_config, cluster_config=cluster_config,
disk_size=disk_size disk_size=disk_size
} }
call zipIndex.zipIndex as LCL5zipIndex {
call two_family_merge.two_family_merge as two_family_merge {
input: input:
vcf=LCL5mendelian.trio_vcf,
cluster_config=cluster_config,
disk_size=disk_size
}
call zipIndex.zipIndex as LCL6zipIndex {
input:
vcf=LCL6mendelian.trio_vcf,
cluster_config=cluster_config,
disk_size=disk_size
}
call VCFrename.VCFrename as LCL5VCFrename {
input:
trio_vcf_gz=LCL5zipIndex.vcf_gz,
trio_vcf_idx=LCL5zipIndex.vcf_idx,
mother_name=quartet[7],
father_name=quartet[6],
child_name=quartet[4],
LCL5_trio_vcf=LCL5mendelian.trio_vcf,
LCL6_trio_vcf=LCL6mendelian.trio_vcf,
family_name=quartet[8], family_name=quartet[8],
child="LCL5",
cluster_config=cluster_config, cluster_config=cluster_config,
disk_size=disk_size disk_size=disk_size
} }
call VCFrename.VCFrename as LCL6VCFrename {
input:
trio_vcf_gz=LCL6zipIndex.vcf_gz,
trio_vcf_idx=LCL6zipIndex.vcf_idx,
mother_name=quartet[7],
father_name=quartet[6],
child_name=quartet[5],
family_name=quartet[8],
child="LCL6",
cluster_config=cluster_config,
disk_size=disk_size
}
call mergeSister.mergeSister as mergeSister {
input:
LCL5_trio_vcf_gz=LCL5VCFrename.rename_trio_vcf_gz,
LCL5_trio_vcf_idx=LCL5VCFrename.rename_trio_vcf_idx,
LCL6_trio_vcf_gz=LCL6VCFrename.rename_trio_vcf_gz,
LCL6_trio_vcf_idx=LCL6VCFrename.rename_trio_vcf_idx,
family_name=quartet[8],
cluster_config=cluster_config,
disk_size=disk_size
}
call reformVCF.reformVCF as reformVCF {
input:
family_mendelian_info=mergeSister.family_mendelian_info,
family_name=quartet[8],
cluster_config=cluster_config,
disk_size=disk_size
}
} }
} }

Ładowanie…
Anuluj
Zapisz