Kaynağa Gözat

vote for 27

master
LUYAO REN 5 yıl önce
ebeveyn
işleme
6d25fd4f8d
4 değiştirilmiş dosya ile 68 ekleme ve 23 silme
  1. +8
    -0
      README.md
  2. +18
    -19
      codescripts/high_confidence_call_vote.py
  3. +5
    -0
      inputs
  4. +37
    -4
      workflow.wdl

+ 8
- 0
README.md Dosyayı Görüntüle

@@ -77,6 +77,14 @@ t

## App输入变量与输入文件

inputSamplesFile的格式如下

```bash
#LCL5_VCF #LCL6_VCF #LCL7_VCF #LCL8_VCF #LCL5_sampleName #LCL6_sampleName #LCL7_sampleName #LCL8_sampleName #familyName
```

最终版的整合文件包括:



## App输出文件

+ 18
- 19
codescripts/high_confidence_call_vote.py Dosyayı Görüntüle

@@ -62,7 +62,7 @@ file_name = prefix + '_annotated.vcf'
outfile = open(file_name,'w')

# write VCF
outputcolumn = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tQuartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5\tQuartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5\tQuartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5\tQuartet_DNA_BGI_SEQ2000_WGE_1_20190402_LCL5\tQuartet_DNA_BGI_SEQ2000_WGE_2_20190402_LCL5\tQuartet_DNA_BGI_SEQ500_BGI_1_20180328_LCL5 \tQuartet_DNA_BGI_SEQ500_BGI_2_20180328_LCL5\tQuartet_DNA_BGI_SEQ500_BGI_3_20180328_LCL5\tQuartet_DNA_ILM_Nova_ARD_1_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_2_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_3_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_4_20190111_LCL5\tQuartet_DNA_ILM_Nova_ARD_5_20190111_LCL5\tQuartet_DNA_ILM_Nova_ARD_6_20190111_LCL5\tQuartet_DNA_ILM_Nova_BRG_1_20171024_LCL5\tQuartet_DNA_ILM_Nova_BRG_1_20180930_LCL5\tQuartet_DNA_ILM_Nova_BRG_2_20180930_LCL5\tQuartet_DNA_ILM_Nova_BRG_3_20180930_LCL5\tQuartet_DNA_ILM_Nova_GAC_1_20171025_LCL5\tQuartet_DNA_ILM_Nova_NVG_1_20171024_LCL5\tQuartet_DNA_ILM_Nova_WUX_1_20171024_LCL5\tQuartet_DNA_ILM_XTen_ARD_1_20170403_LCL5\tQuartet_DNA_ILM_XTen_ARD_2_20170403_LCL5\tQuartet_DNA_ILM_XTen_ARD_3_20170403_LCL5\tQuartet_DNA_ILM_XTen_NVG_1_20170329_LCL5\tQuartet_DNA_ILM_XTen_NVG_2_20170329_LCL5\tQuartet_DNA_ILM_XTen_NVG_3_20170329_LCL5\tQuartet_DNA_ILM_XTen_WUX_1_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_2_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_3_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_4_20180703_LCL5\tQuartet_DNA_ILM_XTen_WUX_5_20180703_LCL5\tQuartet_DNA_ILM_XTen_WUX_6_20180703_LCL5' +'\t'+ sample_name+'_pcr'+'\t' + sample_name+'_pcr-free'+ '\t'+ sample_name +'_consensus' + '\t' + sample_name + '_consensus_alt_seq' +'\n'
outputcolumn = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tQuartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5\tQuartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5\tQuartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5\tQuartet_DNA_BGI_T7_WGE_1_20191105_LCL5\tQuartet_DNA_BGI_T7_WGE_2_20191105_LCL5\tQuartet_DNA_BGI_T7_WGE_3_20191105_LCL5\tQuartet_DNA_ILM_Nova_ARD_1_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_2_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_3_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_4_20190111_LCL5\tQuartet_DNA_ILM_Nova_ARD_5_20190111_LCL5\tQuartet_DNA_ILM_Nova_ARD_6_20190111_LCL5\tQuartet_DNA_ILM_Nova_BRG_1_20180930_LCL5\tQuartet_DNA_ILM_Nova_BRG_2_20180930_LCL5\tQuartet_DNA_ILM_Nova_BRG_3_20180930_LCL5\tQuartet_DNA_ILM_Nova_WUX_1_20190917_LCL5\tQuartet_DNA_ILM_Nova_WUX_2_20190917_LCL5\tQuartet_DNA_ILM_Nova_WUX_3_20190917_LCL5\tQuartet_DNA_ILM_XTen_ARD_1_20170403_LCL5\tQuartet_DNA_ILM_XTen_ARD_2_20170403_LCL5\tQuartet_DNA_ILM_XTen_ARD_3_20170403_LCL5\tQuartet_DNA_ILM_XTen_NVG_1_20170329_LCL5\tQuartet_DNA_ILM_XTen_NVG_2_20170329_LCL5\tQuartet_DNA_ILM_XTen_NVG_3_20170329_LCL5\tQuartet_DNA_ILM_XTen_WUX_1_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_2_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_3_20170216_LCL5' +'\t'+ sample_name+'_pcr'+'\t' + sample_name+'_pcr-free'+ '\t'+ sample_name +'_consensus' + '\t' + sample_name + '_consensus_alt_seq' +'\n'
outfile.write(vcf_header)
outfile.write(outputcolumn)

@@ -71,14 +71,14 @@ outfile.write(outputcolumn)
def detected_percentage(strings):
strings = [x.replace('0/0','.') for x in strings]
gt = [x.split(':')[0] for x in strings]
percentage = round((33 - gt.count('.'))/33,4)
percentage = round((27 - gt.count('.'))/27,4)
return(str(percentage))

def vote_percentage(strings,consensus_call):
strings = [x.replace('.','0/0') for x in strings]
gt = [x.split(':')[0] for x in strings]
gt = list(map(gt_uniform,[i for i in gt]))
percentage = round(gt.count(consensus_call)/33,4)
percentage = round(gt.count(consensus_call)/27,4)
return(str(percentage))

def family_vote(strings,consensus_call):
@@ -143,36 +143,35 @@ def main():
pcr_consensus = ''
pcr_free_consensus = ''
consensus_call = ''
consensus_alt_seq = '.'
consensus_alt_seq = ''
# pcr
pcr = itemgetter(*[9,10,11,12,14,15,16,23,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41])(strings)
pcr = itemgetter(*[9,10,11,27,28,29,30,31,32,33,34,35])(strings)
SEQ2000 = decide_by_rep(pcr[0:3])
SEQ500 = decide_by_rep(pcr[4:7])
Nova = decide_by_rep(pcr[7:11])
XTen_ARD = decide_by_rep(pcr[11:14])
XTen_NVG = decide_by_rep(pcr[14:17])
XTen_WUX_1 = decide_by_rep(pcr[17:20])
XTen_WUX_2 = decide_by_rep(pcr[20:23])
sequence_site = [SEQ2000,SEQ500,Nova,XTen_ARD,XTen_NVG,XTen_WUX_1,XTen_WUX_2]
XTen_ARD = decide_by_rep(pcr[3:6])
XTen_NVG = decide_by_rep(pcr[6:9])
XTen_WUX = decide_by_rep(pcr[9:12])
sequence_site = [SEQ2000,XTen_ARD,XTen_NVG,XTen_WUX]
sequence_dict = Counter(sequence_site)
highest_sequence = sequence_dict.most_common(1)
candidate_sequence = highest_sequence[0][0]
freq_sequence = highest_sequence[0][1]
if freq_sequence > 4:
if freq_sequence > 2:
pcr_consensus = candidate_sequence
else:
pcr_consensus = 'inconSequenceSite'
# pcr-free
pcr_free = itemgetter(*[13,17,18,19,20,21,22,24,25,26])(strings)
pcr_free = itemgetter(*[12,13,14,15,16,17,18,19,20,21,22,23,24,25,26])(strings)
#SEQ2000 = decide_by_rep(pcr_free[0])
Nova_ARD_1 = decide_by_rep(pcr_free[1:4])
Nova_ARD_2 = decide_by_rep(pcr_free[4:7])
Nova_BRG = decide_by_rep(pcr_free[7:10])
sequence_site = [SEQ2000,Nova_ARD_1,Nova_ARD_2,Nova_BRG]
T7_WGE = decide_by_rep(pcr_free[0:3])
Nova_ARD_1 = decide_by_rep(pcr_free[3:6])
Nova_ARD_2 = decide_by_rep(pcr_free[6:9])
Nova_BRG = decide_by_rep(pcr_free[9:12])
Nova_WUX = decide_by_rep(pcr_free[12:15])
sequence_site = [T7_WGE,Nova_ARD_1,Nova_ARD_2,Nova_BRG,Nova_WUX]
highest_sequence = sequence_dict.most_common(1)
candidate_sequence = highest_sequence[0][0]
freq_sequence = highest_sequence[0][1]
if freq_sequence > 2:
if freq_sequence > 3:
pcr_free_consensus = candidate_sequence
else:
pcr_free_consensus = 'inconSequenceSite'

+ 5
- 0
inputs Dosyayı Görüntüle

@@ -2,6 +2,8 @@
"{{ project_name }}.LCL7merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa",
"{{ project_name }}.LCL6familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL7votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1",
"{{ project_name }}.LCL6votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1",
"{{ project_name }}.LCL5VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL6mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1",
"{{ project_name }}.mergeSister.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
@@ -11,10 +13,12 @@
"{{ project_name }}.LCL6merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL6variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9",
"{{ project_name }}.LCL6zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1",
"{{ project_name }}.LCL7familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL6VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL8votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1",
"{{ project_name }}.reformVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call:v1.1",
"{{ project_name }}.LCL5zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.xlarge img-ubuntu-vpc",
@@ -27,3 +31,4 @@
}




+ 37
- 4
workflow.wdl Dosyayı Görüntüle

@@ -5,6 +5,7 @@ import "./tasks/VCFrename.wdl" as VCFrename
import "./tasks/mergeSister.wdl" as mergeSister
import "./tasks/reformVCF.wdl" as reformVCF
import "./tasks/merge.wdl" as merge
import "./tasks/votes.wdl" as votes

workflow {{ project_name }} {
File inputSamplesFile
@@ -175,7 +176,15 @@ workflow {{ project_name }} {
cluster_config=cluster_config,
disk_size=disk_size
}
call votes.votes as LCL5votes{
input:
merged_vcf=LCL5merge.merged_vcf,
vcf_dup=LCL5merge.vcf_dup,
sample='LCL5',
prefix='LCL5_consensus',
cluster_config=cluster_config,
disk_size=disk_size
}
call merge.merge as LCL6merge {
input:
family_vcf_gz=LCL6familyzipIndex.vcf_gz,
@@ -184,7 +193,15 @@ workflow {{ project_name }} {
cluster_config=cluster_config,
disk_size=disk_size
}
call votes.votes as LCL6votes {
input:
merged_vcf=LCL6merge.merged_vcf,
vcf_dup=LCL6merge.vcf_dup,
sample='LCL6',
prefix='LCL6_consensus',
cluster_config=cluster_config,
disk_size=disk_size
}
call merge.merge as LCL7merge {
input:
family_vcf_gz=LCL7familyzipIndex.vcf_gz,
@@ -193,7 +210,15 @@ workflow {{ project_name }} {
cluster_config=cluster_config,
disk_size=disk_size
}
call votes.votes as LCL7votes {
input:
merged_vcf=LCL7merge.merged_vcf,
vcf_dup=LCL7merge.vcf_dup,
sample='LCL7',
prefix='LCL7_consensus',
cluster_config=cluster_config,
disk_size=disk_size
}
call merge.merge as LCL8merge {
input:
family_vcf_gz=LCL8familyzipIndex.vcf_gz,
@@ -202,6 +227,14 @@ workflow {{ project_name }} {
cluster_config=cluster_config,
disk_size=disk_size
}
call votes.votes as LCL8votes {
input:
merged_vcf=LCL8merge.merged_vcf,
vcf_dup=LCL8merge.vcf_dup,
sample='LCL8',
prefix='LCL8_consensus',
cluster_config=cluster_config,
disk_size=disk_size
}
}


Yükleniyor…
İptal
Kaydet