Procházet zdrojové kódy

vote for 27

master
LUYAO REN před 5 roky
rodič
revize
6d25fd4f8d
4 změnil soubory, kde provedl 68 přidání a 23 odebrání
  1. +8
    -0
      README.md
  2. +18
    -19
      codescripts/high_confidence_call_vote.py
  3. +5
    -0
      inputs
  4. +37
    -4
      workflow.wdl

+ 8
- 0
README.md Zobrazit soubor



## App输入变量与输入文件 ## App输入变量与输入文件


inputSamplesFile的格式如下

```bash
#LCL5_VCF #LCL6_VCF #LCL7_VCF #LCL8_VCF #LCL5_sampleName #LCL6_sampleName #LCL7_sampleName #LCL8_sampleName #familyName
```

最终版的整合文件包括:





## App输出文件 ## App输出文件

+ 18
- 19
codescripts/high_confidence_call_vote.py Zobrazit soubor

outfile = open(file_name,'w') outfile = open(file_name,'w')


# write VCF # write VCF
outputcolumn = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tQuartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5\tQuartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5\tQuartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5\tQuartet_DNA_BGI_SEQ2000_WGE_1_20190402_LCL5\tQuartet_DNA_BGI_SEQ2000_WGE_2_20190402_LCL5\tQuartet_DNA_BGI_SEQ500_BGI_1_20180328_LCL5 \tQuartet_DNA_BGI_SEQ500_BGI_2_20180328_LCL5\tQuartet_DNA_BGI_SEQ500_BGI_3_20180328_LCL5\tQuartet_DNA_ILM_Nova_ARD_1_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_2_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_3_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_4_20190111_LCL5\tQuartet_DNA_ILM_Nova_ARD_5_20190111_LCL5\tQuartet_DNA_ILM_Nova_ARD_6_20190111_LCL5\tQuartet_DNA_ILM_Nova_BRG_1_20171024_LCL5\tQuartet_DNA_ILM_Nova_BRG_1_20180930_LCL5\tQuartet_DNA_ILM_Nova_BRG_2_20180930_LCL5\tQuartet_DNA_ILM_Nova_BRG_3_20180930_LCL5\tQuartet_DNA_ILM_Nova_GAC_1_20171025_LCL5\tQuartet_DNA_ILM_Nova_NVG_1_20171024_LCL5\tQuartet_DNA_ILM_Nova_WUX_1_20171024_LCL5\tQuartet_DNA_ILM_XTen_ARD_1_20170403_LCL5\tQuartet_DNA_ILM_XTen_ARD_2_20170403_LCL5\tQuartet_DNA_ILM_XTen_ARD_3_20170403_LCL5\tQuartet_DNA_ILM_XTen_NVG_1_20170329_LCL5\tQuartet_DNA_ILM_XTen_NVG_2_20170329_LCL5\tQuartet_DNA_ILM_XTen_NVG_3_20170329_LCL5\tQuartet_DNA_ILM_XTen_WUX_1_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_2_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_3_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_4_20180703_LCL5\tQuartet_DNA_ILM_XTen_WUX_5_20180703_LCL5\tQuartet_DNA_ILM_XTen_WUX_6_20180703_LCL5' +'\t'+ sample_name+'_pcr'+'\t' + sample_name+'_pcr-free'+ '\t'+ sample_name +'_consensus' + '\t' + sample_name + '_consensus_alt_seq' +'\n'
outputcolumn = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tQuartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5\tQuartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5\tQuartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5\tQuartet_DNA_BGI_T7_WGE_1_20191105_LCL5\tQuartet_DNA_BGI_T7_WGE_2_20191105_LCL5\tQuartet_DNA_BGI_T7_WGE_3_20191105_LCL5\tQuartet_DNA_ILM_Nova_ARD_1_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_2_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_3_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_4_20190111_LCL5\tQuartet_DNA_ILM_Nova_ARD_5_20190111_LCL5\tQuartet_DNA_ILM_Nova_ARD_6_20190111_LCL5\tQuartet_DNA_ILM_Nova_BRG_1_20180930_LCL5\tQuartet_DNA_ILM_Nova_BRG_2_20180930_LCL5\tQuartet_DNA_ILM_Nova_BRG_3_20180930_LCL5\tQuartet_DNA_ILM_Nova_WUX_1_20190917_LCL5\tQuartet_DNA_ILM_Nova_WUX_2_20190917_LCL5\tQuartet_DNA_ILM_Nova_WUX_3_20190917_LCL5\tQuartet_DNA_ILM_XTen_ARD_1_20170403_LCL5\tQuartet_DNA_ILM_XTen_ARD_2_20170403_LCL5\tQuartet_DNA_ILM_XTen_ARD_3_20170403_LCL5\tQuartet_DNA_ILM_XTen_NVG_1_20170329_LCL5\tQuartet_DNA_ILM_XTen_NVG_2_20170329_LCL5\tQuartet_DNA_ILM_XTen_NVG_3_20170329_LCL5\tQuartet_DNA_ILM_XTen_WUX_1_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_2_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_3_20170216_LCL5' +'\t'+ sample_name+'_pcr'+'\t' + sample_name+'_pcr-free'+ '\t'+ sample_name +'_consensus' + '\t' + sample_name + '_consensus_alt_seq' +'\n'
outfile.write(vcf_header) outfile.write(vcf_header)
outfile.write(outputcolumn) outfile.write(outputcolumn)


def detected_percentage(strings): def detected_percentage(strings):
strings = [x.replace('0/0','.') for x in strings] strings = [x.replace('0/0','.') for x in strings]
gt = [x.split(':')[0] for x in strings] gt = [x.split(':')[0] for x in strings]
percentage = round((33 - gt.count('.'))/33,4)
percentage = round((27 - gt.count('.'))/27,4)
return(str(percentage)) return(str(percentage))


def vote_percentage(strings,consensus_call): def vote_percentage(strings,consensus_call):
strings = [x.replace('.','0/0') for x in strings] strings = [x.replace('.','0/0') for x in strings]
gt = [x.split(':')[0] for x in strings] gt = [x.split(':')[0] for x in strings]
gt = list(map(gt_uniform,[i for i in gt])) gt = list(map(gt_uniform,[i for i in gt]))
percentage = round(gt.count(consensus_call)/33,4)
percentage = round(gt.count(consensus_call)/27,4)
return(str(percentage)) return(str(percentage))


def family_vote(strings,consensus_call): def family_vote(strings,consensus_call):
pcr_consensus = '' pcr_consensus = ''
pcr_free_consensus = '' pcr_free_consensus = ''
consensus_call = '' consensus_call = ''
consensus_alt_seq = '.'
consensus_alt_seq = ''
# pcr # pcr
pcr = itemgetter(*[9,10,11,12,14,15,16,23,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41])(strings)
pcr = itemgetter(*[9,10,11,27,28,29,30,31,32,33,34,35])(strings)
SEQ2000 = decide_by_rep(pcr[0:3]) SEQ2000 = decide_by_rep(pcr[0:3])
SEQ500 = decide_by_rep(pcr[4:7])
Nova = decide_by_rep(pcr[7:11])
XTen_ARD = decide_by_rep(pcr[11:14])
XTen_NVG = decide_by_rep(pcr[14:17])
XTen_WUX_1 = decide_by_rep(pcr[17:20])
XTen_WUX_2 = decide_by_rep(pcr[20:23])
sequence_site = [SEQ2000,SEQ500,Nova,XTen_ARD,XTen_NVG,XTen_WUX_1,XTen_WUX_2]
XTen_ARD = decide_by_rep(pcr[3:6])
XTen_NVG = decide_by_rep(pcr[6:9])
XTen_WUX = decide_by_rep(pcr[9:12])
sequence_site = [SEQ2000,XTen_ARD,XTen_NVG,XTen_WUX]
sequence_dict = Counter(sequence_site) sequence_dict = Counter(sequence_site)
highest_sequence = sequence_dict.most_common(1) highest_sequence = sequence_dict.most_common(1)
candidate_sequence = highest_sequence[0][0] candidate_sequence = highest_sequence[0][0]
freq_sequence = highest_sequence[0][1] freq_sequence = highest_sequence[0][1]
if freq_sequence > 4:
if freq_sequence > 2:
pcr_consensus = candidate_sequence pcr_consensus = candidate_sequence
else: else:
pcr_consensus = 'inconSequenceSite' pcr_consensus = 'inconSequenceSite'
# pcr-free # pcr-free
pcr_free = itemgetter(*[13,17,18,19,20,21,22,24,25,26])(strings)
pcr_free = itemgetter(*[12,13,14,15,16,17,18,19,20,21,22,23,24,25,26])(strings)
#SEQ2000 = decide_by_rep(pcr_free[0]) #SEQ2000 = decide_by_rep(pcr_free[0])
Nova_ARD_1 = decide_by_rep(pcr_free[1:4])
Nova_ARD_2 = decide_by_rep(pcr_free[4:7])
Nova_BRG = decide_by_rep(pcr_free[7:10])
sequence_site = [SEQ2000,Nova_ARD_1,Nova_ARD_2,Nova_BRG]
T7_WGE = decide_by_rep(pcr_free[0:3])
Nova_ARD_1 = decide_by_rep(pcr_free[3:6])
Nova_ARD_2 = decide_by_rep(pcr_free[6:9])
Nova_BRG = decide_by_rep(pcr_free[9:12])
Nova_WUX = decide_by_rep(pcr_free[12:15])
sequence_site = [T7_WGE,Nova_ARD_1,Nova_ARD_2,Nova_BRG,Nova_WUX]
highest_sequence = sequence_dict.most_common(1) highest_sequence = sequence_dict.most_common(1)
candidate_sequence = highest_sequence[0][0] candidate_sequence = highest_sequence[0][0]
freq_sequence = highest_sequence[0][1] freq_sequence = highest_sequence[0][1]
if freq_sequence > 2:
if freq_sequence > 3:
pcr_free_consensus = candidate_sequence pcr_free_consensus = candidate_sequence
else: else:
pcr_free_consensus = 'inconSequenceSite' pcr_free_consensus = 'inconSequenceSite'

+ 5
- 0
inputs Zobrazit soubor

"{{ project_name }}.LCL7merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.LCL7merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", "{{ project_name }}.fasta": "GRCh38.d1.vd1.fa",
"{{ project_name }}.LCL6familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.LCL6familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL7votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1",
"{{ project_name }}.LCL6votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1",
"{{ project_name }}.LCL5VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.LCL5VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL6mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", "{{ project_name }}.LCL6mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1",
"{{ project_name }}.mergeSister.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.mergeSister.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL6merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.LCL6merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL6variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", "{{ project_name }}.LCL6variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9",
"{{ project_name }}.LCL6zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.LCL6zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1",
"{{ project_name }}.LCL7familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.LCL7familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.LCL5familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL6VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.LCL6VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL5merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.LCL5merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.LCL8votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1",
"{{ project_name }}.reformVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call:v1.1", "{{ project_name }}.reformVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call:v1.1",
"{{ project_name }}.LCL5zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", "{{ project_name }}.LCL5zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest",
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.xlarge img-ubuntu-vpc", "{{ project_name }}.cluster_config": "OnDemand bcs.a2.xlarge img-ubuntu-vpc",
} }






+ 37
- 4
workflow.wdl Zobrazit soubor

import "./tasks/mergeSister.wdl" as mergeSister import "./tasks/mergeSister.wdl" as mergeSister
import "./tasks/reformVCF.wdl" as reformVCF import "./tasks/reformVCF.wdl" as reformVCF
import "./tasks/merge.wdl" as merge import "./tasks/merge.wdl" as merge
import "./tasks/votes.wdl" as votes


workflow {{ project_name }} { workflow {{ project_name }} {
File inputSamplesFile File inputSamplesFile
cluster_config=cluster_config, cluster_config=cluster_config,
disk_size=disk_size disk_size=disk_size
} }
call votes.votes as LCL5votes{
input:
merged_vcf=LCL5merge.merged_vcf,
vcf_dup=LCL5merge.vcf_dup,
sample='LCL5',
prefix='LCL5_consensus',
cluster_config=cluster_config,
disk_size=disk_size
}
call merge.merge as LCL6merge { call merge.merge as LCL6merge {
input: input:
family_vcf_gz=LCL6familyzipIndex.vcf_gz, family_vcf_gz=LCL6familyzipIndex.vcf_gz,
cluster_config=cluster_config, cluster_config=cluster_config,
disk_size=disk_size disk_size=disk_size
} }
call votes.votes as LCL6votes {
input:
merged_vcf=LCL6merge.merged_vcf,
vcf_dup=LCL6merge.vcf_dup,
sample='LCL6',
prefix='LCL6_consensus',
cluster_config=cluster_config,
disk_size=disk_size
}
call merge.merge as LCL7merge { call merge.merge as LCL7merge {
input: input:
family_vcf_gz=LCL7familyzipIndex.vcf_gz, family_vcf_gz=LCL7familyzipIndex.vcf_gz,
cluster_config=cluster_config, cluster_config=cluster_config,
disk_size=disk_size disk_size=disk_size
} }
call votes.votes as LCL7votes {
input:
merged_vcf=LCL7merge.merged_vcf,
vcf_dup=LCL7merge.vcf_dup,
sample='LCL7',
prefix='LCL7_consensus',
cluster_config=cluster_config,
disk_size=disk_size
}
call merge.merge as LCL8merge { call merge.merge as LCL8merge {
input: input:
family_vcf_gz=LCL8familyzipIndex.vcf_gz, family_vcf_gz=LCL8familyzipIndex.vcf_gz,
cluster_config=cluster_config, cluster_config=cluster_config,
disk_size=disk_size disk_size=disk_size
} }
call votes.votes as LCL8votes {
input:
merged_vcf=LCL8merge.merged_vcf,
vcf_dup=LCL8merge.vcf_dup,
sample='LCL8',
prefix='LCL8_consensus',
cluster_config=cluster_config,
disk_size=disk_size
}
} }



Načítá se…
Zrušit
Uložit