## App输入变量与输入文件 | ## App输入变量与输入文件 | ||||
inputSamplesFile的格式如下 | |||||
```bash | |||||
#LCL5_VCF #LCL6_VCF #LCL7_VCF #LCL8_VCF #LCL5_sampleName #LCL6_sampleName #LCL7_sampleName #LCL8_sampleName #familyName | |||||
``` | |||||
最终版的整合文件包括: | |||||
## App输出文件 | ## App输出文件 |
outfile = open(file_name,'w') | outfile = open(file_name,'w') | ||||
# write VCF | # write VCF | ||||
outputcolumn = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tQuartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5\tQuartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5\tQuartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5\tQuartet_DNA_BGI_SEQ2000_WGE_1_20190402_LCL5\tQuartet_DNA_BGI_SEQ2000_WGE_2_20190402_LCL5\tQuartet_DNA_BGI_SEQ500_BGI_1_20180328_LCL5 \tQuartet_DNA_BGI_SEQ500_BGI_2_20180328_LCL5\tQuartet_DNA_BGI_SEQ500_BGI_3_20180328_LCL5\tQuartet_DNA_ILM_Nova_ARD_1_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_2_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_3_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_4_20190111_LCL5\tQuartet_DNA_ILM_Nova_ARD_5_20190111_LCL5\tQuartet_DNA_ILM_Nova_ARD_6_20190111_LCL5\tQuartet_DNA_ILM_Nova_BRG_1_20171024_LCL5\tQuartet_DNA_ILM_Nova_BRG_1_20180930_LCL5\tQuartet_DNA_ILM_Nova_BRG_2_20180930_LCL5\tQuartet_DNA_ILM_Nova_BRG_3_20180930_LCL5\tQuartet_DNA_ILM_Nova_GAC_1_20171025_LCL5\tQuartet_DNA_ILM_Nova_NVG_1_20171024_LCL5\tQuartet_DNA_ILM_Nova_WUX_1_20171024_LCL5\tQuartet_DNA_ILM_XTen_ARD_1_20170403_LCL5\tQuartet_DNA_ILM_XTen_ARD_2_20170403_LCL5\tQuartet_DNA_ILM_XTen_ARD_3_20170403_LCL5\tQuartet_DNA_ILM_XTen_NVG_1_20170329_LCL5\tQuartet_DNA_ILM_XTen_NVG_2_20170329_LCL5\tQuartet_DNA_ILM_XTen_NVG_3_20170329_LCL5\tQuartet_DNA_ILM_XTen_WUX_1_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_2_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_3_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_4_20180703_LCL5\tQuartet_DNA_ILM_XTen_WUX_5_20180703_LCL5\tQuartet_DNA_ILM_XTen_WUX_6_20180703_LCL5' +'\t'+ sample_name+'_pcr'+'\t' + sample_name+'_pcr-free'+ '\t'+ sample_name +'_consensus' + '\t' + sample_name + '_consensus_alt_seq' +'\n' | |||||
outputcolumn = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tQuartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5\tQuartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5\tQuartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5\tQuartet_DNA_BGI_T7_WGE_1_20191105_LCL5\tQuartet_DNA_BGI_T7_WGE_2_20191105_LCL5\tQuartet_DNA_BGI_T7_WGE_3_20191105_LCL5\tQuartet_DNA_ILM_Nova_ARD_1_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_2_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_3_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_4_20190111_LCL5\tQuartet_DNA_ILM_Nova_ARD_5_20190111_LCL5\tQuartet_DNA_ILM_Nova_ARD_6_20190111_LCL5\tQuartet_DNA_ILM_Nova_BRG_1_20180930_LCL5\tQuartet_DNA_ILM_Nova_BRG_2_20180930_LCL5\tQuartet_DNA_ILM_Nova_BRG_3_20180930_LCL5\tQuartet_DNA_ILM_Nova_WUX_1_20190917_LCL5\tQuartet_DNA_ILM_Nova_WUX_2_20190917_LCL5\tQuartet_DNA_ILM_Nova_WUX_3_20190917_LCL5\tQuartet_DNA_ILM_XTen_ARD_1_20170403_LCL5\tQuartet_DNA_ILM_XTen_ARD_2_20170403_LCL5\tQuartet_DNA_ILM_XTen_ARD_3_20170403_LCL5\tQuartet_DNA_ILM_XTen_NVG_1_20170329_LCL5\tQuartet_DNA_ILM_XTen_NVG_2_20170329_LCL5\tQuartet_DNA_ILM_XTen_NVG_3_20170329_LCL5\tQuartet_DNA_ILM_XTen_WUX_1_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_2_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_3_20170216_LCL5' +'\t'+ sample_name+'_pcr'+'\t' + sample_name+'_pcr-free'+ '\t'+ sample_name +'_consensus' + '\t' + sample_name + '_consensus_alt_seq' +'\n' | |||||
outfile.write(vcf_header) | outfile.write(vcf_header) | ||||
outfile.write(outputcolumn) | outfile.write(outputcolumn) | ||||
def detected_percentage(strings): | def detected_percentage(strings): | ||||
strings = [x.replace('0/0','.') for x in strings] | strings = [x.replace('0/0','.') for x in strings] | ||||
gt = [x.split(':')[0] for x in strings] | gt = [x.split(':')[0] for x in strings] | ||||
percentage = round((33 - gt.count('.'))/33,4) | |||||
percentage = round((27 - gt.count('.'))/27,4) | |||||
return(str(percentage)) | return(str(percentage)) | ||||
def vote_percentage(strings,consensus_call): | def vote_percentage(strings,consensus_call): | ||||
strings = [x.replace('.','0/0') for x in strings] | strings = [x.replace('.','0/0') for x in strings] | ||||
gt = [x.split(':')[0] for x in strings] | gt = [x.split(':')[0] for x in strings] | ||||
gt = list(map(gt_uniform,[i for i in gt])) | gt = list(map(gt_uniform,[i for i in gt])) | ||||
percentage = round(gt.count(consensus_call)/33,4) | |||||
percentage = round(gt.count(consensus_call)/27,4) | |||||
return(str(percentage)) | return(str(percentage)) | ||||
def family_vote(strings,consensus_call): | def family_vote(strings,consensus_call): | ||||
pcr_consensus = '' | pcr_consensus = '' | ||||
pcr_free_consensus = '' | pcr_free_consensus = '' | ||||
consensus_call = '' | consensus_call = '' | ||||
consensus_alt_seq = '.' | |||||
consensus_alt_seq = '' | |||||
# pcr | # pcr | ||||
pcr = itemgetter(*[9,10,11,12,14,15,16,23,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41])(strings) | |||||
pcr = itemgetter(*[9,10,11,27,28,29,30,31,32,33,34,35])(strings) | |||||
SEQ2000 = decide_by_rep(pcr[0:3]) | SEQ2000 = decide_by_rep(pcr[0:3]) | ||||
SEQ500 = decide_by_rep(pcr[4:7]) | |||||
Nova = decide_by_rep(pcr[7:11]) | |||||
XTen_ARD = decide_by_rep(pcr[11:14]) | |||||
XTen_NVG = decide_by_rep(pcr[14:17]) | |||||
XTen_WUX_1 = decide_by_rep(pcr[17:20]) | |||||
XTen_WUX_2 = decide_by_rep(pcr[20:23]) | |||||
sequence_site = [SEQ2000,SEQ500,Nova,XTen_ARD,XTen_NVG,XTen_WUX_1,XTen_WUX_2] | |||||
XTen_ARD = decide_by_rep(pcr[3:6]) | |||||
XTen_NVG = decide_by_rep(pcr[6:9]) | |||||
XTen_WUX = decide_by_rep(pcr[9:12]) | |||||
sequence_site = [SEQ2000,XTen_ARD,XTen_NVG,XTen_WUX] | |||||
sequence_dict = Counter(sequence_site) | sequence_dict = Counter(sequence_site) | ||||
highest_sequence = sequence_dict.most_common(1) | highest_sequence = sequence_dict.most_common(1) | ||||
candidate_sequence = highest_sequence[0][0] | candidate_sequence = highest_sequence[0][0] | ||||
freq_sequence = highest_sequence[0][1] | freq_sequence = highest_sequence[0][1] | ||||
if freq_sequence > 4: | |||||
if freq_sequence > 2: | |||||
pcr_consensus = candidate_sequence | pcr_consensus = candidate_sequence | ||||
else: | else: | ||||
pcr_consensus = 'inconSequenceSite' | pcr_consensus = 'inconSequenceSite' | ||||
# pcr-free | # pcr-free | ||||
pcr_free = itemgetter(*[13,17,18,19,20,21,22,24,25,26])(strings) | |||||
pcr_free = itemgetter(*[12,13,14,15,16,17,18,19,20,21,22,23,24,25,26])(strings) | |||||
#SEQ2000 = decide_by_rep(pcr_free[0]) | #SEQ2000 = decide_by_rep(pcr_free[0]) | ||||
Nova_ARD_1 = decide_by_rep(pcr_free[1:4]) | |||||
Nova_ARD_2 = decide_by_rep(pcr_free[4:7]) | |||||
Nova_BRG = decide_by_rep(pcr_free[7:10]) | |||||
sequence_site = [SEQ2000,Nova_ARD_1,Nova_ARD_2,Nova_BRG] | |||||
T7_WGE = decide_by_rep(pcr_free[0:3]) | |||||
Nova_ARD_1 = decide_by_rep(pcr_free[3:6]) | |||||
Nova_ARD_2 = decide_by_rep(pcr_free[6:9]) | |||||
Nova_BRG = decide_by_rep(pcr_free[9:12]) | |||||
Nova_WUX = decide_by_rep(pcr_free[12:15]) | |||||
sequence_site = [T7_WGE,Nova_ARD_1,Nova_ARD_2,Nova_BRG,Nova_WUX] | |||||
highest_sequence = sequence_dict.most_common(1) | highest_sequence = sequence_dict.most_common(1) | ||||
candidate_sequence = highest_sequence[0][0] | candidate_sequence = highest_sequence[0][0] | ||||
freq_sequence = highest_sequence[0][1] | freq_sequence = highest_sequence[0][1] | ||||
if freq_sequence > 2: | |||||
if freq_sequence > 3: | |||||
pcr_free_consensus = candidate_sequence | pcr_free_consensus = candidate_sequence | ||||
else: | else: | ||||
pcr_free_consensus = 'inconSequenceSite' | pcr_free_consensus = 'inconSequenceSite' |
"{{ project_name }}.LCL7merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | "{{ project_name }}.LCL7merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | ||||
"{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | "{{ project_name }}.fasta": "GRCh38.d1.vd1.fa", | ||||
"{{ project_name }}.LCL6familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | "{{ project_name }}.LCL6familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | ||||
"{{ project_name }}.LCL7votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.LCL6votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.LCL5VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | "{{ project_name }}.LCL5VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | ||||
"{{ project_name }}.LCL6mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | "{{ project_name }}.LCL6mendelian.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/vbt:v1.1", | ||||
"{{ project_name }}.mergeSister.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | "{{ project_name }}.mergeSister.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | ||||
"{{ project_name }}.LCL6merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | "{{ project_name }}.LCL6merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | ||||
"{{ project_name }}.LCL6variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | "{{ project_name }}.LCL6variantsNorm.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/bcftools:v1.9", | ||||
"{{ project_name }}.LCL6zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | "{{ project_name }}.LCL6zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | ||||
"{{ project_name }}.LCL5votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.LCL7familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | "{{ project_name }}.LCL7familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | ||||
"{{ project_name }}.LCL5familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | "{{ project_name }}.LCL5familyzipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | ||||
"{{ project_name }}.LCL6VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | "{{ project_name }}.LCL6VCFrename.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | ||||
"{{ project_name }}.LCL5merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | "{{ project_name }}.LCL5merge.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | ||||
"{{ project_name }}.LCL8votes.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call_manuscript:v1.1", | |||||
"{{ project_name }}.reformVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call:v1.1", | "{{ project_name }}.reformVCF.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/high_confidence_call:v1.1", | ||||
"{{ project_name }}.LCL5zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | "{{ project_name }}.LCL5zipIndex.docker": "registry-vpc.cn-shanghai.aliyuncs.com/pgx-docker-registry/rtg-tools:latest", | ||||
"{{ project_name }}.cluster_config": "OnDemand bcs.a2.xlarge img-ubuntu-vpc", | "{{ project_name }}.cluster_config": "OnDemand bcs.a2.xlarge img-ubuntu-vpc", | ||||
} | } | ||||
import "./tasks/mergeSister.wdl" as mergeSister | import "./tasks/mergeSister.wdl" as mergeSister | ||||
import "./tasks/reformVCF.wdl" as reformVCF | import "./tasks/reformVCF.wdl" as reformVCF | ||||
import "./tasks/merge.wdl" as merge | import "./tasks/merge.wdl" as merge | ||||
import "./tasks/votes.wdl" as votes | |||||
workflow {{ project_name }} { | workflow {{ project_name }} { | ||||
File inputSamplesFile | File inputSamplesFile | ||||
cluster_config=cluster_config, | cluster_config=cluster_config, | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
call votes.votes as LCL5votes{ | |||||
input: | |||||
merged_vcf=LCL5merge.merged_vcf, | |||||
vcf_dup=LCL5merge.vcf_dup, | |||||
sample='LCL5', | |||||
prefix='LCL5_consensus', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call merge.merge as LCL6merge { | call merge.merge as LCL6merge { | ||||
input: | input: | ||||
family_vcf_gz=LCL6familyzipIndex.vcf_gz, | family_vcf_gz=LCL6familyzipIndex.vcf_gz, | ||||
cluster_config=cluster_config, | cluster_config=cluster_config, | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
call votes.votes as LCL6votes { | |||||
input: | |||||
merged_vcf=LCL6merge.merged_vcf, | |||||
vcf_dup=LCL6merge.vcf_dup, | |||||
sample='LCL6', | |||||
prefix='LCL6_consensus', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call merge.merge as LCL7merge { | call merge.merge as LCL7merge { | ||||
input: | input: | ||||
family_vcf_gz=LCL7familyzipIndex.vcf_gz, | family_vcf_gz=LCL7familyzipIndex.vcf_gz, | ||||
cluster_config=cluster_config, | cluster_config=cluster_config, | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
call votes.votes as LCL7votes { | |||||
input: | |||||
merged_vcf=LCL7merge.merged_vcf, | |||||
vcf_dup=LCL7merge.vcf_dup, | |||||
sample='LCL7', | |||||
prefix='LCL7_consensus', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
call merge.merge as LCL8merge { | call merge.merge as LCL8merge { | ||||
input: | input: | ||||
family_vcf_gz=LCL8familyzipIndex.vcf_gz, | family_vcf_gz=LCL8familyzipIndex.vcf_gz, | ||||
cluster_config=cluster_config, | cluster_config=cluster_config, | ||||
disk_size=disk_size | disk_size=disk_size | ||||
} | } | ||||
call votes.votes as LCL8votes { | |||||
input: | |||||
merged_vcf=LCL8merge.merged_vcf, | |||||
vcf_dup=LCL8merge.vcf_dup, | |||||
sample='LCL8', | |||||
prefix='LCL8_consensus', | |||||
cluster_config=cluster_config, | |||||
disk_size=disk_size | |||||
} | |||||
} | } | ||||