LUYAO REN преди 5 години
родител
ревизия
37783e9db9
променени са 11 файла, в които са добавени 420 реда и са изтрити 131 реда
  1. +285
    -0
      codescripts/FinalResult2VCF.py
  2. +0
    -68
      codescripts/bed_for_bamReadcount.py
  3. +15
    -5
      codescripts/high_confidence_call_vote.py
  4. +0
    -0
      codescripts/information_intergration.py
  5. +17
    -15
      codescripts/variants_quality_location_intergration.py
  6. +1
    -1
      codescripts/vcf_mq_af.py
  7. +4
    -4
      tasks/VCFinfo.wdl
  8. +3
    -38
      tasks/bed_annotation.wdl
  9. +26
    -0
      tasks/final_result.wdl
  10. +3
    -0
      tasks/votes.wdl
  11. +66
    -0
      workflow.wdl

+ 285
- 0
codescripts/FinalResult2VCF.py Целия файл

@@ -0,0 +1,285 @@
import pandas as pd
import sys, argparse, os
import fileinput
import re

# input arguments
parser = argparse.ArgumentParser(description="this script is to get final high confidence calls and information of all replicates")

parser.add_argument('-vcfInfo', '--vcfInfo', type=str, help='The txt file of variants information, this file is named as prefix__variant_quality_location.txt', required=True)
parser.add_argument('-mendelianInfo', '--mendelianInfo', type=str, help='The merged mendelian information of all samples', required=True)
parser.add_argument('-prefix', '--prefix', type=str, help='The prefix of output filenames', required=True)
parser.add_argument('-sample', '--sample_name', type=str, help='which sample of quartet', required=True)


args = parser.parse_args()
vcfInfo = args.vcfInfo
mendelianInfo = args.mendelianInfo
prefix = args.prefix
sample_name = args.sample_name

vcf_header = '''##fileformat=VCFv4.2
##fileDate=20200331
##source=high_confidence_calls_intergration(choppy app)
##reference=GRCh38.d1.vd1
##INFO=<ID=location,Number=1,Type=String,Description="Repeat region">
##INFO=<ID=DPCT,Number=1,Type=Float,Description="Percentage of detected votes">
##INFO=<ID=VPCT,Number=1,Type=Float,Description="Percentage of consnesus votes">
##INFO=<ID=FPCT,Number=1,Type=Float,Description="Percentage of mendelian consisitent votes">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=DP,Number=1,Type=Int,Description="Depth">
##FORMAT=<ID=AF,Number=1,Type=Float,Description="Allele frequency">
##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype quality">
##FORMAT=<ID=MQ,Number=1,Type=Float,Description="Mapping quality">
##contig=<ID=chr1,length=248956422>
##contig=<ID=chr2,length=242193529>
##contig=<ID=chr3,length=198295559>
##contig=<ID=chr4,length=190214555>
##contig=<ID=chr5,length=181538259>
##contig=<ID=chr6,length=170805979>
##contig=<ID=chr7,length=159345973>
##contig=<ID=chr8,length=145138636>
##contig=<ID=chr9,length=138394717>
##contig=<ID=chr10,length=133797422>
##contig=<ID=chr11,length=135086622>
##contig=<ID=chr12,length=133275309>
##contig=<ID=chr13,length=114364328>
##contig=<ID=chr14,length=107043718>
##contig=<ID=chr15,length=101991189>
##contig=<ID=chr16,length=90338345>
##contig=<ID=chr17,length=83257441>
##contig=<ID=chr18,length=80373285>
##contig=<ID=chr19,length=58617616>
##contig=<ID=chr20,length=64444167>
##contig=<ID=chr21,length=46709983>
##contig=<ID=chr22,length=50818468>
##contig=<ID=chrX,length=156040895>
'''

vcf_header_all_sample = '''##fileformat=VCFv4.2
##fileDate=20200331
##reference=GRCh38.d1.vd1
##INFO=<ID=location,Number=1,Type=String,Description="Repeat region">
##INFO=<ID=DPCT,Number=1,Type=Float,Description="Percentage of detected votes">
##INFO=<ID=VPCT,Number=1,Type=Float,Description="Percentage of consnesus votes">
##INFO=<ID=FPCT,Number=1,Type=Float,Description="Percentage of mendelian consisitent votes">
##INFO=<ID=ALL_ALT,Number=1,Type=Float,Description="Sum of alternative reads of all samples">
##INFO=<ID=ALL_DP,Number=1,Type=Float,Description="Sum of depth of all samples">
##INFO=<ID=ALL_AF,Number=1,Type=Float,Description="Allele frequency of net alternatice reads and net depth">
##INFO=<ID=GQ_MEAN,Number=1,Type=Float,Description="Mean of genotype quality of all samples">
##INFO=<ID=MQ_MEAN,Number=1,Type=Float,Description="Mean of mapping quality of all samples">
##INFO=<ID=PCR,Number=1,Type=String,Description="Consensus of PCR votes">
##INFO=<ID=PCR_FREE,Number=1,Type=String,Description="Consensus of PCR-free votes">
##INFO=<ID=CONSENSUS,Number=1,Type=String,Description="Consensus calls">
##INFO=<ID=CONSENSUS_SEQ,Number=1,Type=String,Description="Consensus sequence">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=DP,Number=1,Type=String,Description="Depth">
##FORMAT=<ID=AF,Number=1,Type=String,Description="Allele frequency">
##FORMAT=<ID=GQ,Number=1,Type=String,Description="Genotype quality">
##FORMAT=<ID=MQ,Number=1,Type=String,Description="Mapping quality">
##FORMAT=<ID=TWINS,Number=1,Type=String,Description="1 is twins shared, 0 is twins discordant ">
##FORMAT=<ID=TRIO5,Number=1,Type=String,Description="1 is LCL7, LCL8 and LCL5 mendelian consistent, 0 is mendelian vioaltion">
##FORMAT=<ID=TRIO6,Number=1,Type=String,Description="1 is LCL7, LCL8 and LCL6 mendelian consistent, 0 is mendelian vioaltion">
##contig=<ID=chr1,length=248956422>
##contig=<ID=chr2,length=242193529>
##contig=<ID=chr3,length=198295559>
##contig=<ID=chr4,length=190214555>
##contig=<ID=chr5,length=181538259>
##contig=<ID=chr6,length=170805979>
##contig=<ID=chr7,length=159345973>
##contig=<ID=chr8,length=145138636>
##contig=<ID=chr9,length=138394717>
##contig=<ID=chr10,length=133797422>
##contig=<ID=chr11,length=135086622>
##contig=<ID=chr12,length=133275309>
##contig=<ID=chr13,length=114364328>
##contig=<ID=chr14,length=107043718>
##contig=<ID=chr15,length=101991189>
##contig=<ID=chr16,length=90338345>
##contig=<ID=chr17,length=83257441>
##contig=<ID=chr18,length=80373285>
##contig=<ID=chr19,length=58617616>
##contig=<ID=chr20,length=64444167>
##contig=<ID=chr21,length=46709983>
##contig=<ID=chr22,length=50818468>
##contig=<ID=chrX,length=156040895>
'''

# output file
file_name = prefix + '_benchmarking_calls.vcf'
outfile = open(file_name,'w')

all_sample_file_name = prefix + '_all_sample_information.vcf'
all_sample_outfile = open(all_sample_file_name, 'w')

# write VCF
outputcolumn = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t' + sample_name + '_high_confidence_calls\n'
outfile.write(vcf_header)
outfile.write(outputcolumn)

outputcolumn_all_sample = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t'+ \
'Quartet_DNA_BGI_SEQ2000_BGI_1_20180518\tQuartet_DNA_BGI_SEQ2000_BGI_2_20180530\tQuartet_DNA_BGI_SEQ2000_BGI_3_20180530\t' + \
'Quartet_DNA_BGI_T7_WGE_1_20191105\tQuartet_DNA_BGI_T7_WGE_2_20191105\tQuartet_DNA_BGI_T7_WGE_3_20191105\t' + \
'Quartet_DNA_ILM_Nova_ARD_1_20181108\tQuartet_DNA_ILM_Nova_ARD_2_20181108\tQuartet_DNA_ILM_Nova_ARD_3_20181108\t' + \
'Quartet_DNA_ILM_Nova_ARD_4_20190111\tQuartet_DNA_ILM_Nova_ARD_5_20190111\tQuartet_DNA_ILM_Nova_ARD_6_20190111\t' + \
'Quartet_DNA_ILM_Nova_BRG_1_20180930\tQuartet_DNA_ILM_Nova_BRG_2_20180930\tQuartet_DNA_ILM_Nova_BRG_3_20180930\t' + \
'Quartet_DNA_ILM_Nova_WUX_1_20190917\tQuartet_DNA_ILM_Nova_WUX_2_20190917\tQuartet_DNA_ILM_Nova_WUX_3_20190917\t' + \
'Quartet_DNA_ILM_XTen_ARD_1_20170403\tQuartet_DNA_ILM_XTen_ARD_2_20170403\tQuartet_DNA_ILM_XTen_ARD_3_20170403\t' + \
'Quartet_DNA_ILM_XTen_NVG_1_20170329\tQuartet_DNA_ILM_XTen_NVG_2_20170329\tQuartet_DNA_ILM_XTen_NVG_3_20170329\t' + \
'Quartet_DNA_ILM_XTen_WUX_1_20170216\tQuartet_DNA_ILM_XTen_WUX_2_20170216\tQuartet_DNA_ILM_XTen_WUX_3_20170216\n'
all_sample_outfile.write(vcf_header_all_sample)
all_sample_outfile.write(outputcolumn_all_sample)

# input files
vcf_info = pd.read_table(vcfInfo)
mendelian_info = pd.read_table(mendelianInfo)

merged_df = pd.merge(vcf_info, mendelian_info, how='outer', left_on=['#CHROM','POS','REF'], right_on = ['#CHROM','POS','REF'])
merged_df = merged_df.fillna('.')

# function
def single_sample_format(format_x,strings_x,strings_y):
gt = '.'
dp = '.'
af = '.'
gq = '.'
mq = '.'
twins = '.'
trio5 = '.'
trio6 = '.'
# GT:DP:AF:GQ:MQ:TWINS:TRIO5:TRIO6
# strings_x
format_strings = format_x.split(':')
if (strings_x == '.') and (strings_y != '.'):
element_strings_y = str(strings_y).split(':')
gt = '0/0'
dp = '.'
af = '.'
gq = '.'
mq = '.'
twins = element_strings_y[1]
trio5 = element_strings_y[2]
trio6 = element_strings_y[3]
elif (strings_x != '.') and (strings_y == '.'):
element_strings_x = strings_x.split(':')
formatDict = dict(zip(format_strings, element_strings_x))
gt = formatDict['GT']
dp = formatDict['DP']
af = formatDict['AF']
gq = formatDict['GQ']
mq = formatDict['MQ']
twins = '.'
trio5 = '.'
trio6 = '.'
elif (strings_x != '.') and (strings_y != '.'):
element_strings_y = str(strings_y).split(':')
element_strings_x = strings_x.split(':')
formatDict = dict(zip(format_strings, element_strings_x))
gt = formatDict['GT']
dp = formatDict['DP']
af = formatDict['AF']
gq = formatDict['GQ']
mq = formatDict['MQ']
twins = element_strings_y[1]
trio5 = element_strings_y[2]
trio6 = element_strings_y[3]
else:
pass
merged_format = gt + ':' + dp + ':' + af + ':' + gq + ':' + mq + ':' + twins + ':' + trio5 + ':' + trio6
return(merged_format)

#
for row in merged_df.itertuples():
info = 'location=' + str(row.location) + ';' + str(row.INFO_y)
if row.FILTER_y == 'reproducible':
ref = row.DP - row._42
FORMAT = row[77] + ':' + str(int(ref)) + ',' + str(int(row._42)) + ':' + str(int(row.DP)) + ':' + str(round(row.AF,2)) + ':' + str(round(row.GQ,2)) + ':' + str(round(row.MQ,2))
outline1 = str(row._1) + '\t' + str(row.POS) + '\t' + str(row.ID_x) + '\t' + str(row.REF) + '\t' + str(row[78]) + '\t' + '.' + '\t' + '.' + '\t' + str(info) + '\t' + 'GT:AD:DP:AF:GQ:MQ' + '\t' + str(FORMAT) + '\n'
outfile.write(outline1)
else:
pass
if row.INFO_x != '.':
info = 'location=' + str(row.location) + ';' + str(row.INFO_y) + ';' + 'ALL_ALT=' + str(int(row._42)) + ';' + 'ALL_DP=' + str(int(row.DP)) + ';' + 'ALL_AF=' + str(round(row.AF,1)) + ';' + 'GQ_MEAN=' + str(round(row.GQ,1)) + ';' + 'MQ_MEAN=' + str(round(row.MQ,1)) + ';' + 'PCR=' + str(row[75]) + ';' + 'PCR_FREE=' + str(row[76]) + ';' + 'CONSENSUS=' + str(row[77]) + ';' + 'CONSENSUS_SEQ=' + str(row[78])
Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5_y)
Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5_y)
Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5_y)
Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5_y)
Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5_y)
Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5_y)
Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5_y)
Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5_y)
Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5_y)
Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5_y)
Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5_y)
Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5_y)
Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5_y)
Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5_y)
Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5_y)
Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5_y)
Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5_y)
Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5_y)
Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5_y)
Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5_y)
Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5_y)
Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5_y)
Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5_y)
Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5_y)
Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5_y)
Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5_y)
Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5_y)
outline2 = str(row._1) + '\t' + str(row.POS) + '\t' + str(row.ID_x) +'\t' + str(row.REF) + '\t' + str(row[5]) + '\t' + '.' + '\t' + '.' + '\t' + str(info) + '\t' + 'GT:DP:AF:GQ:MQ:TWINS:TRIO5:TRIO6' + '\t' \
+ str(Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5) + '\t' + str(Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5) + '\t' + str(Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5) + '\t' \
+ str(Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5) + '\t' + str(Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5) + '\t' + str(Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5) + '\t' \
+ str(Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5) + '\t' \
+ str(Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5) + '\t' \
+ str(Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5) + '\t' \
+ str(Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5) + '\t' \
+ str(Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5) + '\t' \
+ str(Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5) + '\t' \
+ str(Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5) + '\n'
all_sample_outfile.write(outline2)
else:
info = '.'
Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5_y)
Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5_y)
Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5_x, row.Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5_y)
Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5_y)
Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5_y)
Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5_x, row.Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5_y)
Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5_y)
Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5_y)
Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5_y)
Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5_y)
Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5_y)
Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5_x, row.Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5_y)
Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5_y)
Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5_y)
Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5_x, row.Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5_y)
Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5_y)
Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5_y)
Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5_x, row.Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5_y)
Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5_y)
Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5_y)
Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5_x, row.Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5_y)
Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5_y)
Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5_y)
Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5_x, row.Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5_y)
Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5_y)
Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5_y)
Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5 = single_sample_format(row.FORMAT_x, row.Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5_x, row.Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5_y)
outline2 = str(row._1) + '\t' + str(row.POS) + '\t' + str(row.ID_x) +'\t' + str(row.REF) + '\t' + str(row[5]) + '\t' + '.' + '\t' + '.' + '\t' + str(info) + '\t' + 'GT:DP:AF:GQ:MQ:TWINS:TRIO5:TRIO6' + '\t' \
+ str(Quartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5) + '\t' + str(Quartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5) + '\t' + str(Quartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5) + '\t' \
+ str(Quartet_DNA_BGI_T7_WGE_1_20191105_LCL5) + '\t' + str(Quartet_DNA_BGI_T7_WGE_2_20191105_LCL5) + '\t' + str(Quartet_DNA_BGI_T7_WGE_3_20191105_LCL5) + '\t' \
+ str(Quartet_DNA_ILM_Nova_ARD_1_20181108_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_2_20181108_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_3_20181108_LCL5) + '\t' \
+ str(Quartet_DNA_ILM_Nova_ARD_4_20190111_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_5_20190111_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_ARD_6_20190111_LCL5) + '\t' \
+ str(Quartet_DNA_ILM_Nova_BRG_1_20180930_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_BRG_2_20180930_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_BRG_3_20180930_LCL5) + '\t' \
+ str(Quartet_DNA_ILM_Nova_WUX_1_20190917_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_WUX_2_20190917_LCL5) + '\t' + str(Quartet_DNA_ILM_Nova_WUX_3_20190917_LCL5) + '\t' \
+ str(Quartet_DNA_ILM_XTen_ARD_1_20170403_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_ARD_2_20170403_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_ARD_3_20170403_LCL5) + '\t' \
+ str(Quartet_DNA_ILM_XTen_NVG_1_20170329_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_NVG_2_20170329_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_NVG_3_20170329_LCL5) + '\t' \
+ str(Quartet_DNA_ILM_XTen_WUX_1_20170216_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_WUX_2_20170216_LCL5) + '\t' + str(Quartet_DNA_ILM_XTen_WUX_3_20170216_LCL5) + '\n'
all_sample_outfile.write(outline2)






+ 0
- 68
codescripts/bed_for_bamReadcount.py Целия файл

@@ -1,68 +0,0 @@
import sys,getopt
import os
import re
import fileinput

def usage():
print(
"""
Usage: python bed_for_bamReadcount.py -i input_vcf_file -o prefix

This script selects SNPs and Indels supported by all callsets.

Please notice that bam-readcount only takes in 1-based coordinates.

Input:
-i a vcf file

Output:
-o a indel bed file for bam-readcount
""")

# select supported small variants
def process(oneLine):
m = re.match('^\#',oneLine)
if m is not None:
pass
else:
line = oneLine.rstrip()
strings = line.strip().split('\t')
# convert the position to bed file for bam-readcount
# deletion
if len(strings[3]) > 1 and len(strings[4]) == 1:
pos = int(strings[1]) + 1
outline = strings[0] + '\t' + str(pos) + '\t' + str(pos) + '\t' + strings[3] + '\t' + strings[4]+'\n'
outINDEL.write(outline)
# insertion
elif len(strings[3]) == 1 and len(strings[4]) > 1 and (',' not in strings[4]):
outline = strings[0] + '\t' + strings[1] + '\t' + strings[1] + '\t' + strings[3] + '\t' + strings[4] + '\n'
outINDEL.write(outline)
else:
outMNP.write(oneLine)

opts,args = getopt.getopt(sys.argv[1:],"hi:o:")
for op,value in opts:
if op == "-i":
inputFile=value
elif op == "-o":
prefix=value
elif op == "-h":
usage()
sys.exit()

if len(sys.argv[1:]) < 3:
usage()
sys.exit()

INDELname = prefix + '.bed'
MNPname = prefix + '_MNP.txt'

outINDEL = open(INDELname,'w')
outMNP = open(MNPname,'w')

for line in fileinput.input(inputFile):
process(line)

outINDEL.close()
outMNP.close()


+ 15
- 5
codescripts/high_confidence_call_vote.py Целия файл

@@ -27,6 +27,7 @@ vcf_header = '''##fileformat=VCFv4.2
##reference=GRCh38.d1.vd1
##INFO=<ID=DPCT,Number=1,Type=Float,Description="Percentage of detected votes">
##INFO=<ID=VPCT,Number=1,Type=Float,Description="Percentage of consnesus votes">
##INFO=<ID=FPCT,Number=1,Type=Float,Description="Percentage of mendelian consisitent votes">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##contig=<ID=chr1,length=248956422>
##contig=<ID=chr2,length=242193529>
@@ -82,7 +83,14 @@ def vote_percentage(strings,consensus_call):
return(str(percentage))

def family_vote(strings,consensus_call):
pass
strings = [x.replace('.','0/0') for x in strings]
gt = [x.split(':')[0] for x in strings]
gt = list(map(gt_uniform,[i for i in gt]))
mendelian = [x[-5:] for x in strings]
indices = [i for i, x in enumerate(gt) if x == consensus_call]
matched_mendelian = itemgetter(*indices)(mendelian)
percentage = round(matched_mendelian.count('1:1:1')/27,4)
return(str(percentage))

def gt_uniform(strings):
uniformed_gt = ''
@@ -140,10 +148,10 @@ def main():
outfile.write(outLine)
else:
# pre-define
pcr_consensus = ''
pcr_free_consensus = ''
consensus_call = ''
consensus_alt_seq = ''
pcr_consensus = '.'
pcr_free_consensus = '.'
consensus_call = '.'
consensus_alt_seq = '.'
# pcr
pcr = itemgetter(*[9,10,11,27,28,29,30,31,32,33,34,35])(strings)
SEQ2000 = decide_by_rep(pcr[0:3])
@@ -183,6 +191,8 @@ def main():
strings[7] = 'VPCT=' + VPCT
DPCT = detected_percentage(strings[9:])
strings[7] = strings[7] + ';DPCT=' + DPCT
FPCT = family_vote(strings[9:],consensus_call)
strings[7] = strings[7] + ';FPCT=' + FPCT
# Delete multiple alternative genotype to necessary expression
strings[6] = 'reproducible'
alt = strings[4]

+ 0
- 0
codescripts/information_intergration.py Целия файл


+ 17
- 15
codescripts/variants_quality_location_intergration.py Целия файл

@@ -27,33 +27,34 @@ def get_location(info):
return repeat


def extract_info_normal(strings):
AF = []
def extract_info_normal(FORMAT,strings):
GQ = []
MQ = []
DP = []
ALT = []
format_strings = FORMAT.split(':')
for element in strings:
if element == '.':
pass
else:
ad = element.split(':')[1]
ref = ad.split(',')[0]
alt = ad.split(',')[1]
af = float(int(alt)/(int(ref) + int(alt)))
gq = int(element.split(':')[3])
mq = float(element.split(':')[5])
dp = int(element.split(':')[2])
AF.append(af)
element_strings = element.split(':')
formatDict = dict(zip(format_strings, element_strings))
alt = int(formatDict['ALT'])
dp = int(formatDict['DP'])
gq = int(formatDict['GQ'])
mq = float(formatDict['MQ'])
GQ.append(gq)
MQ.append(mq)
DP.append(dp)
ALT.append(int(alt))
AF_m = statistics.mean(AF)
GQ_m = statistics.mean(GQ)
MQ_m = statistics.mean(MQ)
ALT.append(alt)
DP_a = sum(DP)
ALT_a = sum(ALT)
if DP_a == 0:
AF_m = 'NA'
else:
AF_m = float(ALT_a/DP_a)
GQ_m = statistics.mean(GQ)
MQ_m = statistics.mean(MQ)
return AF_m,GQ_m,MQ_m,DP_a,ALT_a


@@ -61,6 +62,7 @@ file_name = prefix + '_variant_quality_location.txt'
outfile = open(file_name,'w')
outputcolumn = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tQuartet_DNA_BGI_SEQ2000_BGI_1_20180518_LCL5\tQuartet_DNA_BGI_SEQ2000_BGI_2_20180530_LCL5\tQuartet_DNA_BGI_SEQ2000_BGI_3_20180530_LCL5\tQuartet_DNA_BGI_T7_WGE_1_20191105_LCL5\tQuartet_DNA_BGI_T7_WGE_2_20191105_LCL5\tQuartet_DNA_BGI_T7_WGE_3_20191105_LCL5\tQuartet_DNA_ILM_Nova_ARD_1_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_2_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_3_20181108_LCL5\tQuartet_DNA_ILM_Nova_ARD_4_20190111_LCL5\tQuartet_DNA_ILM_Nova_ARD_5_20190111_LCL5\tQuartet_DNA_ILM_Nova_ARD_6_20190111_LCL5\tQuartet_DNA_ILM_Nova_BRG_1_20180930_LCL5\tQuartet_DNA_ILM_Nova_BRG_2_20180930_LCL5\tQuartet_DNA_ILM_Nova_BRG_3_20180930_LCL5\tQuartet_DNA_ILM_Nova_WUX_1_20190917_LCL5\tQuartet_DNA_ILM_Nova_WUX_2_20190917_LCL5\tQuartet_DNA_ILM_Nova_WUX_3_20190917_LCL5\tQuartet_DNA_ILM_XTen_ARD_1_20170403_LCL5\tQuartet_DNA_ILM_XTen_ARD_2_20170403_LCL5\tQuartet_DNA_ILM_XTen_ARD_3_20170403_LCL5\tQuartet_DNA_ILM_XTen_NVG_1_20170329_LCL5\tQuartet_DNA_ILM_XTen_NVG_2_20170329_LCL5\tQuartet_DNA_ILM_XTen_NVG_3_20170329_LCL5\tQuartet_DNA_ILM_XTen_WUX_1_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_2_20170216_LCL5\tQuartet_DNA_ILM_XTen_WUX_3_20170216_LCL5' +'\t'+ 'location' + '\t' + 'AF' + '\t' + 'GQ' + '\t' + 'MQ' + '\t' + 'DP' + '\t' + 'ALT' +'\n'
outfile.write(outputcolumn)

for line in fileinput.input(multi_sample_vcf):
m = re.match('^\#',line)
if m is not None:
@@ -69,7 +71,7 @@ for line in fileinput.input(multi_sample_vcf):
line = line.strip()
strings = line.split('\t')
repeat = get_location(strings[7])
AF,GQ,MQ,DP,ALT = extract_info_normal(strings[9:])
AF,GQ,MQ,DP,ALT = extract_info_normal(strings[8],strings[9:])
outLine = '\t'.join(strings) + '\t' + repeat +'\t' + str(AF) + '\t' + str(GQ) + '\t' + str(MQ) + '\t' + str(DP) + '\t' + str(ALT) + '\n'
outfile.write(outLine)


+ 1
- 1
codescripts/vcf_mq_af.py Целия файл

@@ -15,7 +15,7 @@ normed_vcf = args.normed_vcf
prefix = args.prefix


file_name = prefix + '.variant_quality_location.vcf'
file_name = prefix + '_variant_quality_location.vcf'
outfile = open(file_name,'w')

for line in fileinput.input(normed_vcf):

+ 4
- 4
tasks/VCFinfo.wdl Целия файл

@@ -1,5 +1,5 @@
task VCFinfo {
File merged_info
File repeat_annotated_vcf
String sample
String docker
String cluster_config
@@ -7,8 +7,8 @@ task VCFinfo {
command <<<

python /opt/variants_quality_location_intergration.py -vcf ${repeat_annotated_vcf} -prefix ${sample}
>>>

runtime {
@@ -18,6 +18,6 @@ task VCFinfo {
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File extracted_info = ""
File extracted_info = "${sample}_variant_quality_location.txt"
}
}

+ 3
- 38
tasks/bed_annotation.wdl Целия файл

@@ -9,43 +9,9 @@ task bed_annotation {
command <<<

rtg vcfannotate --bed-info=${repeat_bed} -i ${merged_vcf} -o ${sample}.normed.repeatAnno.vcf.gz
## DP
zcat ${sample}.normed.repeatAnno.vcf.gz | grep -v '##' | awk '
BEGIN { OFS = "\t" }
NF > 2 && FNR > 1 {
for ( i=9; i<=NF; i++ ) {
split($i,a,":") ;$i = a[3];
}
}
{ print }
' > ${sample}.depth.txt
## GQ
zcat ${sample}.normed.repeatAnno.vcf.gz | grep -v '##' | awk '
BEGIN { OFS = "\t" }
NF > 2 && FNR > 1 {
for ( i=9; i<=NF; i++ ) {
split($i,a,":") ;$i = a[4];
}
}
{ print }
' > ${sample}.genotypeQuality.txt

## MQ
zcat ${sample}.normed.repeatAnno.vcf.gz | grep -v '##' | awk '
BEGIN { OFS = "\t" }
NF > 2 && FNR > 1 {
for ( i=9; i<=NF; i++ ) {
split($i,a,":") ;$i = a[6];
}
}
{ print }
' > ${sample}.mappinyQuality.txt

## Allele frequency
rtg vcfannotate --bed-info=${repeat_bed} -i ${merged_vcf} -o ${sample}.normed.repeatAnno.vcf.gz

gunzip ${sample}.normed.repeatAnno.vcf.gz

>>>

@@ -56,7 +22,6 @@ task bed_annotation {
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File repeat_annotated_vcf = "${sample}.normed.repeatAnno.vcf.gz"
File repeat_annotated_vcf_idx = "${sample}.normed.repeatAnno.vcf.gz.tbi"
File repeat_annotated_vcf = "${sample}.normed.repeatAnno.vcf"
}
}

+ 26
- 0
tasks/final_result.wdl Целия файл

@@ -0,0 +1,26 @@
task FinalResult {
File extracted_info
File annotated_txt
String prefix
String sample
String docker
String cluster_config
String disk_size
command <<<

python /opt/FinalResult2VCF.py -vcfInfo ${extracted_info} -mendelianInfo ${annotated_txt} -prefix ${prefix} -sample ${sample}

>>>

runtime {
docker:docker
cluster: cluster_config
systemDisk: "cloud_ssd 40"
dataDisk: "cloud_ssd " + disk_size + " /cromwell_root/"
}
output {
File benchmarking_calls = "${prefix}_benchmarking_calls.vcf"
File all_info = "${prefix}_all_sample_information.vcf"
}
}

+ 3
- 0
tasks/votes.wdl Целия файл

@@ -9,6 +9,8 @@ task votes {
command <<<
python /opt/high_confidence_call_vote.py -vcf ${merged_vcf} -dup ${vcf_dup} -sample ${sample} -prefix ${prefix}

cat ${prefix}_annotated.vcf | grep -v '##' > ${prefix}.txt
>>>

runtime {
@@ -19,5 +21,6 @@ task votes {
}
output {
File annotated_vcf = "${prefix}_annotated.vcf"
File annotated_txt = "${prefix}.txt"
}
}

+ 66
- 0
workflow.wdl Целия файл

@@ -9,6 +9,8 @@ import "./tasks/merge.wdl" as merge
import "./tasks/votes.wdl" as votes
import "./tasks/bed_annotation.wdl" as bed_annotation
import "./tasks/mergeVCFInfo.wdl" as mergeVCFInfo
import "./tasks/VCFinfo.wdl" as VCFinfo
import "./tasks/final_result.wdl" as FinalResult

workflow {{ project_name }} {
File inputSamplesFile
@@ -299,6 +301,22 @@ workflow {{ project_name }} {
cluster_config=cluster_config,
disk_size=disk_size
}
call VCFinfo.VCFinfo as LCL5allSampleReform {
input:
repeat_annotated_vcf=LCL5bedAnnotation.repeat_annotated_vcf,
sample='LCL5',
cluster_config=cluster_config,
disk_size=disk_size
}
call FinalResult.FinalResult as LCL5FinalResult {
input:
extracted_info=LCL5allSampleReform.extracted_info,
annotated_txt=LCL5votes.annotated_txt,
prefix='LCL5',
sample='LCL5',
cluster_config=cluster_config,
disk_size=disk_size,
}
call mergeVCFInfo.mergeVCFInfo as LCL6mergeVCF {
input:
vcf_gz=LCL6normZip.vcf_gz,
@@ -316,6 +334,22 @@ workflow {{ project_name }} {
cluster_config=cluster_config,
disk_size=disk_size
}
call VCFinfo.VCFinfo as LCL6allSampleReform {
input:
repeat_annotated_vcf=LCL6bedAnnotation.repeat_annotated_vcf,
sample='LCL6',
cluster_config=cluster_config,
disk_size=disk_size
}
call FinalResult.FinalResult as LCL6FinalResult {
input:
extracted_info=LCL6allSampleReform.extracted_info,
annotated_txt=LCL6votes.annotated_txt,
prefix='LCL6',
sample='LCL6',
cluster_config=cluster_config,
disk_size=disk_size,
}
call mergeVCFInfo.mergeVCFInfo as LCL7mergeVCF {
input:
vcf_gz=LCL7normZip.vcf_gz,
@@ -333,6 +367,22 @@ workflow {{ project_name }} {
cluster_config=cluster_config,
disk_size=disk_size
}
call VCFinfo.VCFinfo as LCL7allSampleReform {
input:
repeat_annotated_vcf=LCL7bedAnnotation.repeat_annotated_vcf,
sample='LCL7',
cluster_config=cluster_config,
disk_size=disk_size
}
call FinalResult.FinalResult as LCL7FinalResult {
input:
extracted_info=LCL7allSampleReform.extracted_info,
annotated_txt=LCL7votes.annotated_txt,
prefix='LCL7',
sample='LCL7',
cluster_config=cluster_config,
disk_size=disk_size,
}
call mergeVCFInfo.mergeVCFInfo as LCL8mergeVCF {
input:
vcf_gz=LCL8normZip.vcf_gz,
@@ -350,5 +400,21 @@ workflow {{ project_name }} {
cluster_config=cluster_config,
disk_size=disk_size
}
call VCFinfo.VCFinfo as LCL8allSampleReform {
input:
repeat_annotated_vcf=LCL8bedAnnotation.repeat_annotated_vcf,
sample='LCL8',
cluster_config=cluster_config,
disk_size=disk_size
}
call FinalResult.FinalResult as LCL8FinalResult {
input:
extracted_info=LCL8allSampleReform.extracted_info,
annotated_txt=LCL8votes.annotated_txt,
prefix='LCL8',
sample='LCL8',
cluster_config=cluster_config,
disk_size=disk_size,
}
}


Loading…
Отказ
Запис