import sys,getopt import os import re import fileinput def usage(): print( """ Usage: python bed_for_bamReadcount.py -i input_vcf_file -o prefix This script selects SNPs and Indels supported by all callsets. Please notice that bam-readcount only takes in 1-based coordinates. Input: -i a vcf file Output: -o a indel bed file for bam-readcount """) # select supported small variants def process(oneLine): m = re.match('^\#',oneLine) if m is not None: pass else: line = oneLine.rstrip() strings = line.strip().split('\t') # convert the position to bed file for bam-readcount # deletion if len(strings[3]) > 1 and len(strings[4]) == 1: pos = int(strings[1]) + 1 outline = strings[0] + '\t' + str(pos) + '\t' + str(pos) + '\t' + strings[3] + '\t' + strings[4]+'\n' outINDEL.write(outline) # insertion elif len(strings[3]) == 1 and len(strings[4]) > 1 and (',' not in strings[4]): outline = strings[0] + '\t' + strings[1] + '\t' + strings[1] + '\t' + strings[3] + '\t' + strings[4] + '\n' outINDEL.write(outline) else: outMNP.write(oneLine) opts,args = getopt.getopt(sys.argv[1:],"hi:o:") for op,value in opts: if op == "-i": inputFile=value elif op == "-o": prefix=value elif op == "-h": usage() sys.exit() if len(sys.argv[1:]) < 3: usage() sys.exit() INDELname = prefix + '.bed' MNPname = prefix + '_MNP.txt' outINDEL = open(INDELname,'w') outMNP = open(MNPname,'w') for line in fileinput.input(inputFile): process(line) outINDEL.close() outMNP.close()