|
- import sys,getopt
- import os
- import re
- import fileinput
-
- def usage():
- print(
- """
- Usage: python bed_for_bamReadcount.py -i input_vcf_file -o prefix
-
- This script selects SNPs and Indels supported by all callsets.
-
- Please notice that bam-readcount only takes in 1-based coordinates.
-
- Input:
- -i a vcf file
-
- Output:
- -o a indel bed file for bam-readcount
- """)
-
- # select supported small variants
- def process(oneLine):
- m = re.match('^\#',oneLine)
- if m is not None:
- pass
- else:
- line = oneLine.rstrip()
- strings = line.strip().split('\t')
- # convert the position to bed file for bam-readcount
- # deletion
- if len(strings[3]) > 1 and len(strings[4]) == 1:
- pos = int(strings[1]) + 1
- outline = strings[0] + '\t' + str(pos) + '\t' + str(pos) + '\t' + strings[3] + '\t' + strings[4]+'\n'
- outINDEL.write(outline)
- # insertion
- elif len(strings[3]) == 1 and len(strings[4]) > 1 and (',' not in strings[4]):
- outline = strings[0] + '\t' + strings[1] + '\t' + strings[1] + '\t' + strings[3] + '\t' + strings[4] + '\n'
- outINDEL.write(outline)
- else:
- outMNP.write(oneLine)
-
- opts,args = getopt.getopt(sys.argv[1:],"hi:o:")
- for op,value in opts:
- if op == "-i":
- inputFile=value
- elif op == "-o":
- prefix=value
- elif op == "-h":
- usage()
- sys.exit()
-
- if len(sys.argv[1:]) < 3:
- usage()
- sys.exit()
-
- INDELname = prefix + '.bed'
- MNPname = prefix + '_MNP.txt'
-
- outINDEL = open(INDELname,'w')
- outMNP = open(MNPname,'w')
-
- for line in fileinput.input(inputFile):
- process(line)
-
- outINDEL.close()
- outMNP.close()
|