Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

69 Zeilen
1.7KB

  1. import sys,getopt
  2. import os
  3. import re
  4. import fileinput
  5. def usage():
  6. print(
  7. """
  8. Usage: python bed_for_bamReadcount.py -i input_vcf_file -o prefix
  9. This script selects SNPs and Indels supported by all callsets.
  10. Please notice that bam-readcount only takes in 1-based coordinates.
  11. Input:
  12. -i a vcf file
  13. Output:
  14. -o a indel bed file for bam-readcount
  15. """)
  16. # select supported small variants
  17. def process(oneLine):
  18. m = re.match('^\#',oneLine)
  19. if m is not None:
  20. pass
  21. else:
  22. line = oneLine.rstrip()
  23. strings = line.strip().split('\t')
  24. # convert the position to bed file for bam-readcount
  25. # deletion
  26. if len(strings[3]) > 1 and len(strings[4]) == 1:
  27. pos = int(strings[1]) + 1
  28. outline = strings[0] + '\t' + str(pos) + '\t' + str(pos) + '\t' + strings[3] + '\t' + strings[4]+'\n'
  29. outINDEL.write(outline)
  30. # insertion
  31. elif len(strings[3]) == 1 and len(strings[4]) > 1 and (',' not in strings[4]):
  32. outline = strings[0] + '\t' + strings[1] + '\t' + strings[1] + '\t' + strings[3] + '\t' + strings[4] + '\n'
  33. outINDEL.write(outline)
  34. else:
  35. outMNP.write(oneLine)
  36. opts,args = getopt.getopt(sys.argv[1:],"hi:o:")
  37. for op,value in opts:
  38. if op == "-i":
  39. inputFile=value
  40. elif op == "-o":
  41. prefix=value
  42. elif op == "-h":
  43. usage()
  44. sys.exit()
  45. if len(sys.argv[1:]) < 3:
  46. usage()
  47. sys.exit()
  48. INDELname = prefix + '.bed'
  49. MNPname = prefix + '_MNP.txt'
  50. outINDEL = open(INDELname,'w')
  51. outMNP = open(MNPname,'w')
  52. for line in fileinput.input(inputFile):
  53. process(line)
  54. outINDEL.close()
  55. outMNP.close()