Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.

57 lines
2.2KB

  1. # import modules
  2. ################################
  3. import sys, argparse, os
  4. import fileinput
  5. from operator import itemgetter
  6. parser = argparse.ArgumentParser(description="this script is to vote callable bed region")
  7. parser.add_argument('-bed', '--multiSampleBED', type=str, help='The bed file to get high confidence region', required=True)
  8. parser.add_argument('-prefix', '--prefix', type=str, help='The output file you want to name', required=True)
  9. args = parser.parse_args()
  10. # Rename input:
  11. input_file = args.multiSampleBED
  12. prefix = args.prefix
  13. consensus_filename = prefix + '.27consensus.bed'
  14. outCONSENSUS = open(consensus_filename,'w')
  15. filter_filename = prefix + '.filtered.bed'
  16. outFiltered = open(filter_filename,'w')
  17. #initial
  18. #sequence_tech = ['SEQ2000','SEQ2000','SEQ2000','SEQ2000','SEQ2000','SEQ500','SEQ500','SEQ500','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','Nova','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen','XTen']
  19. #sequence_site = ['BGI','BGI','BGI','WGE','WGE','BGI','BGI','BGI','ARD','ARD','ARD','ARD','ARD','ARD','BRG','BRG','BRG','BRG','GAC','NVG','WUX','ARD','ARD','ARD','NVG','NVG','NVG','WUX','WUX','WUX','WUX','WUX','WUX']
  20. def consensus_bed(oneLine):
  21. line = oneLine.strip()
  22. strings = line.split('\t')
  23. # replicate
  24. SEQ2000_BGI = 1 if strings[5:8].count('1') > 1 else 0
  25. T7_WGE = 1 if strings[8:11].count('1') > 1 else 0
  26. Nova_ARD_1 = 1 if strings[11:14].count('1') > 1 else 0
  27. Nova_ARD_2 = 1 if strings[14:17].count('1') > 1 else 0
  28. Nova_BRG = 1 if strings[17:20].count('1') > 1 else 0
  29. Nova_WUX = 1 if strings[20:23].count('1') > 1 else 0
  30. XTen_ARD = 1 if strings[23:26].count('1') >1 else 0
  31. XTen_NVG = 1 if strings[26:29].count('1') > 1 else 0
  32. XTen_WUX = 1 if strings[29:32].count('1') > 1 else 0
  33. # library
  34. pcr = 1 if [SEQ2000_BGI,XTen_ARD,XTen_WUX,XTen_NVG].count(1) > 2 else 0
  35. pcr_free = 1 if [T7_WGE,Nova_ARD_1,Nova_ARD_2,Nova_BRG,Nova_WUX].count(1) > 3 else 0
  36. voted = 1 if [pcr,pcr_free].count(1) > 1 else 0
  37. # get consensus bed and tech specific bed
  38. if voted == 1:
  39. outCONSENSUS.write(oneLine)
  40. else:
  41. outFiltered.write(oneLine)
  42. for oneLine in fileinput.input(input_file):
  43. consensus_bed(oneLine)
  44. outCONSENSUS.close()
  45. outFiltered.close()