You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

53 lines
1.3KB

  1. from __future__ import division
  2. import sys, argparse, os
  3. import fileinput
  4. import re
  5. import statistics
  6. # input arguments
  7. parser = argparse.ArgumentParser(description="this script is to get mapping quality, allele frequency and alternative depth")
  8. parser.add_argument('-vcf', '--normed_vcf', type=str, help='The VCF file you want to used', required=True)
  9. parser.add_argument('-prefix', '--prefix', type=str, help='Prefix of output file name', required=True)
  10. args = parser.parse_args()
  11. normed_vcf = args.normed_vcf
  12. prefix = args.prefix
  13. file_name = prefix + '_variant_quality_location.vcf'
  14. outfile = open(file_name,'w')
  15. for line in fileinput.input(normed_vcf):
  16. m = re.match('^\#',line)
  17. if m is not None:
  18. outfile.write(line)
  19. else:
  20. line = line.strip()
  21. strings = line.split('\t')
  22. strings[8] = strings[8] + ':MQ:ALT:AF'
  23. infos = strings[7].strip().split(';')
  24. ## MQ
  25. for element in infos:
  26. m = re.match('MQ=',element)
  27. if m is not None:
  28. MQ = element.split('=')[1]
  29. ## ALT
  30. ad = strings[9].split(':')[1]
  31. ad_single = ad.split(',')
  32. ad_single = [int(i) for i in ad_single]
  33. DP = sum(ad_single)
  34. if DP != 0:
  35. ad_single.pop(0)
  36. ALT = sum(ad_single)
  37. AF = ALT/DP
  38. else:
  39. ALT = 0
  40. AF = 'NA'
  41. outLine = '\t'.join(strings) + ':' + MQ + ':' + str(ALT) + ':' + str(AF) + '\n'
  42. outfile.write(outLine)