You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

38 lines
993B

  1. import pandas as pd
  2. import sys, argparse, os
  3. mut = pd.read_table('/mnt/pgx_src_data_pool_4/home/renluyao/manuscript/MIE/vcf/mutation_type',header=None)
  4. outIndel = open(sys.argv[1],'w')
  5. for row in mut.itertuples():
  6. if ',' in row._4:
  7. alt_seq = row._4.split(',')
  8. alt_len = [len(i) for i in alt_seq]
  9. alt = max(alt_len)
  10. else:
  11. alt = len(row._4)
  12. ref = row._3
  13. pos = row._2
  14. if len(ref) == 1 and alt == 1:
  15. pass
  16. elif len(ref) > alt:
  17. StartPos = int(pos) - 1
  18. EndPos = int(pos) + (len(ref) - 1)
  19. outline_indel = row._1 + '\t' + str(StartPos) + '\t' + str(EndPos) + '\n'
  20. outIndel.write(outline_indel)
  21. elif alt > len(ref):
  22. StartPos = int(pos) - 1
  23. EndPos = int(pos) + (alt - 1)
  24. outline_indel = row._1 + '\t' + str(StartPos) + '\t' + str(EndPos) + '\n'
  25. outIndel.write(outline_indel)
  26. elif len(ref) == alt:
  27. StartPos = int(pos) - 1
  28. EndPos = int(pos) + (alt - 1)
  29. outline_indel = row._1 + '\t' + str(StartPos) + '\t' + str(EndPos) + '\n'
  30. outIndel.write(outline_indel)