#!/usr/bin/env python3
import re
import sys,os,shutil
from argparse import ArgumentParser
import mmap
from bisect import bisect

# return charges for high model in electronic embedding
head=[]
cart=[]
tail=[]
inpf="5awl.oniom.gjf"
patC=re.compile(r'(^\s*[^()\s,]+(?:\([^()]+\))?(?:[\s,]+[\d-]{1,2}(?=[\s,]))?)(?:[\s,]+-?\d+\.\d*){3}(.*)')
if inpf:
  with open(inpf) as f: 
    cnt=0
    for l in f.read().splitlines():
      if not l.split(): cnt+=1
      elif '--Link1--' in l: break
      if cnt<=3:
        head.append(l)
        if cnt>=2: 
          cnt+=1
      elif cnt==4:
        #m_=patC.match(l)
        #if not m_: break
        cart.append(l)
      elif cnt==5: 
        cnt+=1
      elif cnt==6: 
        tail.append(l)
      elif cnt>6: 
        break
else:
  # template not found, dummy is used
  head=["# b3lyp/6-31g(d)",
        "",
        "no_input",
        "",
        "0 1"]
if not tail: tail=[""]
pairs=[[] for i in range(len(cart))]
for t in tail:
    # connectivity part
    line=t.split()
    targ=int(line[0])-1
    partners=[int(p)-1 for p in line[1::2]]
    for p in partners:
        pairs[targ].append(p)
        pairs[p].append(targ)
patONIOM=re.compile(r"(H *$|L H)")
patC=re.compile(r'^\s*[^()\s,-]+-[^()\s,-]+-([\d.-]+)(?:\([^()]+\))?(?:[\s,]+[\d-]{1,2}(?=[\s,]))?((?:[\s,]+-?\d+\.\d*){3})')
chg=[]
for c in cart:
    m=patC.match(c)
    chg.append(m.group(2)+" "+m.group(1))
all_index=[i for i in range(len(cart))]
high_index=[i for i,c in enumerate(cart) if patONIOM.search(c)]
neighbors=[]
for i in high_index:
    neighbors.extend(pairs[i])
neighbors=set(neighbors)-set(high_index)
tneighbors=[]
for c in cart:
    pass
for i in neighbors:
    tneighbors.extend(pairs[i])
tneighbors=set(tneighbors)-set(high_index)-neighbors
to_remove=neighbors.union(tneighbors)
chg_index=set(all_index)-set(high_index)-to_remove
for i in chg_index:
    print(chg[i])

#print(sorted(charges))
#print(len(charges))
