@@ -19,6 +19,16 @@ class AddEntityHook:
s = re.sub(open_c + self.NO_PARENS + close_c, '', s)
return s if s else last_s
def _destroke_chr(self, c):
name = unicodedata.name(c, '')
if name.endswith(' WITH STROKE'):
return unicodedata.lookup(name[:-12])
else:
return c
def _destroke(self, s):
return ''.join(self._destroke_chr(c) for c in s)
def _entity_parts(self, s, trim_re):
for word in s.split():
word = unicodedata.normalize('NFKD', word)
@@ -36,6 +46,7 @@ class AddEntityHook:
def run(self, data):
if ('payee' in data) and ('entity' not in data):
payee = self._remove_parens(data['payee'])
payee = self._destroke(payee)
entity = self._str2entity(payee, self.NONASCII_RE)
if not entity:
entity = self._str2entity(payee, self.NONALNUM_RE)