Files @ 82e4bf5bd8d3
Branch filter:

Location: NPO-Accounting/import2ledger/import2ledger/hooks/add_entity.py

Brett Smith
hooks.add_entity: Add name prefixes: der, la
import re
import unicodedata

class AddEntityHook:
    NAME_PREFIXES = frozenset([
        'da',
        'de',
        'der',
        'la',
        'van',
    ])
    NONASCII_RE = re.compile(r'[^-A-Za-z0-9]')
    NONALNUM_RE = re.compile(r'[^-\w]')
    OPEN_PARENS = ['\\(', '\\[', '\\{']
    CLOSE_PARENS = ['\\)', '\\]', '\\}']
    NO_PARENS = '[^{}]*'.format(''.join(OPEN_PARENS + CLOSE_PARENS))

    def __init__(self, config):
        pass

    def _remove_parens(self, s):
        last_s = None
        while s != last_s:
            last_s = s
            for open_c, close_c in zip(self.OPEN_PARENS, self.CLOSE_PARENS):
                s = re.sub(open_c + self.NO_PARENS + close_c, '', s)
        return s if s else last_s

    def _destroke_chr(self, c):
        name = unicodedata.name(c, '')
        if name.endswith(' WITH STROKE'):
            return unicodedata.lookup(name[:-12])
        else:
            return c

    def _destroke(self, s):
        return ''.join(self._destroke_chr(c) for c in s)

    def _entity_parts(self, s, trim_re):
        for word in s.split():
            word = unicodedata.normalize('NFKD', word)
            word = trim_re.sub('', word)
            if word:
                yield word

    def _str2entity(self, s, trim_re, name_shifts):
        parts = list(self._entity_parts(s, trim_re))
        if name_shifts > 0:
            pivot = -name_shifts - 1
            try:
                while parts[pivot].lower() in self.NAME_PREFIXES:
                    pivot -= 1
            except IndexError:
                pass
            else:
                pivot += 1
                parts = parts[pivot:] + parts[:pivot]
        return '-'.join(parts)

    def _name2entity(self, name, name_shifts):
        name = self._remove_parens(name)
        name = self._destroke(name)
        entity = self._str2entity(name, self.NONASCII_RE, name_shifts)
        if not entity:
            entity = self._str2entity(name, self.NONALNUM_RE, 0)
        return entity

    def run(self, data):
        if ('payee' in data) and ('entity' not in data):
            data['entity'] = self._name2entity(data['payee'], 1)