Changeset - e8bcbd5f99ec
[Not reviewed]
0 2 0
Brett Smith - 6 years ago 2017-12-30 22:29:54
brettcsmith@brettcsmith.org
hooks.add_entity: Remove common company suffixes from entity tags.
2 files changed with 51 insertions and 16 deletions:
0 comments (0 inline, 0 general)
import2ledger/hooks/add_entity.py
Show inline comments
...
 
@@ -9,6 +9,15 @@ class AddEntityHook:
 
        'la',
 
        'van',
 
    ])
 
    COMPANY_SUFFIXES = frozenset([
 
        'co',
 
        'company',
 
        'corp',
 
        'corporation',
 
        'inc',
 
        'incorporated',
 
        'llc',
 
    ])
 
    NONASCII_RE = re.compile(r'[^-A-Za-z0-9]')
 
    NONALNUM_RE = re.compile(r'[^-\w]')
 
    OPEN_PARENS = ['\\(', '\\[', '\\{']
...
 
@@ -43,30 +52,46 @@ class AddEntityHook:
 
            if word:
 
                yield word
 

	
 
    def _str2entity(self, s, trim_re, name_shifts):
 
    def _move_last_name(self, parts):
 
        pivot = -2
 
        try:
 
            while parts[pivot].lower() in self.NAME_PREFIXES:
 
                pivot -= 1
 
        except IndexError:
 
            pass
 
        else:
 
            pivot += 1
 
            parts = parts[pivot:] + parts[:pivot]
 
        return parts
 

	
 
    def _chop_corp_suffixes(self, parts):
 
        for index in range(-1, -len(parts), -1):
 
            if parts[index].lower() not in self.COMPANY_SUFFIXES:
 
                del_from = index + 1
 
                break
 
        else:
 
            del_from = 1
 
        if del_from != 0:
 
            del parts[del_from:]
 
        return parts
 

	
 
    def _str2entity(self, s, trim_re, words_rearrange_func):
 
        parts = list(self._entity_parts(s, trim_re))
 
        if name_shifts > 0:
 
            pivot = -name_shifts - 1
 
            try:
 
                while parts[pivot].lower() in self.NAME_PREFIXES:
 
                    pivot -= 1
 
            except IndexError:
 
                pass
 
            else:
 
                pivot += 1
 
                parts = parts[pivot:] + parts[:pivot]
 
        if words_rearrange_func is not None:
 
            parts = words_rearrange_func(parts)
 
        return '-'.join(parts)
 

	
 
    def _name2entity(self, name, name_shifts):
 
    def _name2entity(self, name, rearrange_func1, rearrange_func2):
 
        name = self._remove_parens(name)
 
        name = self._destroke(name)
 
        entity = self._str2entity(name, self.NONASCII_RE, name_shifts)
 
        entity = self._str2entity(name, self.NONASCII_RE, rearrange_func1)
 
        if not entity:
 
            entity = self._str2entity(name, self.NONALNUM_RE, 0)
 
            entity = self._str2entity(name, self.NONALNUM_RE, rearrange_func2)
 
        return entity
 

	
 
    def run(self, data):
 
        if ('payee' in data) and ('entity' not in data):
 
            data['entity'] = self._name2entity(data['payee'], 1)
 
            data['entity'] = self._name2entity(data['payee'], self._move_last_name, None)
 
        if ('corporation' in data) and ('corp_entity' not in data):
 
            data['corp_entity'] = self._name2entity(data['corporation'], 0)
 
            data['corp_entity'] = self._name2entity(
 
                data['corporation'], self._chop_corp_suffixes, self._chop_corp_suffixes)
tests/test_hooks.py
Show inline comments
...
 
@@ -27,6 +27,16 @@ def test_load_all():
 
    ('payee', 'A de B de la C', 'entity', 'de-la-C-A-de-B'),
 
    ('corporation', 'Company A', 'corp_entity', 'Company-A'),
 
    ('corporation', 'Company A 99', 'corp_entity', 'Company-A-99'),
 
    ('corporation', 'DX Co.', 'corp_entity', 'DX'),
 
    ('corporation', 'DX Company', 'corp_entity', 'DX'),
 
    ('corporation', 'DX Company Inc.', 'corp_entity', 'DX'),
 
    ('corporation', 'DX Corp', 'corp_entity', 'DX'),
 
    ('corporation', 'DX Corp LLC', 'corp_entity', 'DX'),
 
    ('corporation', 'DX Corporation', 'corp_entity', 'DX'),
 
    ('corporation', 'DX, Inc.', 'corp_entity', 'DX'),
 
    ('corporation', 'DX Incorporated', 'corp_entity', 'DX'),
 
    ('payee', 'Poe Inc', 'entity', 'Inc-Poe'),
 
    ('corporation', 'Silly Van', 'corp_entity', 'Silly-Van'),
 
])
 
def test_add_entity(in_key, payee, out_key, expected):
 
    data = {in_key: payee}
0 comments (0 inline, 0 general)