diff --git a/import2ledger/hooks/add_entity.py b/import2ledger/hooks/add_entity.py index 90321b3ece7df147593b89152fe2d0a5fc131ee2..a6b44a86869c9273fe5aeabd7fce7c657a813e3f 100644 --- a/import2ledger/hooks/add_entity.py +++ b/import2ledger/hooks/add_entity.py @@ -9,6 +9,15 @@ class AddEntityHook: 'la', 'van', ]) + COMPANY_SUFFIXES = frozenset([ + 'co', + 'company', + 'corp', + 'corporation', + 'inc', + 'incorporated', + 'llc', + ]) NONASCII_RE = re.compile(r'[^-A-Za-z0-9]') NONALNUM_RE = re.compile(r'[^-\w]') OPEN_PARENS = ['\\(', '\\[', '\\{'] @@ -43,30 +52,46 @@ class AddEntityHook: if word: yield word - def _str2entity(self, s, trim_re, name_shifts): + def _move_last_name(self, parts): + pivot = -2 + try: + while parts[pivot].lower() in self.NAME_PREFIXES: + pivot -= 1 + except IndexError: + pass + else: + pivot += 1 + parts = parts[pivot:] + parts[:pivot] + return parts + + def _chop_corp_suffixes(self, parts): + for index in range(-1, -len(parts), -1): + if parts[index].lower() not in self.COMPANY_SUFFIXES: + del_from = index + 1 + break + else: + del_from = 1 + if del_from != 0: + del parts[del_from:] + return parts + + def _str2entity(self, s, trim_re, words_rearrange_func): parts = list(self._entity_parts(s, trim_re)) - if name_shifts > 0: - pivot = -name_shifts - 1 - try: - while parts[pivot].lower() in self.NAME_PREFIXES: - pivot -= 1 - except IndexError: - pass - else: - pivot += 1 - parts = parts[pivot:] + parts[:pivot] + if words_rearrange_func is not None: + parts = words_rearrange_func(parts) return '-'.join(parts) - def _name2entity(self, name, name_shifts): + def _name2entity(self, name, rearrange_func1, rearrange_func2): name = self._remove_parens(name) name = self._destroke(name) - entity = self._str2entity(name, self.NONASCII_RE, name_shifts) + entity = self._str2entity(name, self.NONASCII_RE, rearrange_func1) if not entity: - entity = self._str2entity(name, self.NONALNUM_RE, 0) + entity = self._str2entity(name, self.NONALNUM_RE, rearrange_func2) return entity def run(self, data): if ('payee' in data) and ('entity' not in data): - data['entity'] = self._name2entity(data['payee'], 1) + data['entity'] = self._name2entity(data['payee'], self._move_last_name, None) if ('corporation' in data) and ('corp_entity' not in data): - data['corp_entity'] = self._name2entity(data['corporation'], 0) + data['corp_entity'] = self._name2entity( + data['corporation'], self._chop_corp_suffixes, self._chop_corp_suffixes) diff --git a/tests/test_hooks.py b/tests/test_hooks.py index 896fa3a7397416704672e941c5e7360197874f3a..bcb7e3ebe7c19575525a82455c18b9dd652349b7 100644 --- a/tests/test_hooks.py +++ b/tests/test_hooks.py @@ -27,6 +27,16 @@ def test_load_all(): ('payee', 'A de B de la C', 'entity', 'de-la-C-A-de-B'), ('corporation', 'Company A', 'corp_entity', 'Company-A'), ('corporation', 'Company A 99', 'corp_entity', 'Company-A-99'), + ('corporation', 'DX Co.', 'corp_entity', 'DX'), + ('corporation', 'DX Company', 'corp_entity', 'DX'), + ('corporation', 'DX Company Inc.', 'corp_entity', 'DX'), + ('corporation', 'DX Corp', 'corp_entity', 'DX'), + ('corporation', 'DX Corp LLC', 'corp_entity', 'DX'), + ('corporation', 'DX Corporation', 'corp_entity', 'DX'), + ('corporation', 'DX, Inc.', 'corp_entity', 'DX'), + ('corporation', 'DX Incorporated', 'corp_entity', 'DX'), + ('payee', 'Poe Inc', 'entity', 'Inc-Poe'), + ('corporation', 'Silly Van', 'corp_entity', 'Silly-Van'), ]) def test_add_entity(in_key, payee, out_key, expected): data = {in_key: payee}