From f888b13c56b0b2a6bb92ffbcd624c6dad002d83d 2017-10-22 18:05:56 From: Brett Smith Date: 2017-10-22 18:05:56 Subject: [PATCH] hooks.add_entity: Better handle common name prefix parts. Keep these with the name they're attached to, rather than breaking the name in the middle. --- diff --git a/import2ledger/hooks/add_entity.py b/import2ledger/hooks/add_entity.py index d0f4791453e7fe79c19d2e630a12fda4fb667294..d4f6191055304e7fdbafffabb9d797d75edaeee1 100644 --- a/import2ledger/hooks/add_entity.py +++ b/import2ledger/hooks/add_entity.py @@ -2,6 +2,11 @@ import re import unicodedata class AddEntityHook: + NAME_PREFIXES = frozenset([ + 'da', + 'de', + 'van', + ]) NONASCII_RE = re.compile(r'[^-A-Za-z0-9]') NONALNUM_RE = re.compile(r'[^-\w]') OPEN_PARENS = ['\\(', '\\[', '\\{'] @@ -41,6 +46,8 @@ class AddEntityHook: if not parts: return '' parts.insert(0, parts.pop()) + if parts[-1].lower() in self.NAME_PREFIXES: + parts.insert(0, parts.pop()) return '-'.join(parts) def run(self, data): diff --git a/tests/test_hooks.py b/tests/test_hooks.py index d9f68a3d0d4c1b2a2fce5c059c1443318bcc85e3..270209f5f1b525ae2ec00ae781ca9b8555c547d8 100644 --- a/tests/test_hooks.py +++ b/tests/test_hooks.py @@ -19,6 +19,9 @@ def test_load_all(): ('Alex(Nickname) Smith', 'Smith-Alex'), ('稲荷', '稲荷'), ('Pøweł', 'Powel'), + ('Elyse Jan Smith', 'Smith-Elyse-Jan'), + ('Jan van Smith', 'van-Smith-Jan'), + ('Francis da Silva', 'da-Silva-Francis'), ]) def test_add_entity(payee, expected): data = {'payee': payee}