Changeset - f888b13c56b0
[Not reviewed]
0 2 0
Brett Smith - 7 years ago 2017-10-22 18:05:56
brettcsmith@brettcsmith.org
hooks.add_entity: Better handle common name prefix parts.

Keep these with the name they're attached to, rather than breaking the name
in the middle.
2 files changed with 10 insertions and 0 deletions:
0 comments (0 inline, 0 general)
import2ledger/hooks/add_entity.py
Show inline comments
 
import re
 
import unicodedata
 

	
 
class AddEntityHook:
 
    NAME_PREFIXES = frozenset([
 
        'da',
 
        'de',
 
        'van',
 
    ])
 
    NONASCII_RE = re.compile(r'[^-A-Za-z0-9]')
 
    NONALNUM_RE = re.compile(r'[^-\w]')
 
    OPEN_PARENS = ['\\(', '\\[', '\\{']
 
    CLOSE_PARENS = ['\\)', '\\]', '\\}']
 
    NO_PARENS = '[^{}]*'.format(''.join(OPEN_PARENS + CLOSE_PARENS))
 

	
...
 
@@ -38,12 +43,14 @@ class AddEntityHook:
 

	
 
    def _str2entity(self, s, trim_re):
 
        parts = list(self._entity_parts(s, trim_re))
 
        if not parts:
 
            return ''
 
        parts.insert(0, parts.pop())
 
        if parts[-1].lower() in self.NAME_PREFIXES:
 
            parts.insert(0, parts.pop())
 
        return '-'.join(parts)
 

	
 
    def run(self, data):
 
        if ('payee' in data) and ('entity' not in data):
 
            payee = self._remove_parens(data['payee'])
 
            payee = self._destroke(payee)
tests/test_hooks.py
Show inline comments
...
 
@@ -16,12 +16,15 @@ def test_load_all():
 
    ('Dakota D.  Doe', 'Doe-Dakota-D'),
 
    ('Björk', 'Bjork'),
 
    ('Fran Doe-Smith', 'Doe-Smith-Fran'),
 
    ('Alex(Nickname) Smith', 'Smith-Alex'),
 
    ('稲荷', '稲荷'),
 
    ('Pøweł', 'Powel'),
 
    ('Elyse Jan Smith', 'Smith-Elyse-Jan'),
 
    ('Jan van Smith', 'van-Smith-Jan'),
 
    ('Francis da Silva', 'da-Silva-Francis'),
 
])
 
def test_add_entity(payee, expected):
 
    data = {'payee': payee}
 
    hook = add_entity.AddEntityHook(argparse.Namespace())
 
    hook.run(data)
 
    assert data['entity'] == expected
0 comments (0 inline, 0 general)