Changeset - f888b13c56b0
[Not reviewed]
0 2 0
Brett Smith - 7 years ago 2017-10-22 18:05:56
brettcsmith@brettcsmith.org
hooks.add_entity: Better handle common name prefix parts.

Keep these with the name they're attached to, rather than breaking the name
in the middle.
2 files changed with 10 insertions and 0 deletions:
0 comments (0 inline, 0 general)
import2ledger/hooks/add_entity.py
Show inline comments
 
import re
 
import unicodedata
 

	
 
class AddEntityHook:
 
    NAME_PREFIXES = frozenset([
 
        'da',
 
        'de',
 
        'van',
 
    ])
 
    NONASCII_RE = re.compile(r'[^-A-Za-z0-9]')
 
    NONALNUM_RE = re.compile(r'[^-\w]')
 
    OPEN_PARENS = ['\\(', '\\[', '\\{']
 
    CLOSE_PARENS = ['\\)', '\\]', '\\}']
 
    NO_PARENS = '[^{}]*'.format(''.join(OPEN_PARENS + CLOSE_PARENS))
 

	
 
    def __init__(self, config):
 
        pass
 

	
 
    def _remove_parens(self, s):
 
        last_s = None
 
        while s != last_s:
...
 
@@ -32,22 +37,24 @@ class AddEntityHook:
 
    def _entity_parts(self, s, trim_re):
 
        for word in s.split():
 
            word = unicodedata.normalize('NFKD', word)
 
            word = trim_re.sub('', word)
 
            if word:
 
                yield word
 

	
 
    def _str2entity(self, s, trim_re):
 
        parts = list(self._entity_parts(s, trim_re))
 
        if not parts:
 
            return ''
 
        parts.insert(0, parts.pop())
 
        if parts[-1].lower() in self.NAME_PREFIXES:
 
            parts.insert(0, parts.pop())
 
        return '-'.join(parts)
 

	
 
    def run(self, data):
 
        if ('payee' in data) and ('entity' not in data):
 
            payee = self._remove_parens(data['payee'])
 
            payee = self._destroke(payee)
 
            entity = self._str2entity(payee, self.NONASCII_RE)
 
            if not entity:
 
                entity = self._str2entity(payee, self.NONALNUM_RE)
 
            data['entity'] = entity
tests/test_hooks.py
Show inline comments
...
 
@@ -10,24 +10,27 @@ from import2ledger.hooks import add_entity, default_date
 
def test_load_all():
 
    all_hooks = list(hooks.load_all())
 
    assert add_entity.AddEntityHook in all_hooks
 

	
 
@pytest.mark.parametrize('payee,expected', [
 
    ('Alex Smith', 'Smith-Alex'),
 
    ('Dakota D.  Doe', 'Doe-Dakota-D'),
 
    ('Björk', 'Bjork'),
 
    ('Fran Doe-Smith', 'Doe-Smith-Fran'),
 
    ('Alex(Nickname) Smith', 'Smith-Alex'),
 
    ('稲荷', '稲荷'),
 
    ('Pøweł', 'Powel'),
 
    ('Elyse Jan Smith', 'Smith-Elyse-Jan'),
 
    ('Jan van Smith', 'van-Smith-Jan'),
 
    ('Francis da Silva', 'da-Silva-Francis'),
 
])
 
def test_add_entity(payee, expected):
 
    data = {'payee': payee}
 
    hook = add_entity.AddEntityHook(argparse.Namespace())
 
    hook.run(data)
 
    assert data['entity'] == expected
 

	
 

	
 
class DefaultDateConfig:
 
    ONE_DAY = datetime.timedelta(days=1)
 

	
 
    def __init__(self, start_date=None):
0 comments (0 inline, 0 general)