Changeset - f888b13c56b0
[Not reviewed]
0 2 0
Brett Smith - 7 years ago 2017-10-22 18:05:56
brettcsmith@brettcsmith.org
hooks.add_entity: Better handle common name prefix parts.

Keep these with the name they're attached to, rather than breaking the name
in the middle.
2 files changed with 10 insertions and 0 deletions:
0 comments (0 inline, 0 general)
import2ledger/hooks/add_entity.py
Show inline comments
 
import re
 
import unicodedata
 

	
 
class AddEntityHook:
 
    NAME_PREFIXES = frozenset([
 
        'da',
 
        'de',
 
        'van',
 
    ])
 
    NONASCII_RE = re.compile(r'[^-A-Za-z0-9]')
 
    NONALNUM_RE = re.compile(r'[^-\w]')
 
    OPEN_PARENS = ['\\(', '\\[', '\\{']
 
    CLOSE_PARENS = ['\\)', '\\]', '\\}']
 
    NO_PARENS = '[^{}]*'.format(''.join(OPEN_PARENS + CLOSE_PARENS))
 

	
 
    def __init__(self, config):
 
        pass
 

	
 
    def _remove_parens(self, s):
 
        last_s = None
 
        while s != last_s:
 
            last_s = s
 
            for open_c, close_c in zip(self.OPEN_PARENS, self.CLOSE_PARENS):
 
                s = re.sub(open_c + self.NO_PARENS + close_c, '', s)
 
        return s if s else last_s
 

	
 
    def _destroke_chr(self, c):
 
        name = unicodedata.name(c, '')
 
        if name.endswith(' WITH STROKE'):
 
            return unicodedata.lookup(name[:-12])
 
        else:
 
            return c
 

	
 
    def _destroke(self, s):
 
        return ''.join(self._destroke_chr(c) for c in s)
 

	
 
    def _entity_parts(self, s, trim_re):
 
        for word in s.split():
 
            word = unicodedata.normalize('NFKD', word)
 
            word = trim_re.sub('', word)
 
            if word:
 
                yield word
 

	
 
    def _str2entity(self, s, trim_re):
 
        parts = list(self._entity_parts(s, trim_re))
 
        if not parts:
 
            return ''
 
        parts.insert(0, parts.pop())
 
        if parts[-1].lower() in self.NAME_PREFIXES:
 
            parts.insert(0, parts.pop())
 
        return '-'.join(parts)
 

	
 
    def run(self, data):
 
        if ('payee' in data) and ('entity' not in data):
 
            payee = self._remove_parens(data['payee'])
 
            payee = self._destroke(payee)
 
            entity = self._str2entity(payee, self.NONASCII_RE)
 
            if not entity:
 
                entity = self._str2entity(payee, self.NONALNUM_RE)
 
            data['entity'] = entity
tests/test_hooks.py
Show inline comments
 
import argparse
 
import datetime
 
import itertools
 

	
 
import pytest
 

	
 
from import2ledger import hooks
 
from import2ledger.hooks import add_entity, default_date
 

	
 
def test_load_all():
 
    all_hooks = list(hooks.load_all())
 
    assert add_entity.AddEntityHook in all_hooks
 

	
 
@pytest.mark.parametrize('payee,expected', [
 
    ('Alex Smith', 'Smith-Alex'),
 
    ('Dakota D.  Doe', 'Doe-Dakota-D'),
 
    ('Björk', 'Bjork'),
 
    ('Fran Doe-Smith', 'Doe-Smith-Fran'),
 
    ('Alex(Nickname) Smith', 'Smith-Alex'),
 
    ('稲荷', '稲荷'),
 
    ('Pøweł', 'Powel'),
 
    ('Elyse Jan Smith', 'Smith-Elyse-Jan'),
 
    ('Jan van Smith', 'van-Smith-Jan'),
 
    ('Francis da Silva', 'da-Silva-Francis'),
 
])
 
def test_add_entity(payee, expected):
 
    data = {'payee': payee}
 
    hook = add_entity.AddEntityHook(argparse.Namespace())
 
    hook.run(data)
 
    assert data['entity'] == expected
 

	
 

	
 
class DefaultDateConfig:
 
    ONE_DAY = datetime.timedelta(days=1)
 

	
 
    def __init__(self, start_date=None):
 
        if start_date is None:
 
            start_date = datetime.date(2016, 3, 5)
 
        self.date = start_date - self.ONE_DAY
 

	
 
    def get_default_date(self, section_name=None):
 
        self.date += self.ONE_DAY
 
        return self.date
 

	
 

	
 
class TestDefaultDate:
 
    def test_simple_case(self):
 
        expect_date = datetime.date(2016, 2, 4)
 
        config = DefaultDateConfig(expect_date)
 
        data = {}
 
        hook = default_date.DefaultDateHook(config)
 
        hook.run(data)
 
        assert data['date'] == expect_date
 

	
 
    def test_no_caching(self):
 
        config = DefaultDateConfig()
 
        hook = default_date.DefaultDateHook(config)
 
        d1 = {}
 
        d2 = {}
 
        hook.run(d1)
 
        hook.run(d2)
 
        assert d1['date'] != d2['date']
 

	
 
    def test_no_override(self):
 
        expect_date = datetime.date(2016, 2, 6)
 
        config = DefaultDateConfig(expect_date + datetime.timedelta(days=300))
 
        hook = default_date.DefaultDateHook(config)
 
        data = {'date': expect_date}
 
        hook.run(data)
 
        assert data['date'] is expect_date
0 comments (0 inline, 0 general)