Changeset - e3dceb601c00
[Not reviewed]
0 3 0
Brett Smith - 4 years ago 2020-06-11 14:46:06
brettcsmith@brettcsmith.org
filters: Add iter_unique() function.
3 files changed with 21 insertions and 11 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/filters.py
Show inline comments
 
"""filters.py - Common filters for postings"""
 
# Copyright © 2020  Brett Smith
 
#
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU Affero General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU Affero General Public License for more details.
 
#
 
# You should have received a copy of the GNU Affero General Public License
 
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
 

	
 
import datetime
 
import re
 

	
 
from beancount.core import data as bc_data
 

	
 
from . import data
 
from . import rtutil
 

	
 
from typing import (
 
    cast,
 
    Hashable,
 
    Iterable,
 
    Iterator,
 
    Optional,
 
    Pattern,
 
    Set,
 
    TypeVar,
 
    Union,
 
)
 
from .beancount_types import (
 
    Directive,
 
    Entries,
 
    MetaKey,
 
    MetaValue,
 
    Transaction,
 
)
 

	
 
# Saying Optional works around <https://github.com/python/mypy/issues/8768>.
 
HashT = TypeVar('HashT', bound=Optional[Hashable])
 
Postings = Iterable[data.Posting]
 
Regexp = Union[str, Pattern]
 

	
 
def audit_date(entries: Entries) -> Optional[datetime.date]:
 
    for entry in entries:
 
        if (isinstance(entry, bc_data.Custom)
 
            and entry.type == 'conservancy_beancount_audit'):  # type:ignore[attr-defined]
 
            return entry.date
 
    return None
 

	
 
def filter_meta_equal(postings: Postings, key: MetaKey, value: MetaValue) -> Postings:
 
    for post in postings:
 
        try:
 
            if post.meta[key] == value:
 
                yield post
 
        except KeyError:
 
            pass
 

	
 
def filter_meta_match(postings: Postings, key: MetaKey, regexp: Regexp) -> Postings:
 
    for post in postings:
 
        try:
 
            if re.search(regexp, post.meta[key]):
 
                yield post
 
        except (KeyError, TypeError):
 
            pass
 

	
 
def filter_for_rt_id(postings: Postings, ticket_id: Union[int, str]) -> Postings:
 
    """Filter postings with a primary RT ticket
 

	
 
    This functions yields postings where the *first* rt-id matches the given
 
    ticket number.
 
    """
 
    regexp = rtutil.RT.metadata_regexp(ticket_id, first_link_only=True)
 
    return filter_meta_match(postings, 'rt-id', regexp)
 

	
 
def iter_unique(seq: Iterable[HashT]) -> Iterator[HashT]:
 
    seen: Set[HashT] = set()
 
    for item in seq:
 
        if item not in seen:
 
            seen.add(item)
 
            yield item
 

	
 
def remove_opening_balance_txn(entries: Entries) -> Optional[Transaction]:
 
    """Remove an opening balance transaction from entries returned by Beancount
 

	
 
    Returns the removed transaction if found, or None if not.
 
    Note that it modifies the ``entries`` argument in-place.
 

	
 
    This function is useful for tools like accrual-report that are more
 
    focused on finding and reporting related transactions than providing
 
    total account balances, etc. Since the opening balance transaction does not
 
    provide the same metadata documentation as typical transactions, it's
 
    typically easiest to filter it out before cross-referencing transactions by
 
    metadata.
 

	
 
    Note that this function only removes a single transaction, because that's
 
    fastest for the common case.
 
    """
 
    for index, entry in enumerate(entries):
 
        if isinstance(entry, bc_data.Transaction):
 
            entry = cast(Transaction, entry)
 
            if data.is_opening_balance_txn(entry):
 
                break
 
    else:
 
        return None
 
    # Deleting from the beginning of a list is O(n) slow in Python:
 
    # <https://wiki.python.org/moin/TimeComplexity>
 
    # So don't do that, and instead replace the transaction with a placeholder
 
    # directive.
 
    # The type:ignore is because of the funny way Beancount builds directives.
 
    entries[index] = bc_data.Custom(  # type:ignore[operator]
 
        entry.meta, entry.date, "Removed opening balances", [],
 
    )
 
    return entry
conservancy_beancount/reports/accrual.py
Show inline comments
...
 
@@ -114,203 +114,197 @@ from .. import books
 
from .. import cliutil
 
from .. import config as configmod
 
from .. import data
 
from .. import filters
 
from .. import rtutil
 

	
 
PROGNAME = 'accrual-report'
 

	
 
CompoundAmount = TypeVar('CompoundAmount', data.Amount, core.Balance)
 
PostGroups = Mapping[Optional[MetaValue], 'AccrualPostings']
 
RTObject = Mapping[str, str]
 
T = TypeVar('T')
 

	
 
logger = logging.getLogger('conservancy_beancount.reports.accrual')
 

	
 
class Sentinel:
 
    pass
 

	
 

	
 
class Account(NamedTuple):
 
    name: str
 
    aging_thresholds: Sequence[int]
 

	
 

	
 
class AccrualAccount(enum.Enum):
 
    # Note the aging report uses the same order accounts are defined here.
 
    # See AgingODS.start_spreadsheet().
 
    RECEIVABLE = Account('Assets:Receivable', [365, 120, 90, 60])
 
    PAYABLE = Account('Liabilities:Payable', [365, 90, 60, 30])
 

	
 
    @classmethod
 
    def account_names(cls) -> Iterator[str]:
 
        return (acct.value.name for acct in cls)
 

	
 
    @classmethod
 
    def by_account(cls, name: data.Account) -> 'AccrualAccount':
 
        for account in cls:
 
            if name.is_under(account.value.name):
 
                return account
 
        raise ValueError(f"unrecognized account {name!r}")
 

	
 
    @classmethod
 
    def classify(cls, related: core.RelatedPostings) -> 'AccrualAccount':
 
        for account in cls:
 
            account_name = account.value.name
 
            if all(post.account.is_under(account_name) for post in related):
 
                return account
 
        raise ValueError("unrecognized account set in related postings")
 

	
 
    @property
 
    def normalize_amount(self) -> Callable[[T], T]:
 
        return core.normalize_amount_func(self.value.name)
 

	
 

	
 
class AccrualPostings(core.RelatedPostings):
 
    __slots__ = (
 
        'accrual_type',
 
        'end_balance',
 
        'account',
 
        'entity',
 
        'invoice',
 
    )
 
    INCONSISTENT = Sentinel()
 

	
 
    def __init__(self,
 
                 source: Iterable[data.Posting]=(),
 
                 *,
 
                 _can_own: bool=False,
 
    ) -> None:
 
        super().__init__(source, _can_own=_can_own)
 
        # The following type declarations tell mypy about values set in the for
 
        # loop that are important enough to be referenced directly elsewhere.
 
        self.account = self._single_item(post.account for post in self)
 
        if isinstance(self.account, Sentinel):
 
            self.accrual_type: Optional[AccrualAccount] = None
 
            norm_func: Callable[[T], T] = lambda x: x
 
            entity_pred: Callable[[data.Posting], bool] = bool
 
        else:
 
            self.accrual_type = AccrualAccount.by_account(self.account)
 
            norm_func = self.accrual_type.normalize_amount
 
            entity_pred = lambda post: norm_func(post.units).number > 0
 
        self.entity = self._single_item(self.entities(entity_pred))
 
        self.invoice = self._single_item(self.first_links('invoice'))
 
        self.end_balance = norm_func(self.balance_at_cost())
 

	
 
    def _single_item(self, seq: Iterable[T]) -> Union[T, Sentinel]:
 
        items = iter(seq)
 
        try:
 
            item1 = next(items)
 
        except StopIteration:
 
            all_same = False
 
        else:
 
            all_same = all(item == item1 for item in items)
 
        return item1 if all_same else self.INCONSISTENT
 

	
 
    def entities(self, pred: Callable[[data.Posting], bool]=bool) -> Iterator[MetaValue]:
 
        seen: Set[MetaValue] = set()
 
        for post in self:
 
            if pred(post):
 
                try:
 
                    entity = post.meta['entity']
 
                except KeyError:
 
                    pass
 
                else:
 
                    if entity not in seen:
 
                        yield entity
 
                        seen.add(entity)
 
        return filters.iter_unique(
 
            post.meta['entity']
 
            for post in self
 
            if pred(post) and 'entity' in post.meta
 
        )
 

	
 
    def first_links(self, key: MetaKey, default: Optional[str]=None) -> Iterator[Optional[str]]:
 
        return (post.meta.first_link(key, default) for post in self)
 

	
 
    def make_consistent(self) -> Iterator[Tuple[MetaValue, 'AccrualPostings']]:
 
        account_ok = isinstance(self.account, str)
 
        entity_ok = isinstance(self.entity, str)
 
        # `'/' in self.invoice` is just our heuristic to ensure that the
 
        # invoice metadata is "unique enough," and not just a placeholder
 
        # value like "FIXME". It can be refined if needed.
 
        invoice_ok = isinstance(self.invoice, str) and '/' in self.invoice
 
        if account_ok and entity_ok and invoice_ok:
 
            yield (self.invoice, self)
 
            return
 
        groups = collections.defaultdict(list)
 
        for post in self:
 
            post_invoice = self.invoice if invoice_ok else (
 
                post.meta.get('invoice') or 'BlankInvoice'
 
            )
 
            post_entity = self.entity if entity_ok else (
 
                post.meta.get('entity') or 'BlankEntity'
 
            )
 
            groups[f'{post.account} {post_invoice} {post_entity}'].append(post)
 
        type_self = type(self)
 
        for group_key, posts in groups.items():
 
            yield group_key, type_self(posts, _can_own=True)
 

	
 
    def is_paid(self, default: Optional[bool]=None) -> Optional[bool]:
 
        if self.accrual_type is None:
 
            return default
 
        else:
 
            return self.end_balance.le_zero()
 

	
 
    def is_zero(self, default: Optional[bool]=None) -> Optional[bool]:
 
        if self.accrual_type is None:
 
            return default
 
        else:
 
            return self.end_balance.is_zero()
 

	
 
    def since_last_nonzero(self) -> 'AccrualPostings':
 
        for index, (post, balance) in enumerate(self.iter_with_balance()):
 
            if balance.is_zero():
 
                start_index = index
 
        try:
 
            empty = start_index == index
 
        except NameError:
 
            empty = True
 
        return self if empty else self[start_index + 1:]
 

	
 

	
 
class BaseReport:
 
    def __init__(self, out_file: TextIO) -> None:
 
        self.out_file = out_file
 
        self.logger = logger.getChild(type(self).__name__)
 

	
 
    def _report(self, posts: AccrualPostings, index: int) -> Iterable[str]:
 
        raise NotImplementedError("BaseReport._report")
 

	
 
    def run(self, groups: PostGroups) -> None:
 
        for index, invoice in enumerate(groups):
 
            for line in self._report(groups[invoice], index):
 
                print(line, file=self.out_file)
 

	
 

	
 
class AgingODS(core.BaseODS[AccrualPostings, Optional[data.Account]]):
 
    COLUMNS = [
 
        'Date',
 
        'Entity',
 
        'Invoice Amount',
 
        'Booked Amount',
 
        'Project',
 
        'Ticket',
 
        'Invoice',
 
        'Approval',
 
        'Contract',
 
        'Purchase Order',
 
    ]
 
    COL_COUNT = len(COLUMNS)
 

	
 
    def __init__(self,
 
                 rt_client: rt.Rt,
 
                 date: datetime.date,
 
                 logger: logging.Logger,
 
    ) -> None:
 
        super().__init__()
 
        self.rt_client = rt_client
 
        self.rt_wrapper = rtutil.RT(self.rt_client)
 
        self.date = date
 
        self.logger = logger
 

	
 
    def init_styles(self) -> None:
 
        super().init_styles()
 
        self.style_widecol = self.replace_child(
 
            self.document.automaticstyles,
 
            odf.style.Style,
 
            name='WideCol',
tests/test_filters.py
Show inline comments
...
 
@@ -90,96 +90,99 @@ def test_filter_meta_equal(cc_txn_pair, key, value, expected_indexes):
 
    ('receipt', 'Receipt', range(3)),
 
    ('statement', '.', [4]),
 
    ('metadate', 'foo', ()),
 
    ('BadKey', '.', ()),
 
    ('emptykey', '.', ()),
 
])
 
def test_filter_meta_match(cc_txn_pair, key, regexp, expected_indexes):
 
    postings = data.Posting.from_entries(cc_txn_pair)
 
    actual = filters.filter_meta_match(postings, key, regexp)
 
    check_filter(actual, cc_txn_pair, expected_indexes)
 

	
 
@pytest.mark.parametrize('ticket_id,expected_indexes', [
 
    (550, range(5)),
 
    ('550', range(5)),
 
    (55, ()),
 
    ('55', ()),
 
    (50, ()),
 
    ('.', ()),
 
])
 
def test_filter_for_rt_id(cc_txn_pair, ticket_id, expected_indexes):
 
    postings = data.Posting.from_entries(cc_txn_pair)
 
    actual = filters.filter_for_rt_id(postings, ticket_id)
 
    check_filter(actual, cc_txn_pair, expected_indexes)
 

	
 
@pytest.mark.parametrize('rt_id', [
 
    'rt:450/',
 
    ' rt:450 rt:540',
 
    'rt://ticket/450',
 
    'rt://ticket/450/',
 
    ' rt://ticket/450',
 
    'rt://ticket/450 rt://ticket/540',
 
])
 
def test_filter_for_rt_id_syntax_variations(rt_id):
 
    entries = [testutil.Transaction(**{'rt-id': rt_id}, postings=[
 
        ('Income:Donations', -10),
 
        ('Assets:Cash', 10),
 
    ])]
 
    postings = data.Posting.from_entries(entries)
 
    actual = filters.filter_for_rt_id(postings, 450)
 
    check_filter(actual, entries, range(2))
 

	
 
def test_filter_for_rt_id_uses_first_link_only():
 
    entries = [testutil.Transaction(postings=[
 
        ('Income:Donations', -10, {'rt-id': 'rt:1 rt:350'}),
 
        ('Assets:Cash', 10, {'rt-id': 'rt://ticket/2 rt://ticket/350'}),
 
    ])]
 
    postings = data.Posting.from_entries(entries)
 
    actual = filters.filter_for_rt_id(postings, 350)
 
    check_filter(actual, entries, ()),
 

	
 
@pytest.mark.parametrize('opening_txn', [
 
    testutil.OpeningBalance(),
 
    None,
 
])
 
def test_remove_opening_balance_txn(opening_txn):
 
    entries = [
 
        testutil.Transaction(postings=[
 
            (account, amount),
 
            ('Assets:Checking', -amount),
 
        ])
 
        for account, amount in [
 
                ('Income:Donations', -50),
 
                ('Expenses:Other', 75),
 
        ]]
 
    if opening_txn is not None:
 
        entries.insert(1, opening_txn)
 
    actual = filters.remove_opening_balance_txn(entries)
 
    assert actual is opening_txn
 
    assert opening_txn not in entries
 
    assert not any(
 
        post.account.startswith('Equity:')
 
        for entry in entries
 
        for post in getattr(entry, 'postings', ())
 
    )
 

	
 
@pytest.mark.parametrize('entry', [
 
    bc_data.Custom({}, testutil.FY_START_DATE, 'conservancy_beancount_audit', []),
 
    None,
 
])
 
def test_audit_date(entry):
 
    dates = testutil.date_seq()
 
    entries = [
 
        bc_data.Open({}, next(dates), 'Income:Donations', ['USD'], None),
 
        bc_data.Open({}, next(dates), 'Assets:Cash', ['USD'], None),
 
        testutil.Transaction(postings=[
 
            ('Income:Donations', -10),
 
            ('Assets:Cash', 10),
 
        ]),
 
    ]
 
    if entry is not None:
 
        entries.append(entry)
 
    actual = filters.audit_date(entries)
 
    if entry is None:
 
        assert actual is None
 
    else:
 
        assert actual == entry.date
 

	
 
def test_iter_unique():
 
    assert list(filters.iter_unique('1213231')) == list('123')
0 comments (0 inline, 0 general)