Changeset - 71d671e493ad
[Not reviewed]
0 3 0
Brett Smith - 4 years ago 2020-06-14 12:53:27
brettcsmith@brettcsmith.org
data: Add Metadata.human_name() classmethod.
3 files changed with 50 insertions and 12 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/data.py
Show inline comments
 
"""Enhanced Beancount data structures for Conservancy
 

	
 
The classes in this module are interface-compatible with Beancount's core data
 
structures, and provide additional business logic that we want to use
 
throughout Conservancy tools.
 
"""
 
# Copyright © 2020  Brett Smith
 
#
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU Affero General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU Affero General Public License for more details.
 
#
 
# You should have received a copy of the GNU Affero General Public License
 
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
 

	
 
import collections
 
import datetime
 
import decimal
 
import functools
 
import re
 

	
 
from beancount.core import account as bc_account
 
from beancount.core import amount as bc_amount
 
from beancount.core import convert as bc_convert
 
from beancount.core import position as bc_position
 

	
 
from typing import (
 
    cast,
 
    overload,
 
    Callable,
 
    Hashable,
 
    Iterable,
 
    Iterator,
 
    MutableMapping,
 
    Optional,
 
    Sequence,
 
    TypeVar,
 
    Union,
 
)
 

	
 
from .beancount_types import (
 
    Directive,
 
    MetaKey,
 
    MetaValue,
 
    Posting as BasePosting,
 
    Transaction,
 
)
 

	
 
DecimalCompat = Union[decimal.Decimal, int]
 

	
 
LINK_METADATA = frozenset([
 
    'approval',
 
    'check',
 
    'contract',
 
    'invoice',
 
    'purchase-order',
 
    'receipt',
 
    'rt-id',
 
    'statement',
 
])
 

	
 
class Account(str):
 
    """Account name string
 

	
 
    This is a string that names an account, like Assets:Bank:Checking
 
    or Income:Donations. This class provides additional methods for common
 
    account name parsing and queries.
 
    """
...
 
@@ -168,139 +169,162 @@ class Account(str):
 
        elif stop is None:
 
            return self[self._find_part_slice(start)]
 
        else:
 
            part_slice = slice(start, stop)
 
        return self.split(self.SEP)[part_slice]
 

	
 
    def root_part(self, count: int=1) -> str:
 
        """Return the first part(s) of the account name as a string"""
 
        try:
 
            stop = self._find_part_slice(count - 1).stop
 
        except IndexError:
 
            return self
 
        else:
 
            return self[:stop]
 

	
 

	
 
class Amount(bc_amount.Amount):
 
    """Beancount amount after processing
 

	
 
    Beancount's native Amount class declares number to be Optional[Decimal],
 
    because the number is None when Beancount first parses a posting that does
 
    not have an amount, because the user wants it to be automatically balanced.
 

	
 
    As part of the loading process, Beancount replaces those None numbers
 
    with the calculated amount, so it will always be a Decimal. This class
 
    overrides the type declaration accordingly, so the type checker knows
 
    that our code doesn't have to consider the possibility that number is
 
    None.
 
    """
 
    number: decimal.Decimal
 

	
 
    # beancount.core._Amount is the plain namedtuple.
 
    # beancore.core.Amount adds instance methods to it.
 
    # b.c.Amount.__New__ calls `b.c._Amount.__new__`, which confuses type
 
    # checking. See <https://github.com/python/mypy/issues/1279>.
 
    # It works fine if you use super(), which is better practice anyway.
 
    # So we override __new__ just to call _Amount.__new__ this way.
 
    def __new__(cls, number: decimal.Decimal, currency: str) -> 'Amount':
 
        return super(bc_amount.Amount, Amount).__new__(cls, number, currency)
 

	
 

	
 
class Metadata(MutableMapping[MetaKey, MetaValue]):
 
    """Transaction or posting metadata
 

	
 
    This class wraps a Beancount metadata dictionary with additional methods
 
    for common parsing and query tasks.
 
    """
 
    __slots__ = ('meta',)
 
    _HUMAN_NAMES: MutableMapping[MetaKey, str] = {
 
        # Initialize this dict with special cases.
 
        # We use it as a cache for other metadata names as they're queried.
 
        'check-id': 'Check Number',
 
        'paypal-id': 'PayPal ID',
 
        'rt-id': 'Ticket',
 
    }
 

	
 
    def __init__(self, source: MutableMapping[MetaKey, MetaValue]) -> None:
 
        self.meta = source
 

	
 
    def __iter__(self) -> Iterator[MetaKey]:
 
        return iter(self.meta)
 

	
 
    def __len__(self) -> int:
 
        return len(self.meta)
 

	
 
    def __getitem__(self, key: MetaKey) -> MetaValue:
 
        return self.meta[key]
 

	
 
    def __setitem__(self, key: MetaKey, value: MetaValue) -> None:
 
        self.meta[key] = value
 

	
 
    def __delitem__(self, key: MetaKey) -> None:
 
        del self.meta[key]
 

	
 
    def get_links(self, key: MetaKey) -> Sequence[str]:
 
        try:
 
            value = self.meta[key]
 
        except KeyError:
 
            return ()
 
        if isinstance(value, str):
 
            return value.split()
 
        else:
 
            raise TypeError("{} metadata is a {}, not str".format(
 
                key, type(value).__name__,
 
            ))
 

	
 
    @overload
 
    def first_link(self, key: MetaKey, default: None=None) -> Optional[str]: ...
 

	
 
    @overload
 
    def first_link(self, key: MetaKey, default: str) -> str: ...
 

	
 
    def first_link(self, key: MetaKey, default: Optional[str]=None) -> Optional[str]:
 
        try:
 
            return self.get_links(key)[0]
 
        except (IndexError, TypeError):
 
            return default
 

	
 
    @classmethod
 
    def human_name(cls, key: MetaKey) -> str:
 
        """Return the "human" version of a metadata name
 

	
 
        This is usually the metadata key with punctuation replaced with spaces,
 
        and then titlecased, with a few special cases. The return value is
 
        suitable for using in reports.
 
        """
 
        try:
 
            retval = cls._HUMAN_NAMES[key]
 
        except KeyError:
 
            retval = key.replace('-', ' ').title()
 
            retval = re.sub(r'\bId$', 'ID', retval)
 
            cls._HUMAN_NAMES[key] = retval
 
        return retval
 

	
 

	
 
class PostingMeta(Metadata):
 
    """Combined access to posting metadata with its parent transaction metadata
 

	
 
    This lets you access posting metadata through a single dict-like object.
 
    If you try to look up metadata that doesn't exist on the posting, it will
 
    look for the value in the parent transaction metadata instead.
 

	
 
    You can set and delete metadata as well. Changes only affect the metadata
 
    of the posting, never the transaction. Changes are propagated to the
 
    underlying Beancount data structures.
 

	
 
    Functionally, you can think of this as identical to:
 

	
 
      collections.ChainMap(post.meta, txn.meta)
 

	
 
    Under the hood, this class does a little extra work to avoid creating
 
    posting metadata if it doesn't have to.
 
    """
 
    __slots__ = ('txn', 'index', 'post')
 

	
 
    def __init__(self,
 
                 txn: Transaction,
 
                 index: int,
 
                 post: Optional[BasePosting]=None,
 
    ) -> None:
 
        if post is None:
 
            post = txn.postings[index]
 
        self.txn = txn
 
        self.index = index
 
        self.post = post
 
        if post.meta is None:
 
            self.meta = self.txn.meta
 
        else:
 
            self.meta = collections.ChainMap(post.meta, txn.meta)
 

	
 
    def __getitem__(self, key: MetaKey) -> MetaValue:
 
        try:
 
            return super().__getitem__(key)
 
        except KeyError:
 
            if key == 'entity' and self.txn.payee is not None:
 
                return self.txn.payee
 
            else:
 
                raise
 

	
 
    def __setitem__(self, key: MetaKey, value: MetaValue) -> None:
 
        if self.post.meta is None:
 
            self.post = self.post._replace(meta={key: value})
conservancy_beancount/reports/accrual.py
Show inline comments
...
 
@@ -228,107 +228,110 @@ class AccrualPostings(core.RelatedPostings):
 
            )
 
            groups[f'{post.account} {post_invoice} {post_entity}'].append(post)
 
        type_self = type(self)
 
        for group_key, posts in groups.items():
 
            yield group_key, type_self(posts, _can_own=True)
 

	
 
    def is_paid(self, default: Optional[bool]=None) -> Optional[bool]:
 
        if self.accrual_type is None:
 
            return default
 
        else:
 
            return self.end_balance.le_zero()
 

	
 
    def is_zero(self, default: Optional[bool]=None) -> Optional[bool]:
 
        if self.accrual_type is None:
 
            return default
 
        else:
 
            return self.end_balance.is_zero()
 

	
 
    def since_last_nonzero(self) -> 'AccrualPostings':
 
        for index, (post, balance) in enumerate(self.iter_with_balance()):
 
            if balance.is_zero():
 
                start_index = index
 
        try:
 
            empty = start_index == index
 
        except NameError:
 
            empty = True
 
        return self if empty else self[start_index + 1:]
 

	
 
    @property
 
    def rt_id(self) -> Union[str, None, Sentinel]:
 
        return self._single_item(self.first_meta_links('rt-id', None))
 

	
 

	
 
class BaseReport:
 
    def __init__(self, out_file: TextIO) -> None:
 
        self.out_file = out_file
 
        self.logger = logger.getChild(type(self).__name__)
 

	
 
    def _report(self, posts: AccrualPostings, index: int) -> Iterable[str]:
 
        raise NotImplementedError("BaseReport._report")
 

	
 
    def run(self, groups: PostGroups) -> None:
 
        for index, invoice in enumerate(groups):
 
            for line in self._report(groups[invoice], index):
 
                print(line, file=self.out_file)
 

	
 

	
 
class AgingODS(core.BaseODS[AccrualPostings, Optional[data.Account]]):
 
    DOC_COLUMNS = [
 
        'rt-id',
 
        'invoice',
 
        'approval',
 
        'contract',
 
        'purchase-order',
 
    ]
 
    COLUMNS = [
 
        'Date',
 
        'Entity',
 
        data.Metadata.human_name('entity'),
 
        'Invoice Amount',
 
        'Booked Amount',
 
        'Project',
 
        'Ticket',
 
        'Invoice',
 
        'Approval',
 
        'Contract',
 
        'Purchase Order',
 
        data.Metadata.human_name('project'),
 
        *(data.Metadata.human_name(key) for key in DOC_COLUMNS),
 
    ]
 
    COL_COUNT = len(COLUMNS)
 

	
 
    def __init__(self,
 
                 rt_wrapper: rtutil.RT,
 
                 date: datetime.date,
 
                 logger: logging.Logger,
 
    ) -> None:
 
        super().__init__(rt_wrapper)
 
        self.date = date
 
        self.logger = logger
 

	
 
    def init_styles(self) -> None:
 
        super().init_styles()
 
        self.style_widecol = self.replace_child(
 
            self.document.automaticstyles,
 
            odf.style.Style,
 
            name='WideCol',
 
        )
 
        self.style_widecol.setAttribute('family', 'table-column')
 
        self.style_widecol.addElement(odf.style.TableColumnProperties(
 
            columnwidth='1.25in',
 
        ))
 

	
 
    def section_key(self, row: AccrualPostings) -> Optional[data.Account]:
 
        if isinstance(row.account, str):
 
            return row.account
 
        else:
 
            return None
 

	
 
    def start_spreadsheet(self) -> None:
 
        for accrual_type in AccrualAccount:
 
            self.use_sheet(accrual_type.name.title())
 
            for index in range(self.COL_COUNT):
 
                stylename = self.style_widecol if index else ''
 
                self.sheet.addElement(odf.table.TableColumn(stylename=stylename))
 
            self.add_row(*(
 
                self.string_cell(name, stylename=self.style_bold)
 
                for name in self.COLUMNS
 
            ))
 
            self.lock_first_row()
 

	
 
    def start_section(self, key: Optional[data.Account]) -> None:
 
        if key is None:
 
            return
 
        self.age_thresholds = list(AccrualAccount.by_account(key).value.aging_thresholds)
 
        self.age_balances = [core.MutableBalance() for _ in self.age_thresholds]
 
        accrual_date = self.date - datetime.timedelta(days=self.age_thresholds[-1])
...
 
@@ -365,101 +368,98 @@ class AgingODS(core.BaseODS[AccrualPostings, Optional[data.Account]]):
 
                age_range = f"Over {age_text}"
 
            else:
 
                age_range = f"{age_text}–{last_age_text}"
 
            self.add_row(
 
                self.string_cell(
 
                    f"Total Aged {age_range}: ",
 
                    stylename=text_style,
 
                    numbercolumnsspanned=text_span,
 
                ),
 
                *(odf.table.TableCell() for _ in range(1, text_span)),
 
                self.balance_cell(balance),
 
            )
 
            last_age_text = age_text
 
            total_balance += balance
 
        self.add_row(
 
            self.string_cell(
 
                "Total Unpaid: ",
 
                stylename=text_style,
 
                numbercolumnsspanned=text_span,
 
            ),
 
            *(odf.table.TableCell() for _ in range(1, text_span)),
 
            self.balance_cell(total_balance),
 
        )
 

	
 
    def write_row(self, row: AccrualPostings) -> None:
 
        age = (self.date - row[0].meta.date).days
 
        if row.end_balance.ge_zero():
 
            for index, threshold in enumerate(self.age_thresholds):
 
                if age >= threshold:
 
                    self.age_balances[index] += row.end_balance
 
                    break
 
            else:
 
                return
 
        raw_balance = row.balance()
 
        if row.accrual_type is not None:
 
            raw_balance = row.accrual_type.normalize_amount(raw_balance)
 
        if raw_balance == row.end_balance:
 
            amount_cell = odf.table.TableCell()
 
        else:
 
            amount_cell = self.balance_cell(raw_balance)
 
        projects = {post.meta.get('project') or None for post in row}
 
        projects.discard(None)
 
        self.add_row(
 
            self.date_cell(row[0].meta.date),
 
            self.multiline_cell(row.entities()),
 
            amount_cell,
 
            self.balance_cell(row.end_balance),
 
            self.multiline_cell(sorted(projects)),
 
            self.meta_links_cell(row.all_meta_links('rt-id')),
 
            self.meta_links_cell(row.all_meta_links('invoice')),
 
            self.meta_links_cell(row.all_meta_links('approval')),
 
            self.meta_links_cell(row.all_meta_links('contract')),
 
            self.meta_links_cell(row.all_meta_links('purchase-order')),
 
            *(self.meta_links_cell(row.all_meta_links(key))
 
              for key in self.DOC_COLUMNS),
 
        )
 

	
 

	
 
class AgingReport(BaseReport):
 
    def __init__(self,
 
                 rt_wrapper: rtutil.RT,
 
                 out_file: BinaryIO,
 
                 date: Optional[datetime.date]=None,
 
    ) -> None:
 
        if date is None:
 
            date = datetime.date.today()
 
        self.out_bin = out_file
 
        self.logger = logger.getChild(type(self).__name__)
 
        self.ods = AgingODS(rt_wrapper, date, self.logger)
 

	
 
    def run(self, groups: PostGroups) -> None:
 
        rows: List[AccrualPostings] = []
 
        for group in groups.values():
 
            if group.is_zero():
 
                # Cheap optimization: don't slice and dice groups we're not
 
                # going to report anyway.
 
                continue
 
            elif group.accrual_type is None:
 
                group = group.since_last_nonzero()
 
            else:
 
                # Filter out new accruals after the report date.
 
                # e.g., cover the case that the same invoices has multiple
 
                # postings over time, and we don't want to report too-recent
 
                # ones.
 
                cutoff_date = self.ods.date - datetime.timedelta(
 
                    days=group.accrual_type.value.aging_thresholds[-1],
 
                )
 
                group = AccrualPostings(
 
                    post for post in group.since_last_nonzero()
 
                    if post.meta.date <= cutoff_date
 
                    or group.accrual_type.normalize_amount(post.units.number) < 0
 
                )
 
            if group and not group.is_zero():
 
                rows.append(group)
 
        rows.sort(key=lambda related: (
 
            related.account,
 
            related[0].meta.date,
 
            ('\0'.join(related.entities())
 
             if related.entity is related.INCONSISTENT
 
             else related.entity),
 
        ))
 
        self.ods.write(rows)
 
        self.ods.save_file(self.out_bin)
tests/test_data_metadata.py
Show inline comments
...
 
@@ -44,48 +44,62 @@ def test_metadata_transforms_source():
 
    'link1  link2',
 
    ' link1  link2   link3    ',
 
])
 
def test_get_links(value):
 
    meta = data.Metadata({'key': value})
 
    assert list(meta.get_links('key')) == value.split()
 

	
 
def test_get_links_missing():
 
    meta = data.Metadata({})
 
    assert not meta.get_links('key')
 

	
 
@pytest.mark.parametrize('value', testutil.NON_STRING_METADATA_VALUES)
 
def test_get_links_bad_type(value):
 
    meta = data.Metadata({'key': value})
 
    with pytest.raises(TypeError):
 
        meta.get_links('key')
 

	
 
def test_first_link_from_txn(simple_txn):
 
    meta = data.PostingMeta(simple_txn, 0)
 
    assert meta.first_link('note') == 'txn'
 

	
 
def test_first_link_from_post_override(simple_txn):
 
    meta = data.PostingMeta(simple_txn, 1)
 
    assert meta.first_link('note') == 'donation'
 

	
 
def test_first_link_is_only_link(simple_txn):
 
    meta = data.PostingMeta(simple_txn, 1)
 
    assert meta.first_link('extra') == 'Extra'
 

	
 
def test_first_link_nonexistent_metadata(simple_txn):
 
    meta = data.PostingMeta(simple_txn, 1)
 
    assert meta.first_link('Nonexistent') is None
 

	
 
def test_first_link_nonexistent_default(simple_txn):
 
    meta = data.PostingMeta(simple_txn, 1)
 
    assert meta.first_link('Nonexistent', 'missing') == 'missing'
 

	
 
@pytest.mark.parametrize('meta_value', testutil.NON_STRING_METADATA_VALUES)
 
def test_first_link_bad_type_metadata(simple_txn, meta_value):
 
    simple_txn.meta['badmeta'] = meta_value
 
    meta = data.PostingMeta(simple_txn, 1)
 
    assert meta.first_link('badmeta') is None
 

	
 
@pytest.mark.parametrize('meta_value', testutil.NON_STRING_METADATA_VALUES)
 
def test_first_link_bad_type_default(simple_txn, meta_value):
 
    simple_txn.meta['badmeta'] = meta_value
 
    meta = data.PostingMeta(simple_txn, 1)
 
    assert meta.first_link('badmeta', '_') == '_'
 

	
 
@pytest.mark.parametrize('meta_name,expected', [
 
    ('approval', 'Approval'),
 
    ('bank-id', 'Bank ID'),
 
    ('bank-statement', 'Bank Statement'),
 
    ('check-id', 'Check Number'),
 
    ('paypal-id', 'PayPal ID'),
 
    ('purchase-order', 'Purchase Order'),
 
    ('receipt', 'Receipt'),
 
    ('rt-id', 'Ticket'),
 
    ('tax-statement', 'Tax Statement'),
 
])
 
def test_human_name(meta_name, expected):
 
    assert data.Metadata.human_name(meta_name) == expected
0 comments (0 inline, 0 general)