Changeset - c0a2d1c07024
[Not reviewed]
0 2 0
Brett Smith - 4 years ago 2020-07-01 14:54:58
brettcsmith@brettcsmith.org
accrual: Rip out unnecessary functionality.

Now that make_consistent is really robust, there's much less need to do all
the consistency checking that was done in AccrualPostings.__init__. I expect
this will provide a performance benefit for large reports, since we'll be
calculating data for many fewer accrual groups. The only performance penalty
I see is that the aging report has to calculate the balance three times for
each row it reports, twice in write() and once in write_row(), but that
seems okay and can be cached separately if needed.
2 files changed with 69 insertions and 245 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/reports/accrual.py
Show inline comments
...
 
@@ -127,10 +127,6 @@ T = TypeVar('T')
 

	
 
logger = logging.getLogger('conservancy_beancount.reports.accrual')
 

	
 
class Sentinel:
 
    pass
 

	
 

	
 
class Account(NamedTuple):
 
    name: str
 
    aging_thresholds: Sequence[int]
...
 
@@ -167,57 +163,7 @@ class AccrualAccount(enum.Enum):
 

	
 

	
 
class AccrualPostings(core.RelatedPostings):
 
    __slots__ = (
 
        'accrual_type',
 
        'date',
 
        'end_balance',
 
        'account',
 
        'entity',
 
        'invoice',
 
    )
 
    INCONSISTENT = Sentinel()
 

	
 
    def __init__(self,
 
                 source: Iterable[data.Posting]=(),
 
                 *,
 
                 _can_own: bool=False,
 
    ) -> None:
 
        super().__init__(source, _can_own=_can_own)
 
        self.account = self._single_item(post.account for post in self)
 
        if isinstance(self.account, Sentinel):
 
            self.accrual_type: Optional[AccrualAccount] = None
 
            norm_func: Callable[[T], T] = lambda x: x
 
            accrual_pred: Callable[[data.Posting], bool] = bool
 
        else:
 
            self.accrual_type = AccrualAccount.by_account(self.account)
 
            norm_func = self.accrual_type.normalize_amount
 
            accrual_pred = lambda post: norm_func(post.units).number > 0
 
            if not any(accrual_pred(post) for post in self):
 
                accrual_pred = bool
 
        self.date = self._single_item(self.dates(accrual_pred))
 
        self.entity = self._single_item(self.entities(accrual_pred))
 
        self.invoice = self._single_item(self.first_meta_links('invoice', None))
 
        self.end_balance = norm_func(self.balance_at_cost())
 

	
 
    def _single_item(self, seq: Iterable[T]) -> Union[T, Sentinel]:
 
        items = iter(seq)
 
        try:
 
            item1 = next(items)
 
        except StopIteration:
 
            all_same = False
 
        else:
 
            all_same = all(item == item1 for item in items)
 
        return item1 if all_same else self.INCONSISTENT
 

	
 
    def dates(self, pred: Callable[[data.Posting], bool]=bool) -> Iterator[datetime.date]:
 
        return filters.iter_unique(post.meta.date for post in self if pred(post))
 

	
 
    def entities(self, pred: Callable[[data.Posting], bool]=bool) -> Iterator[MetaValue]:
 
        return filters.iter_unique(
 
            post.meta['entity']
 
            for post in self
 
            if pred(post) and 'entity' in post.meta
 
        )
 
    __slots__ = ()
 

	
 
    @classmethod
 
    def make_consistent(cls,
...
 
@@ -266,31 +212,13 @@ class AccrualPostings(core.RelatedPostings):
 
            if pay_posts:
 
                yield key, cls(pay_posts, _can_own=True)
 

	
 
    def is_paid(self, default: Optional[bool]=None) -> Optional[bool]:
 
        if self.accrual_type is None:
 
            return default
 
        else:
 
            return self.end_balance.le_zero()
 

	
 
    def is_zero(self, default: Optional[bool]=None) -> Optional[bool]:
 
        if self.accrual_type is None:
 
            return default
 
        else:
 
            return self.end_balance.is_zero()
 

	
 
    def since_last_nonzero(self) -> 'AccrualPostings':
 
        for index, (post, balance) in enumerate(self.iter_with_balance()):
 
            if balance.is_zero():
 
                start_index = index
 
    def is_paid(self) -> Optional[bool]:
 
        try:
 
            empty = start_index == index
 
        except NameError:
 
            empty = True
 
        return self if empty else self[start_index + 1:]
 

	
 
    @property
 
    def rt_id(self) -> Union[str, None, Sentinel]:
 
        return self._single_item(self.first_meta_links('rt-id', None))
 
            accrual_type = AccrualAccount.classify(self)
 
        except ValueError:
 
            return None
 
        else:
 
            return accrual_type.normalize_amount(self.balance()).le_zero()
 

	
 

	
 
class BaseReport:
...
 
@@ -307,7 +235,7 @@ class BaseReport:
 
                print(line, file=self.out_file)
 

	
 

	
 
class AgingODS(core.BaseODS[AccrualPostings, Optional[data.Account]]):
 
class AgingODS(core.BaseODS[AccrualPostings, data.Account]):
 
    DOC_COLUMNS = [
 
        'rt-id',
 
        'invoice',
...
 
@@ -334,11 +262,8 @@ class AgingODS(core.BaseODS[AccrualPostings, Optional[data.Account]]):
 
        self.date = date
 
        self.logger = logger
 

	
 
    def section_key(self, row: AccrualPostings) -> Optional[data.Account]:
 
        if isinstance(row.account, str):
 
            return row.account
 
        else:
 
            return None
 
    def section_key(self, row: AccrualPostings) -> data.Account:
 
        return row[0].account
 

	
 
    def start_spreadsheet(self) -> None:
 
        for accrual_type in AccrualAccount:
...
 
@@ -357,9 +282,8 @@ class AgingODS(core.BaseODS[AccrualPostings, Optional[data.Account]]):
 
            ))
 
            self.lock_first_row()
 

	
 
    def start_section(self, key: Optional[data.Account]) -> None:
 
        if key is None:
 
            return
 
    def start_section(self, key: data.Account) -> None:
 
        self.norm_func = core.normalize_amount_func(key)
 
        self.age_thresholds = list(AccrualAccount.by_account(key).value.aging_thresholds)
 
        self.age_balances = [core.MutableBalance() for _ in self.age_thresholds]
 
        accrual_date = self.date - datetime.timedelta(days=self.age_thresholds[-1])
...
 
@@ -374,9 +298,7 @@ class AgingODS(core.BaseODS[AccrualPostings, Optional[data.Account]]):
 
        ))
 
        self.add_row()
 

	
 
    def end_section(self, key: Optional[data.Account]) -> None:
 
        if key is None:
 
            return
 
    def end_section(self, key: data.Account) -> None:
 
        total_balance = core.MutableBalance()
 
        text_style = self.merge_styles(self.style_bold, self.style_endtext)
 
        text_span = 4
...
 
@@ -418,28 +340,30 @@ class AgingODS(core.BaseODS[AccrualPostings, Optional[data.Account]]):
 
        )
 

	
 
    def write_row(self, row: AccrualPostings) -> None:
 
        age = (self.date - row[0].meta.date).days
 
        if row.end_balance.ge_zero():
 
        row_date = row[0].meta.date
 
        row_balance = self.norm_func(row.balance_at_cost())
 
        age = (self.date - row_date).days
 
        if row_balance.ge_zero():
 
            for index, threshold in enumerate(self.age_thresholds):
 
                if age >= threshold:
 
                    self.age_balances[index] += row.end_balance
 
                    self.age_balances[index] += row_balance
 
                    break
 
            else:
 
                return
 
        raw_balance = row.balance()
 
        if row.accrual_type is not None:
 
            raw_balance = row.accrual_type.normalize_amount(raw_balance)
 
        if raw_balance == row.end_balance:
 
        raw_balance = self.norm_func(row.balance())
 
        if raw_balance == row_balance:
 
            amount_cell = odf.table.TableCell()
 
        else:
 
            amount_cell = self.balance_cell(raw_balance)
 
        projects = {post.meta.get('project') or None for post in row}
 
        entities = row.meta_values('entity')
 
        entities.discard(None)
 
        projects = row.meta_values('project')
 
        projects.discard(None)
 
        self.add_row(
 
            self.date_cell(row[0].meta.date),
 
            self.multiline_cell(row.entities()),
 
            self.date_cell(row_date),
 
            self.multiline_cell(sorted(entities)),
 
            amount_cell,
 
            self.balance_cell(row.end_balance),
 
            self.balance_cell(row_balance),
 
            self.multiline_cell(sorted(projects)),
 
            *(self.meta_links_cell(row.all_meta_links(key))
 
              for key in self.DOC_COLUMNS),
...
 
@@ -459,35 +383,12 @@ class AgingReport(BaseReport):
 
        self.ods = AgingODS(rt_wrapper, date, self.logger)
 

	
 
    def run(self, groups: PostGroups) -> None:
 
        rows: List[AccrualPostings] = []
 
        for group in groups.values():
 
            if group.is_zero():
 
                # Cheap optimization: don't slice and dice groups we're not
 
                # going to report anyway.
 
                continue
 
            elif group.accrual_type is None:
 
                group = group.since_last_nonzero()
 
            else:
 
                # Filter out new accruals after the report date.
 
                # e.g., cover the case that the same invoices has multiple
 
                # postings over time, and we don't want to report too-recent
 
                # ones.
 
                cutoff_date = self.ods.date - datetime.timedelta(
 
                    days=group.accrual_type.value.aging_thresholds[-1],
 
                )
 
                group = AccrualPostings(
 
                    post for post in group.since_last_nonzero()
 
                    if post.meta.date <= cutoff_date
 
                    or group.accrual_type.normalize_amount(post.units.number) < 0
 
                )
 
            if group and not group.is_zero():
 
                rows.append(group)
 
        rows.sort(key=lambda related: (
 
            related.account,
 
            related[0].meta.date,
 
            ('\0'.join(related.entities())
 
             if related.entity is related.INCONSISTENT
 
             else related.entity),
 
        rows = [group for group in groups.values()
 
                if not group.balance_at_cost().is_zero()]
 
        rows.sort(key=lambda group: (
 
            group[0].account,
 
            group[0].meta.date,
 
            abs(sum(amt.number for amt in group.balance_at_cost().values())),
 
        ))
 
        self.ods.write(rows)
 
        self.ods.save_file(self.out_bin)
...
 
@@ -495,12 +396,14 @@ class AgingReport(BaseReport):
 

	
 
class BalanceReport(BaseReport):
 
    def _report(self, posts: AccrualPostings, index: int) -> Iterable[str]:
 
        posts = posts.since_last_nonzero()
 
        date_s = posts[0].meta.date.strftime('%Y-%m-%d')
 
        meta = posts[0].meta
 
        date_s = meta.date.strftime('%Y-%m-%d')
 
        entity_s = meta.get('entity', '<no entity>')
 
        invoice_s = meta.get('invoice', '<no invoice>')
 
        balance_s = posts.balance_at_cost().format(zero="Zero balance")
 
        if index:
 
            yield ""
 
        yield f"{posts.invoice}:"
 
        yield f"{entity_s} {invoice_s}:"
 
        yield f"  {balance_s} outstanding since {date_s}"
 

	
 

	
...
 
@@ -523,10 +426,12 @@ class OutgoingReport(BaseReport):
 
        self.rt_client = rt_wrapper.rt
 

	
 
    def _primary_rt_id(self, posts: AccrualPostings) -> rtutil.TicketAttachmentIds:
 
        rt_id = posts.rt_id
 
        rt_ids = posts.first_meta_links('rt-id')
 
        rt_id = next(rt_ids, None)
 
        rt_id2 = next(rt_ids, None)
 
        if rt_id is None:
 
            raise ValueError("no rt-id links found")
 
        elif isinstance(rt_id, Sentinel):
 
        elif rt_id2 is not None:
 
            raise ValueError("multiple rt-id links found")
 
        parsed = rtutil.RT.parse(rt_id)
 
        if parsed is None:
...
 
@@ -535,7 +440,6 @@ class OutgoingReport(BaseReport):
 
            return parsed
 

	
 
    def _report(self, posts: AccrualPostings, index: int) -> Iterable[str]:
 
        posts = posts.since_last_nonzero()
 
        try:
 
            ticket_id, _ = self._primary_rt_id(posts)
 
            ticket = self.rt_client.get_ticket(ticket_id)
...
 
@@ -545,9 +449,13 @@ class OutgoingReport(BaseReport):
 
            ticket = None
 
            errmsg = error.args[0]
 
        if ticket is None:
 
            meta = posts[0].meta
 
            self.logger.error(
 
                "can't generate outgoings report for %s because no RT ticket available: %s",
 
                posts.invoice, errmsg,
 
                "can't generate outgoings report for %s %s %s because no RT ticket available: %s",
 
                meta.date.isoformat(),
 
                meta.get('entity', '<no entity>'),
 
                meta.get('invoice', '<no invoice>'),
 
                errmsg,
 
            )
 
            return
 

	
...
 
@@ -566,10 +474,11 @@ class OutgoingReport(BaseReport):
 
            )
 
            requestor = f'{requestor_name} <{rt_requestor["EmailAddress"]}>'.strip()
 

	
 
        balance_s = posts.end_balance.format(None)
 
        balance = -posts.balance_at_cost()
 
        balance_s = balance.format(None)
 
        raw_balance = -posts.balance()
 
        payment_amount = raw_balance.format('¤¤ #,##0.00')
 
        if raw_balance != posts.end_balance:
 
        if raw_balance != balance:
 
            payment_amount += f' ({balance_s})'
 
            balance_s = f'{raw_balance} ({balance_s})'
 

	
...
 
@@ -776,14 +685,15 @@ def main(arglist: Optional[Sequence[str]]=None,
 
    groups: PostGroups
 
    if args.report_type is None or args.report_type is ReportType.OUTGOING:
 
        groups = dict(AccrualPostings.group_by_first_meta_link(postings, 'rt-id'))
 
        if (args.report_type is None
 
            and len(groups) == 1
 
            and all(group.accrual_type is AccrualAccount.PAYABLE
 
                    and not group.is_paid()
 
                    and key  # Make sure we have a usable rt-id
 
                    for key, group in groups.items())
 
        ):
 
            args.report_type = ReportType.OUTGOING
 
        if args.report_type is None and len(groups) == 1:
 
            key = next(iter(groups))
 
            group = groups[key]
 
            account = group[0].account
 
            if (AccrualAccount.by_account(account) is AccrualAccount.PAYABLE
 
                and all(post.account == account for post in group)
 
                and not group.balance().ge_zero()
 
                and key):  # Make sure we have a usable rt-id
 
                args.report_type = ReportType.OUTGOING
 
    if args.report_type is not ReportType.OUTGOING:
 
        groups = dict(AccrualPostings.make_consistent(postings))
 
    if args.report_type is not ReportType.AGING:
tests/test_reports_accrual.py
Show inline comments
...
 
@@ -263,82 +263,6 @@ def test_report_type_by_unknown_name(arg):
 
    with pytest.raises(ValueError):
 
        accrual.ReportType.by_name(arg)
 

	
 
@pytest.mark.parametrize('acct_name', ACCOUNTS)
 
def test_accrual_postings_consistent_account(acct_name):
 
    meta = {'invoice': '{acct_name} invoice.pdf'}
 
    txn = testutil.Transaction(postings=[
 
        (acct_name, 50, meta),
 
        (acct_name, 25, meta),
 
    ])
 
    related = accrual.AccrualPostings(data.Posting.from_txn(txn))
 
    assert related.account == acct_name
 

	
 
def test_accrual_postings_entity():
 
    txn = testutil.Transaction(postings=[
 
        (ACCOUNTS[0], 25, {'entity': 'Accruee'}),
 
        (ACCOUNTS[0], -15, {'entity': 'Payee15'}),
 
        (ACCOUNTS[0], -10, {'entity': 'Payee10'}),
 
    ])
 
    related = accrual.AccrualPostings(data.Posting.from_txn(txn))
 
    assert related.entity == 'Accruee'
 
    assert set(related.entities()) == {'Accruee', 'Payee10', 'Payee15'}
 

	
 
def test_accrual_postings_entities():
 
    txn = testutil.Transaction(postings=[
 
        (ACCOUNTS[0], 25, {'entity': 'Accruee'}),
 
        (ACCOUNTS[0], -15, {'entity': 'Payee15'}),
 
        (ACCOUNTS[0], -10, {'entity': 'Payee10'}),
 
    ])
 
    related = accrual.AccrualPostings(data.Posting.from_txn(txn))
 
    actual = related.entities()
 
    assert next(actual, None) == 'Accruee'
 
    assert set(actual) == {'Payee10', 'Payee15'}
 

	
 
def test_accrual_postings_entities_no_duplicates():
 
    txn = testutil.Transaction(postings=[
 
        (ACCOUNTS[0], 25, {'entity': 'Accruee'}),
 
        (ACCOUNTS[0], -15, {'entity': 'Accruee'}),
 
        (ACCOUNTS[0], -10, {'entity': 'Other'}),
 
    ])
 
    related = accrual.AccrualPostings(data.Posting.from_txn(txn))
 
    actual = related.entities()
 
    assert next(actual, None) == 'Accruee'
 
    assert next(actual, None) == 'Other'
 
    assert next(actual, None) is None
 

	
 
def test_accrual_postings_inconsistent_account():
 
    meta = {'invoice': 'invoice.pdf'}
 
    txn = testutil.Transaction(postings=[
 
        (acct_name, index, meta)
 
        for index, acct_name in enumerate(ACCOUNTS)
 
    ])
 
    related = accrual.AccrualPostings(data.Posting.from_txn(txn))
 
    assert related.account is related.INCONSISTENT
 

	
 
def test_accrual_postings_rt_id():
 
    txn = testutil.Transaction(postings=[
 
        (ACCOUNTS[0], 10, {'rt-id': 'rt:90'}),
 
        (ACCOUNTS[0], 10, {'rt-id': 'rt:90 rt:92'}),
 
        (ACCOUNTS[0], 10, {'rt-id': 'rt:90 rt:94 rt:92'}),
 
    ])
 
    related = accrual.AccrualPostings(data.Posting.from_txn(txn))
 
    assert related.rt_id == 'rt:90'
 

	
 
def test_accrual_postings_rt_id_inconsistent():
 
    txn = testutil.Transaction(postings=[
 
        (ACCOUNTS[0], 10, {'rt-id': 'rt:96'}),
 
        (ACCOUNTS[0], 10, {'rt-id': 'rt:98 rt:96'}),
 
    ])
 
    related = accrual.AccrualPostings(data.Posting.from_txn(txn))
 
    assert related.rt_id is related.INCONSISTENT
 

	
 
def test_accrual_postings_rt_id_none():
 
    txn = testutil.Transaction(postings=[
 
        (ACCOUNTS[0], 10),
 
    ])
 
    related = accrual.AccrualPostings(data.Posting.from_txn(txn))
 
    assert related.rt_id is None
 

	
 
@pytest.mark.parametrize('acct_name,invoice,day', testutil.combine_values(
 
    ACCOUNTS,
 
    ['FIXME', '', None, *testutil.NON_STRING_METADATA_VALUES],
...
 
@@ -353,18 +277,13 @@ def test_make_consistent_bad_invoice(acct_name, invoice, day):
 
        (acct_name, index * mult, {'invoice': invoice, 'entity': f'BadInvoice{day}'})
 
        for index in range(1, 4)
 
    ])
 
    consistent = dict(accrual.AccrualPostings.make_consistent(data.Posting.from_txn(txn)))
 
    postings = data.Posting.from_txn(txn)
 
    consistent = dict(accrual.AccrualPostings.make_consistent(iter(postings)))
 
    assert len(consistent) == 1
 
    key = next(iter(consistent))
 
    assert acct_name in key
 
    if invoice:
 
        assert str(invoice) in key
 
    actual = consistent[key]
 
    assert actual
 
    _, actual = consistent.popitem()
 
    assert len(actual) == 3
 
    for act_post, exp_post in zip(actual, txn.postings):
 
        assert act_post.units == exp_post.units
 
        assert act_post.meta.get('invoice') == invoice
 
    for act_post, exp_post in zip(actual, postings):
 
        assert act_post == exp_post
 

	
 
def test_make_consistent_across_accounts():
 
    invoice = 'Invoices/CrossAccount.pdf'
...
 
@@ -376,7 +295,6 @@ def test_make_consistent_across_accounts():
 
    assert len(consistent) == len(ACCOUNTS)
 
    for key, posts in consistent.items():
 
        assert len(posts) == 1
 
        assert posts.account in key
 

	
 
def test_make_consistent_both_invoice_and_account():
 
    txn = testutil.Transaction(date=datetime.date(2019, 2, 2), postings=[
...
 
@@ -386,7 +304,6 @@ def test_make_consistent_both_invoice_and_account():
 
    assert len(consistent) == len(ACCOUNTS)
 
    for key, posts in consistent.items():
 
        assert len(posts) == 1
 
        assert posts.account in key
 

	
 
@pytest.mark.parametrize('acct_name', ACCOUNTS)
 
def test_make_consistent_across_entity(acct_name):
...
 
@@ -399,10 +316,6 @@ def test_make_consistent_across_entity(acct_name):
 
    assert len(consistent) == 3
 
    for key, posts in consistent.items():
 
        assert len(posts) == 1
 
        entities = posts.entities()
 
        assert next(entities, None) == posts.entity
 
        assert next(entities, None) is None
 
        assert posts.entity in key
 

	
 
@pytest.mark.parametrize('acct_name', ACCOUNTS)
 
def test_make_consistent_entity_differs_accrual_payment(acct_name):
...
 
@@ -440,7 +353,7 @@ def test_make_consistent_by_date_with_exact_payment():
 
    actual = [group for _, group in
 
              accrual.AccrualPostings.make_consistent(data.Posting.from_entries(entries))]
 
    assert len(actual) == 2
 
    assert actual[0].is_zero()
 
    assert sum(post.units.number for post in actual[0]) == 0
 
    assert len(actual[1]) == 1
 
    assert actual[1][0].meta.date.day == 3
 

	
...
 
@@ -470,7 +383,7 @@ def test_make_consistent_by_date_with_overpayment():
 
    actual = [group for _, group in
 
              accrual.AccrualPostings.make_consistent(data.Posting.from_entries(entries))]
 
    assert len(actual) == 2
 
    assert actual[0].is_zero()
 
    assert sum(post.units.number for post in actual[0]) == 0
 
    assert len(actual[1]) == 2
 
    assert actual[1][0].meta.date.day == 2
 
    assert actual[1][0].units.number == -25
...
 
@@ -658,7 +571,8 @@ def test_outgoing_report_without_rt_id(accrual_postings, caplog):
 
    assert caplog.records
 
    log = caplog.records[0]
 
    assert log.message.startswith(
 
        f"can't generate outgoings report for {invoice} because no RT ticket available:",
 
        f"can't generate outgoings report for 2010-05-15 MatchingProgram {invoice}"
 
        " because no RT ticket available:",
 
    )
 
    assert not output.getvalue()
 

	
...
 
@@ -761,7 +675,7 @@ def test_output_payments_when_only_match(arglist, expect_invoice):
 
    assert not errors.getvalue()
 
    assert retcode == 0
 
    check_output(output, [
 
        rf'^{re.escape(expect_invoice)}:$',
 
        rf'^EarlyBird {re.escape(expect_invoice)}:$',
 
        r' outstanding since ',
 
    ])
 

	
0 comments (0 inline, 0 general)