From c0a2d1c07024213b48734a11724d798852642de4 2020-07-01 14:54:58 From: Brett Smith Date: 2020-07-01 14:54:58 Subject: [PATCH] accrual: Rip out unnecessary functionality. Now that make_consistent is really robust, there's much less need to do all the consistency checking that was done in AccrualPostings.__init__. I expect this will provide a performance benefit for large reports, since we'll be calculating data for many fewer accrual groups. The only performance penalty I see is that the aging report has to calculate the balance three times for each row it reports, twice in write() and once in write_row(), but that seems okay and can be cached separately if needed. --- diff --git a/conservancy_beancount/reports/accrual.py b/conservancy_beancount/reports/accrual.py index ec25bf0bff424f5b2c3c49514aaa480f1d830106..b8a69b80d35e65f0693c391852d6b396da96ae4c 100644 --- a/conservancy_beancount/reports/accrual.py +++ b/conservancy_beancount/reports/accrual.py @@ -127,10 +127,6 @@ T = TypeVar('T') logger = logging.getLogger('conservancy_beancount.reports.accrual') -class Sentinel: - pass - - class Account(NamedTuple): name: str aging_thresholds: Sequence[int] @@ -167,57 +163,7 @@ class AccrualAccount(enum.Enum): class AccrualPostings(core.RelatedPostings): - __slots__ = ( - 'accrual_type', - 'date', - 'end_balance', - 'account', - 'entity', - 'invoice', - ) - INCONSISTENT = Sentinel() - - def __init__(self, - source: Iterable[data.Posting]=(), - *, - _can_own: bool=False, - ) -> None: - super().__init__(source, _can_own=_can_own) - self.account = self._single_item(post.account for post in self) - if isinstance(self.account, Sentinel): - self.accrual_type: Optional[AccrualAccount] = None - norm_func: Callable[[T], T] = lambda x: x - accrual_pred: Callable[[data.Posting], bool] = bool - else: - self.accrual_type = AccrualAccount.by_account(self.account) - norm_func = self.accrual_type.normalize_amount - accrual_pred = lambda post: norm_func(post.units).number > 0 - if not any(accrual_pred(post) for post in self): - accrual_pred = bool - self.date = self._single_item(self.dates(accrual_pred)) - self.entity = self._single_item(self.entities(accrual_pred)) - self.invoice = self._single_item(self.first_meta_links('invoice', None)) - self.end_balance = norm_func(self.balance_at_cost()) - - def _single_item(self, seq: Iterable[T]) -> Union[T, Sentinel]: - items = iter(seq) - try: - item1 = next(items) - except StopIteration: - all_same = False - else: - all_same = all(item == item1 for item in items) - return item1 if all_same else self.INCONSISTENT - - def dates(self, pred: Callable[[data.Posting], bool]=bool) -> Iterator[datetime.date]: - return filters.iter_unique(post.meta.date for post in self if pred(post)) - - def entities(self, pred: Callable[[data.Posting], bool]=bool) -> Iterator[MetaValue]: - return filters.iter_unique( - post.meta['entity'] - for post in self - if pred(post) and 'entity' in post.meta - ) + __slots__ = () @classmethod def make_consistent(cls, @@ -266,31 +212,13 @@ class AccrualPostings(core.RelatedPostings): if pay_posts: yield key, cls(pay_posts, _can_own=True) - def is_paid(self, default: Optional[bool]=None) -> Optional[bool]: - if self.accrual_type is None: - return default - else: - return self.end_balance.le_zero() - - def is_zero(self, default: Optional[bool]=None) -> Optional[bool]: - if self.accrual_type is None: - return default - else: - return self.end_balance.is_zero() - - def since_last_nonzero(self) -> 'AccrualPostings': - for index, (post, balance) in enumerate(self.iter_with_balance()): - if balance.is_zero(): - start_index = index + def is_paid(self) -> Optional[bool]: try: - empty = start_index == index - except NameError: - empty = True - return self if empty else self[start_index + 1:] - - @property - def rt_id(self) -> Union[str, None, Sentinel]: - return self._single_item(self.first_meta_links('rt-id', None)) + accrual_type = AccrualAccount.classify(self) + except ValueError: + return None + else: + return accrual_type.normalize_amount(self.balance()).le_zero() class BaseReport: @@ -307,7 +235,7 @@ class BaseReport: print(line, file=self.out_file) -class AgingODS(core.BaseODS[AccrualPostings, Optional[data.Account]]): +class AgingODS(core.BaseODS[AccrualPostings, data.Account]): DOC_COLUMNS = [ 'rt-id', 'invoice', @@ -334,11 +262,8 @@ class AgingODS(core.BaseODS[AccrualPostings, Optional[data.Account]]): self.date = date self.logger = logger - def section_key(self, row: AccrualPostings) -> Optional[data.Account]: - if isinstance(row.account, str): - return row.account - else: - return None + def section_key(self, row: AccrualPostings) -> data.Account: + return row[0].account def start_spreadsheet(self) -> None: for accrual_type in AccrualAccount: @@ -357,9 +282,8 @@ class AgingODS(core.BaseODS[AccrualPostings, Optional[data.Account]]): )) self.lock_first_row() - def start_section(self, key: Optional[data.Account]) -> None: - if key is None: - return + def start_section(self, key: data.Account) -> None: + self.norm_func = core.normalize_amount_func(key) self.age_thresholds = list(AccrualAccount.by_account(key).value.aging_thresholds) self.age_balances = [core.MutableBalance() for _ in self.age_thresholds] accrual_date = self.date - datetime.timedelta(days=self.age_thresholds[-1]) @@ -374,9 +298,7 @@ class AgingODS(core.BaseODS[AccrualPostings, Optional[data.Account]]): )) self.add_row() - def end_section(self, key: Optional[data.Account]) -> None: - if key is None: - return + def end_section(self, key: data.Account) -> None: total_balance = core.MutableBalance() text_style = self.merge_styles(self.style_bold, self.style_endtext) text_span = 4 @@ -418,28 +340,30 @@ class AgingODS(core.BaseODS[AccrualPostings, Optional[data.Account]]): ) def write_row(self, row: AccrualPostings) -> None: - age = (self.date - row[0].meta.date).days - if row.end_balance.ge_zero(): + row_date = row[0].meta.date + row_balance = self.norm_func(row.balance_at_cost()) + age = (self.date - row_date).days + if row_balance.ge_zero(): for index, threshold in enumerate(self.age_thresholds): if age >= threshold: - self.age_balances[index] += row.end_balance + self.age_balances[index] += row_balance break else: return - raw_balance = row.balance() - if row.accrual_type is not None: - raw_balance = row.accrual_type.normalize_amount(raw_balance) - if raw_balance == row.end_balance: + raw_balance = self.norm_func(row.balance()) + if raw_balance == row_balance: amount_cell = odf.table.TableCell() else: amount_cell = self.balance_cell(raw_balance) - projects = {post.meta.get('project') or None for post in row} + entities = row.meta_values('entity') + entities.discard(None) + projects = row.meta_values('project') projects.discard(None) self.add_row( - self.date_cell(row[0].meta.date), - self.multiline_cell(row.entities()), + self.date_cell(row_date), + self.multiline_cell(sorted(entities)), amount_cell, - self.balance_cell(row.end_balance), + self.balance_cell(row_balance), self.multiline_cell(sorted(projects)), *(self.meta_links_cell(row.all_meta_links(key)) for key in self.DOC_COLUMNS), @@ -459,35 +383,12 @@ class AgingReport(BaseReport): self.ods = AgingODS(rt_wrapper, date, self.logger) def run(self, groups: PostGroups) -> None: - rows: List[AccrualPostings] = [] - for group in groups.values(): - if group.is_zero(): - # Cheap optimization: don't slice and dice groups we're not - # going to report anyway. - continue - elif group.accrual_type is None: - group = group.since_last_nonzero() - else: - # Filter out new accruals after the report date. - # e.g., cover the case that the same invoices has multiple - # postings over time, and we don't want to report too-recent - # ones. - cutoff_date = self.ods.date - datetime.timedelta( - days=group.accrual_type.value.aging_thresholds[-1], - ) - group = AccrualPostings( - post for post in group.since_last_nonzero() - if post.meta.date <= cutoff_date - or group.accrual_type.normalize_amount(post.units.number) < 0 - ) - if group and not group.is_zero(): - rows.append(group) - rows.sort(key=lambda related: ( - related.account, - related[0].meta.date, - ('\0'.join(related.entities()) - if related.entity is related.INCONSISTENT - else related.entity), + rows = [group for group in groups.values() + if not group.balance_at_cost().is_zero()] + rows.sort(key=lambda group: ( + group[0].account, + group[0].meta.date, + abs(sum(amt.number for amt in group.balance_at_cost().values())), )) self.ods.write(rows) self.ods.save_file(self.out_bin) @@ -495,12 +396,14 @@ class AgingReport(BaseReport): class BalanceReport(BaseReport): def _report(self, posts: AccrualPostings, index: int) -> Iterable[str]: - posts = posts.since_last_nonzero() - date_s = posts[0].meta.date.strftime('%Y-%m-%d') + meta = posts[0].meta + date_s = meta.date.strftime('%Y-%m-%d') + entity_s = meta.get('entity', '') + invoice_s = meta.get('invoice', '') balance_s = posts.balance_at_cost().format(zero="Zero balance") if index: yield "" - yield f"{posts.invoice}:" + yield f"{entity_s} {invoice_s}:" yield f" {balance_s} outstanding since {date_s}" @@ -523,10 +426,12 @@ class OutgoingReport(BaseReport): self.rt_client = rt_wrapper.rt def _primary_rt_id(self, posts: AccrualPostings) -> rtutil.TicketAttachmentIds: - rt_id = posts.rt_id + rt_ids = posts.first_meta_links('rt-id') + rt_id = next(rt_ids, None) + rt_id2 = next(rt_ids, None) if rt_id is None: raise ValueError("no rt-id links found") - elif isinstance(rt_id, Sentinel): + elif rt_id2 is not None: raise ValueError("multiple rt-id links found") parsed = rtutil.RT.parse(rt_id) if parsed is None: @@ -535,7 +440,6 @@ class OutgoingReport(BaseReport): return parsed def _report(self, posts: AccrualPostings, index: int) -> Iterable[str]: - posts = posts.since_last_nonzero() try: ticket_id, _ = self._primary_rt_id(posts) ticket = self.rt_client.get_ticket(ticket_id) @@ -545,9 +449,13 @@ class OutgoingReport(BaseReport): ticket = None errmsg = error.args[0] if ticket is None: + meta = posts[0].meta self.logger.error( - "can't generate outgoings report for %s because no RT ticket available: %s", - posts.invoice, errmsg, + "can't generate outgoings report for %s %s %s because no RT ticket available: %s", + meta.date.isoformat(), + meta.get('entity', ''), + meta.get('invoice', ''), + errmsg, ) return @@ -566,10 +474,11 @@ class OutgoingReport(BaseReport): ) requestor = f'{requestor_name} <{rt_requestor["EmailAddress"]}>'.strip() - balance_s = posts.end_balance.format(None) + balance = -posts.balance_at_cost() + balance_s = balance.format(None) raw_balance = -posts.balance() payment_amount = raw_balance.format('¤¤ #,##0.00') - if raw_balance != posts.end_balance: + if raw_balance != balance: payment_amount += f' ({balance_s})' balance_s = f'{raw_balance} ({balance_s})' @@ -776,14 +685,15 @@ def main(arglist: Optional[Sequence[str]]=None, groups: PostGroups if args.report_type is None or args.report_type is ReportType.OUTGOING: groups = dict(AccrualPostings.group_by_first_meta_link(postings, 'rt-id')) - if (args.report_type is None - and len(groups) == 1 - and all(group.accrual_type is AccrualAccount.PAYABLE - and not group.is_paid() - and key # Make sure we have a usable rt-id - for key, group in groups.items()) - ): - args.report_type = ReportType.OUTGOING + if args.report_type is None and len(groups) == 1: + key = next(iter(groups)) + group = groups[key] + account = group[0].account + if (AccrualAccount.by_account(account) is AccrualAccount.PAYABLE + and all(post.account == account for post in group) + and not group.balance().ge_zero() + and key): # Make sure we have a usable rt-id + args.report_type = ReportType.OUTGOING if args.report_type is not ReportType.OUTGOING: groups = dict(AccrualPostings.make_consistent(postings)) if args.report_type is not ReportType.AGING: diff --git a/tests/test_reports_accrual.py b/tests/test_reports_accrual.py index 4854363e21399218f6cc2346c937f9307242cd4d..8ecac99fce758fd1f82237cc6bd000f284a65568 100644 --- a/tests/test_reports_accrual.py +++ b/tests/test_reports_accrual.py @@ -263,82 +263,6 @@ def test_report_type_by_unknown_name(arg): with pytest.raises(ValueError): accrual.ReportType.by_name(arg) -@pytest.mark.parametrize('acct_name', ACCOUNTS) -def test_accrual_postings_consistent_account(acct_name): - meta = {'invoice': '{acct_name} invoice.pdf'} - txn = testutil.Transaction(postings=[ - (acct_name, 50, meta), - (acct_name, 25, meta), - ]) - related = accrual.AccrualPostings(data.Posting.from_txn(txn)) - assert related.account == acct_name - -def test_accrual_postings_entity(): - txn = testutil.Transaction(postings=[ - (ACCOUNTS[0], 25, {'entity': 'Accruee'}), - (ACCOUNTS[0], -15, {'entity': 'Payee15'}), - (ACCOUNTS[0], -10, {'entity': 'Payee10'}), - ]) - related = accrual.AccrualPostings(data.Posting.from_txn(txn)) - assert related.entity == 'Accruee' - assert set(related.entities()) == {'Accruee', 'Payee10', 'Payee15'} - -def test_accrual_postings_entities(): - txn = testutil.Transaction(postings=[ - (ACCOUNTS[0], 25, {'entity': 'Accruee'}), - (ACCOUNTS[0], -15, {'entity': 'Payee15'}), - (ACCOUNTS[0], -10, {'entity': 'Payee10'}), - ]) - related = accrual.AccrualPostings(data.Posting.from_txn(txn)) - actual = related.entities() - assert next(actual, None) == 'Accruee' - assert set(actual) == {'Payee10', 'Payee15'} - -def test_accrual_postings_entities_no_duplicates(): - txn = testutil.Transaction(postings=[ - (ACCOUNTS[0], 25, {'entity': 'Accruee'}), - (ACCOUNTS[0], -15, {'entity': 'Accruee'}), - (ACCOUNTS[0], -10, {'entity': 'Other'}), - ]) - related = accrual.AccrualPostings(data.Posting.from_txn(txn)) - actual = related.entities() - assert next(actual, None) == 'Accruee' - assert next(actual, None) == 'Other' - assert next(actual, None) is None - -def test_accrual_postings_inconsistent_account(): - meta = {'invoice': 'invoice.pdf'} - txn = testutil.Transaction(postings=[ - (acct_name, index, meta) - for index, acct_name in enumerate(ACCOUNTS) - ]) - related = accrual.AccrualPostings(data.Posting.from_txn(txn)) - assert related.account is related.INCONSISTENT - -def test_accrual_postings_rt_id(): - txn = testutil.Transaction(postings=[ - (ACCOUNTS[0], 10, {'rt-id': 'rt:90'}), - (ACCOUNTS[0], 10, {'rt-id': 'rt:90 rt:92'}), - (ACCOUNTS[0], 10, {'rt-id': 'rt:90 rt:94 rt:92'}), - ]) - related = accrual.AccrualPostings(data.Posting.from_txn(txn)) - assert related.rt_id == 'rt:90' - -def test_accrual_postings_rt_id_inconsistent(): - txn = testutil.Transaction(postings=[ - (ACCOUNTS[0], 10, {'rt-id': 'rt:96'}), - (ACCOUNTS[0], 10, {'rt-id': 'rt:98 rt:96'}), - ]) - related = accrual.AccrualPostings(data.Posting.from_txn(txn)) - assert related.rt_id is related.INCONSISTENT - -def test_accrual_postings_rt_id_none(): - txn = testutil.Transaction(postings=[ - (ACCOUNTS[0], 10), - ]) - related = accrual.AccrualPostings(data.Posting.from_txn(txn)) - assert related.rt_id is None - @pytest.mark.parametrize('acct_name,invoice,day', testutil.combine_values( ACCOUNTS, ['FIXME', '', None, *testutil.NON_STRING_METADATA_VALUES], @@ -353,18 +277,13 @@ def test_make_consistent_bad_invoice(acct_name, invoice, day): (acct_name, index * mult, {'invoice': invoice, 'entity': f'BadInvoice{day}'}) for index in range(1, 4) ]) - consistent = dict(accrual.AccrualPostings.make_consistent(data.Posting.from_txn(txn))) + postings = data.Posting.from_txn(txn) + consistent = dict(accrual.AccrualPostings.make_consistent(iter(postings))) assert len(consistent) == 1 - key = next(iter(consistent)) - assert acct_name in key - if invoice: - assert str(invoice) in key - actual = consistent[key] - assert actual + _, actual = consistent.popitem() assert len(actual) == 3 - for act_post, exp_post in zip(actual, txn.postings): - assert act_post.units == exp_post.units - assert act_post.meta.get('invoice') == invoice + for act_post, exp_post in zip(actual, postings): + assert act_post == exp_post def test_make_consistent_across_accounts(): invoice = 'Invoices/CrossAccount.pdf' @@ -376,7 +295,6 @@ def test_make_consistent_across_accounts(): assert len(consistent) == len(ACCOUNTS) for key, posts in consistent.items(): assert len(posts) == 1 - assert posts.account in key def test_make_consistent_both_invoice_and_account(): txn = testutil.Transaction(date=datetime.date(2019, 2, 2), postings=[ @@ -386,7 +304,6 @@ def test_make_consistent_both_invoice_and_account(): assert len(consistent) == len(ACCOUNTS) for key, posts in consistent.items(): assert len(posts) == 1 - assert posts.account in key @pytest.mark.parametrize('acct_name', ACCOUNTS) def test_make_consistent_across_entity(acct_name): @@ -399,10 +316,6 @@ def test_make_consistent_across_entity(acct_name): assert len(consistent) == 3 for key, posts in consistent.items(): assert len(posts) == 1 - entities = posts.entities() - assert next(entities, None) == posts.entity - assert next(entities, None) is None - assert posts.entity in key @pytest.mark.parametrize('acct_name', ACCOUNTS) def test_make_consistent_entity_differs_accrual_payment(acct_name): @@ -440,7 +353,7 @@ def test_make_consistent_by_date_with_exact_payment(): actual = [group for _, group in accrual.AccrualPostings.make_consistent(data.Posting.from_entries(entries))] assert len(actual) == 2 - assert actual[0].is_zero() + assert sum(post.units.number for post in actual[0]) == 0 assert len(actual[1]) == 1 assert actual[1][0].meta.date.day == 3 @@ -470,7 +383,7 @@ def test_make_consistent_by_date_with_overpayment(): actual = [group for _, group in accrual.AccrualPostings.make_consistent(data.Posting.from_entries(entries))] assert len(actual) == 2 - assert actual[0].is_zero() + assert sum(post.units.number for post in actual[0]) == 0 assert len(actual[1]) == 2 assert actual[1][0].meta.date.day == 2 assert actual[1][0].units.number == -25 @@ -658,7 +571,8 @@ def test_outgoing_report_without_rt_id(accrual_postings, caplog): assert caplog.records log = caplog.records[0] assert log.message.startswith( - f"can't generate outgoings report for {invoice} because no RT ticket available:", + f"can't generate outgoings report for 2010-05-15 MatchingProgram {invoice}" + " because no RT ticket available:", ) assert not output.getvalue() @@ -761,7 +675,7 @@ def test_output_payments_when_only_match(arglist, expect_invoice): assert not errors.getvalue() assert retcode == 0 check_output(output, [ - rf'^{re.escape(expect_invoice)}:$', + rf'^EarlyBird {re.escape(expect_invoice)}:$', r' outstanding since ', ])