Changeset - 6c7603fa6c73
[Not reviewed]
0 3 0
Brett Smith - 4 years ago 2020-07-21 02:45:14
brettcsmith@brettcsmith.org
ledger: Add options to control account totals display.
3 files changed with 140 insertions and 52 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/reports/ledger.py
Show inline comments
 
"""ledger.py - General ledger report from Beancount
 

	
 
This tool produces a spreadsheet that shows postings in Beancount, organized
 
by account.
 

	
 
Specify the date range you want to report with the ``--begin`` and ``--end``
 
options.
 

	
 
Select the accounts you want to report with the ``--account`` option. You can
 
specify this option multiple times. The report will include at least one sheet
 
for each account you specify. Subaccounts will be reported on that sheet as
 
well.
 

	
 
Select the postings you want to report by passing metadata search terms in
 
``name=value`` format.
 

	
 
Run ``ledger-report --help`` for abbreviations and other options.
 

	
 
Examples
 
--------
 

	
 
Report all activity related to a given project::
 

	
 
    ledger-report project=NAME
 

	
 
Get all Assets postings for a given month to help with reconciliation::
 

	
 
    ledger-report -a Assets -b 2018-05-01 -e 2018-06-01
 
"""
 
# Copyright © 2020  Brett Smith
 
#
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU Affero General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU Affero General Public License for more details.
 
#
 
# You should have received a copy of the GNU Affero General Public License
 
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
 

	
 
import argparse
 
import collections
 
import datetime
 
import enum
 
import itertools
 
import operator
 
import logging
 
import sys
 

	
 
from typing import (
 
    Callable,
 
    Dict,
 
    Iterable,
 
    Iterator,
 
    List,
 
    Mapping,
 
    Optional,
 
    Sequence,
 
    Set,
 
    TextIO,
 
    Tuple,
 
    Union,
 
)
 

	
 
from pathlib import Path
 

	
 
import odf.table  # type:ignore[import]
 

	
 
from beancount.core import data as bc_data
 
from beancount.parser import printer as bc_printer
 

	
 
from . import core
 
from .. import books
 
from .. import cliutil
 
from .. import config as configmod
 
from .. import data
 
from .. import ranges
 
from .. import rtutil
 

	
 
PostTally = List[Tuple[int, data.Account]]
 

	
 
PROGNAME = 'ledger-report'
 
logger = logging.getLogger('conservancy_beancount.reports.ledger')
 

	
 
class LedgerODS(core.BaseODS[data.Posting, data.Account]):
 
    CORE_COLUMNS: Sequence[str] = [
 
        'Date',
 
        data.Metadata.human_name('entity'),
 
        'Description',
 
        'Original Amount',
 
        'Booked Amount',
 
    ]
 
    ACCOUNT_COLUMNS: Dict[str, Sequence[str]] = collections.OrderedDict([
 
        ('Income', ['project', 'rt-id', 'receipt', 'income-type', 'memo']),
 
        ('Expenses', ['project', 'rt-id', 'receipt', 'approval', 'expense-allocation']),
 
        ('Equity', ['rt-id']),
 
        ('Assets:Receivable', ['project', 'rt-id', 'invoice', 'approval', 'contract', 'purchase-order']),
 
        ('Liabilities:Payable', ['project', 'rt-id', 'invoice', 'approval', 'contract', 'purchase-order']),
 
        ('Assets:PayPal', ['rt-id', 'paypal-id', 'receipt', 'approval']),
 
        ('Assets', ['rt-id', 'receipt', 'approval', 'bank-statement']),
 
        ('Liabilities', ['rt-id', 'receipt', 'approval', 'bank-statement']),
 
    ])
 
    # Excel 2003 was limited to 65,536 rows per worksheet.
 
    # While we can probably count on all our users supporting more modern
 
    # formats (Excel 2007 supports over 1 million rows per worksheet),
 
    # keeping the default limit conservative seems good to avoid running into
 
    # other limits (like the number of hyperlinks per worksheet), plus just
 
    # better for human organization and readability.
 
    SHEET_SIZE = 65500
 

	
 
    def __init__(self,
 
                 start_date: datetime.date,
 
                 stop_date: datetime.date,
 
                 accounts: Optional[Sequence[str]]=None,
 
                 rt_wrapper: Optional[rtutil.RT]=None,
 
                 sheet_size: Optional[int]=None,
 
                 totals_with_entries: Optional[Sequence[str]]=None,
 
                 totals_without_entries: Optional[Sequence[str]]=None,
 
    ) -> None:
 
        if sheet_size is None:
 
            sheet_size = self.SHEET_SIZE
 
        if totals_with_entries is None:
 
            totals_with_entries = [s for s in self.ACCOUNT_COLUMNS if ':' not in s]
 
        if totals_without_entries is None:
 
            totals_without_entries = totals_with_entries
 
        super().__init__(rt_wrapper)
 
        self.date_range = ranges.DateRange(start_date, stop_date)
 
        self.sheet_size = sheet_size
 
        self.totals_with_entries = totals_with_entries
 
        self.totals_without_entries = totals_without_entries
 

	
 
        if accounts is None:
 
            self.accounts = set(data.Account.iter_accounts())
 
            self.required_sheet_names = list(self.ACCOUNT_COLUMNS)
 
        else:
 
            self.accounts = set()
 
            self.required_sheet_names = []
 
            for acct_spec in accounts:
 
                subaccounts = frozenset(data.Account.iter_accounts_by_hierarchy(acct_spec))
 
                if subaccounts:
 
                    self.accounts.update(subaccounts)
 
                    self._require_sheet(acct_spec)
 
                else:
 
                    account_roots_map = collections.defaultdict(list)
 
                    for account in data.Account.iter_accounts_by_classification(acct_spec):
 
                        self.accounts.add(account)
 
                        account_roots_map[account.root_part()].append(account)
 
                    if not account_roots_map:
 
                        raise ValueError("unknown account name or classification", acct_spec)
 
                    for root_part, accounts in account_roots_map.items():
 
                        start_count = min(account.count_parts() for account in accounts)
 
                        for count in range(start_count, 1, -1):
 
                            target = accounts[0].root_part(count)
 
                            if all(acct.root_part(count) == target for acct in accounts):
 
                                self._require_sheet(target)
 
                                break
 
                        else:
 
                            self._require_sheet(root_part)
 

	
 
    def _require_sheet(self, new_sheet: str) -> None:
 
        for index, sheet in enumerate(self.required_sheet_names):
 
            if new_sheet == sheet:
 
                break
 
            elif new_sheet.startswith(sheet):
 
                self.required_sheet_names.insert(index, new_sheet)
 
                break
 
        else:
 
            self.required_sheet_names.append(new_sheet)
 

	
 
    def init_styles(self) -> None:
 
        super().init_styles()
 
        self.amount_column = self.column_style(1.2)
 
        self.default_column = self.column_style(1.5)
 
        self.column_styles: Mapping[str, Union[str, odf.style.Style]] = {
 
            'Date': '',
 
            'Description': self.column_style(2),
 
            'Original Amount': self.amount_column,
 
            'Booked Amount': self.amount_column,
 
            data.Metadata.human_name('project'): self.amount_column,
 
            data.Metadata.human_name('rt-id'): self.amount_column,
 
        }
 

	
 
    @classmethod
 
    def _group_tally(
 
            cls,
 
            tally_by_account: PostTally,
 
            key: Callable[[data.Account], Optional[str]],
 
    ) -> Dict[str, PostTally]:
 
        retval: Dict[str, PostTally] = collections.defaultdict(list)
 
        for count, account in tally_by_account:
 
            item_key = key(account)
 
            if item_key is not None:
 
                retval[item_key].append((count, account))
 
        return retval
 

	
 
    @classmethod
 
    def _split_sheet(
 
            cls,
 
            tally_by_account: PostTally,
 
            sheet_size: int,
 
            sheet_name: str,
 
    ) -> Iterator[str]:
 
        total = 0
 
        for index, (count, account) in enumerate(tally_by_account):
 
            total += count
 
            if total > sheet_size:
 
                break
 
        else:
 
            # All the accounts fit in this sheet.
 
            yield sheet_name
 
            return
 
        if index == 0 and len(tally_by_account) == 1:
 
            # With one account, we can't split any further, so warn and stop.
 
            logger.warning(
 
                "%s has %s rows, over size %s",
 
                account, f'{count:,g}', f'{sheet_size:,g}',
 
            )
 
            yield sheet_name
 
            return
 
        group_func = operator.methodcaller('root_part', sheet_name.count(':') + 2)
 
        maybe_split = cls._group_tally(tally_by_account[:index], group_func)
 
        must_split = cls._group_tally(tally_by_account[index:], group_func)
 
        for subkey, must_split_tally in sorted(must_split.items()):
 
            split_names = cls._split_sheet(
 
                maybe_split.get(subkey, []) + must_split_tally, sheet_size, subkey,
 
            )
 
            # We must be willing to split out at least as many sheets as there
 
            # are accounts that didn't fit. Do that first.
 
            yield from itertools.islice(split_names, len(must_split_tally))
 
            # After that, we can be in one of two cases:
 
            # 1. There is no next sheet. All the accounts, including the
 
            #    maybe_splits and must_splits, fit on planned subsheets.
 
            #    Update state to note we don't need a sheet for them anymore.
 
            # 2. The next sheet is named `subkey`, and is planned to include
 
            #    all of our maybe_split accounts. However, we don't need to
 
            #    yield that sheet name, because those accounts already fit in
 
            #    the sheet we're planning, and it would be a needless split.
 
            next_sheet_name = next(split_names, None)
 
            if next_sheet_name is None:
 
                maybe_split.pop(subkey, None)
 
            else:
 
                assert next_sheet_name == subkey
 
                assert not any(split_names)
 
        if maybe_split:
 
            yield sheet_name
 

	
 
    @classmethod
 
    def plan_sheets(
 
            cls,
 
            tally_by_account: Mapping[data.Account, int],
 
            base_sheets: Sequence[str],
 
            sheet_size: int,
 
    ) -> Sequence[str]:
 
        sorted_tally: PostTally = [
 
            (count, account)
 
            for account, count in tally_by_account.items()
 
        ]
 
        sorted_tally.sort()
 
        split_tally = cls._group_tally(
 
            sorted_tally,
 
            operator.methodcaller('is_under', *base_sheets),
 
        )
 
        return [
 
            sheet_name
 
            for key in base_sheets
 
            for sheet_name in cls._split_sheet(split_tally[key], sheet_size, key)
 
        ]
 

	
 
    def section_key(self, row: data.Posting) -> data.Account:
 
        return row.account
 

	
 
    def start_sheet(self, sheet_name: str) -> None:
 
        self.use_sheet(sheet_name.replace(':', ' '))
 
        columns_key = data.Account(sheet_name).is_under(*self.ACCOUNT_COLUMNS)
 
        # columns_key must not be None because ACCOUNT_COLUMNS has an entry
 
        # for all five root accounts.
 
        assert columns_key is not None
 
        self.metadata_columns = self.ACCOUNT_COLUMNS[columns_key]
 
        self.sheet_columns: Sequence[str] = [
 
            *self.CORE_COLUMNS,
 
            *(data.Metadata.human_name(meta_key) for meta_key in self.metadata_columns),
 
        ]
 
        for col_name in self.sheet_columns:
 
            self.sheet.addElement(odf.table.TableColumn(
 
                stylename=self.column_styles.get(col_name, self.default_column),
 
            ))
 
        self.add_row(*(
 
            self.string_cell(col_name, stylename=self.style_bold)
 
            for col_name in self.sheet_columns
 
        ))
 
        self.lock_first_row()
 

	
 
    def _report_section_balance(self, key: data.Account, date_key: str) -> None:
 
        related = self.account_groups[key]
 
        if date_key == 'start':
 
            if not key.keeps_balance():
 
                return
 
            date = self.date_range.start
 
            balance = related.start_bal
 
            description = "Opening Balance"
 
        else:
 
            date = self.date_range.stop
 
            if key.keeps_balance():
 
                balance = related.stop_bal
 
                description = "Ending Balance"
 
            else:
 
                balance = related.period_bal
 
                description = "Period Total"
 
        self.add_row(
 
            self.date_cell(date, stylename=self.merge_styles(
 
                self.style_bold, self.style_date,
 
            )),
 
            odf.table.TableCell(),
 
            self.string_cell(description, stylename=self.style_bold),
 
            odf.table.TableCell(),
 
            self.balance_cell(self.norm_func(balance), stylename=self.style_bold),
 
        )
 

	
 
    def start_section(self, key: data.Account) -> None:
 
    def start_section(self, key: data.Account, *, force_total: bool=False) -> None:
 
        self.add_row()
 
        self.add_row(
 
            odf.table.TableCell(),
 
            self.string_cell(
 
                f"{key} Ledger"
 
                f" From {self.date_range.start.isoformat()}"
 
                f" To {self.date_range.stop.isoformat()}",
 
                stylename=self.style_bold,
 
                numbercolumnsspanned=len(self.sheet_columns) - 1,
 
            ),
 
        )
 
        self.norm_func = core.normalize_amount_func(key)
 
        self._report_section_balance(key, 'start')
 
        if force_total or key.is_under(*self.totals_with_entries):
 
            self._report_section_balance(key, 'start')
 

	
 
    def end_section(self, key: data.Account) -> None:
 
        self._report_section_balance(key, 'stop')
 

	
 
    def write_row(self, row: data.Posting) -> None:
 
        if row.cost is None:
 
            amount_cell = odf.table.TableCell()
 
        else:
 
            amount_cell = self.currency_cell(self.norm_func(row.units))
 
        self.add_row(
 
            self.date_cell(row.meta.date),
 
            self.string_cell(row.meta.get('entity') or ''),
 
            self.string_cell(row.meta.txn.narration),
 
            amount_cell,
 
            self.currency_cell(self.norm_func(row.at_cost())),
 
            *(self.meta_links_cell(row.meta.report_links(key))
 
              if key in data.LINK_METADATA
 
              else self.string_cell(row.meta.get(key, ''))
 
              for key in self.metadata_columns),
 
        )
 

	
 
    def write_balance_sheet(self) -> None:
 
        self.use_sheet("Balance")
 
        self.sheet.addElement(odf.table.TableColumn(stylename=self.column_style(3)))
 
        self.sheet.addElement(odf.table.TableColumn(stylename=self.column_style(1.5)))
 
        self.add_row(
 
            self.string_cell("Account", stylename=self.style_bold),
 
            self.string_cell("Balance", stylename=self.style_bold),
 
        )
 
        self.lock_first_row()
 
        self.add_row()
 
        self.add_row(self.string_cell(
 
            f"Ledger From {self.date_range.start.isoformat()}"
 
            f" To {self.date_range.stop.isoformat()}",
 
            stylename=self.merge_styles(self.style_centertext, self.style_bold),
 
            numbercolumnsspanned=2,
 
        ))
 
        self.add_row()
 
        for account, balance in core.account_balances(self.account_groups):
 
            if account is core.OPENING_BALANCE_NAME:
 
                text = f"Balance as of {self.date_range.start.isoformat()}"
 
                style = self.merge_styles(self.style_bold, self.style_endtext)
 
            elif account is core.ENDING_BALANCE_NAME:
 
                text = f"Balance as of {self.date_range.stop.isoformat()}"
 
                style = self.merge_styles(self.style_bold, self.style_endtext)
 
            else:
 
                text = account
 
                style = self.style_endtext
 
            self.add_row(
 
                self.string_cell(text, stylename=style),
 
                self.balance_cell(-balance, stylename=style),
 
            )
 

	
 
    def write(self, rows: Iterable[data.Posting]) -> None:
 
        related_cls = core.PeriodPostings.with_start_date(self.date_range.start)
 
        self.account_groups = dict(related_cls.group_by_account(
 
            post for post in rows if post.meta.date < self.date_range.stop
 
        ))
 
        for empty_acct in self.accounts.difference(self.account_groups):
 
            self.account_groups[empty_acct] = related_cls()
 
        self.write_balance_sheet()
 
        tally_by_account_iter = (
 
            (account, len(self.account_groups[account]))
 
            for account in self.accounts
 
        )
 
        tally_by_account = {
 
            # 3 for the rows generated by start_section+end_section
 
            account: count + 3
 
            for account, count in tally_by_account_iter
 
        }
 
        sheet_names = self.plan_sheets(
 
            tally_by_account, self.required_sheet_names, self.sheet_size,
 
        )
 
        using_sheet_index = -1
 
        for sheet_index, account in core.sort_and_filter_accounts(
 
                tally_by_account, sheet_names,
 
        ):
 
            while using_sheet_index < sheet_index:
 
                using_sheet_index += 1
 
                self.start_sheet(sheet_names[using_sheet_index])
 
            postings = self.account_groups[account]
 
            if postings:
 
                super().write(postings)
 
            elif account.is_open_on_date(self.date_range.start):
 
                self.start_section(account)
 
            elif not account.is_open_on_date(self.date_range.start):
 
                pass
 
            elif account.is_under(*self.totals_without_entries):
 
                self.start_section(account, force_total=True)
 
                self.end_section(account)
 
        for index in range(using_sheet_index + 1, len(sheet_names)):
 
            self.start_sheet(sheet_names[index])
 

	
 

	
 
class ReturnFlag(enum.IntFlag):
 
    LOAD_ERRORS = 1
 
    NOTHING_TO_REPORT = 8
 

	
 

	
 
def parse_arguments(arglist: Optional[Sequence[str]]=None) -> argparse.Namespace:
 
    parser = argparse.ArgumentParser(prog=PROGNAME)
 
    cliutil.add_version_argument(parser)
 
    parser.add_argument(
 
        '--begin', '--start', '-b',
 
        dest='start_date',
 
        metavar='DATE',
 
        type=cliutil.date_arg,
 
        help="""Date to start reporting entries, inclusive, in YYYY-MM-DD format.
 
The default is one year ago.
 
""")
 
    parser.add_argument(
 
        '--end', '--stop', '-e',
 
        dest='stop_date',
 
        metavar='DATE',
 
        type=cliutil.date_arg,
 
        help="""Date to stop reporting entries, exclusive, in YYYY-MM-DD format.
 
The default is a year after the start date, or 30 days from today if the start
 
date was also not specified.
 
""")
 
    parser.add_argument(
 
        '--account', '-a',
 
        dest='accounts',
 
        metavar='ACCOUNT',
 
        action='append',
 
        help="""Show this account in the report. You can specify this option
 
multiple times. You can specify a part of the account hierarchy, or an account
 
classification from metadata. If not specified, the default set adapts to your
 
search criteria.
 
""")
 
    parser.add_argument(
 
        '--show-totals', '-S',
 
        metavar='ACCOUNT',
 
        action='append',
 
        help="""When entries for this account appear in the report, include
 
account balance(s) as well. You can specify this option multiple times. Pass in
 
a part of the account hierarchy. The default is all accounts.
 
""")
 
    parser.add_argument(
 
        '--add-totals', '-T',
 
        metavar='ACCOUNT',
 
        action='append',
 
        help="""When an account could be included in the report but does not
 
have any entries in the date range, include a header and account balance(s) for
 
it. You can specify this option multiple times. Pass in a part of the account
 
hierarchy. The default set adapts to your search criteria.
 
""")
 
    parser.add_argument(
 
        '--sheet-size', '--size',
 
        metavar='SIZE',
 
        type=int,
 
        default=LedgerODS.SHEET_SIZE,
 
        help="""Try to limit sheets to this many rows. The report will
 
automatically create new sheets to make this happen. When that's not possible,
 
it will issue a warning.
 
""")
 
    parser.add_argument(
 
        '--output-file', '-O',
 
        metavar='PATH',
 
        type=Path,
 
        help="""Write the report to this file, or stdout when PATH is `-`.
 
The default is `LedgerReport_<StartDate>_<StopDate>.ods`.
 
""")
 
    cliutil.add_loglevel_argument(parser)
 
    parser.add_argument(
 
        'search_terms',
 
        metavar='FILTER',
 
        type=cliutil.SearchTerm.arg_parser('project', 'rt-id'),
 
        nargs=argparse.ZERO_OR_MORE,
 
        help="""Report on postings that match this criteria. The format is
 
NAME=TERM. TERM is a link or word that must exist in a posting's NAME
 
metadata to match. A single ticket number is a shortcut for
 
`rt-id=rt:NUMBER`. Any other word is a shortcut for `project=TERM`.
 
""")
 
    args = parser.parse_args(arglist)
 
    if args.add_totals is None and args.search_terms:
 
        args.add_totals = []
 
    if args.accounts is None:
 
        if any(term.meta_key == 'project' for term in args.search_terms):
 
            args.accounts = [
 
                'Income',
 
                'Expenses',
 
                'Assets:Receivable',
 
                'Assets:Prepaid',
 
                'Liabilities:UnearnedIncome',
 
                'Liabilities:Payable',
 
            ]
 
        else:
 
            args.accounts = list(LedgerODS.ACCOUNT_COLUMNS)
 
    return args
 

	
 
def diff_year(date: datetime.date, diff: int) -> datetime.date:
 
    new_year = date.year + diff
 
    try:
 
        return date.replace(year=new_year)
 
    except ValueError:
 
        # The original date is Feb 29, which doesn't exist in the new year.
 
        if diff < 0:
 
            return datetime.date(new_year, 2, 28)
 
        else:
 
            return datetime.date(new_year, 3, 1)
 

	
 
def main(arglist: Optional[Sequence[str]]=None,
 
         stdout: TextIO=sys.stdout,
 
         stderr: TextIO=sys.stderr,
 
         config: Optional[configmod.Config]=None,
 
) -> int:
 
    args = parse_arguments(arglist)
 
    cliutil.set_loglevel(logger, args.loglevel)
 
    if config is None:
 
        config = configmod.Config()
 
        config.load_file()
 

	
 
    today = datetime.date.today()
 
    if args.start_date is None:
 
        args.start_date = diff_year(today, -1)
 
        if args.stop_date is None:
 
            args.stop_date = today + datetime.timedelta(days=30)
 
    elif args.stop_date is None:
 
        args.stop_date = diff_year(args.start_date, 1)
 

	
 
    returncode = 0
 
    books_loader = config.books_loader()
 
    if books_loader is None:
 
        entries, load_errors, options = books.Loader.load_none(config.config_file_path())
 
    else:
 
        entries, load_errors, options = books_loader.load_fy_range(args.start_date, args.stop_date)
 
    for error in load_errors:
 
        bc_printer.print_error(error, file=stderr)
 
        returncode |= ReturnFlag.LOAD_ERRORS
 

	
 
    data.Account.load_from_books(entries, options)
 
    postings = data.Posting.from_entries(entries)
 
    for search_term in args.search_terms:
 
        postings = search_term.filter_postings(postings)
 

	
 
    rt_wrapper = config.rt_wrapper()
 
    if rt_wrapper is None:
 
        logger.warning("could not initialize RT client; spreadsheet links will be broken")
 
    try:
 
        report = LedgerODS(
 
            args.start_date,
 
            args.stop_date,
 
            args.accounts,
 
            rt_wrapper,
 
            args.sheet_size,
 
            args.show_totals,
 
            args.add_totals,
 
        )
 
    except ValueError as error:
 
        logger.error("%s: %r", *error.args)
 
        return 2
 
    report.write(postings)
 
    if not any(report.account_groups.values()):
 
        logger.warning("no matching postings found to report")
 
        returncode |= ReturnFlag.NOTHING_TO_REPORT
 

	
 
    if args.output_file is None:
 
        out_dir_path = config.repository_path() or Path()
 
        args.output_file = out_dir_path / 'LedgerReport_{}_{}.ods'.format(
 
            args.start_date.isoformat(), args.stop_date.isoformat(),
 
        )
 
        logger.info("Writing report to %s", args.output_file)
 
    ods_file = cliutil.bytes_output(args.output_file, stdout)
 
    report.save_file(ods_file)
 
    return 0 if returncode == 0 else 16 + returncode
 

	
 
entry_point = cliutil.make_entry_point(__name__, PROGNAME)
 

	
 
if __name__ == '__main__':
 
    exit(entry_point())
setup.py
Show inline comments
 
#!/usr/bin/env python3
 

	
 
from setuptools import setup
 

	
 
setup(
 
    name='conservancy_beancount',
 
    description="Plugin, library, and reports for reading Conservancy's books",
 
    version='1.5.11',
 
    version='1.5.12',
 
    author='Software Freedom Conservancy',
 
    author_email='info@sfconservancy.org',
 
    license='GNU AGPLv3+',
 

	
 
    install_requires=[
 
        'babel>=2.6',  # Debian:python3-babel
 
        'beancount>=2.2',  # Debian:beancount
 
        # 1.4.1 crashes when trying to save some documents.
 
        'odfpy>=1.4.0,!=1.4.1',  # Debian:python3-odf
 
        'PyYAML>=3.0',  # Debian:python3-yaml
 
        'regex',  # Debian:python3-regex
 
        'rt>=2.0',
 
    ],
 
    setup_requires=[
 
        'pytest-mypy',
 
        'pytest-runner',  # Debian:python3-pytest-runner
 
    ],
 
    tests_require=[
 
        'mypy>=0.770',  # Debian:python3-mypy
 
        'pytest',  # Debian:python3-pytest
 
    ],
 

	
 
    packages=[
 
        'conservancy_beancount',
 
        'conservancy_beancount.plugin',
 
        'conservancy_beancount.reports',
 
    ],
 
    entry_points={
 
        'console_scripts': [
 
            'accrual-report = conservancy_beancount.reports.accrual:entry_point',
 
            'fund-report = conservancy_beancount.reports.fund:entry_point',
 
            'ledger-report = conservancy_beancount.reports.ledger:entry_point',
 
            'opening-balances = conservancy_beancount.tools.opening_balances:entry_point',
 
        ],
 
    },
 
)
tests/test_reports_ledger.py
Show inline comments
 
"""test_reports_ledger.py - Unit tests for general ledger report"""
 
# Copyright © 2020  Brett Smith
 
#
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU Affero General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU Affero General Public License for more details.
 
#
 
# You should have received a copy of the GNU Affero General Public License
 
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
 

	
 
import collections
 
import contextlib
 
import copy
 
import datetime
 
import io
 
import re
 

	
 
import pytest
 

	
 
from . import testutil
 

	
 
import odf.table
 
import odf.text
 

	
 
from beancount.core import data as bc_data
 
from beancount import loader as bc_loader
 
from conservancy_beancount import data
 
from conservancy_beancount.reports import core
 
from conservancy_beancount.reports import ledger
 

	
 
clean_account_meta = contextlib.contextmanager(testutil.clean_account_meta)
 

	
 
Acct = data.Account
 

	
 
_ledger_load = bc_loader.load_file(testutil.test_path('books/ledger.beancount'))
 
DEFAULT_REPORT_SHEETS = [
 
    'Balance',
 
    'Income',
 
    'Expenses',
 
    'Equity',
 
    'Assets:Receivable',
 
    'Liabilities:Payable',
 
    'Assets:PayPal',
 
    'Assets',
 
    'Liabilities',
 
]
 
PROJECT_REPORT_SHEETS = DEFAULT_REPORT_SHEETS[:5] + [
 
    'Assets:Prepaid',
 
    'Liabilities:UnearnedIncome',
 
    'Liabilities:Payable',
 
]
 
del PROJECT_REPORT_SHEETS[3]
 
OVERSIZE_RE = re.compile(
 
    r'^([A-Za-z0-9:]+) has ([0-9,]+) rows, over size ([0-9,]+)$'
 
)
 
START_DATE = datetime.date(2018, 3, 1)
 
MID_DATE = datetime.date(2019, 3, 1)
 
STOP_DATE = datetime.date(2020, 3, 1)
 

	
 
@pytest.fixture
 
def ledger_entries():
 
    return copy.deepcopy(_ledger_load[0])
 

	
 
def iter_accounts(entries):
 
    for entry in entries:
 
        if isinstance(entry, bc_data.Open):
 
            yield entry.account
 

	
 
class NotFound(Exception): pass
 
class NoSheet(NotFound): pass
 
class NoHeader(NotFound): pass
 

	
 
class ExpectedPostings(core.RelatedPostings):
 
    @classmethod
 
    def find_section(cls, ods, account):
 
        for sheet in ods.getElementsByType(odf.table.Table):
 
            sheet_account = sheet.getAttribute('name').replace(' ', ':')
 
            if sheet_account and account.is_under(sheet_account):
 
                break
 
        else:
 
            raise NoSheet(account)
 
        rows = iter(sheet.getElementsByType(odf.table.TableRow))
 
        for row in rows:
 
            cells = row.childNodes
 
            if len(cells) == 2 and cells[-1].text.startswith(f'{account} '):
 
                break
 
        else:
 
            raise NoHeader(account)
 
        return rows
 

	
 
    @classmethod
 
    def check_not_in_report(cls, ods, *accounts):
 
        for account in accounts:
 
            with pytest.raises(NotFound):
 
                cls.find_section(ods, data.Account(account))
 

	
 
    @classmethod
 
    def check_in_report(cls, ods, account, start_date=START_DATE, end_date=STOP_DATE):
 
        date = end_date + datetime.timedelta(days=1)
 
        txn = testutil.Transaction(date=date, postings=[
 
            (account, 0),
 
        ])
 
        related = cls(data.Posting.from_txn(txn))
 
        related.check_report(ods, start_date, end_date)
 

	
 
    def slice_date_range(self, start_date, end_date):
 
        postings = enumerate(self)
 
        for start_index, post in postings:
 
            if start_date <= post.meta.date:
 
                break
 
        else:
 
            start_index += 1
 
        if end_date <= post.meta.date:
 
            end_index = start_index
 
        else:
 
            for end_index, post in postings:
 
                if end_date <= post.meta.date:
 
                    break
 
            else:
 
                end_index = None
 
        return (self[:start_index].balance_at_cost(),
 
                self[start_index:end_index])
 

	
 
    def check_report(self, ods, start_date, end_date):
 
    def check_report(self, ods, start_date, end_date, expect_totals=True):
 
        account = self[0].account
 
        norm_func = core.normalize_amount_func(account)
 
        open_bal, expect_posts = self.slice_date_range(start_date, end_date)
 
        open_bal = norm_func(open_bal)
 
        for sheet in ods.getElementsByType(odf.table.Table):
 
            sheet_account = sheet.getAttribute('name').replace(' ', ':')
 
            if sheet_account and account.is_under(sheet_account):
 
                break
 
        else:
 
            raise NoSheet(account)
 
        rows = iter(sheet.getElementsByType(odf.table.TableRow))
 
        for row in rows:
 
            cells = row.childNodes
 
            if len(cells) == 2 and cells[-1].text.startswith(f'{account} '):
 
                break
 
        else:
 
            if expect_posts:
 
                raise NoHeader(account)
 
            else:
 
                return
 
        closing_bal = norm_func(expect_posts.balance_at_cost())
 
        if account.is_under('Assets', 'Liabilities'):
 
        rows = self.find_section(ods, account)
 
        if expect_totals and account.is_under('Assets', 'Liabilities'):
 
            opening_row = testutil.ODSCell.from_row(next(rows))
 
            assert opening_row[0].value == start_date
 
            assert opening_row[4].text == open_bal.format(None, empty='0', sep='\0')
 
            closing_bal += open_bal
 
        for expected in expect_posts:
 
            cells = iter(testutil.ODSCell.from_row(next(rows)))
 
            assert next(cells).value == expected.meta.date
 
            assert next(cells).text == (expected.meta.get('entity') or '')
 
            assert next(cells).text == (expected.meta.txn.narration or '')
 
            if expected.cost is None:
 
                assert not next(cells).text
 
                assert next(cells).value == norm_func(expected.units.number)
 
            else:
 
                assert next(cells).value == norm_func(expected.units.number)
 
                assert next(cells).value == norm_func(expected.at_cost().number)
 
        closing_row = testutil.ODSCell.from_row(next(rows))
 
        assert closing_row[0].value == end_date
 
        empty = '$0.00' if expect_posts else '0'
 
        assert closing_row[4].text == closing_bal.format(None, empty=empty, sep='\0')
 
        if expect_totals:
 
            closing_row = testutil.ODSCell.from_row(next(rows))
 
            assert closing_row[0].value == end_date
 
            empty = '$0.00' if expect_posts else '0'
 
            assert closing_row[4].text == closing_bal.format(None, empty=empty, sep='\0')
 

	
 

	
 
def get_sheet_names(ods):
 
    return [sheet.getAttribute('name').replace(' ', ':')
 
            for sheet in ods.getElementsByType(odf.table.Table)]
 

	
 
def check_oversize_logs(caplog, accounts, sheet_size):
 
    actual = {}
 
    for log in caplog.records:
 
        match = OVERSIZE_RE.match(log.message)
 
        if match:
 
            assert int(match.group(3).replace(',', '')) == sheet_size
 
            actual[match.group(1)] = int(match.group(2).replace(',', ''))
 
    expected = {name: size for name, size in accounts.items() if size > sheet_size}
 
    assert actual == expected
 

	
 
def test_plan_sheets_no_change():
 
    have = {
 
        Acct('Assets:Cash'): 10,
 
        Acct('Income:Donations'): 20,
 
    }
 
    want = ['Assets', 'Income']
 
    actual = ledger.LedgerODS.plan_sheets(have, want.copy(), 100)
 
    assert actual == want
 

	
 
@pytest.mark.parametrize('have', [
 
    {},
 
    {Acct('Income:Other'): 10},
 
    {Acct('Assets:Checking'): 20, Acct('Expenses:Other'): 15},
 
])
 
def test_plan_sheets_includes_accounts_without_transactions(have):
 
    want = ['Assets', 'Income', 'Expenses']
 
    actual = ledger.LedgerODS.plan_sheets(have, want.copy(), 100)
 
    assert actual == want
 

	
 
def test_plan_sheets_single_split():
 
    have = {
 
        Acct('Assets:Cash'): 60,
 
        Acct('Assets:Checking'): 80,
 
        Acct('Income:Donations'): 50,
 
        Acct('Expenses:Travel'): 90,
 
        Acct('Expenses:FilingFees'): 25,
 
    }
 
    want = ['Assets', 'Income', 'Expenses']
 
    actual = ledger.LedgerODS.plan_sheets(have, want, 100)
 
    assert actual == [
 
        'Assets:Checking',
 
        'Assets',
 
        'Income',
 
        'Expenses:Travel',
 
        'Expenses',
 
    ]
 

	
 
def test_plan_sheets_split_subtree():
 
    have = {
 
        Acct('Assets:Bank1:Checking'): 80,
 
        Acct('Assets:Bank1:Savings'): 10,
 
        Acct('Assets:Cash:USD'): 20,
 
        Acct('Assets:Cash:EUR'): 15,
 
    }
 
    actual = ledger.LedgerODS.plan_sheets(have, ['Assets'], 100)
 
    assert actual == ['Assets:Bank1', 'Assets']
 

	
 
def test_plan_sheets_ambiguous_split():
 
    have = {
 
        Acct('Assets:Bank1:Checking'): 80,
 
        Acct('Assets:Bank1:Savings'): 40,
 
        Acct('Assets:Receivable:Accounts'): 40,
 
        Acct('Assets:Cash'): 10,
 
    }
 
    actual = ledger.LedgerODS.plan_sheets(have, ['Assets'], 100)
 
    # :Savings cannot fit with :Checking, so it's important that the return
 
    # value disambiguate that.
 
    assert actual == ['Assets:Bank1:Checking', 'Assets']
 

	
 
def test_plan_sheets_oversize(caplog):
 
    have = {
 
        Acct('Assets:Checking'): 150,
 
        Acct('Assets:Cash'): 50,
 
    }
 
    actual = ledger.LedgerODS.plan_sheets(have, ['Assets'], 100)
 
    assert actual == ['Assets:Checking', 'Assets']
 
    check_oversize_logs(caplog, have, 100)
 

	
 
def test_plan_sheets_all_oversize(caplog):
 
    have = {
 
        Acct('Assets:Checking'): 150,
 
        Acct('Assets:Cash'): 150,
 
    }
 
    actual = ledger.LedgerODS.plan_sheets(have, ['Assets'], 100)
 
    # In this case, each account should appear in alphabetical order.
 
    assert actual == ['Assets:Cash', 'Assets:Checking']
 
    check_oversize_logs(caplog, have, 100)
 

	
 
def test_plan_sheets_full_split_required(caplog):
 
    have = {
 
        Acct('Assets:Bank:Savings'): 98,
 
        Acct('Assets:Bank:Checking'): 96,
 
        Acct('Assets:Bank:Investment'): 94,
 
    }
 
    actual = ledger.LedgerODS.plan_sheets(have, ['Assets'], 100)
 
    assert actual == ['Assets:Bank:Checking', 'Assets:Bank:Savings', 'Assets']
 
    assert not caplog.records
 

	
 
def build_report(ledger_entries, start_date, stop_date, *args, **kwargs):
 
    postings = list(data.Posting.from_entries(iter(ledger_entries)))
 
    with clean_account_meta():
 
        data.Account.load_openings_and_closings(iter(ledger_entries))
 
        report = ledger.LedgerODS(start_date, stop_date, *args, **kwargs)
 
        report.write(iter(postings))
 
    return postings, report
 

	
 
@pytest.mark.parametrize('start_date,stop_date', [
 
    (START_DATE, STOP_DATE),
 
    (START_DATE, MID_DATE),
 
    (MID_DATE, STOP_DATE),
 
    (START_DATE.replace(month=6), START_DATE.replace(month=12)),
 
    (STOP_DATE, STOP_DATE.replace(month=12)),
 
])
 
def test_date_range_report(ledger_entries, start_date, stop_date):
 
    postings = list(data.Posting.from_entries(iter(ledger_entries)))
 
    with clean_account_meta():
 
        data.Account.load_openings_and_closings(iter(ledger_entries))
 
        report = ledger.LedgerODS(start_date, stop_date)
 
        report.write(iter(postings))
 
    for _, expected in ExpectedPostings.group_by_account(postings):
 
        expected.check_report(report.document, start_date, stop_date)
 
    postings, report = build_report(ledger_entries, start_date, stop_date)
 
    expected = dict(ExpectedPostings.group_by_account(postings))
 
    for account in iter_accounts(ledger_entries):
 
        try:
 
            related = expected[account]
 
        except KeyError:
 
            ExpectedPostings.check_in_report(report.document, account, start_date, stop_date)
 
        else:
 
            related.check_report(report.document, start_date, stop_date)
 

	
 
@pytest.mark.parametrize('tot_accts', [
 
    (),
 
    ('Assets', 'Liabilities'),
 
    ('Income', 'Expenses'),
 
    ('Assets', 'Liabilities', 'Income', 'Expenses'),
 
])
 
def test_report_filter_totals(ledger_entries, tot_accts):
 
    postings, report = build_report(ledger_entries, START_DATE, STOP_DATE,
 
                                    totals_with_entries=tot_accts,
 
                                    totals_without_entries=tot_accts)
 
    expected = dict(ExpectedPostings.group_by_account(postings))
 
    for account in iter_accounts(ledger_entries):
 
        expect_totals = account.startswith(tot_accts)
 
        if account in expected and expected[account][-1].meta.date >= START_DATE:
 
            expected[account].check_report(report.document, START_DATE, STOP_DATE,
 
                                           expect_totals=expect_totals)
 
        elif expect_totals:
 
            ExpectedPostings.check_in_report(report.document, account)
 
        else:
 
            ExpectedPostings.check_not_in_report(report.document, account)
 

	
 
@pytest.mark.parametrize('accounts', [
 
    ('Income', 'Expenses'),
 
    ('Assets:Receivable', 'Liabilities:Payable'),
 
])
 
def test_account_names_report(ledger_entries, accounts):
 
    postings = list(data.Posting.from_entries(iter(ledger_entries)))
 
    with clean_account_meta():
 
        data.Account.load_openings_and_closings(iter(ledger_entries))
 
        report = ledger.LedgerODS(START_DATE, STOP_DATE, accounts=accounts)
 
        report.write(iter(postings))
 
    for key, expected in ExpectedPostings.group_by_account(postings):
 
        should_find = key.startswith(accounts)
 
        try:
 
            expected.check_report(report.document, START_DATE, STOP_DATE)
 
        except NotFound:
 
            assert not should_find
 
    postings, report = build_report(ledger_entries, START_DATE, STOP_DATE, accounts)
 
    expected = dict(ExpectedPostings.group_by_account(postings))
 
    for account in iter_accounts(ledger_entries):
 
        if account.startswith(accounts):
 
            expected[account].check_report(report.document, START_DATE, STOP_DATE)
 
        else:
 
            assert should_find
 
            ExpectedPostings.check_not_in_report(report.document, account)
 

	
 
def run_main(arglist, config=None):
 
    if config is None:
 
        config = testutil.TestConfig(
 
            books_path=testutil.test_path('books/ledger.beancount'),
 
            rt_client=testutil.RTClient(),
 
        )
 
    arglist.insert(0, '--output-file=-')
 
    output = io.BytesIO()
 
    errors = io.StringIO()
 
    with clean_account_meta():
 
        retcode = ledger.main(arglist, output, errors, config)
 
    output.seek(0)
 
    return retcode, output, errors
 

	
 
def test_main(ledger_entries):
 
    retcode, output, errors = run_main([
 
        '-b', START_DATE.isoformat(),
 
        '-e', STOP_DATE.isoformat(),
 
    ])
 
    output.seek(0)
 
    assert not errors.getvalue()
 
    assert retcode == 0
 
    ods = odf.opendocument.load(output)
 
    assert get_sheet_names(ods) == DEFAULT_REPORT_SHEETS[:]
 
    postings = data.Posting.from_entries(ledger_entries)
 
    for _, expected in ExpectedPostings.group_by_account(postings):
 
        expected.check_report(ods, START_DATE, STOP_DATE)
 
    postings = data.Posting.from_entries(iter(ledger_entries))
 
    expected = dict(ExpectedPostings.group_by_account(postings))
 
    for account in iter_accounts(ledger_entries):
 
        try:
 
            expected[account].check_report(ods, START_DATE, STOP_DATE)
 
        except KeyError:
 
            ExpectedPostings.check_in_report(ods, account)
 

	
 
@pytest.mark.parametrize('acct_arg', [
 
    'Liabilities',
 
    'Accounts payable',
 
])
 
def test_main_account_limit(ledger_entries, acct_arg):
 
    retcode, output, errors = run_main([
 
        '-a', acct_arg,
 
        '-b', START_DATE.isoformat(),
 
        '-e', STOP_DATE.isoformat(),
 
    ])
 
    assert not errors.getvalue()
 
    assert retcode == 0
 
    ods = odf.opendocument.load(output)
 
    assert get_sheet_names(ods) == ['Balance', 'Liabilities']
 
    postings = data.Posting.from_entries(ledger_entries)
 
    for account, expected in ExpectedPostings.group_by_account(postings):
 
        if account == 'Liabilities:UnearnedIncome':
 
            should_find = acct_arg == 'Liabilities'
 
        else:
 
            should_find = account.startswith('Liabilities')
 
        try:
 
            expected.check_report(ods, START_DATE, STOP_DATE)
 
        except NotFound:
 
            assert not should_find
 
        else:
 
            assert should_find
 

	
 
def test_main_account_classification_splits_hierarchy(ledger_entries):
 
    retcode, output, errors = run_main([
 
        '-a', 'Cash',
 
        '-b', START_DATE.isoformat(),
 
        '-e', STOP_DATE.isoformat(),
 
    ])
 
    assert not errors.getvalue()
 
    assert retcode == 0
 
    ods = odf.opendocument.load(output)
 
    assert get_sheet_names(ods) == ['Balance', 'Assets']
 
    postings = data.Posting.from_entries(ledger_entries)
 
    for account, expected in ExpectedPostings.group_by_account(postings):
 
        should_find = (account == 'Assets:Checking' or account == 'Assets:PayPal')
 
        try:
 
            expected.check_report(ods, START_DATE, STOP_DATE)
 
        except NotFound:
 
            assert not should_find, f"{account} not found in report"
 
        else:
 
            assert should_find, f"{account} in report but should be excluded"
 

	
 
@pytest.mark.parametrize('project,start_date,stop_date', [
 
    ('eighteen', START_DATE, MID_DATE.replace(day=30)),
 
    ('nineteen', MID_DATE, STOP_DATE),
 
])
 
def test_main_project_report(ledger_entries, project, start_date, stop_date):
 
    postings = data.Posting.from_entries(ledger_entries)
 
    postings = data.Posting.from_entries(iter(ledger_entries))
 
    for key, related in ExpectedPostings.group_by_meta(postings, 'project'):
 
        if key == project:
 
            break
 
    assert key == project
 
    retcode, output, errors = run_main([
 
        f'--begin={start_date.isoformat()}',
 
        f'--end={stop_date.isoformat()}',
 
        project,
 
    ])
 
    assert not errors.getvalue()
 
    assert retcode == 0
 
    ods = odf.opendocument.load(output)
 
    assert get_sheet_names(ods) == PROJECT_REPORT_SHEETS[:]
 
    for _, expected in ExpectedPostings.group_by_account(related):
 
        expected.check_report(ods, start_date, stop_date)
 
    expected = dict(ExpectedPostings.group_by_account(related))
 
    for account in iter_accounts(ledger_entries):
 
        try:
 
            expected[account].check_report(ods, start_date, stop_date)
 
        except KeyError:
 
            ExpectedPostings.check_not_in_report(ods, account)
 

	
 
@pytest.mark.parametrize('arg', [
 
    'Assets:NoneSuchBank',
 
    'Funny money',
 
])
 
def test_main_invalid_account(caplog, arg):
 
    retcode, output, errors = run_main(['-a', arg])
 
    assert retcode == 2
 
    assert any(log.message.endswith(f': {arg!r}') for log in caplog.records)
 

	
 
def test_main_no_postings(caplog):
 
    retcode, output, errors = run_main(['NonexistentProject'])
 
    assert retcode == 24
 
    assert any(log.levelname == 'WARNING' for log in caplog.records)
0 comments (0 inline, 0 general)