From ccbc447a353d7d6d908a0a7c95b26bf784fc3b40 2021-02-24 18:15:33 From: Brett Smith Date: 2021-02-24 18:15:33 Subject: [PATCH] query: Start new reporting tool. Ultimately this is going to be a tool that can generate nicely-formatted spreadsheets from arbitrary bean-queries. This initial version doesn't generate spreadsheets yet, but it does integrate our usual books-loading tools and rewrite rules into existing bean-query functionality, so it's a start. It also has some of the query building and parsing that higher-level spreadsheets will need. --- diff --git a/conservancy_beancount/reports/query.py b/conservancy_beancount/reports/query.py new file mode 100644 index 0000000000000000000000000000000000000000..302423f5c9335d45da502cbfb9c289912d0fca8c --- /dev/null +++ b/conservancy_beancount/reports/query.py @@ -0,0 +1,236 @@ +"""query.py - Report arbitrary queries with advanced loading and formatting""" +# Copyright © 2021 Brett Smith +# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0 +# +# Full copyright and licensing details can be found at toplevel file +# LICENSE.txt in the repository. + +import argparse +import collections +import datetime +import enum +import functools +import locale +import logging +import re +import sys + +from typing import ( + cast, + Callable, + Dict, + Iterable, + Iterator, + Mapping, + Optional, + Sequence, + TextIO, + Tuple, + Union, +) +from ..beancount_types import ( + MetaValue, + Posting, + Transaction, +) + +from decimal import Decimal +from pathlib import Path + +import beancount.query.shell as bc_query +import beancount.query.query_parser as bc_query_parser + +from . import core +from . import rewrite +from .. import books +from .. import cliutil +from .. import config as configmod +from .. import data + +PROGNAME = 'query-report' +QUERY_PARSER = bc_query_parser.Parser() +logger = logging.getLogger('conservancy_beancount.reports.query') + +class BooksLoader: + """Closure to load books with a zero-argument callable + + This matches the load interface that BQLShell expects. + """ + def __init__( + self, + books_loader: Optional[books.Loader], + start_date: Optional[datetime.date]=None, + stop_date: Optional[datetime.date]=None, + rewrite_rules: Sequence[rewrite.RewriteRuleset]=(), + ) -> None: + self.books_loader = books_loader + self.start_date = start_date + self.stop_date = stop_date + self.rewrite_rules = rewrite_rules + + def __call__(self) -> books.LoadResult: + result = books.Loader.dispatch(self.books_loader, self.start_date, self.stop_date) + for index, entry in enumerate(result.entries): + # entry might not be a Transaction; we catch that later. + # The type ignores are because the underlying Beancount type isn't + # type-checkable. + postings = data.Posting.from_txn(entry) # type:ignore[arg-type] + for ruleset in self.rewrite_rules: + postings = ruleset.rewrite(postings) + try: + result.entries[index] = entry._replace(postings=list(postings)) # type:ignore[call-arg] + except AttributeError: + pass + return result + + +class BQLShell(bc_query.BQLShell): + pass + + +class JoinOperator(enum.Enum): + AND = 'AND' + OR = 'OR' + + def join(self, parts: Iterable[str]) -> str: + return f' {self.value} '.join(parts) + + +class ReportFormat(enum.Enum): + TEXT = 'text' + TXT = TEXT + CSV = 'csv' + # ODS = 'ods' + + +def _date_condition( + date: Union[int, datetime.date], + year_to_date: Callable[[int], datetime.date], + op: str, +) -> str: + if isinstance(date, int): + date = year_to_date(date) + return f'date {op} {date.isoformat()}' + +def build_query( + args: argparse.Namespace, + fy: books.FiscalYear, + in_file: Optional[TextIO]=None, +) -> Optional[str]: + if not args.query: + args.query = [] if in_file is None else [line[:-1] for line in in_file] + if not any(re.search(r'\S', s) for s in args.query): + return None + plain_query = ' '.join(args.query) + try: + QUERY_PARSER.parse(plain_query) + except bc_query_parser.ParseError: + conds = [f'({args.join.join(args.query)})'] + if args.start_date is not None: + conds.append(_date_condition(args.start_date, fy.first_date, '>=')) + if args.stop_date is not None: + conds.append(_date_condition(args.stop_date, fy.next_fy_date, '<')) + return f'SELECT * WHERE {" AND ".join(conds)}' + else: + return plain_query + +def parse_arguments(arglist: Optional[Sequence[str]]=None) -> argparse.Namespace: + parser = argparse.ArgumentParser(prog=PROGNAME) + cliutil.add_version_argument(parser) + parser.add_argument( + '--begin', '--start', '-b', + dest='start_date', + metavar='DATE', + type=cliutil.year_or_date_arg, + help="""Begin loading entries from this fiscal year. When query-report +builds the query, it will include a condition `date >= DATE`. +""") + parser.add_argument( + '--end', '--stop', '-e', + dest='stop_date', + metavar='DATE', + type=cliutil.year_or_date_arg, + help="""End loading entries from this fiscal year. When query-report +builds the query, it will include a condition `date < DATE`. If you specify a +begin date but not an end date, the default end date will be the end of the +fiscal year of the begin date. +""") + cliutil.add_rewrite_rules_argument(parser) + format_arg = cliutil.EnumArgument(ReportFormat) + parser.add_argument( + '--report-type', '--format', '-t', '-f', + metavar='TYPE', + type=format_arg.enum_type, + help="""Format of report to generate. Choices are +{format_arg.choices_str()}. Default is guessed from your output filename +extension, or 'text' if that fails. +""") + parser.add_argument( + '--output-file', '-O', '-o', + metavar='PATH', + type=Path, + help="""Write the report to this file, or stdout when PATH is `-`. +The default is stdout for text and CSV reports, and a generated filename for +ODS reports. +""") + join_arg = cliutil.EnumArgument(JoinOperator) + parser.add_argument( + '--join', '-j', + metavar='OP', + type=join_arg.enum_type, + default=JoinOperator.AND, + help="""When you specify multiple WHERE conditions on the command line +and let query-report build the query, join conditions with this operator. +Choices are {join_arg.choices_str()}. Default 'and'. +""") + cliutil.add_loglevel_argument(parser) + parser.add_argument( + 'query', + nargs=argparse.ZERO_OR_MORE, + help="""Query to run non-interactively. You can specify a full query +you write yourself, or conditions to follow WHERE and let query-report build +the rest of the query. +""") + args = parser.parse_args(arglist) + return args + +def main(arglist: Optional[Sequence[str]]=None, + stdout: TextIO=sys.stdout, + stderr: TextIO=sys.stderr, + config: Optional[configmod.Config]=None, +) -> int: + args = parse_arguments(arglist) + cliutil.set_loglevel(logger, args.loglevel) + if config is None: + config = configmod.Config() + config.load_file() + + fy = config.fiscal_year_begin() + if args.stop_date is None and args.start_date is not None: + args.stop_date = fy.next_fy_date(args.start_date) + query = build_query(args, fy, None if sys.stdin.isatty() else sys.stdin) + is_interactive = query is None and sys.stdin.isatty() + if args.report_type is None: + try: + args.report_type = ReportFormat[args.output_file.suffix[1:].upper()] + except (AttributeError, KeyError): + args.report_type = ReportFormat.TEXT # if is_interactive else ReportFormat.ODS + load_func = BooksLoader( + config.books_loader(), + args.start_date, + args.stop_date, + [rewrite.RewriteRuleset.from_yaml(path) for path in args.rewrite_rules], + ) + shell = BQLShell(is_interactive, load_func, stdout, args.report_type.value) + shell.on_Reload() + if query is None: + shell.cmdloop() + else: + shell.onecmd(query) + + return cliutil.ExitCode.OK + +entry_point = cliutil.make_entry_point(__name__, PROGNAME) + +if __name__ == '__main__': + exit(entry_point()) diff --git a/setup.py b/setup.py index 9e661f166c050d94741fce67303dc8a8fffd84be..20992512260a39dd6838c19c6c6007795c16fbdb 100755 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ from setuptools import setup setup( name='conservancy_beancount', description="Plugin, library, and reports for reading Conservancy's books", - version='1.18.1', + version='1.18.2', author='Software Freedom Conservancy', author_email='info@sfconservancy.org', license='GNU AGPLv3+', @@ -53,6 +53,7 @@ setup( 'pdfform-extract = conservancy_beancount.pdfforms.extract:entry_point', 'pdfform-extract-irs990scheduleA = conservancy_beancount.pdfforms.extract.irs990scheduleA:entry_point', 'pdfform-fill = conservancy_beancount.pdfforms.fill:entry_point', + 'query-report = conservancy_beancount.reports.query:entry_point', 'reconcile-paypal = conservancy_beancount.reconcile.paypal:entry_point', 'reconcile-statement = conservancy_beancount.reconcile.statement:entry_point', 'split-ods-links = conservancy_beancount.tools.split_ods_links:entry_point', diff --git a/tests/test_reports_query.py b/tests/test_reports_query.py new file mode 100644 index 0000000000000000000000000000000000000000..113407b029858c5095e0a73c9663612472c772a9 --- /dev/null +++ b/tests/test_reports_query.py @@ -0,0 +1,223 @@ +"""test_reports_query.py - Unit tests for query report""" +# Copyright © 2021 Brett Smith +# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0 +# +# Full copyright and licensing details can be found at toplevel file +# LICENSE.txt in the repository. + +import argparse +import collections +import copy +import csv +import datetime +import io +import itertools +import re + +import pytest + +from . import testutil + +from conservancy_beancount.books import FiscalYear +from conservancy_beancount.reports import query as qmod + +from decimal import Decimal + +class MockRewriteRuleset: + def __init__(self, multiplier=2): + self.multiplier = multiplier + + def rewrite(self, posts): + for post in posts: + number, currency = post.units + number *= self.multiplier + yield post._replace(units=testutil.Amount(number, currency)) + + +@pytest.fixture(scope='module') +def fy(): + return FiscalYear(3, 1) + +def pipe_main(arglist, config): + stdout = io.StringIO() + stderr = io.StringIO() + returncode = qmod.main(arglist, stdout, stderr, config) + return returncode, stdout, stderr + +def query_args(query=None, start_date=None, stop_date=None, join='AND'): + join = qmod.JoinOperator[join] + return argparse.Namespace(**locals()) + +def test_books_loader_empty(): + result = qmod.BooksLoader(None)() + assert not result.entries + assert len(result.errors) == 1 + +def test_books_loader_plain(): + books_path = testutil.test_path(f'books/books/2018.beancount') + loader = testutil.TestBooksLoader(books_path) + result = qmod.BooksLoader(loader)() + assert not result.errors + assert result.entries + min_date = datetime.date(2018, 3, 1) + assert all(ent.date >= min_date for ent in result.entries) + +def test_books_loader_rewrites(): + rewrites = [MockRewriteRuleset()] + books_path = testutil.test_path(f'books/books/2018.beancount') + loader = testutil.TestBooksLoader(books_path) + result = qmod.BooksLoader(loader, None, None, rewrites)() + assert not result.errors + assert result.entries + numbers = frozenset( + abs(post.units.number) + for entry in result.entries + for post in getattr(entry, 'postings', ()) + ) + assert numbers + assert all(abs(number) >= 40 for number in numbers) + +@pytest.mark.parametrize('file_s', [None, '', ' \n \n\n']) +def test_build_query_empty(fy, file_s): + args = query_args() + if file_s is None: + query = qmod.build_query(args, fy) + else: + with io.StringIO(file_s) as qfile: + query = qmod.build_query(args, fy, qfile) + assert query is None + +@pytest.mark.parametrize('query_str', [ + 'SELECT * WHERE date >= 2018-03-01', + 'select *', + 'JOURNAL "Income:Donations"', + 'journal', + 'BALANCES FROM year=2018', + 'balances', +]) +def test_build_query_in_arglist(fy, query_str): + args = query_args(query_str.split(), testutil.PAST_DATE, testutil.FUTURE_DATE) + assert qmod.build_query(args, fy) == query_str + +@pytest.mark.parametrize('count,join_op', enumerate(qmod.JoinOperator, 1)) +def test_build_query_where_arglist_conditions(fy, count, join_op): + conds = ['account ~ "^Income:"', 'year >= 2018'][:count] + args = query_args(conds, join=join_op.name) + query = qmod.build_query(args, fy) + assert query.startswith('SELECT ') + cond_index = query.index(' WHERE ') + 7 + assert query[cond_index:] == '({})'.format(join_op.join(conds)) + +@pytest.mark.parametrize('argname,date_arg', itertools.product( + ['start_date', 'stop_date'], + [testutil.FY_START_DATE, testutil.FY_START_DATE.year], +)) +def test_build_query_one_date_arg(fy, argname, date_arg): + query_kwargs = { + argname: date_arg, + 'query': ['flag = "*"', 'flag = "!"'], + 'join': 'OR', + } + args = query_args(**query_kwargs) + query = qmod.build_query(args, fy) + assert query.startswith('SELECT ') + cond_index = query.index(' WHERE ') + 7 + if argname == 'start_date': + expect_op = '>=' + year_to_date = fy.first_date + else: + expect_op = '<' + year_to_date = fy.next_fy_date + if not isinstance(date_arg, datetime.date): + date_arg = year_to_date(date_arg) + assert query[cond_index:] == '({}) AND date {} {}'.format( + ' OR '.join(query_kwargs['query']), expect_op, date_arg.isoformat(), + ) + +@pytest.mark.parametrize('start_date,stop_date', itertools.product( + [testutil.PAST_DATE, testutil.PAST_DATE.year], + [testutil.FUTURE_DATE, testutil.FUTURE_DATE.year], +)) +def test_build_query_two_date_args(fy, start_date, stop_date): + args = query_args(['account ~ "^Equity:"'], start_date, stop_date, 'AND') + query = qmod.build_query(args, fy) + assert query.startswith('SELECT ') + cond_index = query.index(' WHERE ') + 7 + if isinstance(start_date, int): + start_date = fy.first_date(start_date) + if isinstance(stop_date, int): + stop_date = fy.next_fy_date(stop_date) + assert query[cond_index:] == '({}) AND date >= {} AND date < {}'.format( + args.query[0], start_date.isoformat(), stop_date.isoformat(), + ) + +def test_build_query_plain_from_file(fy): + with io.StringIO("SELECT *\n WHERE account ~ '^Assets:';\n") as qfile: + query = qmod.build_query(query_args(), fy, qfile) + assert re.fullmatch(r"SELECT \*\s+WHERE account ~ '\^Assets:';\s*", query) + +def test_build_query_from_file_where_clauses(fy): + conds = ["account ~ '^Income:'", "account ~ '^Expenses:'"] + args = query_args(None, testutil.PAST_DATE, testutil.FUTURE_DATE, 'OR') + with io.StringIO(''.join(f'{s}\n' for s in conds)) as qfile: + query = qmod.build_query(args, fy, qfile) + assert query.startswith('SELECT ') + cond_index = query.index(' WHERE ') + 7 + assert query[cond_index:] == '({}) AND date >= {} AND date < {}'.format( + ' OR '.join(conds), + testutil.PAST_DATE.isoformat(), + testutil.FUTURE_DATE.isoformat(), + ) + +@pytest.mark.parametrize('arglist,fy', testutil.combine_values( + [['--report-type', 'text'], ['--format=text'], ['-f', 'txt']], + range(2018, 2021), +)) +def test_text_query(arglist, fy): + books_path = testutil.test_path(f'books/books/{fy}.beancount') + config = testutil.TestConfig(books_path=books_path) + arglist += ['select', 'date,', 'narration,', 'account,', 'position'] + returncode, stdout, stderr = pipe_main(arglist, config) + assert returncode == 0 + stdout.seek(0) + lines = iter(stdout) + next(lines); next(lines) # Skip header + for count, line in enumerate(lines, 1): + assert re.match(rf'^{fy}-\d\d-\d\d\s+{fy} donation\b', line) + assert count >= 2 + +@pytest.mark.parametrize('arglist,fy', testutil.combine_values( + [['--format=csv'], ['-f', 'csv'], ['-t', 'csv']], + range(2018, 2021), +)) +def test_csv_query(arglist, fy): + books_path = testutil.test_path(f'books/books/{fy}.beancount') + config = testutil.TestConfig(books_path=books_path) + arglist += ['select', 'date,', 'narration,', 'account,', 'position'] + returncode, stdout, stderr = pipe_main(arglist, config) + assert returncode == 0 + stdout.seek(0) + for count, row in enumerate(csv.DictReader(stdout), 1): + assert re.fullmatch(rf'{fy}-\d\d-\d\d', row['date']) + assert row['narration'] == f'{fy} donation' + assert count >= 2 + +@pytest.mark.parametrize('end_index', range(3)) +def test_rewrite_query(end_index): + books_path = testutil.test_path(f'books/books/2018.beancount') + config = testutil.TestConfig(books_path=books_path) + accounts = ['Assets', 'Income'] + expected = frozenset(accounts[:end_index]) + rewrite_paths = [ + testutil.test_path(f'userconfig/Rewrite{s}.yml') + for s in expected + ] + arglist = [f'--rewrite-rules={path}' for path in rewrite_paths] + arglist.append('--format=txt') + arglist.append('select any_meta("root") as root') + returncode, stdout, stderr = pipe_main(arglist, config) + assert returncode == 0 + stdout.seek(0) + actual = frozenset(line.rstrip('\n') for line in stdout) + assert expected.issubset(actual) + assert frozenset(accounts).difference(expected).isdisjoint(actual) diff --git a/tests/userconfig/RewriteAssets.yml b/tests/userconfig/RewriteAssets.yml new file mode 100644 index 0000000000000000000000000000000000000000..1f128da3d401841d24836b6d041031ebc00414ff --- /dev/null +++ b/tests/userconfig/RewriteAssets.yml @@ -0,0 +1,2 @@ +- if: [.account in Assets] + then: [root = Assets] diff --git a/tests/userconfig/RewriteIncome.yml b/tests/userconfig/RewriteIncome.yml new file mode 100644 index 0000000000000000000000000000000000000000..f20fee0c596c1ae9fde7a05eafadc54717c1a30f --- /dev/null +++ b/tests/userconfig/RewriteIncome.yml @@ -0,0 +1,2 @@ +- if: [.account in Income] + then: [root = Income]