Changeset - ccbc447a353d
[Not reviewed]
0 1 4
Brett Smith - 3 years ago 2021-02-24 18:15:33
brettcsmith@brettcsmith.org
query: Start new reporting tool.

Ultimately this is going to be a tool that can generate nicely-formatted
spreadsheets from arbitrary bean-queries. This initial version doesn't
generate spreadsheets yet, but it does integrate our usual books-loading
tools and rewrite rules into existing bean-query functionality, so it's a
start. It also has some of the query building and parsing that higher-level
spreadsheets will need.
5 files changed with 465 insertions and 1 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/reports/query.py
Show inline comments
 
new file 100644
 
"""query.py - Report arbitrary queries with advanced loading and formatting"""
 
# Copyright © 2021  Brett Smith
 
# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0
 
#
 
# Full copyright and licensing details can be found at toplevel file
 
# LICENSE.txt in the repository.
 

	
 
import argparse
 
import collections
 
import datetime
 
import enum
 
import functools
 
import locale
 
import logging
 
import re
 
import sys
 

	
 
from typing import (
 
    cast,
 
    Callable,
 
    Dict,
 
    Iterable,
 
    Iterator,
 
    Mapping,
 
    Optional,
 
    Sequence,
 
    TextIO,
 
    Tuple,
 
    Union,
 
)
 
from ..beancount_types import (
 
    MetaValue,
 
    Posting,
 
    Transaction,
 
)
 

	
 
from decimal import Decimal
 
from pathlib import Path
 

	
 
import beancount.query.shell as bc_query
 
import beancount.query.query_parser as bc_query_parser
 

	
 
from . import core
 
from . import rewrite
 
from .. import books
 
from .. import cliutil
 
from .. import config as configmod
 
from .. import data
 

	
 
PROGNAME = 'query-report'
 
QUERY_PARSER = bc_query_parser.Parser()
 
logger = logging.getLogger('conservancy_beancount.reports.query')
 

	
 
class BooksLoader:
 
    """Closure to load books with a zero-argument callable
 

	
 
    This matches the load interface that BQLShell expects.
 
    """
 
    def __init__(
 
            self,
 
            books_loader: Optional[books.Loader],
 
            start_date: Optional[datetime.date]=None,
 
            stop_date: Optional[datetime.date]=None,
 
            rewrite_rules: Sequence[rewrite.RewriteRuleset]=(),
 
    ) -> None:
 
        self.books_loader = books_loader
 
        self.start_date = start_date
 
        self.stop_date = stop_date
 
        self.rewrite_rules = rewrite_rules
 

	
 
    def __call__(self) -> books.LoadResult:
 
        result = books.Loader.dispatch(self.books_loader, self.start_date, self.stop_date)
 
        for index, entry in enumerate(result.entries):
 
            # entry might not be a Transaction; we catch that later.
 
            # The type ignores are because the underlying Beancount type isn't
 
            # type-checkable.
 
            postings = data.Posting.from_txn(entry)  # type:ignore[arg-type]
 
            for ruleset in self.rewrite_rules:
 
                postings = ruleset.rewrite(postings)
 
            try:
 
                result.entries[index] = entry._replace(postings=list(postings))  # type:ignore[call-arg]
 
            except AttributeError:
 
                pass
 
        return result
 

	
 

	
 
class BQLShell(bc_query.BQLShell):
 
    pass
 

	
 

	
 
class JoinOperator(enum.Enum):
 
    AND = 'AND'
 
    OR = 'OR'
 

	
 
    def join(self, parts: Iterable[str]) -> str:
 
        return f' {self.value} '.join(parts)
 

	
 

	
 
class ReportFormat(enum.Enum):
 
    TEXT = 'text'
 
    TXT = TEXT
 
    CSV = 'csv'
 
    # ODS = 'ods'
 

	
 

	
 
def _date_condition(
 
        date: Union[int, datetime.date],
 
        year_to_date: Callable[[int], datetime.date],
 
        op: str,
 
) -> str:
 
    if isinstance(date, int):
 
        date = year_to_date(date)
 
    return f'date {op} {date.isoformat()}'
 

	
 
def build_query(
 
        args: argparse.Namespace,
 
        fy: books.FiscalYear,
 
        in_file: Optional[TextIO]=None,
 
) -> Optional[str]:
 
    if not args.query:
 
        args.query = [] if in_file is None else [line[:-1] for line in in_file]
 
        if not any(re.search(r'\S', s) for s in args.query):
 
            return None
 
    plain_query = ' '.join(args.query)
 
    try:
 
        QUERY_PARSER.parse(plain_query)
 
    except bc_query_parser.ParseError:
 
        conds = [f'({args.join.join(args.query)})']
 
        if args.start_date is not None:
 
            conds.append(_date_condition(args.start_date, fy.first_date, '>='))
 
        if args.stop_date is not None:
 
            conds.append(_date_condition(args.stop_date, fy.next_fy_date, '<'))
 
        return f'SELECT * WHERE {" AND ".join(conds)}'
 
    else:
 
        return plain_query
 

	
 
def parse_arguments(arglist: Optional[Sequence[str]]=None) -> argparse.Namespace:
 
    parser = argparse.ArgumentParser(prog=PROGNAME)
 
    cliutil.add_version_argument(parser)
 
    parser.add_argument(
 
        '--begin', '--start', '-b',
 
        dest='start_date',
 
        metavar='DATE',
 
        type=cliutil.year_or_date_arg,
 
        help="""Begin loading entries from this fiscal year. When query-report
 
builds the query, it will include a condition `date >= DATE`.
 
""")
 
    parser.add_argument(
 
        '--end', '--stop', '-e',
 
        dest='stop_date',
 
        metavar='DATE',
 
        type=cliutil.year_or_date_arg,
 
        help="""End loading entries from this fiscal year. When query-report
 
builds the query, it will include a condition `date < DATE`. If you specify a
 
begin date but not an end date, the default end date will be the end of the
 
fiscal year of the begin date.
 
""")
 
    cliutil.add_rewrite_rules_argument(parser)
 
    format_arg = cliutil.EnumArgument(ReportFormat)
 
    parser.add_argument(
 
        '--report-type', '--format', '-t', '-f',
 
        metavar='TYPE',
 
        type=format_arg.enum_type,
 
        help="""Format of report to generate. Choices are
 
{format_arg.choices_str()}. Default is guessed from your output filename
 
extension, or 'text' if that fails.
 
""")
 
    parser.add_argument(
 
        '--output-file', '-O', '-o',
 
        metavar='PATH',
 
        type=Path,
 
        help="""Write the report to this file, or stdout when PATH is `-`.
 
The default is stdout for text and CSV reports, and a generated filename for
 
ODS reports.
 
""")
 
    join_arg = cliutil.EnumArgument(JoinOperator)
 
    parser.add_argument(
 
        '--join', '-j',
 
        metavar='OP',
 
        type=join_arg.enum_type,
 
        default=JoinOperator.AND,
 
        help="""When you specify multiple WHERE conditions on the command line
 
and let query-report build the query, join conditions with this operator.
 
Choices are {join_arg.choices_str()}. Default 'and'.
 
""")
 
    cliutil.add_loglevel_argument(parser)
 
    parser.add_argument(
 
        'query',
 
        nargs=argparse.ZERO_OR_MORE,
 
        help="""Query to run non-interactively. You can specify a full query
 
you write yourself, or conditions to follow WHERE and let query-report build
 
the rest of the query.
 
""")
 
    args = parser.parse_args(arglist)
 
    return args
 

	
 
def main(arglist: Optional[Sequence[str]]=None,
 
         stdout: TextIO=sys.stdout,
 
         stderr: TextIO=sys.stderr,
 
         config: Optional[configmod.Config]=None,
 
) -> int:
 
    args = parse_arguments(arglist)
 
    cliutil.set_loglevel(logger, args.loglevel)
 
    if config is None:
 
        config = configmod.Config()
 
        config.load_file()
 

	
 
    fy = config.fiscal_year_begin()
 
    if args.stop_date is None and args.start_date is not None:
 
        args.stop_date = fy.next_fy_date(args.start_date)
 
    query = build_query(args, fy, None if sys.stdin.isatty() else sys.stdin)
 
    is_interactive = query is None and sys.stdin.isatty()
 
    if args.report_type is None:
 
        try:
 
            args.report_type = ReportFormat[args.output_file.suffix[1:].upper()]
 
        except (AttributeError, KeyError):
 
            args.report_type = ReportFormat.TEXT # if is_interactive else ReportFormat.ODS
 
    load_func = BooksLoader(
 
        config.books_loader(),
 
        args.start_date,
 
        args.stop_date,
 
        [rewrite.RewriteRuleset.from_yaml(path) for path in args.rewrite_rules],
 
    )
 
    shell = BQLShell(is_interactive, load_func, stdout, args.report_type.value)
 
    shell.on_Reload()
 
    if query is None:
 
        shell.cmdloop()
 
    else:
 
        shell.onecmd(query)
 

	
 
    return cliutil.ExitCode.OK
 

	
 
entry_point = cliutil.make_entry_point(__name__, PROGNAME)
 

	
 
if __name__ == '__main__':
 
    exit(entry_point())
setup.py
Show inline comments
...
 
@@ -5,7 +5,7 @@ from setuptools import setup
 
setup(
 
    name='conservancy_beancount',
 
    description="Plugin, library, and reports for reading Conservancy's books",
 
    version='1.18.1',
 
    version='1.18.2',
 
    author='Software Freedom Conservancy',
 
    author_email='info@sfconservancy.org',
 
    license='GNU AGPLv3+',
...
 
@@ -53,6 +53,7 @@ setup(
 
            'pdfform-extract = conservancy_beancount.pdfforms.extract:entry_point',
 
            'pdfform-extract-irs990scheduleA = conservancy_beancount.pdfforms.extract.irs990scheduleA:entry_point',
 
            'pdfform-fill = conservancy_beancount.pdfforms.fill:entry_point',
 
            'query-report = conservancy_beancount.reports.query:entry_point',
 
            'reconcile-paypal = conservancy_beancount.reconcile.paypal:entry_point',
 
            'reconcile-statement = conservancy_beancount.reconcile.statement:entry_point',
 
            'split-ods-links = conservancy_beancount.tools.split_ods_links:entry_point',
tests/test_reports_query.py
Show inline comments
 
new file 100644
 
"""test_reports_query.py - Unit tests for query report"""
 
# Copyright © 2021  Brett Smith
 
# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0
 
#
 
# Full copyright and licensing details can be found at toplevel file
 
# LICENSE.txt in the repository.
 

	
 
import argparse
 
import collections
 
import copy
 
import csv
 
import datetime
 
import io
 
import itertools
 
import re
 

	
 
import pytest
 

	
 
from . import testutil
 

	
 
from conservancy_beancount.books import FiscalYear
 
from conservancy_beancount.reports import query as qmod
 

	
 
from decimal import Decimal
 

	
 
class MockRewriteRuleset:
 
    def __init__(self, multiplier=2):
 
        self.multiplier = multiplier
 

	
 
    def rewrite(self, posts):
 
        for post in posts:
 
            number, currency = post.units
 
            number *= self.multiplier
 
            yield post._replace(units=testutil.Amount(number, currency))
 

	
 

	
 
@pytest.fixture(scope='module')
 
def fy():
 
    return FiscalYear(3, 1)
 

	
 
def pipe_main(arglist, config):
 
    stdout = io.StringIO()
 
    stderr = io.StringIO()
 
    returncode = qmod.main(arglist, stdout, stderr, config)
 
    return returncode, stdout, stderr
 

	
 
def query_args(query=None, start_date=None, stop_date=None, join='AND'):
 
    join = qmod.JoinOperator[join]
 
    return argparse.Namespace(**locals())
 

	
 
def test_books_loader_empty():
 
    result = qmod.BooksLoader(None)()
 
    assert not result.entries
 
    assert len(result.errors) == 1
 

	
 
def test_books_loader_plain():
 
    books_path = testutil.test_path(f'books/books/2018.beancount')
 
    loader = testutil.TestBooksLoader(books_path)
 
    result = qmod.BooksLoader(loader)()
 
    assert not result.errors
 
    assert result.entries
 
    min_date = datetime.date(2018, 3, 1)
 
    assert all(ent.date >= min_date for ent in result.entries)
 

	
 
def test_books_loader_rewrites():
 
    rewrites = [MockRewriteRuleset()]
 
    books_path = testutil.test_path(f'books/books/2018.beancount')
 
    loader = testutil.TestBooksLoader(books_path)
 
    result = qmod.BooksLoader(loader, None, None, rewrites)()
 
    assert not result.errors
 
    assert result.entries
 
    numbers = frozenset(
 
        abs(post.units.number)
 
        for entry in result.entries
 
        for post in getattr(entry, 'postings', ())
 
    )
 
    assert numbers
 
    assert all(abs(number) >= 40 for number in numbers)
 

	
 
@pytest.mark.parametrize('file_s', [None, '', ' \n \n\n'])
 
def test_build_query_empty(fy, file_s):
 
    args = query_args()
 
    if file_s is None:
 
        query = qmod.build_query(args, fy)
 
    else:
 
        with io.StringIO(file_s) as qfile:
 
            query = qmod.build_query(args, fy, qfile)
 
    assert query is None
 

	
 
@pytest.mark.parametrize('query_str', [
 
    'SELECT * WHERE date >= 2018-03-01',
 
    'select *',
 
    'JOURNAL "Income:Donations"',
 
    'journal',
 
    'BALANCES FROM year=2018',
 
    'balances',
 
])
 
def test_build_query_in_arglist(fy, query_str):
 
    args = query_args(query_str.split(), testutil.PAST_DATE, testutil.FUTURE_DATE)
 
    assert qmod.build_query(args, fy) == query_str
 

	
 
@pytest.mark.parametrize('count,join_op', enumerate(qmod.JoinOperator, 1))
 
def test_build_query_where_arglist_conditions(fy, count, join_op):
 
    conds = ['account ~ "^Income:"', 'year >= 2018'][:count]
 
    args = query_args(conds, join=join_op.name)
 
    query = qmod.build_query(args, fy)
 
    assert query.startswith('SELECT ')
 
    cond_index = query.index(' WHERE ') + 7
 
    assert query[cond_index:] == '({})'.format(join_op.join(conds))
 

	
 
@pytest.mark.parametrize('argname,date_arg', itertools.product(
 
    ['start_date', 'stop_date'],
 
    [testutil.FY_START_DATE, testutil.FY_START_DATE.year],
 
))
 
def test_build_query_one_date_arg(fy, argname, date_arg):
 
    query_kwargs = {
 
        argname: date_arg,
 
        'query': ['flag = "*"', 'flag = "!"'],
 
        'join': 'OR',
 
    }
 
    args = query_args(**query_kwargs)
 
    query = qmod.build_query(args, fy)
 
    assert query.startswith('SELECT ')
 
    cond_index = query.index(' WHERE ') + 7
 
    if argname == 'start_date':
 
        expect_op = '>='
 
        year_to_date = fy.first_date
 
    else:
 
        expect_op = '<'
 
        year_to_date = fy.next_fy_date
 
    if not isinstance(date_arg, datetime.date):
 
        date_arg = year_to_date(date_arg)
 
    assert query[cond_index:] == '({}) AND date {} {}'.format(
 
        ' OR '.join(query_kwargs['query']), expect_op, date_arg.isoformat(),
 
    )
 

	
 
@pytest.mark.parametrize('start_date,stop_date', itertools.product(
 
    [testutil.PAST_DATE, testutil.PAST_DATE.year],
 
    [testutil.FUTURE_DATE, testutil.FUTURE_DATE.year],
 
))
 
def test_build_query_two_date_args(fy, start_date, stop_date):
 
    args = query_args(['account ~ "^Equity:"'], start_date, stop_date, 'AND')
 
    query = qmod.build_query(args, fy)
 
    assert query.startswith('SELECT ')
 
    cond_index = query.index(' WHERE ') + 7
 
    if isinstance(start_date, int):
 
        start_date = fy.first_date(start_date)
 
    if isinstance(stop_date, int):
 
        stop_date = fy.next_fy_date(stop_date)
 
    assert query[cond_index:] == '({}) AND date >= {} AND date < {}'.format(
 
        args.query[0], start_date.isoformat(), stop_date.isoformat(),
 
    )
 

	
 
def test_build_query_plain_from_file(fy):
 
    with io.StringIO("SELECT *\n WHERE account ~ '^Assets:';\n") as qfile:
 
        query = qmod.build_query(query_args(), fy, qfile)
 
    assert re.fullmatch(r"SELECT \*\s+WHERE account ~ '\^Assets:';\s*", query)
 

	
 
def test_build_query_from_file_where_clauses(fy):
 
    conds = ["account ~ '^Income:'", "account ~ '^Expenses:'"]
 
    args = query_args(None, testutil.PAST_DATE, testutil.FUTURE_DATE, 'OR')
 
    with io.StringIO(''.join(f'{s}\n' for s in conds)) as qfile:
 
        query = qmod.build_query(args, fy, qfile)
 
    assert query.startswith('SELECT ')
 
    cond_index = query.index(' WHERE ') + 7
 
    assert query[cond_index:] == '({}) AND date >= {} AND date < {}'.format(
 
        ' OR '.join(conds),
 
        testutil.PAST_DATE.isoformat(),
 
        testutil.FUTURE_DATE.isoformat(),
 
    )
 

	
 
@pytest.mark.parametrize('arglist,fy', testutil.combine_values(
 
    [['--report-type', 'text'], ['--format=text'], ['-f', 'txt']],
 
    range(2018, 2021),
 
))
 
def test_text_query(arglist, fy):
 
    books_path = testutil.test_path(f'books/books/{fy}.beancount')
 
    config = testutil.TestConfig(books_path=books_path)
 
    arglist += ['select', 'date,', 'narration,', 'account,', 'position']
 
    returncode, stdout, stderr = pipe_main(arglist, config)
 
    assert returncode == 0
 
    stdout.seek(0)
 
    lines = iter(stdout)
 
    next(lines); next(lines)  # Skip header
 
    for count, line in enumerate(lines, 1):
 
        assert re.match(rf'^{fy}-\d\d-\d\d\s+{fy} donation\b', line)
 
    assert count >= 2
 

	
 
@pytest.mark.parametrize('arglist,fy', testutil.combine_values(
 
    [['--format=csv'], ['-f', 'csv'], ['-t', 'csv']],
 
    range(2018, 2021),
 
))
 
def test_csv_query(arglist, fy):
 
    books_path = testutil.test_path(f'books/books/{fy}.beancount')
 
    config = testutil.TestConfig(books_path=books_path)
 
    arglist += ['select', 'date,', 'narration,', 'account,', 'position']
 
    returncode, stdout, stderr = pipe_main(arglist, config)
 
    assert returncode == 0
 
    stdout.seek(0)
 
    for count, row in enumerate(csv.DictReader(stdout), 1):
 
        assert re.fullmatch(rf'{fy}-\d\d-\d\d', row['date'])
 
        assert row['narration'] == f'{fy} donation'
 
    assert count >= 2
 

	
 
@pytest.mark.parametrize('end_index', range(3))
 
def test_rewrite_query(end_index):
 
    books_path = testutil.test_path(f'books/books/2018.beancount')
 
    config = testutil.TestConfig(books_path=books_path)
 
    accounts = ['Assets', 'Income']
 
    expected = frozenset(accounts[:end_index])
 
    rewrite_paths = [
 
        testutil.test_path(f'userconfig/Rewrite{s}.yml')
 
        for s in expected
 
    ]
 
    arglist = [f'--rewrite-rules={path}' for path in rewrite_paths]
 
    arglist.append('--format=txt')
 
    arglist.append('select any_meta("root") as root')
 
    returncode, stdout, stderr = pipe_main(arglist, config)
 
    assert returncode == 0
 
    stdout.seek(0)
 
    actual = frozenset(line.rstrip('\n') for line in stdout)
 
    assert expected.issubset(actual)
 
    assert frozenset(accounts).difference(expected).isdisjoint(actual)
tests/userconfig/RewriteAssets.yml
Show inline comments
 
new file 100644
 
- if: [.account in Assets]
 
  then: [root = Assets]
tests/userconfig/RewriteIncome.yml
Show inline comments
 
new file 100644
 
- if: [.account in Income]
 
  then: [root = Income]
0 comments (0 inline, 0 general)