Changeset - b599ddee5de9
[Not reviewed]
0 1 0
Brett Smith - 3 years ago 2021-03-06 14:33:10
brettcsmith@brettcsmith.org
query: Skip rewrite rule logic when none are loaded.

This saves a few seconds of load time for the user on each run and is easy
to implement, so it's worth it.
1 file changed with 4 insertions and 0 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/reports/query.py
Show inline comments
 
"""query.py - Report arbitrary queries with advanced loading and formatting"""
 
# Copyright © 2021  Brett Smith
 
# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0
 
#
 
# Full copyright and licensing details can be found at toplevel file
 
# LICENSE.txt in the repository.
 

	
 
import argparse
 
import contextlib
 
import datetime
 
import enum
 
import itertools
 
import logging
 
import re
 
import sys
 

	
 
from typing import (
 
    cast,
 
    AbstractSet,
 
    Callable,
 
    Dict,
 
    Iterable,
 
    Iterator,
 
    Mapping,
 
    NamedTuple,
 
    Optional,
 
    Sequence,
 
    TextIO,
 
    Tuple,
 
    Type,
 
    Union,
 
)
 
from ..beancount_types import (
 
    MetaValue,
 
    Posting,
 
    Transaction,
 
)
 

	
 
from decimal import Decimal
 
from pathlib import Path
 

	
 
import beancount.query.numberify as bc_query_numberify
 
import beancount.query.query_compile as bc_query_compile
 
import beancount.query.query_env as bc_query_env
 
import beancount.query.query_execute as bc_query_execute
 
import beancount.query.query_parser as bc_query_parser
 
import beancount.query.query_render as bc_query_render
 
import beancount.query.shell as bc_query_shell
 

	
 
from . import core
 
from . import rewrite
 
from .. import books
 
from .. import cliutil
 
from .. import config as configmod
 
from .. import data
 

	
 
BUILTIN_FIELDS: AbstractSet[str] = frozenset(itertools.chain(
 
    bc_query_env.TargetsEnvironment.columns,  # type:ignore[has-type]
 
    bc_query_env.TargetsEnvironment.functions,  # type:ignore[has-type]
 
))
 
PROGNAME = 'query-report'
 
QUERY_PARSER = bc_query_parser.Parser()
 
logger = logging.getLogger('conservancy_beancount.reports.query')
 

	
 
RowTypes = Sequence[Tuple[str, Type]]
 
Rows = Sequence[NamedTuple]
 

	
 
class BooksLoader:
 
    """Closure to load books with a zero-argument callable
 

	
 
    This matches the load interface that BQLShell expects.
 
    """
 
    def __init__(
 
            self,
 
            books_loader: Optional[books.Loader],
 
            start_date: Optional[datetime.date]=None,
 
            stop_date: Optional[datetime.date]=None,
 
            rewrite_rules: Sequence[rewrite.RewriteRuleset]=(),
 
    ) -> None:
 
        self.books_loader = books_loader
 
        self.start_date = start_date
 
        self.stop_date = stop_date
 
        self.rewrite_rules = rewrite_rules
 

	
 
    def __call__(self) -> books.LoadResult:
 
        logger.debug("BooksLoader called")
 
        result = books.Loader.dispatch(self.books_loader, self.start_date, self.stop_date)
 
        logger.debug("books loaded from Beancount")
 
        if self.rewrite_rules:
 
            for index, entry in enumerate(result.entries):
 
                # entry might not be a Transaction; we catch that later.
 
                # The type ignores are because the underlying Beancount type isn't
 
                # type-checkable.
 
                postings = data.Posting.from_txn(entry)  # type:ignore[arg-type]
 
                for ruleset in self.rewrite_rules:
 
                    postings = ruleset.rewrite(postings)
 
                try:
 
                    result.entries[index] = entry._replace(postings=list(postings))  # type:ignore[call-arg]
 
                except AttributeError:
 
                    pass
 
            logger.debug("rewrite rules applied")
 
        return result
 

	
 

	
 
class BQLShell(bc_query_shell.BQLShell):
 
    def on_Select(self, statement: str) -> None:
 
        output_format: str = self.vars['format']
 
        try:
 
            render_func = getattr(self, f'_render_{output_format}')
 
        except AttributeError:
 
            logger.error("unknown output format %r", output_format)
 
            return
 

	
 
        try:
 
            logger.debug("compiling query")
 
            compiled_query = bc_query_compile.compile(
 
                statement, self.env_targets, self.env_postings, self.env_entries,
 
            )
 
            logger.debug("executing query")
 
            row_types, rows = bc_query_execute.execute_query(
 
                compiled_query, self.entries, self.options_map,
 
            )
 
            if self.vars['numberify'] and output_format != 'ods':
 
                logger.debug("numberifying query")
 
                row_types, rows = bc_query_numberify.numberify_results(
 
                    row_types, rows, self.options_map['dcontext'].build(),
 
                )
 
        except Exception as error:
 
            logger.error(str(error), exc_info=logger.isEnabledFor(logging.DEBUG))
 
            return
 

	
 
        if not rows and output_format != 'ods':
 
            print("(empty)", file=self.outfile)
 
        else:
 
            logger.debug("rendering query as %s", output_format)
 
            render_func(row_types, rows)
 

	
 
    def _render_csv(self, row_types: RowTypes, rows: Rows) -> None:
 
        bc_query_render.render_csv(
 
            row_types,
 
            rows,
 
            self.options_map['dcontext'],
 
            self.outfile,
 
            self.vars['expand'],
 
        )
 

	
 
    def _render_text(self, row_types: RowTypes, rows: Rows) -> None:
 
        with contextlib.ExitStack() as stack:
 
            if self.is_interactive:
 
                output = stack.enter_context(self.get_pager())
 
            else:
 
                output = self.outfile
 
            bc_query_render.render_text(
 
                row_types,
 
                rows,
 
                self.options_map['dcontext'],
 
                output,
 
                self.vars['expand'],
 
                self.vars['boxed'],
 
                self.vars['spaced'],
 
            )
 

	
 

	
 
class JoinOperator(enum.Enum):
 
    AND = 'AND'
 
    OR = 'OR'
 

	
 
    def join(self, parts: Iterable[str]) -> str:
 
        return f' {self.value} '.join(parts)
 

	
 

	
 
class ReportFormat(enum.Enum):
 
    TEXT = 'text'
 
    TXT = TEXT
 
    CSV = 'csv'
 
    # ODS = 'ods'
 

	
 

	
 
def _date_condition(
 
        date: Union[int, datetime.date],
 
        year_to_date: Callable[[int], datetime.date],
 
        op: str,
 
) -> str:
 
    if isinstance(date, int):
 
        date = year_to_date(date)
 
    return f'date {op} {date.isoformat()}'
 

	
 
def build_query(
 
        args: argparse.Namespace,
 
        fy: books.FiscalYear,
 
        in_file: Optional[TextIO]=None,
 
) -> Optional[str]:
 
    if not args.query:
 
        args.query = [] if in_file is None else [line[:-1] for line in in_file]
 
    plain_query = ' '.join(args.query)
 
    if not plain_query or plain_query.isspace():
 
        return None
 
    try:
 
        QUERY_PARSER.parse(plain_query)
 
    except bc_query_parser.ParseError:
 
        if args.join is None:
 
            args.join = JoinOperator.AND
 
        select = [
 
            'date',
 
            'ANY_META("entity") as entity',
 
            'narration',
 
            'position',
 
            'COST(position)',
 
            *(f'ANY_META("{field}") AS {field.replace("-", "_")}'
 
              if field not in BUILTIN_FIELDS
 
              and re.fullmatch(r'[a-z][-_A-Za-z0-9]*', field)
 
              else field
 
              for field in args.select),
 
        ]
 
        conds = [f'({args.join.join(args.query)})']
 
        if args.start_date is not None:
 
            conds.append(_date_condition(args.start_date, fy.first_date, '>='))
 
        if args.stop_date is not None:
 
            conds.append(_date_condition(args.stop_date, fy.next_fy_date, '<'))
 
        return f'SELECT {", ".join(select)} WHERE {" AND ".join(conds)}'
 
    else:
 
        if args.join:
 
            raise ValueError("cannot specify --join with a full query")
 
        if args.select:
 
            raise ValueError("cannot specify --select with a full query")
 
        return plain_query
 

	
 
def parse_arguments(arglist: Optional[Sequence[str]]=None) -> argparse.Namespace:
 
    parser = argparse.ArgumentParser(prog=PROGNAME)
 
    cliutil.add_version_argument(parser)
 
    cliutil.add_loglevel_argument(parser)
 
    parser.add_argument(
 
        '--begin', '--start', '-b',
 
        dest='start_date',
 
        metavar='DATE',
 
        type=cliutil.year_or_date_arg,
 
        help="""Begin loading entries from this fiscal year. When query-report
 
builds the query, it will include a condition `date >= DATE`.
 
""")
 
    parser.add_argument(
 
        '--end', '--stop', '-e',
 
        dest='stop_date',
 
        metavar='DATE',
 
        type=cliutil.year_or_date_arg,
 
        help="""End loading entries from this fiscal year. When query-report
 
builds the query, it will include a condition `date < DATE`. If you specify a
 
begin date but not an end date, the default end date will be the end of the
 
fiscal year of the begin date.
 
""")
 
    cliutil.add_rewrite_rules_argument(parser)
 
    format_arg = cliutil.EnumArgument(ReportFormat)
 
    parser.add_argument(
 
        '--report-type', '--format', '-t', '-f',
 
        metavar='TYPE',
 
        type=format_arg.enum_type,
 
        help="""Format of report to generate. Choices are
 
{format_arg.choices_str()}. Default is guessed from your output filename
 
extension, or 'text' if that fails.
 
""")
 
    parser.add_argument(
 
        '--output-file', '-O', '-o',
 
        metavar='PATH',
 
        type=Path,
 
        help="""Write the report to this file, or stdout when PATH is `-`.
 
The default is stdout for text and CSV reports, and a generated filename for
 
ODS reports.
 
""")
 

	
 
    query_group = parser.add_argument_group("query options", """
 
You can write a single full query as a command line argument (like bean-query),
 
or you can write individual WHERE condition(s) as arguments. If you write
 
WHERE conditions, these options are used to build the rest of the query.
 
""")
 
    join_arg = cliutil.EnumArgument(JoinOperator)
 
    query_group.add_argument(
 
        '--select', '-s',
 
        metavar='COLUMN',
 
        default=[],
 
        action=cliutil.ExtendAction,
 
        help="""Columns to select. You can write these as comma-separated
 
names, and/or specify the option more than once. You can specify both
 
bean-query's built-in column names (like `account` and `flag`) and metadata
 
keys.
 
""")
 
    query_group.add_argument(
 
        '--group-by', '-g',
 
        metavar='COLUMN',
 
        help="""Group output by this column
 
""")
 
    # query_group.add_argument(
 
    #     '--order-by', '--sort', '-r',
 
    #     metavar='COLUMN',
 
    #     help="""Order output by this column
0 comments (0 inline, 0 general)