Changeset - 6fa12789661e
[Not reviewed]
0 3 0
Brett Smith - 3 years ago 2021-03-09 15:39:12
brettcsmith@brettcsmith.org
query: Improve formatting of ODS output.

* Provide dedicated formatting for more Beancount types.
* Improve code to determine when we're looking up link metadata
and should format output as links.
3 files changed with 194 insertions and 76 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/reports/core.py
Show inline comments
...
 
@@ -1368,7 +1368,7 @@ class BaseODS(BaseSpreadsheet[RT, ST], metaclass=abc.ABCMeta):
 
            )
 
            return self.multiline_cell(lines, **attrs)
 

	
 
    def currency_cell(self, amount: data.Amount, **attrs: Any) -> odf.table.TableCell:
 
    def currency_cell(self, amount: bc_amount._Amount, **attrs: Any) -> odf.table.TableCell:
 
        if 'stylename' not in attrs:
 
            attrs['stylename'] = self.currency_style(amount.currency)
 
        number, currency = amount
conservancy_beancount/reports/query.py
Show inline comments
...
 
@@ -33,6 +33,7 @@ from typing import (
 
    Union,
 
)
 
from ..beancount_types import (
 
    MetaKey,
 
    MetaValue,
 
    Posting,
 
    Transaction,
...
 
@@ -41,6 +42,8 @@ from ..beancount_types import (
 
from decimal import Decimal
 
from pathlib import Path
 
from beancount.core.amount import _Amount as BeancountAmount
 
from beancount.core.inventory import Inventory
 
from beancount.core.position import _Position as Position
 

	
 
import beancount.query.numberify as bc_query_numberify
 
import beancount.query.query_compile as bc_query_compile
...
 
@@ -59,10 +62,6 @@ from .. import config as configmod
 
from .. import data
 
from .. import rtutil
 

	
 
BUILTIN_FIELDS: AbstractSet[str] = frozenset(itertools.chain(
 
    bc_query_env.TargetsEnvironment.columns,  # type:ignore[has-type]
 
    bc_query_env.TargetsEnvironment.functions,  # type:ignore[has-type]
 
))
 
PROGNAME = 'query-report'
 
logger = logging.getLogger('conservancy_beancount.reports.query')
 

	
...
 
@@ -80,6 +79,12 @@ EnvironmentFunctions = Dict[
 
RowTypes = Sequence[Tuple[str, Type]]
 
Rows = Sequence[NamedTuple]
 
Store = List[Any]
 
QueryExpression = Union[
 
    bc_query_parser.Column,
 
    bc_query_parser.Constant,
 
    bc_query_parser.Function,
 
    bc_query_parser.UnaryOp,
 
]
 
QueryStatement = Union[
 
    bc_query_parser.Balances,
 
    bc_query_parser.Journal,
...
 
@@ -124,14 +129,45 @@ class BooksLoader:
 

	
 

	
 
class QueryODS(core.BaseODS[NamedTuple, None]):
 
    META_FNAMES = frozenset([
 
        'any_meta',
 
        'entry_meta',
 
        'meta',
 
        'meta_docs',
 
        'str_meta',
 
    ])
 

	
 
    def is_empty(self) -> bool:
 
        return not self.sheet.childNodes
 

	
 
    def section_key(self, row: NamedTuple) -> None:
 
        return None
 

	
 
    def _generic_cell(self, value: Any) -> odf.table.TableCell:
 
        if isinstance(value, Iterable) and not isinstance(value, (str, tuple)):
 
            return self.multiline_cell(value)
 
        else:
 
            return self.string_cell('' if value is None else str(value))
 

	
 
    def _inventory_cell(self, value: Inventory) -> odf.table.TableCell:
 
        return self.balance_cell(core.Balance(pos.units for pos in value))
 

	
 
    def _link_string_cell(self, value: str) -> odf.table.TableCell:
 
        return self.meta_links_cell(value.split())
 

	
 
    def _metadata_cell(self, value: MetaValue) -> odf.table.TableCell:
 
        return self._cell_type(type(value))(value)
 

	
 
    def _position_cell(self, value: Position) -> odf.table.TableCell:
 
        return self.currency_cell(value.units)
 

	
 
    def _cell_type(self, row_type: Type) -> CellFunc:
 
        if issubclass(row_type, BeancountAmount):
 
        """Return a function to create a cell, for non-metadata row types."""
 
        if issubclass(row_type, Inventory):
 
            return self._inventory_cell
 
        elif issubclass(row_type, Position):
 
            return self._position_cell
 
        elif issubclass(row_type, BeancountAmount):
 
            return self.currency_cell
 
        elif issubclass(row_type, (int, float, Decimal)):
 
            return self.float_cell
...
 
@@ -142,49 +178,85 @@ class QueryODS(core.BaseODS[NamedTuple, None]):
 
        else:
 
            return self._generic_cell
 

	
 
    def _generic_cell(self, value: Any) -> odf.table.TableCell:
 
        return self.string_cell('' if value is None else str(value))
 

	
 
    def _link_cell(self, value: MetaValue) -> odf.table.TableCell:
 
        if isinstance(value, str):
 
            return self.meta_links_cell(value.split())
 
    def _link_cell_type(self, row_type: Type) -> CellFunc:
 
        """Return a function to create a cell from metadata with documentation links."""
 
        if issubclass(row_type, str):
 
            return self._link_string_cell
 
        elif issubclass(row_type, tuple):
 
            return self._generic_cell
 
        elif issubclass(row_type, Iterable):
 
            return self.meta_links_cell
 
        else:
 
            return self._generic_cell(value)
 

	
 
    def _metadata_cell(self, value: MetaValue) -> odf.table.TableCell:
 
        return self._cell_type(type(value))(value)
 
            return self._generic_cell
 

	
 
    def _cell_types(self, row_types: RowTypes) -> Iterator[CellFunc]:
 
        for name, row_type in row_types:
 
            if row_type is object:
 
                if name.replace('_', '-') in data.LINK_METADATA:
 
                    yield self._link_cell
 
                else:
 
                    yield self._metadata_cell
 
            else:
 
    def _meta_target(self, target: QueryExpression) -> Optional[MetaKey]:
 
        """Return the metadata key looked up by this target, if any
 

	
 
        This function takes a parsed target (i.e., what we're SELECTing) and
 
        recurses it to see whether it's looking up any metadata. If so, it
 
        returns the key of that metadata. Otherwise it returns None.
 
        """
 
        if isinstance(target, bc_query_parser.UnaryOp):
 
            return self._meta_target(target.operand)
 
        elif not isinstance(target, bc_query_parser.Function):
 
            return None
 
        try:
 
            operand = target.operands[0]
 
        except IndexError:
 
            return None
 
        if (target.fname in self.META_FNAMES
 
            and isinstance(operand, bc_query_parser.Constant)):
 
            return operand.value  # type:ignore[no-any-return]
 
        else:
 
            for operand in target.operands:
 
                retval = self._meta_target(operand)
 
                if retval is not None:
 
                    break
 
            return retval
 

	
 
    def _cell_types(self, statement: QueryStatement, row_types: RowTypes) -> Iterator[CellFunc]:
 
        """Return functions to create table cells from result rows
 

	
 
        Given a parsed query and the types of return rows, yields a function
 
        to create a cell for each column in the row, in order. The returned
 
        functions vary in order to provide the best available formatting for
 
        different data types.
 
        """
 
        if (isinstance(statement, bc_query_parser.Select)
 
            and isinstance(statement.targets, Sequence)):
 
            targets = [t.expression for t in statement.targets]
 
        else:
 
            # Synthesize something that makes clear we're not loading metadata.
 
            targets = [bc_query_parser.Column(name) for name, _ in row_types]
 
        for target, (_, row_type) in zip(targets, row_types):
 
            meta_key = self._meta_target(target)
 
            if meta_key is None:
 
                yield self._cell_type(row_type)
 
            elif meta_key in data.LINK_METADATA:
 
                yield self._link_cell_type(row_type)
 
            else:
 
                yield self._metadata_cell
 

	
 
    def write_query(self, row_types: RowTypes, rows: Rows) -> None:
 
    def write_query(self, statement: QueryStatement, row_types: RowTypes, rows: Rows) -> None:
 
        if self.is_empty():
 
            self.sheet.setAttribute('name', "Query 1")
 
        else:
 
            self.use_sheet(f"Query {len(self.document.spreadsheet.childNodes) + 1}")
 
        for name, row_type in row_types:
 
            if row_type is object or issubclass(row_type, str):
 
                col_width = 2.0
 
            elif issubclass(row_type, BeancountAmount):
 
            if issubclass(row_type, datetime.date):
 
                col_width = 1.0
 
            elif issubclass(row_type, (BeancountAmount, Inventory, Position)):
 
                col_width = 1.5
 
            else:
 
                col_width = 1.0
 
                col_width = 2.0
 
            col_style = self.column_style(col_width)
 
            self.sheet.addElement(odf.table.TableColumn(stylename=col_style))
 
        self.add_row(*(
 
            self.string_cell(data.Metadata.human_name(name.replace('_', '-')),
 
                             stylename=self.style_bold)
 
            self.string_cell(data.Metadata.human_name(name), stylename=self.style_bold)
 
            for name, _ in row_types
 
        ))
 
        self.lock_first_row()
 
        cell_funcs = list(self._cell_types(row_types))
 
        cell_funcs = list(self._cell_types(statement, row_types))
 
        for row in rows:
 
            self.add_row(*(
 
                cell_func(value)
...
 
@@ -238,7 +310,7 @@ class AggregateSet(bc_query_compile.EvalAggregator):
 

	
 
    def update(self, store: Store, context: Context) -> None:
 
        value, = self.eval_args(context)
 
        if isinstance(value, Sequence) and not isinstance(value, str):
 
        if isinstance(value, Sequence) and not isinstance(value, (str, tuple)):
 
            store[self.handle].update(value)
 
        else:
 
            store[self.handle].add(value)
...
 
@@ -304,9 +376,9 @@ class BQLShell(bc_query_shell.BQLShell):
 
            print("(empty)", file=self.outfile)
 
        else:
 
            logger.debug("rendering query as %s", output_format)
 
            render_func(row_types, rows)
 
            render_func(statement, row_types, rows)
 

	
 
    def _render_csv(self, row_types: RowTypes, rows: Rows) -> None:
 
    def _render_csv(self, statement: QueryStatement, row_types: RowTypes, rows: Rows) -> None:
 
        bc_query_render.render_csv(
 
            row_types,
 
            rows,
...
 
@@ -315,11 +387,15 @@ class BQLShell(bc_query_shell.BQLShell):
 
            self.vars['expand'],
 
        )
 

	
 
    def _render_ods(self, row_types: RowTypes, rows: Rows) -> None:
 
        self.ods.write_query(row_types, rows)
 
        logger.info("results saved in sheet %s", self.ods.sheet.getAttribute('name'))
 
    def _render_ods(self, statement: QueryStatement, row_types: RowTypes, rows: Rows) -> None:
 
        self.ods.write_query(statement, row_types, rows)
 
        logger.info(
 
            "%s rows of results saved in sheet %s",
 
            len(rows),
 
            self.ods.sheet.getAttribute('name'),
 
        )
 

	
 
    def _render_text(self, row_types: RowTypes, rows: Rows) -> None:
 
    def _render_text(self, statement: QueryStatement, row_types: RowTypes, rows: Rows) -> None:
 
        with contextlib.ExitStack() as stack:
 
            if self.is_interactive:
 
                output = stack.enter_context(self.get_pager())
...
 
@@ -394,9 +470,7 @@ ODS reports.
 
        help="""Query to run non-interactively. If none is provided, and
 
standard input is not a terminal, reads the query from stdin instead.
 
""")
 

	
 
    args = parser.parse_args(arglist)
 
    return args
 
    return parser.parse_args(arglist)
 

	
 
def main(arglist: Optional[Sequence[str]]=None,
 
         stdout: TextIO=sys.stdout,
tests/test_reports_query.py
Show inline comments
...
 
@@ -21,6 +21,7 @@ import pytest
 
from . import testutil
 

	
 
from beancount.core import data as bc_data
 
from beancount.query import query_parser as bc_query_parser
 
from conservancy_beancount.books import FiscalYear
 
from conservancy_beancount.reports import query as qmod
 
from conservancy_beancount import rtutil
...
 
@@ -38,6 +39,10 @@ class MockRewriteRuleset:
 
            yield post._replace(units=testutil.Amount(number, currency))
 

	
 

	
 
@pytest.fixture(scope='module')
 
def qparser():
 
    return bc_query_parser.Parser()
 

	
 
@pytest.fixture(scope='module')
 
def rt():
 
    return rtutil.RT(testutil.RTClient())
...
 
@@ -130,40 +135,47 @@ def test_rewrite_query(end_index):
 
    assert expected.issubset(actual)
 
    assert frozenset(accounts).difference(expected).isdisjoint(actual)
 

	
 
def test_ods_amount_formatting():
 
def test_ods_amount_formatting(qparser):
 
    statement = qparser.parse('SELECT UNITS(position)')
 
    row_types = [('amount', bc_data.Amount)]
 
    row_source = [(testutil.Amount(12),), (testutil.Amount(1480, 'JPY'),)]
 
    ods = qmod.QueryODS()
 
    ods.write_query(row_types, row_source)
 
    ods.write_query(statement, row_types, row_source)
 
    actual = testutil.ODSCell.from_sheet(ods.document.spreadsheet.firstChild)
 
    assert next(actual)[0].text == 'Amount'
 
    assert next(actual)[0].text == '$12.00'
 
    assert next(actual)[0].text == 'Â¥1,480'
 
    assert next(actual, None) is None
 

	
 
def test_ods_datetime_formatting():
 
def test_ods_datetime_formatting(qparser):
 
    statement = qparser.parse('SELECT date')
 
    row_types = [('date', datetime.date)]
 
    row_source = [(testutil.PAST_DATE,), (testutil.FUTURE_DATE,)]
 
    ods = qmod.QueryODS()
 
    ods.write_query(row_types, row_source)
 
    ods.write_query(statement, row_types, row_source)
 
    actual = testutil.ODSCell.from_sheet(ods.document.spreadsheet.firstChild)
 
    assert next(actual)[0].text == 'Date'
 
    assert next(actual)[0].text == testutil.PAST_DATE.isoformat()
 
    assert next(actual)[0].text == testutil.FUTURE_DATE.isoformat()
 
    assert next(actual, None) is None
 

	
 
@pytest.mark.parametrize('meta_key,header_text', [
 
    ('check', 'Check'),
 
    ('purchase-order', 'Purchase Order'),
 
    ('rt-id', 'Ticket'),
 
@pytest.mark.parametrize('meta_key,meta_func', [
 
    ('check', 'ANY_META'),
 
    ('purchase-order', 'META'),
 
    ('rt-id', 'META_DOCS'),
 
])
 
def test_ods_link_formatting(rt, meta_key, header_text):
 
    row_types = [(meta_key.replace('-', '_'), object)]
 
    row_source = [('rt:1/5',), ('rt:3 Checks/9.pdf',)]
 
def test_ods_link_formatting(qparser, rt, meta_key, meta_func):
 
    meta_func_returns_list = meta_func == 'META_DOCS'
 
    statement = qparser.parse(f'SELECT {meta_func}({meta_key!r}) AS docs')
 
    row_types = [('docs', list if meta_func_returns_list else str)]
 
    row_source = [
 
        (s.split() if meta_func_returns_list else s,)
 
        for s in ['rt:1/5', 'rt:3 Checks/9.pdf']
 
    ]
 
    ods = qmod.QueryODS(rt)
 
    ods.write_query(row_types, row_source)
 
    ods.write_query(statement, row_types, row_source)
 
    rows = iter(ods.document.spreadsheet.firstChild.getElementsByType(odf.table.TableRow))
 
    assert next(rows).text == header_text
 
    assert next(rows).text == 'Docs'
 
    actual = iter(
 
        [link.text for link in row.getElementsByType(odf.text.A)]
 
        for row in rows
...
 
@@ -172,50 +184,54 @@ def test_ods_link_formatting(rt, meta_key, header_text):
 
    assert next(actual) == ['rt:3', '9.pdf']
 
    assert next(actual, None) is None
 

	
 
def test_ods_meta_formatting():
 
    row_types = [('metadata', object)]
 
def test_ods_meta_formatting(qparser):
 
    statement = qparser.parse('SELECT ANY_META("entity") AS entity')
 
    row_types = [('entity', object)]
 
    row_source = [(testutil.Amount(14),), (None,), ('foo bar',)]
 
    ods = qmod.QueryODS()
 
    ods.write_query(row_types, row_source)
 
    ods.write_query(statement, row_types, row_source)
 
    actual = testutil.ODSCell.from_sheet(ods.document.spreadsheet.firstChild)
 
    assert next(actual)[0].text == 'Metadata'
 
    assert next(actual)[0].text == 'Entity'
 
    assert next(actual)[0].text == '$14.00'
 
    assert next(actual)[0].text == ''
 
    assert next(actual)[0].text == 'foo bar'
 
    assert next(actual, None) is None
 

	
 
def test_ods_multicolumn_write(rt):
 
    row_types = [('date', datetime.date), ('rt-id', object), ('desc', str)]
 
def test_ods_multicolumn_write(qparser, rt):
 
    statement = qparser.parse(
 
        'SELECT MIN(date) AS date, SET(META_DOCS("rt-id")) AS tix, STR_META("entity") AS entity',
 
    )
 
    row_types = [('date', datetime.date), ('tix', set), ('entity', str)]
 
    row_source = [
 
        (testutil.PAST_DATE, 'rt:1', 'aaa'),
 
        (testutil.FY_START_DATE, 'rt:2', 'bbb'),
 
        (testutil.FUTURE_DATE, 'rt:3', 'ccc'),
 
        (testutil.PAST_DATE, {'rt:1'}, 'AA'),
 
        (testutil.FY_START_DATE, {'rt:2'}, 'BB'),
 
        (testutil.FUTURE_DATE, {'rt:3', 'rt:4'}, 'CC'),
 
    ]
 
    ods = qmod.QueryODS(rt)
 
    ods.write_query(row_types, row_source)
 
    ods.write_query(statement, list(row_types), list(row_source))
 
    actual = iter(
 
        cell.text
 
        for row in testutil.ODSCell.from_sheet(ods.document.spreadsheet.firstChild)
 
        for cell in row
 
    )
 
    assert next(actual) == 'Date'
 
    assert next(actual) == 'Ticket'
 
    assert next(actual) == 'Desc'
 
    for expected, _ in row_types:
 
        assert next(actual) == expected.title()
 
    assert next(actual) == testutil.PAST_DATE.isoformat()
 
    assert next(actual) == 'rt:1'
 
    assert next(actual) == 'aaa'
 
    assert next(actual) == 'AA'
 
    assert next(actual) == testutil.FY_START_DATE.isoformat()
 
    assert next(actual) == 'rt:2'
 
    assert next(actual) == 'bbb'
 
    assert next(actual) == 'BB'
 
    assert next(actual) == testutil.FUTURE_DATE.isoformat()
 
    assert next(actual) == 'rt:3'
 
    assert next(actual) == 'ccc'
 
    assert frozenset(next(actual).split('\0')) == row_source[-1][1]
 
    assert next(actual) == 'CC'
 
    assert next(actual, None) is None
 

	
 
def test_ods_is_empty():
 
def test_ods_is_empty(qparser):
 
    statement = qparser.parse('SELECT * WHERE date < 1900-01-01')
 
    ods = qmod.QueryODS()
 
    assert ods.is_empty()
 
    ods.write_query([], [])
 
    ods.write_query(statement, [], [])
 
    assert not ods.is_empty()
 

	
 
@pytest.mark.parametrize('fy,account,amt_prefix', [
...
 
@@ -232,8 +248,9 @@ def test_ods_output(fy, account, amt_prefix):
 
    ]
 
    returncode, stdout, stderr = pipe_main(arglist, config, io.BytesIO)
 
    assert returncode == 0
 
    stdout.seek(0)
 
    ods_doc = odf.opendocument.load(stdout)
 
    with stdout:
 
        stdout.seek(0)
 
        ods_doc = odf.opendocument.load(stdout)
 
    rows = iter(ods_doc.spreadsheet.firstChild.getElementsByType(odf.table.TableRow))
 
    next(rows)  # Skip header row
 
    amt_pattern = rf'^{re.escape(amt_prefix)}\d'
...
 
@@ -243,3 +260,30 @@ def test_ods_output(fy, account, amt_prefix):
 
        assert narration.text.startswith(f'{fy} ')
 
        assert re.match(amt_pattern, amount.text)
 
    assert count
 

	
 
def test_ods_aggregate_output():
 
    books_path = testutil.test_path(f'books/books/2020.beancount')
 
    config = testutil.TestConfig(books_path=books_path)
 
    arglist = [
 
        '-O', '-',
 
        '-f', 'ods',
 
        'SELECT account, SET(narration), SUM(UNITS(position))',
 
        'WHERE date >= 2020-04-01 AND date <= 2020-04-02',
 
        'GROUP BY account ORDER BY account ASC',
 
    ]
 
    returncode, stdout, stderr = pipe_main(arglist, config, io.BytesIO)
 
    assert returncode == 0
 
    with stdout:
 
        stdout.seek(0)
 
        ods_doc = odf.opendocument.load(stdout)
 
    rows = iter(ods_doc.spreadsheet.firstChild.getElementsByType(odf.table.TableRow))
 
    next(rows)  # Skip header row
 
    actual = {}
 
    for row in rows:
 
        acct, descs, balance = row.childNodes
 
        actual[acct.text] = (frozenset(descs.text.split('\0')), balance.text)
 
    in_desc = {'2020 donation'}
 
    ex_desc = {'2020 bank maintenance fee'}
 
    assert actual['Income:Donations'] == (in_desc, '$20.20')
 
    assert actual['Expenses:BankingFees'] == (ex_desc, '$1.00')
 
    assert actual['Assets:Checking'] == (in_desc | ex_desc, '($21.20)')
0 comments (0 inline, 0 general)