Changeset - 6fa12789661e
[Not reviewed]
0 3 0
Brett Smith - 3 years ago 2021-03-09 15:39:12
brettcsmith@brettcsmith.org
query: Improve formatting of ODS output.

* Provide dedicated formatting for more Beancount types.
* Improve code to determine when we're looking up link metadata
and should format output as links.
3 files changed with 194 insertions and 76 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/reports/core.py
Show inline comments
...
 
@@ -1369,5 +1369,5 @@ class BaseODS(BaseSpreadsheet[RT, ST], metaclass=abc.ABCMeta):
 
            return self.multiline_cell(lines, **attrs)
 

	
 
    def currency_cell(self, amount: data.Amount, **attrs: Any) -> odf.table.TableCell:
 
    def currency_cell(self, amount: bc_amount._Amount, **attrs: Any) -> odf.table.TableCell:
 
        if 'stylename' not in attrs:
 
            attrs['stylename'] = self.currency_style(amount.currency)
conservancy_beancount/reports/query.py
Show inline comments
...
 
@@ -34,4 +34,5 @@ from typing import (
 
)
 
from ..beancount_types import (
 
    MetaKey,
 
    MetaValue,
 
    Posting,
...
 
@@ -42,4 +43,6 @@ from decimal import Decimal
 
from pathlib import Path
 
from beancount.core.amount import _Amount as BeancountAmount
 
from beancount.core.inventory import Inventory
 
from beancount.core.position import _Position as Position
 

	
 
import beancount.query.numberify as bc_query_numberify
...
 
@@ -60,8 +63,4 @@ from .. import data
 
from .. import rtutil
 

	
 
BUILTIN_FIELDS: AbstractSet[str] = frozenset(itertools.chain(
 
    bc_query_env.TargetsEnvironment.columns,  # type:ignore[has-type]
 
    bc_query_env.TargetsEnvironment.functions,  # type:ignore[has-type]
 
))
 
PROGNAME = 'query-report'
 
logger = logging.getLogger('conservancy_beancount.reports.query')
...
 
@@ -81,4 +80,10 @@ RowTypes = Sequence[Tuple[str, Type]]
 
Rows = Sequence[NamedTuple]
 
Store = List[Any]
 
QueryExpression = Union[
 
    bc_query_parser.Column,
 
    bc_query_parser.Constant,
 
    bc_query_parser.Function,
 
    bc_query_parser.UnaryOp,
 
]
 
QueryStatement = Union[
 
    bc_query_parser.Balances,
...
 
@@ -125,4 +130,12 @@ class BooksLoader:
 

	
 
class QueryODS(core.BaseODS[NamedTuple, None]):
 
    META_FNAMES = frozenset([
 
        'any_meta',
 
        'entry_meta',
 
        'meta',
 
        'meta_docs',
 
        'str_meta',
 
    ])
 

	
 
    def is_empty(self) -> bool:
 
        return not self.sheet.childNodes
...
 
@@ -131,6 +144,29 @@ class QueryODS(core.BaseODS[NamedTuple, None]):
 
        return None
 

	
 
    def _generic_cell(self, value: Any) -> odf.table.TableCell:
 
        if isinstance(value, Iterable) and not isinstance(value, (str, tuple)):
 
            return self.multiline_cell(value)
 
        else:
 
            return self.string_cell('' if value is None else str(value))
 

	
 
    def _inventory_cell(self, value: Inventory) -> odf.table.TableCell:
 
        return self.balance_cell(core.Balance(pos.units for pos in value))
 

	
 
    def _link_string_cell(self, value: str) -> odf.table.TableCell:
 
        return self.meta_links_cell(value.split())
 

	
 
    def _metadata_cell(self, value: MetaValue) -> odf.table.TableCell:
 
        return self._cell_type(type(value))(value)
 

	
 
    def _position_cell(self, value: Position) -> odf.table.TableCell:
 
        return self.currency_cell(value.units)
 

	
 
    def _cell_type(self, row_type: Type) -> CellFunc:
 
        if issubclass(row_type, BeancountAmount):
 
        """Return a function to create a cell, for non-metadata row types."""
 
        if issubclass(row_type, Inventory):
 
            return self._inventory_cell
 
        elif issubclass(row_type, Position):
 
            return self._position_cell
 
        elif issubclass(row_type, BeancountAmount):
 
            return self.currency_cell
 
        elif issubclass(row_type, (int, float, Decimal)):
...
 
@@ -143,27 +179,64 @@ class QueryODS(core.BaseODS[NamedTuple, None]):
 
            return self._generic_cell
 

	
 
    def _generic_cell(self, value: Any) -> odf.table.TableCell:
 
        return self.string_cell('' if value is None else str(value))
 

	
 
    def _link_cell(self, value: MetaValue) -> odf.table.TableCell:
 
        if isinstance(value, str):
 
            return self.meta_links_cell(value.split())
 
    def _link_cell_type(self, row_type: Type) -> CellFunc:
 
        """Return a function to create a cell from metadata with documentation links."""
 
        if issubclass(row_type, str):
 
            return self._link_string_cell
 
        elif issubclass(row_type, tuple):
 
            return self._generic_cell
 
        elif issubclass(row_type, Iterable):
 
            return self.meta_links_cell
 
        else:
 
            return self._generic_cell(value)
 

	
 
    def _metadata_cell(self, value: MetaValue) -> odf.table.TableCell:
 
        return self._cell_type(type(value))(value)
 
            return self._generic_cell
 

	
 
    def _cell_types(self, row_types: RowTypes) -> Iterator[CellFunc]:
 
        for name, row_type in row_types:
 
            if row_type is object:
 
                if name.replace('_', '-') in data.LINK_METADATA:
 
                    yield self._link_cell
 
                else:
 
                    yield self._metadata_cell
 
            else:
 
    def _meta_target(self, target: QueryExpression) -> Optional[MetaKey]:
 
        """Return the metadata key looked up by this target, if any
 

	
 
        This function takes a parsed target (i.e., what we're SELECTing) and
 
        recurses it to see whether it's looking up any metadata. If so, it
 
        returns the key of that metadata. Otherwise it returns None.
 
        """
 
        if isinstance(target, bc_query_parser.UnaryOp):
 
            return self._meta_target(target.operand)
 
        elif not isinstance(target, bc_query_parser.Function):
 
            return None
 
        try:
 
            operand = target.operands[0]
 
        except IndexError:
 
            return None
 
        if (target.fname in self.META_FNAMES
 
            and isinstance(operand, bc_query_parser.Constant)):
 
            return operand.value  # type:ignore[no-any-return]
 
        else:
 
            for operand in target.operands:
 
                retval = self._meta_target(operand)
 
                if retval is not None:
 
                    break
 
            return retval
 

	
 
    def _cell_types(self, statement: QueryStatement, row_types: RowTypes) -> Iterator[CellFunc]:
 
        """Return functions to create table cells from result rows
 

	
 
        Given a parsed query and the types of return rows, yields a function
 
        to create a cell for each column in the row, in order. The returned
 
        functions vary in order to provide the best available formatting for
 
        different data types.
 
        """
 
        if (isinstance(statement, bc_query_parser.Select)
 
            and isinstance(statement.targets, Sequence)):
 
            targets = [t.expression for t in statement.targets]
 
        else:
 
            # Synthesize something that makes clear we're not loading metadata.
 
            targets = [bc_query_parser.Column(name) for name, _ in row_types]
 
        for target, (_, row_type) in zip(targets, row_types):
 
            meta_key = self._meta_target(target)
 
            if meta_key is None:
 
                yield self._cell_type(row_type)
 
            elif meta_key in data.LINK_METADATA:
 
                yield self._link_cell_type(row_type)
 
            else:
 
                yield self._metadata_cell
 

	
 
    def write_query(self, row_types: RowTypes, rows: Rows) -> None:
 
    def write_query(self, statement: QueryStatement, row_types: RowTypes, rows: Rows) -> None:
 
        if self.is_empty():
 
            self.sheet.setAttribute('name', "Query 1")
...
 
@@ -171,19 +244,18 @@ class QueryODS(core.BaseODS[NamedTuple, None]):
 
            self.use_sheet(f"Query {len(self.document.spreadsheet.childNodes) + 1}")
 
        for name, row_type in row_types:
 
            if row_type is object or issubclass(row_type, str):
 
                col_width = 2.0
 
            elif issubclass(row_type, BeancountAmount):
 
            if issubclass(row_type, datetime.date):
 
                col_width = 1.0
 
            elif issubclass(row_type, (BeancountAmount, Inventory, Position)):
 
                col_width = 1.5
 
            else:
 
                col_width = 1.0
 
                col_width = 2.0
 
            col_style = self.column_style(col_width)
 
            self.sheet.addElement(odf.table.TableColumn(stylename=col_style))
 
        self.add_row(*(
 
            self.string_cell(data.Metadata.human_name(name.replace('_', '-')),
 
                             stylename=self.style_bold)
 
            self.string_cell(data.Metadata.human_name(name), stylename=self.style_bold)
 
            for name, _ in row_types
 
        ))
 
        self.lock_first_row()
 
        cell_funcs = list(self._cell_types(row_types))
 
        cell_funcs = list(self._cell_types(statement, row_types))
 
        for row in rows:
 
            self.add_row(*(
...
 
@@ -239,5 +311,5 @@ class AggregateSet(bc_query_compile.EvalAggregator):
 
    def update(self, store: Store, context: Context) -> None:
 
        value, = self.eval_args(context)
 
        if isinstance(value, Sequence) and not isinstance(value, str):
 
        if isinstance(value, Sequence) and not isinstance(value, (str, tuple)):
 
            store[self.handle].update(value)
 
        else:
...
 
@@ -305,7 +377,7 @@ class BQLShell(bc_query_shell.BQLShell):
 
        else:
 
            logger.debug("rendering query as %s", output_format)
 
            render_func(row_types, rows)
 
            render_func(statement, row_types, rows)
 

	
 
    def _render_csv(self, row_types: RowTypes, rows: Rows) -> None:
 
    def _render_csv(self, statement: QueryStatement, row_types: RowTypes, rows: Rows) -> None:
 
        bc_query_render.render_csv(
 
            row_types,
...
 
@@ -316,9 +388,13 @@ class BQLShell(bc_query_shell.BQLShell):
 
        )
 

	
 
    def _render_ods(self, row_types: RowTypes, rows: Rows) -> None:
 
        self.ods.write_query(row_types, rows)
 
        logger.info("results saved in sheet %s", self.ods.sheet.getAttribute('name'))
 
    def _render_ods(self, statement: QueryStatement, row_types: RowTypes, rows: Rows) -> None:
 
        self.ods.write_query(statement, row_types, rows)
 
        logger.info(
 
            "%s rows of results saved in sheet %s",
 
            len(rows),
 
            self.ods.sheet.getAttribute('name'),
 
        )
 

	
 
    def _render_text(self, row_types: RowTypes, rows: Rows) -> None:
 
    def _render_text(self, statement: QueryStatement, row_types: RowTypes, rows: Rows) -> None:
 
        with contextlib.ExitStack() as stack:
 
            if self.is_interactive:
...
 
@@ -395,7 +471,5 @@ ODS reports.
 
standard input is not a terminal, reads the query from stdin instead.
 
""")
 

	
 
    args = parser.parse_args(arglist)
 
    return args
 
    return parser.parse_args(arglist)
 

	
 
def main(arglist: Optional[Sequence[str]]=None,
tests/test_reports_query.py
Show inline comments
...
 
@@ -22,4 +22,5 @@ from . import testutil
 

	
 
from beancount.core import data as bc_data
 
from beancount.query import query_parser as bc_query_parser
 
from conservancy_beancount.books import FiscalYear
 
from conservancy_beancount.reports import query as qmod
...
 
@@ -39,4 +40,8 @@ class MockRewriteRuleset:
 

	
 

	
 
@pytest.fixture(scope='module')
 
def qparser():
 
    return bc_query_parser.Parser()
 

	
 
@pytest.fixture(scope='module')
 
def rt():
...
 
@@ -131,9 +136,10 @@ def test_rewrite_query(end_index):
 
    assert frozenset(accounts).difference(expected).isdisjoint(actual)
 

	
 
def test_ods_amount_formatting():
 
def test_ods_amount_formatting(qparser):
 
    statement = qparser.parse('SELECT UNITS(position)')
 
    row_types = [('amount', bc_data.Amount)]
 
    row_source = [(testutil.Amount(12),), (testutil.Amount(1480, 'JPY'),)]
 
    ods = qmod.QueryODS()
 
    ods.write_query(row_types, row_source)
 
    ods.write_query(statement, row_types, row_source)
 
    actual = testutil.ODSCell.from_sheet(ods.document.spreadsheet.firstChild)
 
    assert next(actual)[0].text == 'Amount'
...
 
@@ -142,9 +148,10 @@ def test_ods_amount_formatting():
 
    assert next(actual, None) is None
 

	
 
def test_ods_datetime_formatting():
 
def test_ods_datetime_formatting(qparser):
 
    statement = qparser.parse('SELECT date')
 
    row_types = [('date', datetime.date)]
 
    row_source = [(testutil.PAST_DATE,), (testutil.FUTURE_DATE,)]
 
    ods = qmod.QueryODS()
 
    ods.write_query(row_types, row_source)
 
    ods.write_query(statement, row_types, row_source)
 
    actual = testutil.ODSCell.from_sheet(ods.document.spreadsheet.firstChild)
 
    assert next(actual)[0].text == 'Date'
...
 
@@ -153,16 +160,21 @@ def test_ods_datetime_formatting():
 
    assert next(actual, None) is None
 

	
 
@pytest.mark.parametrize('meta_key,header_text', [
 
    ('check', 'Check'),
 
    ('purchase-order', 'Purchase Order'),
 
    ('rt-id', 'Ticket'),
 
@pytest.mark.parametrize('meta_key,meta_func', [
 
    ('check', 'ANY_META'),
 
    ('purchase-order', 'META'),
 
    ('rt-id', 'META_DOCS'),
 
])
 
def test_ods_link_formatting(rt, meta_key, header_text):
 
    row_types = [(meta_key.replace('-', '_'), object)]
 
    row_source = [('rt:1/5',), ('rt:3 Checks/9.pdf',)]
 
def test_ods_link_formatting(qparser, rt, meta_key, meta_func):
 
    meta_func_returns_list = meta_func == 'META_DOCS'
 
    statement = qparser.parse(f'SELECT {meta_func}({meta_key!r}) AS docs')
 
    row_types = [('docs', list if meta_func_returns_list else str)]
 
    row_source = [
 
        (s.split() if meta_func_returns_list else s,)
 
        for s in ['rt:1/5', 'rt:3 Checks/9.pdf']
 
    ]
 
    ods = qmod.QueryODS(rt)
 
    ods.write_query(row_types, row_source)
 
    ods.write_query(statement, row_types, row_source)
 
    rows = iter(ods.document.spreadsheet.firstChild.getElementsByType(odf.table.TableRow))
 
    assert next(rows).text == header_text
 
    assert next(rows).text == 'Docs'
 
    actual = iter(
 
        [link.text for link in row.getElementsByType(odf.text.A)]
...
 
@@ -173,11 +185,12 @@ def test_ods_link_formatting(rt, meta_key, header_text):
 
    assert next(actual, None) is None
 

	
 
def test_ods_meta_formatting():
 
    row_types = [('metadata', object)]
 
def test_ods_meta_formatting(qparser):
 
    statement = qparser.parse('SELECT ANY_META("entity") AS entity')
 
    row_types = [('entity', object)]
 
    row_source = [(testutil.Amount(14),), (None,), ('foo bar',)]
 
    ods = qmod.QueryODS()
 
    ods.write_query(row_types, row_source)
 
    ods.write_query(statement, row_types, row_source)
 
    actual = testutil.ODSCell.from_sheet(ods.document.spreadsheet.firstChild)
 
    assert next(actual)[0].text == 'Metadata'
 
    assert next(actual)[0].text == 'Entity'
 
    assert next(actual)[0].text == '$14.00'
 
    assert next(actual)[0].text == ''
...
 
@@ -185,13 +198,16 @@ def test_ods_meta_formatting():
 
    assert next(actual, None) is None
 

	
 
def test_ods_multicolumn_write(rt):
 
    row_types = [('date', datetime.date), ('rt-id', object), ('desc', str)]
 
def test_ods_multicolumn_write(qparser, rt):
 
    statement = qparser.parse(
 
        'SELECT MIN(date) AS date, SET(META_DOCS("rt-id")) AS tix, STR_META("entity") AS entity',
 
    )
 
    row_types = [('date', datetime.date), ('tix', set), ('entity', str)]
 
    row_source = [
 
        (testutil.PAST_DATE, 'rt:1', 'aaa'),
 
        (testutil.FY_START_DATE, 'rt:2', 'bbb'),
 
        (testutil.FUTURE_DATE, 'rt:3', 'ccc'),
 
        (testutil.PAST_DATE, {'rt:1'}, 'AA'),
 
        (testutil.FY_START_DATE, {'rt:2'}, 'BB'),
 
        (testutil.FUTURE_DATE, {'rt:3', 'rt:4'}, 'CC'),
 
    ]
 
    ods = qmod.QueryODS(rt)
 
    ods.write_query(row_types, row_source)
 
    ods.write_query(statement, list(row_types), list(row_source))
 
    actual = iter(
 
        cell.text
...
 
@@ -199,22 +215,22 @@ def test_ods_multicolumn_write(rt):
 
        for cell in row
 
    )
 
    assert next(actual) == 'Date'
 
    assert next(actual) == 'Ticket'
 
    assert next(actual) == 'Desc'
 
    for expected, _ in row_types:
 
        assert next(actual) == expected.title()
 
    assert next(actual) == testutil.PAST_DATE.isoformat()
 
    assert next(actual) == 'rt:1'
 
    assert next(actual) == 'aaa'
 
    assert next(actual) == 'AA'
 
    assert next(actual) == testutil.FY_START_DATE.isoformat()
 
    assert next(actual) == 'rt:2'
 
    assert next(actual) == 'bbb'
 
    assert next(actual) == 'BB'
 
    assert next(actual) == testutil.FUTURE_DATE.isoformat()
 
    assert next(actual) == 'rt:3'
 
    assert next(actual) == 'ccc'
 
    assert frozenset(next(actual).split('\0')) == row_source[-1][1]
 
    assert next(actual) == 'CC'
 
    assert next(actual, None) is None
 

	
 
def test_ods_is_empty():
 
def test_ods_is_empty(qparser):
 
    statement = qparser.parse('SELECT * WHERE date < 1900-01-01')
 
    ods = qmod.QueryODS()
 
    assert ods.is_empty()
 
    ods.write_query([], [])
 
    ods.write_query(statement, [], [])
 
    assert not ods.is_empty()
 

	
...
 
@@ -233,6 +249,7 @@ def test_ods_output(fy, account, amt_prefix):
 
    returncode, stdout, stderr = pipe_main(arglist, config, io.BytesIO)
 
    assert returncode == 0
 
    stdout.seek(0)
 
    ods_doc = odf.opendocument.load(stdout)
 
    with stdout:
 
        stdout.seek(0)
 
        ods_doc = odf.opendocument.load(stdout)
 
    rows = iter(ods_doc.spreadsheet.firstChild.getElementsByType(odf.table.TableRow))
 
    next(rows)  # Skip header row
...
 
@@ -244,2 +261,29 @@ def test_ods_output(fy, account, amt_prefix):
 
        assert re.match(amt_pattern, amount.text)
 
    assert count
 

	
 
def test_ods_aggregate_output():
 
    books_path = testutil.test_path(f'books/books/2020.beancount')
 
    config = testutil.TestConfig(books_path=books_path)
 
    arglist = [
 
        '-O', '-',
 
        '-f', 'ods',
 
        'SELECT account, SET(narration), SUM(UNITS(position))',
 
        'WHERE date >= 2020-04-01 AND date <= 2020-04-02',
 
        'GROUP BY account ORDER BY account ASC',
 
    ]
 
    returncode, stdout, stderr = pipe_main(arglist, config, io.BytesIO)
 
    assert returncode == 0
 
    with stdout:
 
        stdout.seek(0)
 
        ods_doc = odf.opendocument.load(stdout)
 
    rows = iter(ods_doc.spreadsheet.firstChild.getElementsByType(odf.table.TableRow))
 
    next(rows)  # Skip header row
 
    actual = {}
 
    for row in rows:
 
        acct, descs, balance = row.childNodes
 
        actual[acct.text] = (frozenset(descs.text.split('\0')), balance.text)
 
    in_desc = {'2020 donation'}
 
    ex_desc = {'2020 bank maintenance fee'}
 
    assert actual['Income:Donations'] == (in_desc, '$20.20')
 
    assert actual['Expenses:BankingFees'] == (ex_desc, '$1.00')
 
    assert actual['Assets:Checking'] == (in_desc | ex_desc, '($21.20)')
0 comments (0 inline, 0 general)