Changeset - ef03893bfed3
[Not reviewed]
0 2 0
Brett Smith - 3 years ago 2021-03-12 15:56:43
brettcsmith@brettcsmith.org
query: Convert query functions that return List to Set.

Beancount's built-in renderers expect this and are better equipped for it.
2 files changed with 27 insertions and 34 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/reports/query.py
Show inline comments
...
 
@@ -112,236 +112,234 @@ SENTINEL = object()
 
logger = logging.getLogger('conservancy_beancount.reports.query')
 

	
 
CellFunc = Callable[[Any], odf.table.TableCell]
 
EnvironmentFunctions = Dict[
 
    # The real key type is something like:
 
    #   Union[str, Tuple[str, Type, ...]]
 
    # but two issues with that. One, you can't use Ellipses in a Tuple like
 
    # that, so there's no short way to declare this. Second, Beancount doesn't
 
    # declare it anyway, and mypy infers it as Sequence[object]. So just use
 
    # that.
 
    Sequence[object],
 
    Type[bc_query_compile.EvalFunction],
 
]
 
RowTypes = Sequence[Tuple[str, Type]]
 
Rows = Sequence[NamedTuple]
 
RTResult = Optional[Mapping[Any, Any]]
 
Store = List[Any]
 
QueryExpression = Union[
 
    bc_query_parser.Column,
 
    bc_query_parser.Constant,
 
    bc_query_parser.Function,
 
    bc_query_parser.UnaryOp,
 
]
 
QueryStatement = Union[
 
    bc_query_parser.Balances,
 
    bc_query_parser.Journal,
 
    bc_query_parser.Select,
 
]
 

	
 
# This class annotates the types that Beancount's RowContexts have when they're
 
# passed to EvalFunction.__call__(). These types get set across
 
# create_row_context and execute_query.
 
class PostingContext:
 
    posting: Posting
 
    entry: Transaction
 
    balance: Inventory
 
    options_map: OptionsMap
 
    account_types: Mapping
 
    open_close_map: Mapping
 
    commodity_map: Mapping
 
    price_map: Mapping
 
    # Dynamically set by execute_query
 
    store: Store
 

	
 

	
 
class MetaDocs(bc_query_env.AnyMeta):
 
    """Return a list of document links from metadata."""
 
    def __init__(self, operands: List[bc_query_compile.EvalNode]) -> None:
 
        super(bc_query_env.AnyMeta, self).__init__(operands, list)
 
        super(bc_query_env.AnyMeta, self).__init__(operands, set)
 
        # The second argument is our return type.
 
        # It should match the annotated return type of __call__.
 

	
 
    def __call__(self, context: PostingContext) -> List[str]:
 
    def __call__(self, context: PostingContext) -> Set[str]:
 
        raw_value = super().__call__(context)
 
        if isinstance(raw_value, str):
 
            return raw_value.split()
 
        else:
 
            return []
 
        seq = raw_value.split() if isinstance(raw_value, str) else ''
 
        return set(seq)
 

	
 

	
 
class RTField(NamedTuple):
 
    key: str
 
    parse: Optional[Callable[[str], object]]
 
    unset_value: Optional[str] = None
 

	
 
    def load(self, rt_ticket: RTResult) -> object:
 
        value = rt_ticket.get(self.key) if rt_ticket else None
 
        if not value or value == self.unset_value:
 
            return None
 
        elif self.parse is None:
 
            return value
 
        else:
 
            return self.parse(value)
 

	
 

	
 
class RTTicket(bc_query_compile.EvalFunction):
 
    """Look up a field from RT ticket(s) mentioned in metadata documentation"""
 
    __intypes__ = [str, str, int]
 
    FIELDS = {key: RTField(key, None) for key in [
 
        'AdminCc',
 
        'Cc',
 
        'Creator',
 
        'Owner',
 
        'Queue',
 
        'Status',
 
        'Subject',
 
        'Requestors',
 
    ]}
 
    FIELDS.update((key, RTField(key, int, '0')) for key in [
 
        'numerical_id',
 
        'FinalPriority',
 
        'InitialPriority',
 
        'Priority',
 
        'TimeEstimated',
 
        'TimeLeft',
 
        'TimeWorked',
 
    ])
 
    FIELDS.update((key, RTField(key, rtutil.RTDateTime, 'Not set')) for key in [
 
        'Created',
 
        'Due',
 
        'LastUpdated',
 
        'Resolved',
 
        'Started',
 
        'Starts',
 
        'Told',
 
    ])
 
    FIELDS.update({key.lower(): value for key, value in FIELDS.items()})
 
    FIELDS['id'] = FIELDS['numerical_id']
 
    FIELDS['AdminCC'] = FIELDS['AdminCc']
 
    FIELDS['CC'] = FIELDS['Cc']
 
    RT_CLIENT: ClassVar[rt.Rt]
 
    # _CACHES holds all of the caches for different RT instances that have
 
    # been passed through RTTicket.with_client().
 
    _CACHES: ClassVar[Dict[Hashable, MutableMapping[str, RTResult]]] = {}
 
    # _rt_cache is the cache specific to this RT_CLIENT.
 
    _rt_cache: ClassVar[MutableMapping[str, RTResult]] = {}
 

	
 
    @classmethod
 
    def with_client(cls, client: rt.Rt, cache_key: Hashable) -> Type['RTTicket']:
 
        return type(cls.__name__, (cls,), {
 
            'RT_CLIENT': client,
 
            '_rt_cache': cls._CACHES.setdefault(cache_key, {}),
 
        })
 

	
 
    def __init__(self, operands: List[bc_query_compile.EvalNode]) -> None:
 
        if not hasattr(self, 'RT_CLIENT'):
 
            raise RuntimeError("no RT client available - cannot use rt_ticket()")
 
        rt_op, meta_op, *rest = operands
 
        # We have to evaluate the RT and meta keys on each call, because they
 
        # might themselves be dynamic. In the common case they're constants.
 
        # In that case, check for typos so we can report an error to the user
 
        # before execution even begins.
 
        if isinstance(rt_op, bc_query_compile.EvalConstant):
 
            self._rt_key(rt_op.value)
 
        if isinstance(meta_op, bc_query_compile.EvalConstant):
 
            self._meta_key(meta_op.value)
 
        if not rest:
 
            operands.append(bc_query_compile.EvalConstant(sys.maxsize))
 
        super().__init__(operands, list)
 
        super().__init__(operands, set)
 

	
 
    def _rt_key(self, key: str) -> RTField:
 
        try:
 
            return self.FIELDS[key]
 
        except KeyError:
 
            raise ValueError(f"unknown RT ticket field {key!r}") from None
 

	
 
    def _meta_key(self, key: str) -> str:
 
        if key in data.LINK_METADATA:
 
            return key
 
        else:
 
            raise ValueError(f"metadata key {key!r} does not contain documentation links")
 

	
 
    def __call__(self, context: PostingContext) -> list:
 
    def __call__(self, context: PostingContext) -> Set[object]:
 
        rt_key: str
 
        meta_key: str
 
        limit: int
 
        rt_key, meta_key, limit = self.eval_args(context)
 
        rt_field = self._rt_key(rt_key)
 
        meta_key = self._meta_key(meta_key)
 
        if context.posting.meta is None:
 
            meta_value: Any = SENTINEL
 
        else:
 
            meta_value = context.posting.meta.get(meta_key, SENTINEL)
 
        if meta_value is SENTINEL:
 
            meta_value = context.entry.meta.get(meta_key)
 
        if not isinstance(meta_value, str) or limit < 1:
 
            meta_value = ''
 
        ticket_ids: Set[str] = set()
 
        for link_s in meta_value.split():
 
            rt_id = rtutil.RT.parse(link_s)
 
            if rt_id is not None:
 
                ticket_ids.add(rt_id[0])
 
                if len(ticket_ids) >= limit:
 
                    break
 
        retval: List[object] = []
 
        retval: Set[object] = set()
 
        for ticket_id in ticket_ids:
 
            try:
 
                rt_ticket = self._rt_cache[ticket_id]
 
            except KeyError:
 
                rt_ticket = self.RT_CLIENT.get_ticket(ticket_id)
 
                self._rt_cache[ticket_id] = rt_ticket
 
            field_value = rt_field.load(rt_ticket)
 
            if field_value is None:
 
                pass
 
            elif isinstance(field_value, list):
 
                retval.extend(field_value)
 
                retval.update(field_value)
 
            else:
 
                retval.append(field_value)
 
                retval.add(field_value)
 
        return retval
 

	
 

	
 
class StrMeta(bc_query_env.AnyMeta):
 
    """Looks up metadata like AnyMeta, then always returns a string."""
 
    def __init__(self, operands: List[bc_query_compile.EvalNode]) -> None:
 
        super(bc_query_env.AnyMeta, self).__init__(operands, str)
 

	
 
    def __call__(self, context: PostingContext) -> str:
 
        raw_value = super().__call__(context)
 
        if raw_value is None:
 
            return ''
 
        else:
 
            return str(raw_value)
 

	
 

	
 
class AggregateSet(bc_query_compile.EvalAggregator):
 
    """Filter argument values that aren't unique."""
 
    __intypes__ = [object]
 

	
 
    def __init__(self, operands: List[bc_query_compile.EvalNode]) -> None:
 
       super().__init__(operands, set)
 

	
 
    def allocate(self, allocator: bc_query_execute.Allocator) -> None:
 
        """Allocate and save an index handle into result storage."""
 
        self.handle = allocator.allocate()
 

	
 
    def initialize(self, store: Store) -> None:
 
        """Prepare result storage for a new aggregation."""
 
        store[self.handle] = self.dtype()
 
        # self.dtype() is our return type, aka the second argument to __init__
 
        # above, aka the annotated return type of __call__.
 

	
 
    def update(self, store: Store, context: PostingContext) -> None:
 
        """Update existing storage with new result data."""
 
        value, = self.eval_args(context)
 
        if isinstance(value, Sequence) and not isinstance(value, (str, tuple)):
 
            store[self.handle].update(value)
 
        else:
 
            store[self.handle].add(value)
 

	
 
    def __call__(self, context: PostingContext) -> set:
 
        """Return the result for an aggregation."""
 
        return context.store[self.handle]  # type:ignore[no-any-return]
 

	
 

	
 
class _EnvironmentMixin:
 
    functions: EnvironmentFunctions
tests/test_reports_query.py
Show inline comments
...
 
@@ -41,189 +41,184 @@ class MockRewriteRuleset:
 
            number, currency = post.units
 
            number *= self.multiplier
 
            yield post._replace(units=testutil.Amount(number, currency))
 

	
 

	
 
class RowContext(bc_query_execute.RowContext):
 
    def __init__(self, entry, posting=None):
 
        super().__init__()
 
        self.entry = entry
 
        self.posting = posting
 

	
 

	
 
@pytest.fixture(scope='module')
 
def qparser():
 
    return bc_query_parser.Parser()
 

	
 
@pytest.fixture(scope='module')
 
def rt():
 
    return rtutil.RT(testutil.RTClient())
 

	
 
@pytest.fixture(scope='module')
 
def ticket_query():
 
    return qmod.RTTicket.with_client(testutil.RTClient(), 'testfixture')
 

	
 
def const_operands(*args):
 
    return [bc_query_compile.EvalConstant(v) for v in args]
 

	
 
def pipe_main(arglist, config, stdout_type=io.StringIO):
 
    stdout = stdout_type()
 
    stderr = io.StringIO()
 
    returncode = qmod.main(arglist, stdout, stderr, config)
 
    return returncode, stdout, stderr
 

	
 
def test_rt_ticket_unconfigured():
 
    with pytest.raises(RuntimeError):
 
        qmod.RTTicket(const_operands('id', 'rt-id'))
 

	
 
@pytest.mark.parametrize('field_name', ['foo', 'bar'])
 
def test_rt_ticket_bad_field(ticket_query, field_name):
 
    with pytest.raises(ValueError):
 
        ticket_query(const_operands(field_name, 'rt-id'))
 

	
 
@pytest.mark.parametrize('meta_name', ['foo', 'bar'])
 
def test_rt_ticket_bad_metadata(ticket_query, meta_name):
 
    with pytest.raises(ValueError):
 
        ticket_query(const_operands('id', meta_name))
 

	
 
@pytest.mark.parametrize('field_name,meta_name,expected', [
 
    ('id', 'rt-id', 1),
 
    ('Queue', 'approval', 'general'),
 
    ('Requestors', 'invoice', ['mx1@example.org', 'requestor2@example.org']),
 
    ('Due', 'tax-reporting', datetime.datetime(2017, 1, 14, 12, 1, 0, tzinfo=UTC)),
 
    ('id', 'rt-id', {1}),
 
    ('Queue', 'approval', {'general'}),
 
    ('Requestors', 'invoice', {'mx1@example.org', 'requestor2@example.org'}),
 
    ('Due', 'tax-reporting', {datetime.datetime(2017, 1, 14, 12, 1, 0, tzinfo=UTC)}),
 
])
 
def test_rt_ticket_from_txn(ticket_query, field_name, meta_name, expected):
 
    func = ticket_query(const_operands(field_name, meta_name))
 
    txn = testutil.Transaction(**{meta_name: 'rt:1'}, postings=[
 
        ('Assets:Cash', 80),
 
    ])
 
    context = RowContext(txn, txn.postings[0])
 
    if not isinstance(expected, list):
 
        expected = [expected]
 
    assert func(context) == expected
 

	
 
@pytest.mark.parametrize('field_name,meta_name,expected', [
 
    ('id', 'rt-id', 2),
 
    ('Queue', 'approval', 'general'),
 
    ('Requestors', 'invoice', ['mx2@example.org', 'requestor2@example.org']),
 
    ('Due', 'tax-reporting', datetime.datetime(2017, 1, 14, 12, 2, 0, tzinfo=UTC)),
 
    ('id', 'rt-id', {2}),
 
    ('Queue', 'approval', {'general'}),
 
    ('Requestors', 'invoice', {'mx2@example.org', 'requestor2@example.org'}),
 
    ('Due', 'tax-reporting', {datetime.datetime(2017, 1, 14, 12, 2, 0, tzinfo=UTC)}),
 
])
 
def test_rt_ticket_from_post(ticket_query, field_name, meta_name, expected):
 
    func = ticket_query(const_operands(field_name, meta_name))
 
    txn = testutil.Transaction(**{meta_name: 'rt:1'}, postings=[
 
        ('Assets:Cash', 110, {meta_name: 'rt:2/8'}),
 
    ])
 
    context = RowContext(txn, txn.postings[0])
 
    if not isinstance(expected, list):
 
        expected = [expected]
 
    assert func(context) == expected
 

	
 
@pytest.mark.parametrize('field_name,meta_name,expected,on_txn', [
 
    ('id', 'approval', [1, 2], True),
 
    ('Queue', 'check', ['general', 'general'], False),
 
    ('Requestors', 'invoice', [
 
    ('id', 'approval', {1, 2}, True),
 
    ('Queue', 'check', {'general'}, False),
 
    ('Requestors', 'invoice', {
 
        'mx1@example.org',
 
        'mx2@example.org',
 
        'requestor2@example.org',
 
        'requestor2@example.org',
 
    ], False),
 
    }, False),
 
])
 
def test_rt_ticket_multi_results(ticket_query, field_name, meta_name, expected, on_txn):
 
    func = ticket_query(const_operands(field_name, meta_name))
 
    txn = testutil.Transaction(**{'rt-id': 'rt:1'}, postings=[
 
        ('Assets:Cash', 110, {'rt-id': 'rt:2'}),
 
    ])
 
    post = txn.postings[0]
 
    meta = txn.meta if on_txn else post.meta
 
    meta[meta_name] = 'rt:1/2 Docs/12.pdf rt:2/8'
 
    context = RowContext(txn, post)
 
    assert sorted(func(context)) == expected
 
    assert func(context) == expected
 

	
 
@pytest.mark.parametrize('meta_value,on_txn', testutil.combine_values(
 
    ['', 'Docs/34.pdf', 'Docs/100.pdf Docs/120.pdf'],
 
    [True, False],
 
))
 
def test_rt_ticket_no_results(ticket_query, meta_value, on_txn):
 
    func = ticket_query(const_operands('Queue', 'check'))
 
    txn = testutil.Transaction(**{'rt-id': 'rt:1'}, postings=[
 
        ('Assets:Cash', 110, {'rt-id': 'rt:2'}),
 
    ])
 
    post = txn.postings[0]
 
    meta = txn.meta if on_txn else post.meta
 
    meta['check'] = meta_value
 
    context = RowContext(txn, post)
 
    assert func(context) == []
 
    assert func(context) == set()
 

	
 
def test_rt_ticket_caches_tickets():
 
    rt_client = testutil.RTClient()
 
    rt_client.TICKET_DATA = testutil.RTClient.TICKET_DATA.copy()
 
    ticket_query = qmod.RTTicket.with_client(rt_client, 'cachetestA')
 
    func = ticket_query(const_operands('id', 'rt-id'))
 
    txn = testutil.Transaction(postings=[
 
        ('Assets:Cash', 160, {'rt-id': 'rt:3'}),
 
    ])
 
    context = RowContext(txn, txn.postings[0])
 
    assert func(context) == [3]
 
    assert func(context) == {3}
 
    del rt_client.TICKET_DATA['3']
 
    assert func(context) == [3]
 
    assert func(context) == {3}
 

	
 
def test_rt_ticket_caches_tickets_not_found():
 
    rt_client = testutil.RTClient()
 
    rt_client.TICKET_DATA = testutil.RTClient.TICKET_DATA.copy()
 
    rt3 = rt_client.TICKET_DATA.pop('3')
 
    ticket_query = qmod.RTTicket.with_client(rt_client, 'cachetestB')
 
    func = ticket_query(const_operands('id', 'rt-id'))
 
    txn = testutil.Transaction(postings=[
 
        ('Assets:Cash', 160, {'rt-id': 'rt:3'}),
 
    ])
 
    context = RowContext(txn, txn.postings[0])
 
    assert func(context) == []
 
    assert func(context) == set()
 
    rt_client.TICKET_DATA['3'] = rt3
 
    assert func(context) == []
 
    assert func(context) == set()
 

	
 
def test_books_loader_empty():
 
    result = qmod.BooksLoader(None)()
 
    assert not result.entries
 
    assert len(result.errors) == 1
 

	
 
def test_books_loader_plain():
 
    books_path = testutil.test_path(f'books/books/2018.beancount')
 
    loader = testutil.TestBooksLoader(books_path)
 
    result = qmod.BooksLoader(loader)()
 
    assert not result.errors
 
    assert result.entries
 
    min_date = datetime.date(2018, 3, 1)
 
    assert all(ent.date >= min_date for ent in result.entries)
 

	
 
def test_books_loader_rewrites():
 
    rewrites = [MockRewriteRuleset()]
 
    books_path = testutil.test_path(f'books/books/2018.beancount')
 
    loader = testutil.TestBooksLoader(books_path)
 
    result = qmod.BooksLoader(loader, None, None, rewrites)()
 
    assert not result.errors
 
    assert result.entries
 
    numbers = frozenset(
 
        abs(post.units.number)
 
        for entry in result.entries
 
        for post in getattr(entry, 'postings', ())
 
    )
 
    assert numbers
 
    assert all(abs(number) >= 40 for number in numbers)
 

	
 
@pytest.mark.parametrize('arglist,fy', testutil.combine_values(
 
    [['--report-type', 'text'], ['--format=text'], ['-f', 'txt']],
 
    range(2018, 2021),
 
))
 
def test_text_query(arglist, fy):
 
    books_path = testutil.test_path(f'books/books/{fy}.beancount')
 
    config = testutil.TestConfig(books_path=books_path)
 
    arglist += ['select', 'date,', 'narration,', 'account,', 'position']
 
    returncode, stdout, stderr = pipe_main(arglist, config)
 
    assert returncode == 0
 
    stdout.seek(0)
 
    lines = iter(stdout)
 
    next(lines); next(lines)  # Skip header
 
    for count, line in enumerate(lines, 1):
 
        assert re.match(rf'^{fy}-\d\d-\d\d\s+{fy} ', line)
 
    assert count >= 2
 

	
 
@pytest.mark.parametrize('arglist,fy', testutil.combine_values(
0 comments (0 inline, 0 general)