NPO-Accounting/conservancy_beancount Changeset - ef03893bfed3

Changeset - ef03893bfed3

Parent rev.

Child rev.

[Not reviewed]

0 2 0

Brett Smith - 3 years ago 2021-03-12 15:56:43
brettcsmith@brettcsmith.org

query: Convert query functions that return List to Set.

Beancount's built-in renderers expect this and are better equipped for it.

2 files changed with 27 insertions and 34 deletions:

conservancy_beancount/reports/query.py

tests/test_reports_query.py

0 comments (0 inline, 0 general)

conservancy_beancount/reports/query.py

➞

Show inline comments

@@ ... / @@ -112,236 +112,234 @@ SENTINEL = object() @@
 logger = logging.getLogger('conservancy_beancount.reports.query')
 CellFunc = Callable[[Any], odf.table.TableCell]
 EnvironmentFunctions = Dict[
     # The real key type is something like:
     #   Union[str, Tuple[str, Type, ...]]
     # but two issues with that. One, you can't use Ellipses in a Tuple like
     # that, so there's no short way to declare this. Second, Beancount doesn't
     # declare it anyway, and mypy infers it as Sequence[object]. So just use
     # that.
     Sequence[object],
     Type[bc_query_compile.EvalFunction],
+]
 RowTypes = Sequence[Tuple[str, Type]]
 Rows = Sequence[NamedTuple]
 RTResult = Optional[Mapping[Any, Any]]
 Store = List[Any]
 QueryExpression = Union[
     bc_query_parser.Column,
     bc_query_parser.Constant,
     bc_query_parser.Function,
     bc_query_parser.UnaryOp,
+]
 QueryStatement = Union[
     bc_query_parser.Balances,
     bc_query_parser.Journal,
     bc_query_parser.Select,
+]
 # This class annotates the types that Beancount's RowContexts have when they're
 # passed to EvalFunction.__call__(). These types get set across
 # create_row_context and execute_query.
 class PostingContext:
     posting: Posting
     entry: Transaction
     balance: Inventory
     options_map: OptionsMap
     account_types: Mapping
     open_close_map: Mapping
     commodity_map: Mapping
     price_map: Mapping
     # Dynamically set by execute_query
     store: Store
 class MetaDocs(bc_query_env.AnyMeta):
     """Return a list of document links from metadata."""
     def __init__(self, operands: List[bc_query_compile.EvalNode]) -> None:
-        super(bc_query_env.AnyMeta, self).__init__(operands, list)
+        super(bc_query_env.AnyMeta, self).__init__(operands, set)
         # The second argument is our return type.
         # It should match the annotated return type of __call__.
-    def __call__(self, context: PostingContext) -> List[str]:
+    def __call__(self, context: PostingContext) -> Set[str]:
         raw_value = super().__call__(context)
         if isinstance(raw_value, str):
             return raw_value.split()
         else:
             return []
         seq = raw_value.split() if isinstance(raw_value, str) else ''
         return set(seq)
 class RTField(NamedTuple):
     key: str
     parse: Optional[Callable[[str], object]]
     unset_value: Optional[str] = None
     def load(self, rt_ticket: RTResult) -> object:
         value = rt_ticket.get(self.key) if rt_ticket else None
         if not value or value == self.unset_value:
             return None
         elif self.parse is None:
             return value
         else:
             return self.parse(value)
 class RTTicket(bc_query_compile.EvalFunction):
     """Look up a field from RT ticket(s) mentioned in metadata documentation"""
     __intypes__ = [str, str, int]
     FIELDS = {key: RTField(key, None) for key in [
         'AdminCc',
         'Cc',
         'Creator',
         'Owner',
         'Queue',
         'Status',
         'Subject',
         'Requestors',
     ]}
     FIELDS.update((key, RTField(key, int, '0')) for key in [
         'numerical_id',
         'FinalPriority',
         'InitialPriority',
         'Priority',
         'TimeEstimated',
         'TimeLeft',
         'TimeWorked',
     ])
     FIELDS.update((key, RTField(key, rtutil.RTDateTime, 'Not set')) for key in [
         'Created',
         'Due',
         'LastUpdated',
         'Resolved',
         'Started',
         'Starts',
         'Told',
     ])
     FIELDS.update({key.lower(): value for key, value in FIELDS.items()})
     FIELDS['id'] = FIELDS['numerical_id']
     FIELDS['AdminCC'] = FIELDS['AdminCc']
     FIELDS['CC'] = FIELDS['Cc']
     RT_CLIENT: ClassVar[rt.Rt]
     # _CACHES holds all of the caches for different RT instances that have
     # been passed through RTTicket.with_client().
     _CACHES: ClassVar[Dict[Hashable, MutableMapping[str, RTResult]]] = {}
     # _rt_cache is the cache specific to this RT_CLIENT.
     _rt_cache: ClassVar[MutableMapping[str, RTResult]] = {}
     @classmethod
     def with_client(cls, client: rt.Rt, cache_key: Hashable) -> Type['RTTicket']:
         return type(cls.__name__, (cls,), {
             'RT_CLIENT': client,
             '_rt_cache': cls._CACHES.setdefault(cache_key, {}),
         })
     def __init__(self, operands: List[bc_query_compile.EvalNode]) -> None:
         if not hasattr(self, 'RT_CLIENT'):
             raise RuntimeError("no RT client available - cannot use rt_ticket()")
         rt_op, meta_op, *rest = operands
         # We have to evaluate the RT and meta keys on each call, because they
         # might themselves be dynamic. In the common case they're constants.
         # In that case, check for typos so we can report an error to the user
         # before execution even begins.
         if isinstance(rt_op, bc_query_compile.EvalConstant):
             self._rt_key(rt_op.value)
         if isinstance(meta_op, bc_query_compile.EvalConstant):
             self._meta_key(meta_op.value)
         if not rest:
             operands.append(bc_query_compile.EvalConstant(sys.maxsize))
-        super().__init__(operands, list)
+        super().__init__(operands, set)
     def _rt_key(self, key: str) -> RTField:
         try:
             return self.FIELDS[key]
         except KeyError:
             raise ValueError(f"unknown RT ticket field {key!r}") from None
     def _meta_key(self, key: str) -> str:
         if key in data.LINK_METADATA:
             return key
         else:
             raise ValueError(f"metadata key {key!r} does not contain documentation links")
-    def __call__(self, context: PostingContext) -> list:
+    def __call__(self, context: PostingContext) -> Set[object]:
         rt_key: str
         meta_key: str
         limit: int
         rt_key, meta_key, limit = self.eval_args(context)
         rt_field = self._rt_key(rt_key)
         meta_key = self._meta_key(meta_key)
         if context.posting.meta is None:
             meta_value: Any = SENTINEL
         else:
             meta_value = context.posting.meta.get(meta_key, SENTINEL)
         if meta_value is SENTINEL:
             meta_value = context.entry.meta.get(meta_key)
         if not isinstance(meta_value, str) or limit < 1:
             meta_value = ''
         ticket_ids: Set[str] = set()
         for link_s in meta_value.split():
             rt_id = rtutil.RT.parse(link_s)
             if rt_id is not None:
                 ticket_ids.add(rt_id[0])
                 if len(ticket_ids) >= limit:
                     break
-        retval: List[object] = []
+        retval: Set[object] = set()
         for ticket_id in ticket_ids:
             try:
                 rt_ticket = self._rt_cache[ticket_id]
             except KeyError:
                 rt_ticket = self.RT_CLIENT.get_ticket(ticket_id)
                 self._rt_cache[ticket_id] = rt_ticket
             field_value = rt_field.load(rt_ticket)
             if field_value is None:
                 pass
             elif isinstance(field_value, list):
-                retval.extend(field_value)
+                retval.update(field_value)
             else:
-                retval.append(field_value)
+                retval.add(field_value)
         return retval
 class StrMeta(bc_query_env.AnyMeta):
     """Looks up metadata like AnyMeta, then always returns a string."""
     def __init__(self, operands: List[bc_query_compile.EvalNode]) -> None:
         super(bc_query_env.AnyMeta, self).__init__(operands, str)
     def __call__(self, context: PostingContext) -> str:
         raw_value = super().__call__(context)
         if raw_value is None:
             return ''
         else:
             return str(raw_value)
 class AggregateSet(bc_query_compile.EvalAggregator):
     """Filter argument values that aren't unique."""
     __intypes__ = [object]
     def __init__(self, operands: List[bc_query_compile.EvalNode]) -> None:
        super().__init__(operands, set)
     def allocate(self, allocator: bc_query_execute.Allocator) -> None:
         """Allocate and save an index handle into result storage."""
         self.handle = allocator.allocate()
     def initialize(self, store: Store) -> None:
         """Prepare result storage for a new aggregation."""
         store[self.handle] = self.dtype()
         # self.dtype() is our return type, aka the second argument to __init__
         # above, aka the annotated return type of __call__.
     def update(self, store: Store, context: PostingContext) -> None:
         """Update existing storage with new result data."""
         value, = self.eval_args(context)
         if isinstance(value, Sequence) and not isinstance(value, (str, tuple)):
             store[self.handle].update(value)
         else:
             store[self.handle].add(value)
     def __call__(self, context: PostingContext) -> set:
         """Return the result for an aggregation."""
         return context.store[self.handle]  # type:ignore[no-any-return]
 class _EnvironmentMixin:
     functions: EnvironmentFunctions

tests/test_reports_query.py

➞

Show inline comments

@@ ... / @@ -41,189 +41,184 @@ class MockRewriteRuleset: @@
             number, currency = post.units
             number *= self.multiplier
             yield post._replace(units=testutil.Amount(number, currency))
 class RowContext(bc_query_execute.RowContext):
     def __init__(self, entry, posting=None):
         super().__init__()
         self.entry = entry
         self.posting = posting
 @pytest.fixture(scope='module')
 def qparser():
     return bc_query_parser.Parser()
 @pytest.fixture(scope='module')
 def rt():
     return rtutil.RT(testutil.RTClient())
 @pytest.fixture(scope='module')
 def ticket_query():
     return qmod.RTTicket.with_client(testutil.RTClient(), 'testfixture')
 def const_operands(*args):
     return [bc_query_compile.EvalConstant(v) for v in args]
 def pipe_main(arglist, config, stdout_type=io.StringIO):
     stdout = stdout_type()
     stderr = io.StringIO()
     returncode = qmod.main(arglist, stdout, stderr, config)
     return returncode, stdout, stderr
 def test_rt_ticket_unconfigured():
     with pytest.raises(RuntimeError):
         qmod.RTTicket(const_operands('id', 'rt-id'))
 @pytest.mark.parametrize('field_name', ['foo', 'bar'])
 def test_rt_ticket_bad_field(ticket_query, field_name):
     with pytest.raises(ValueError):
         ticket_query(const_operands(field_name, 'rt-id'))
 @pytest.mark.parametrize('meta_name', ['foo', 'bar'])
 def test_rt_ticket_bad_metadata(ticket_query, meta_name):
     with pytest.raises(ValueError):
         ticket_query(const_operands('id', meta_name))
 @pytest.mark.parametrize('field_name,meta_name,expected', [
     ('id', 'rt-id', 1),
     ('Queue', 'approval', 'general'),
     ('Requestors', 'invoice', ['mx1@example.org', 'requestor2@example.org']),
     ('Due', 'tax-reporting', datetime.datetime(2017, 1, 14, 12, 1, 0, tzinfo=UTC)),
     ('id', 'rt-id', {1}),
     ('Queue', 'approval', {'general'}),
     ('Requestors', 'invoice', {'mx1@example.org', 'requestor2@example.org'}),
     ('Due', 'tax-reporting', {datetime.datetime(2017, 1, 14, 12, 1, 0, tzinfo=UTC)}),
 ])
 def test_rt_ticket_from_txn(ticket_query, field_name, meta_name, expected):
     func = ticket_query(const_operands(field_name, meta_name))
     txn = testutil.Transaction(**{meta_name: 'rt:1'}, postings=[
         ('Assets:Cash', 80),
     ])
     context = RowContext(txn, txn.postings[0])
     if not isinstance(expected, list):
         expected = [expected]
     assert func(context) == expected
 @pytest.mark.parametrize('field_name,meta_name,expected', [
     ('id', 'rt-id', 2),
     ('Queue', 'approval', 'general'),
     ('Requestors', 'invoice', ['mx2@example.org', 'requestor2@example.org']),
     ('Due', 'tax-reporting', datetime.datetime(2017, 1, 14, 12, 2, 0, tzinfo=UTC)),
     ('id', 'rt-id', {2}),
     ('Queue', 'approval', {'general'}),
     ('Requestors', 'invoice', {'mx2@example.org', 'requestor2@example.org'}),
     ('Due', 'tax-reporting', {datetime.datetime(2017, 1, 14, 12, 2, 0, tzinfo=UTC)}),
 ])
 def test_rt_ticket_from_post(ticket_query, field_name, meta_name, expected):
     func = ticket_query(const_operands(field_name, meta_name))
     txn = testutil.Transaction(**{meta_name: 'rt:1'}, postings=[
         ('Assets:Cash', 110, {meta_name: 'rt:2/8'}),
     ])
     context = RowContext(txn, txn.postings[0])
     if not isinstance(expected, list):
         expected = [expected]
     assert func(context) == expected
 @pytest.mark.parametrize('field_name,meta_name,expected,on_txn', [
     ('id', 'approval', [1, 2], True),
     ('Queue', 'check', ['general', 'general'], False),
     ('Requestors', 'invoice', [
     ('id', 'approval', {1, 2}, True),
     ('Queue', 'check', {'general'}, False),
     ('Requestors', 'invoice', {
         'mx1@example.org',
         'mx2@example.org',
         'requestor2@example.org',
         'requestor2@example.org',
     ], False),
     }, False),
 ])
 def test_rt_ticket_multi_results(ticket_query, field_name, meta_name, expected, on_txn):
     func = ticket_query(const_operands(field_name, meta_name))
     txn = testutil.Transaction(**{'rt-id': 'rt:1'}, postings=[
         ('Assets:Cash', 110, {'rt-id': 'rt:2'}),
     ])
     post = txn.postings[0]
     meta = txn.meta if on_txn else post.meta
     meta[meta_name] = 'rt:1/2 Docs/12.pdf rt:2/8'
     context = RowContext(txn, post)
-    assert sorted(func(context)) == expected
     assert func(context) == expected
 @pytest.mark.parametrize('meta_value,on_txn', testutil.combine_values(
     ['', 'Docs/34.pdf', 'Docs/100.pdf Docs/120.pdf'],
     [True, False],
 ))
 def test_rt_ticket_no_results(ticket_query, meta_value, on_txn):
     func = ticket_query(const_operands('Queue', 'check'))
     txn = testutil.Transaction(**{'rt-id': 'rt:1'}, postings=[
         ('Assets:Cash', 110, {'rt-id': 'rt:2'}),
     ])
     post = txn.postings[0]
     meta = txn.meta if on_txn else post.meta
     meta['check'] = meta_value
     context = RowContext(txn, post)
-    assert func(context) == []
+    assert func(context) == set()
 def test_rt_ticket_caches_tickets():
     rt_client = testutil.RTClient()
     rt_client.TICKET_DATA = testutil.RTClient.TICKET_DATA.copy()
     ticket_query = qmod.RTTicket.with_client(rt_client, 'cachetestA')
     func = ticket_query(const_operands('id', 'rt-id'))
     txn = testutil.Transaction(postings=[
         ('Assets:Cash', 160, {'rt-id': 'rt:3'}),
     ])
     context = RowContext(txn, txn.postings[0])
-    assert func(context) == [3]
+    assert func(context) == {3}
     del rt_client.TICKET_DATA['3']
-    assert func(context) == [3]
+    assert func(context) == {3}
 def test_rt_ticket_caches_tickets_not_found():
     rt_client = testutil.RTClient()
     rt_client.TICKET_DATA = testutil.RTClient.TICKET_DATA.copy()
     rt3 = rt_client.TICKET_DATA.pop('3')
     ticket_query = qmod.RTTicket.with_client(rt_client, 'cachetestB')
     func = ticket_query(const_operands('id', 'rt-id'))
     txn = testutil.Transaction(postings=[
         ('Assets:Cash', 160, {'rt-id': 'rt:3'}),
     ])
     context = RowContext(txn, txn.postings[0])
-    assert func(context) == []
+    assert func(context) == set()
     rt_client.TICKET_DATA['3'] = rt3
-    assert func(context) == []
+    assert func(context) == set()
 def test_books_loader_empty():
     result = qmod.BooksLoader(None)()
     assert not result.entries
     assert len(result.errors) == 1
 def test_books_loader_plain():
     books_path = testutil.test_path(f'books/books/2018.beancount')
     loader = testutil.TestBooksLoader(books_path)
     result = qmod.BooksLoader(loader)()
     assert not result.errors
     assert result.entries
     min_date = datetime.date(2018, 3, 1)
     assert all(ent.date >= min_date for ent in result.entries)
 def test_books_loader_rewrites():
     rewrites = [MockRewriteRuleset()]
     books_path = testutil.test_path(f'books/books/2018.beancount')
     loader = testutil.TestBooksLoader(books_path)
     result = qmod.BooksLoader(loader, None, None, rewrites)()
     assert not result.errors
     assert result.entries
     numbers = frozenset(
         abs(post.units.number)
         for entry in result.entries
         for post in getattr(entry, 'postings', ())
+    )
     assert numbers
     assert all(abs(number) >= 40 for number in numbers)
 @pytest.mark.parametrize('arglist,fy', testutil.combine_values(
     [['--report-type', 'text'], ['--format=text'], ['-f', 'txt']],
     range(2018, 2021),
 ))
 def test_text_query(arglist, fy):
     books_path = testutil.test_path(f'books/books/{fy}.beancount')
     config = testutil.TestConfig(books_path=books_path)
     arglist += ['select', 'date,', 'narration,', 'account,', 'position']
     returncode, stdout, stderr = pipe_main(arglist, config)
     assert returncode == 0
     stdout.seek(0)
     lines = iter(stdout)
     next(lines); next(lines)  # Skip header
     for count, line in enumerate(lines, 1):
         assert re.match(rf'^{fy}-\d\d-\d\d\s+{fy} ', line)
     assert count >= 2
 @pytest.mark.parametrize('arglist,fy', testutil.combine_values(

0 comments (0 inline, 0 general)