From d8507a1a35a7dc991696f76b28fc72c9268a7022 2020-04-23 14:27:47
From: Brett Smith <brettcsmith@brettcsmith.org>
Date: 2020-04-23 14:27:47
Subject: [PATCH] rtutil: Add RTUtil.metadata_regexp() classmethod.

The accruals check script wants to be able to search RT links in
all kinds of metadata, not just rt-id as the filter currently
handles.

---

diff --git a/conservancy_beancount/filters.py b/conservancy_beancount/filters.py
index 8cd92c6ba0c7168aa3786dbe46893d6e16cd6361..5bfba62c9b738fdcb766c532dbf334e73363d117 100644
--- a/conservancy_beancount/filters.py
+++ b/conservancy_beancount/filters.py
@@ -17,6 +17,7 @@
 import re
 
 from . import data
+from . import rtutil
 
 from typing import (
     Iterable,
@@ -53,5 +54,5 @@ def filter_for_rt_id(postings: Postings, ticket_id: Union[int, str]) -> Postings
     This functions yields postings where the *first* rt-id matches the given
     ticket number.
     """
-    regexp = r'^\s*rt:(?://ticket/)?{}\b'.format(re.escape(str(ticket_id)))
+    regexp = rtutil.RT.metadata_regexp(ticket_id, first_link_only=True)
     return filter_meta_match(postings, 'rt-id', regexp)
diff --git a/conservancy_beancount/rtutil.py b/conservancy_beancount/rtutil.py
index a0173baf79e45cbc64c2e6d41ca0bf7b67f32583..f8c79746fd7f2badf31b82ebb2515767639b195d 100644
--- a/conservancy_beancount/rtutil.py
+++ b/conservancy_beancount/rtutil.py
@@ -266,6 +266,35 @@ class RT:
     def exists(self, ticket_id: RTId, attachment_id: Optional[RTId]=None) -> bool:
         return self.url(ticket_id, attachment_id) is not None
 
+    @classmethod
+    def metadata_regexp(self,
+                        ticket_id: RTId,
+                        attachment_id: Optional[RTId]=None,
+                        *,
+                        first_link_only: bool=False
+    ) -> str:
+        """Return a pattern to find RT links in metadata
+
+        Given a ticket ID and optional attachment ID, this method returns a
+        regular expression pattern that will find matching RT links in a
+        metadata value string, written in any format.
+
+        If the keyword-only argument first_link_only is true, the pattern will
+        only match the first link in a metadata string. Otherwise the pattern
+        matches any link in the string (the default).
+        """
+        if first_link_only:
+            prolog = r'^\s*'
+        else:
+            prolog = r'(?:^|\s)'
+        if attachment_id is None:
+            attachment = ''
+        else:
+            attachment = r'/(?:attachments?/)?{}'.format(attachment_id)
+        ticket = r'rt:(?://ticket/)?{}'.format(ticket_id)
+        epilog = r'/?(?:$|\s)'
+        return f'{prolog}{ticket}{attachment}{epilog}'
+
     @classmethod
     def parse(cls, s: str) -> Optional[Tuple[str, Optional[str]]]:
         for regexp in cls.PARSE_REGEXPS:
diff --git a/tests/test_filters.py b/tests/test_filters.py
index 1c2cbc26155b6b13e7386efe430f92d84b038254..24b02c4edd7d461f94adb95bbc4f4736f1c68240 100644
--- a/tests/test_filters.py
+++ b/tests/test_filters.py
@@ -111,11 +111,9 @@ def test_filter_for_rt_id(cc_txn_pair, ticket_id, expected_indexes):
 
 @pytest.mark.parametrize('rt_id', [
     'rt:450/',
-    'rt:450/678',
     ' rt:450 rt:540',
     'rt://ticket/450',
     'rt://ticket/450/',
-    'rt://ticket/450/678',
     ' rt://ticket/450',
     'rt://ticket/450 rt://ticket/540',
 ])
diff --git a/tests/test_rtutil.py b/tests/test_rtutil.py
index c57bf40e17fd93d920f425b756d38617cbd0b306..1fed60fe632bc9feac68b8a172c368327350c755 100644
--- a/tests/test_rtutil.py
+++ b/tests/test_rtutil.py
@@ -15,6 +15,8 @@
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
 import contextlib
+import itertools
+import re
 
 import pytest
 
@@ -62,6 +64,28 @@ def test_url(rt, ticket_id, attachment_id, expected):
         expected = DEFAULT_RT_URL + expected
     assert rt.url(ticket_id, attachment_id) == expected
 
+@pytest.mark.parametrize('attachment_id,first_link_only', itertools.product(
+    [245, None],
+    [True, False],
+))
+def test_metadata_regexp(rt, attachment_id, first_link_only):
+    if attachment_id is None:
+        match_links = ['rt:220', 'rt://ticket/220']
+    else:
+        match_links = [f'rt:220/{attachment_id}',
+                       f'rt://ticket/220/attachments/{attachment_id}']
+    regexp = rt.metadata_regexp(220, attachment_id, first_link_only=first_link_only)
+    for link in match_links:
+        assert re.search(regexp, link)
+        assert re.search(regexp, link + ' link2')
+        assert re.search(regexp, link + '0') is None
+        assert re.search(regexp, 'a' + link) is None
+        end_match = re.search(regexp, 'link0 ' + link)
+        if first_link_only:
+            assert end_match is None
+        else:
+            assert end_match
+
 @pytest.mark.parametrize('attachment_id', [
     13,
     None,