NPO-Accounting/conservancy_beancount Changeset - 2eba5a554600

Changeset - 2eba5a554600

Parent rev.

Child rev.

[Not reviewed]

0 2 0

Brett Smith - 4 years ago 2020-08-30 14:03:57
brettcsmith@brettcsmith.org

extract_odf_links: Add --relative-to option.

This is just too convenient for our usual case where we expect
all links to be relative to a specific directory (the repository).

2 files changed with 54 insertions and 15 deletions:

conservancy_beancount/tools/extract_odf_links.py

tests/test_extract_odf_links.py

0 comments (0 inline, 0 general)

conservancy_beancount/tools/extract_odf_links.py

➞

Show inline comments

@@ ... @@
         dest='delimiter',
         const='\0',
         help="""Shortcut for --delimiter=\\0
 """)
     parser.add_argument(
         '--relative-to', '-r',
         metavar='PATH',
         type=Path,
         help="""Try to resolve all links relative to this path, rather than each
 spreadsheet's path
 """)
     parser.add_argument(
         'odf_paths',
         metavar='ODF_PATH',
         type=Path,
         nargs=argparse.ONE_OR_MORE,
         help="""ODF file(s) to extract links from. Note that %(prog)s cannot
 read from stdin because it needs to know document paths to resolve links.
         help="""ODF file(s) to extract links from
 """)
     return parser.parse_args(arglist)
     args = parser.parse_args(arglist)
     if args.relative_to is None:
         if any(path == cliutil.STDSTREAM_PATH for path in args.odf_paths):
             parser.error("--relative-to is required to read from stdin")
     elif args.relative_to.is_dir() or not args.relative_to.exists():
         args.relative_to /= 'PathStub.ods'
     return args
 def extract_links(odf_path: Path) -> Iterator[Path]:
     with odf_path.open('rb') as odf_file:
         odf_doc = odf.opendocument.load(odf_file)
 def extract_links(odf_doc: odf.opendocument.OpenDocument, rel_path: Path) -> Iterator[Path]:
     for a_elem in odf_doc.getElementsByType(odf.text.A):
         parts = urllib.parse.urlparse(a_elem.getAttribute('href') or '')
         if (parts.scheme and parts.scheme != 'file') or not parts.path:
             continue
         path = Path(urllib.parse.unquote(parts.path))
         if not path.is_absolute():
-            path = (odf_path / path).resolve()
+            path = (rel_path / path).resolve()
             try:
-                path.relative_to(odf_path)
+                path.relative_to(rel_path)
             except ValueError:
                 pass
             else:
@@ ... / @@ -113,7 +123,9 @@ def main(arglist: Optional[Sequence[str]]=None, @@
     links: Set[Path] = set()
     for odf_path in args.odf_paths:
         try:
             links.update(extract_links(odf_path))
             with cliutil.bytes_output(odf_path, sys.stdin, 'r') as odf_file:
                 odf_doc = odf.opendocument.load(odf_file)
             links.update(extract_links(odf_doc, args.relative_to or odf_path))
         except IOError as error:
             logger.error("error reading %s: %s", odf_path, error.strerror)
             returncode = os.EX_DATAERR

tests/test_extract_odf_links.py

➞

Show inline comments

@@ ... / @@ -15,6 +15,7 @@ @@
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 import io
 import sys
 import pytest
@@ ... / @@ -38,6 +39,14 @@ def expected_links(rel_path): @@
         for path in INCLUDED_FILE_LINKS
+    )
 def check_output(stdout, sep, rel_path):
     actual = stdout.getvalue().split(sep)
     if actual and not actual[-1]:
         actual.pop()
     expected = expected_links(rel_path)
     assert len(actual) == len(expected)
     assert set(actual) == expected
 @pytest.mark.parametrize('arglist,sep', [
     (['-0'], '\0'),
     (['-d', '\\v'], '\v'),
@@ ... / @@ -50,15 +59,33 @@ def test_extract_file_links(arglist, sep, caplog): @@
     exitcode = extract_odf_links.main(arglist, stdout, stderr)
     assert exitcode == 0
     assert not stderr.getvalue()
     actual = stdout.getvalue().split(sep)
     if actual and not actual[-1]:
         actual.pop()
     expected = expected_links(SRC_PATH.parent)
     assert len(actual) == len(expected)
     assert set(actual) == expected
     check_output(stdout, sep, SRC_PATH.parent)
     assert caplog.records
     assert any(
         log.levelname == 'WARNING'
         and log.message.endswith('/Bad Link.txt not found')
         for log in caplog.records
+    )
 @pytest.mark.parametrize('rel_path', [
     Path('/run'),
     Path('/tmp'),
 ])
 def test_extract_relative_to(rel_path):
     arglist = ['--relative', str(rel_path), '-0', '-']
     stdout = io.StringIO()
     stderr = io.StringIO()
     orig_stdin = sys.stdin
     try:
         sys.stdin = SRC_PATH.open('rb')
         exitcode = extract_odf_links.main(arglist, stdout, stderr)
     finally:
         sys.stdin = orig_stdin
     assert exitcode == 0
     assert not stderr.getvalue()
     check_output(stdout, '\0', rel_path)
 def test_reading_stdin_requires_relative_to():
     with pytest.raises(SystemExit) as exc_check:
         extract_odf_links.main(['-'])
     assert exc_check.value.args[0] == 2

0 comments (0 inline, 0 general)