From 2eba5a5546009e1f044c6e4748e632168297482e 2020-08-30 14:03:57 From: Brett Smith Date: 2020-08-30 14:03:57 Subject: [PATCH] extract_odf_links: Add --relative-to option. This is just too convenient for our usual case where we expect all links to be relative to a specific directory (the repository). --- diff --git a/conservancy_beancount/tools/extract_odf_links.py b/conservancy_beancount/tools/extract_odf_links.py index 57fe52b428c8e27bab2cbc3aff7f98ab66ec8653..0659e58e435ef04ba6c3e1d26f1939b423892a35 100644 --- a/conservancy_beancount/tools/extract_odf_links.py +++ b/conservancy_beancount/tools/extract_odf_links.py @@ -72,29 +72,39 @@ supported in Python like \\n, \\t, \\0, \\u, etc. Default `%(default)s`. dest='delimiter', const='\0', help="""Shortcut for --delimiter=\\0 +""") + parser.add_argument( + '--relative-to', '-r', + metavar='PATH', + type=Path, + help="""Try to resolve all links relative to this path, rather than each +spreadsheet's path """) parser.add_argument( 'odf_paths', metavar='ODF_PATH', type=Path, nargs=argparse.ONE_OR_MORE, - help="""ODF file(s) to extract links from. Note that %(prog)s cannot -read from stdin because it needs to know document paths to resolve links. + help="""ODF file(s) to extract links from """) - return parser.parse_args(arglist) + args = parser.parse_args(arglist) + if args.relative_to is None: + if any(path == cliutil.STDSTREAM_PATH for path in args.odf_paths): + parser.error("--relative-to is required to read from stdin") + elif args.relative_to.is_dir() or not args.relative_to.exists(): + args.relative_to /= 'PathStub.ods' + return args -def extract_links(odf_path: Path) -> Iterator[Path]: - with odf_path.open('rb') as odf_file: - odf_doc = odf.opendocument.load(odf_file) +def extract_links(odf_doc: odf.opendocument.OpenDocument, rel_path: Path) -> Iterator[Path]: for a_elem in odf_doc.getElementsByType(odf.text.A): parts = urllib.parse.urlparse(a_elem.getAttribute('href') or '') if (parts.scheme and parts.scheme != 'file') or not parts.path: continue path = Path(urllib.parse.unquote(parts.path)) if not path.is_absolute(): - path = (odf_path / path).resolve() + path = (rel_path / path).resolve() try: - path.relative_to(odf_path) + path.relative_to(rel_path) except ValueError: pass else: @@ -113,7 +123,9 @@ def main(arglist: Optional[Sequence[str]]=None, links: Set[Path] = set() for odf_path in args.odf_paths: try: - links.update(extract_links(odf_path)) + with cliutil.bytes_output(odf_path, sys.stdin, 'r') as odf_file: + odf_doc = odf.opendocument.load(odf_file) + links.update(extract_links(odf_doc, args.relative_to or odf_path)) except IOError as error: logger.error("error reading %s: %s", odf_path, error.strerror) returncode = os.EX_DATAERR diff --git a/tests/test_extract_odf_links.py b/tests/test_extract_odf_links.py index 46619ae30af84e332f8de5ab8d00164fbbf16989..1e4f2b8801f92b80a4622f963f6750c8ed6d7f28 100644 --- a/tests/test_extract_odf_links.py +++ b/tests/test_extract_odf_links.py @@ -15,6 +15,7 @@ # along with this program. If not, see . import io +import sys import pytest @@ -38,6 +39,14 @@ def expected_links(rel_path): for path in INCLUDED_FILE_LINKS ) +def check_output(stdout, sep, rel_path): + actual = stdout.getvalue().split(sep) + if actual and not actual[-1]: + actual.pop() + expected = expected_links(rel_path) + assert len(actual) == len(expected) + assert set(actual) == expected + @pytest.mark.parametrize('arglist,sep', [ (['-0'], '\0'), (['-d', '\\v'], '\v'), @@ -50,15 +59,33 @@ def test_extract_file_links(arglist, sep, caplog): exitcode = extract_odf_links.main(arglist, stdout, stderr) assert exitcode == 0 assert not stderr.getvalue() - actual = stdout.getvalue().split(sep) - if actual and not actual[-1]: - actual.pop() - expected = expected_links(SRC_PATH.parent) - assert len(actual) == len(expected) - assert set(actual) == expected + check_output(stdout, sep, SRC_PATH.parent) assert caplog.records assert any( log.levelname == 'WARNING' and log.message.endswith('/Bad Link.txt not found') for log in caplog.records ) + +@pytest.mark.parametrize('rel_path', [ + Path('/run'), + Path('/tmp'), +]) +def test_extract_relative_to(rel_path): + arglist = ['--relative', str(rel_path), '-0', '-'] + stdout = io.StringIO() + stderr = io.StringIO() + orig_stdin = sys.stdin + try: + sys.stdin = SRC_PATH.open('rb') + exitcode = extract_odf_links.main(arglist, stdout, stderr) + finally: + sys.stdin = orig_stdin + assert exitcode == 0 + assert not stderr.getvalue() + check_output(stdout, '\0', rel_path) + +def test_reading_stdin_requires_relative_to(): + with pytest.raises(SystemExit) as exc_check: + extract_odf_links.main(['-']) + assert exc_check.value.args[0] == 2