Changeset - b4dee2cf9ae9
[Not reviewed]
0 1 0
Brett Smith - 4 years ago 2020-08-10 14:49:03
brettcsmith@brettcsmith.org
extract_odf_links: Improve help text.
1 file changed with 2 insertions and 2 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/tools/extract_odf_links.py
Show inline comments
...
 
@@ -15,101 +15,101 @@ filesystem, and writes their full paths to stdout.
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU Affero General Public License for more details.
 
#
 
# You should have received a copy of the GNU Affero General Public License
 
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
 

	
 
import argparse
 
import logging
 
import os
 
import sys
 
import urllib.parse
 

	
 
from pathlib import Path
 
from zipfile import BadZipFile
 

	
 
import odf.opendocument  # type:ignore[import]
 
import odf.text  # type:ignore[import]
 

	
 
from typing import (
 
    Iterator,
 
    Optional,
 
    Sequence,
 
    Set,
 
    TextIO,
 
)
 

	
 
from .. import cliutil
 

	
 
PROGNAME = 'extract-odf-links'
 
logger = logging.getLogger('conservancy_beancount.tools.extract_odf_links')
 

	
 
def parse_delimiter(arg: str) -> str:
 
    try:
 
        retval = eval('"{}"'.format(arg.replace('"', r'\"')), {})
 
    except SyntaxError:
 
        retval = None
 
    if isinstance(retval, str):
 
        return retval
 
    else:
 
        raise ValueError(f"not a valid string: {arg!r}")
 

	
 
def parse_arguments(arglist: Optional[Sequence[str]]=None) -> argparse.Namespace:
 
    parser = argparse.ArgumentParser(prog=PROGNAME)
 
    cliutil.add_version_argument(parser)
 
    cliutil.add_loglevel_argument(parser)
 
    parser.add_argument(
 
        '--delimiter', '-d',
 
        metavar='STR',
 
        metavar='TEXT',
 
        type=parse_delimiter,
 
        default='\\n',
 
        help="""String to output between links. Accepts all backslash escapes
 
supported in Python like \\n, \\t, \\0, \\u, etc. Default '\\n'.
 
supported in Python like \\n, \\t, \\0, \\u, etc. Default `%(default)s`.
 
""")
 
    parser.add_argument(
 
        '--zero', '--null', '-z', '-0',
 
        action='store_const',
 
        dest='delimiter',
 
        const='\0',
 
        help="""Shortcut for --delimiter=\\0
 
""")
 
    parser.add_argument(
 
        'odf_paths',
 
        metavar='ODF_PATH',
 
        type=Path,
 
        nargs=argparse.ONE_OR_MORE,
 
        help="""ODF file(s) to extract links from. Note that %(prog)s cannot
 
read from stdin because it needs to know document paths to resolve links.
 
""")
 
    return parser.parse_args(arglist)
 

	
 
def extract_links(odf_path: Path) -> Iterator[Path]:
 
    odf_root = odf_path.parent.resolve()
 
    with odf_path.open('rb') as odf_file:
 
        odf_doc = odf.opendocument.load(odf_file)
 
    for a_elem in odf_doc.getElementsByType(odf.text.A):
 
        parts = urllib.parse.urlparse(a_elem.getAttribute('href') or '')
 
        if (parts.scheme and parts.scheme != 'file') or not parts.path:
 
            continue
 
        path = Path(urllib.parse.unquote(parts.path))
 
        if not path.is_absolute():
 
            path = (odf_path / path).resolve()
 
            try:
 
                path.relative_to(odf_root)
 
            except ValueError:
 
                logger.warning(f"link {path} is neither absolute nor relative to {odf_path}")
 
        yield path
 

	
 
def main(arglist: Optional[Sequence[str]]=None,
 
         stdout: TextIO=sys.stdout,
 
         stderr: TextIO=sys.stderr,
 
) -> int:
 
    args = parse_arguments(arglist)
 
    cliutil.set_loglevel(logger, args.loglevel)
 

	
 
    returncode = 0
 
    links: Set[Path] = set()
 
    for odf_path in args.odf_paths:
 
        try:
 
            links.update(extract_links(odf_path))
 
        except IOError as error:
0 comments (0 inline, 0 general)