From 3219bf89d2464013bde44c2dcddb7fa6e53ade2f 2020-09-09 03:37:00 From: Brett Smith Date: 2020-09-09 03:37:00 Subject: [PATCH] split_ods_links: New tool. See docstring—this is mostly a post-filter to improve Excel compatibility. --- diff --git a/conservancy_beancount/tools/split_ods_links.py b/conservancy_beancount/tools/split_ods_links.py new file mode 100644 index 0000000000000000000000000000000000000000..1a2f8fee4c9475983ec02953a2adb93bfe219cac --- /dev/null +++ b/conservancy_beancount/tools/split_ods_links.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +"""split_ods_links.py - Rewrite an ODS to have at most one link per cell + +This is useful when you plan to send the spreadsheet to an Excel user, which +only supports one link per cell. +""" +# Copyright © 2020 Brett Smith +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import argparse +import concurrent.futures as futmod +import logging +import os +import sys + +from pathlib import Path +from zipfile import BadZipFile + +import odf.opendocument # type:ignore[import] +import odf.table # type:ignore[import] +import odf.text # type:ignore[import] + +from ..reports.core import BaseODS + +from typing import ( + Iterator, + Optional, + Sequence, + TextIO, + Tuple, +) + +from .. import cliutil + +PROGNAME = 'split-ods-links' +logger = logging.getLogger('conservancy_beancount.tools.split_ods_links') + +class ODS(BaseODS[Tuple[None], None]): + def __init__(self, ods_path: Path) -> None: + super().__init__() + self.document = odf.opendocument.load(ods_path) + self.dirty = False + + def section_key(self, row: Tuple[None]) -> None: + raise NotImplementedError("split_ods_links.ODS.section_key") + + def split_row_cells(self, row: odf.table.TableRow, count: int) -> Iterator[odf.table.TableRow]: + for row_index in range(count): + new_row = self.copy_element(row) + for cell_index, cell in enumerate(new_row.childNodes): + try: + cell.childNodes = [cell.childNodes[row_index]] + except IndexError: + new_row.childNodes[cell_index] = odf.table.TableCell() + yield new_row + + def split_link_cells(self) -> None: + for sheet in self.document.spreadsheet.getElementsByType(odf.table.Table): + for row in sheet.getElementsByType(odf.table.TableRow): + cells = row.getElementsByType(odf.table.TableCell) + child_counts = [len(cell.childNodes) for cell in cells] + link_counts = [len(cell.getElementsByType(odf.text.A)) for cell in cells] + if any(count > 1 for count in link_counts): + for new_row in self.split_row_cells(row, max(child_counts)): + sheet.insertBefore(new_row, row) + sheet.removeChild(row) + self.dirty = True + + @classmethod + def run_split(cls, path: Path, suffix: str) -> bool: + ods = cls(path) + ods.split_link_cells() + if ods.dirty: + out_path = path.with_name(path.name.replace('.', f'{suffix}.', 1)) + ods.save_path(out_path) + return ods.dirty + + +def parse_arguments(arglist: Optional[Sequence[str]]=None) -> argparse.Namespace: + parser = argparse.ArgumentParser(prog=PROGNAME) + cliutil.add_version_argument(parser) + cliutil.add_loglevel_argument(parser) + cliutil.add_jobs_argument(parser) + parser.add_argument( + '--suffix', '-s', + default='_split', + help="""Suffix to add to filenames for modified spreadsheets. +Pass an empty string argument to overwrite the original spreadsheet. +Default %(default)r. +""") + parser.add_argument( + 'ods_paths', + metavar='ODS_PATH', + type=Path, + nargs=argparse.ONE_OR_MORE, + help="""ODS file(s) to split links in +""") + return parser.parse_args(arglist) + +def main(arglist: Optional[Sequence[str]]=None, + stdout: TextIO=sys.stdout, + stderr: TextIO=sys.stderr, +) -> int: + args = parse_arguments(arglist) + cliutil.set_loglevel(logger, args.loglevel) + args.ods_paths.sort(key=lambda path: path.stat().st_size, reverse=True) + + returncode = 0 + max_procs = max(1, min(args.jobs, len(args.ods_paths))) + with futmod.ProcessPoolExecutor(max_procs) as pool: + procs = {pool.submit(ODS.run_split, path, args.suffix) for path in args.ods_paths} + for ods_path, proc in zip(args.ods_paths, procs): + try: + proc.result() + except IOError as error: + logger.error("error reading %s: %s", ods_path, error.strerror) + returncode = os.EX_DATAERR + except BadZipFile as error: + logger.error("error parsing %s: %s", ods_path, error.args[0]) + returncode = os.EX_DATAERR + return returncode + +entry_point = cliutil.make_entry_point(__name__, PROGNAME) + +if __name__ == '__main__': + exit(entry_point()) diff --git a/setup.py b/setup.py index 97fb540a3a60f3ed1a953dd9e9a0ac74b2901b3d..e16c8a1af62e4e3e6465ee6a199cfcdced911880 100755 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ from setuptools import setup setup( name='conservancy_beancount', description="Plugin, library, and reports for reading Conservancy's books", - version='1.9.7', + version='1.10.0', author='Software Freedom Conservancy', author_email='info@sfconservancy.org', license='GNU AGPLv3+', @@ -44,6 +44,7 @@ setup( 'fund-report = conservancy_beancount.reports.fund:entry_point', 'ledger-report = conservancy_beancount.reports.ledger:entry_point', 'opening-balances = conservancy_beancount.tools.opening_balances:entry_point', + 'split-ods-links = conservancy_beancount.tools.split_ods_links:entry_point', ], }, )