Changeset - 3219bf89d246
[Not reviewed]
0 1 1
Brett Smith - 4 years ago 2020-09-09 03:37:00
brettcsmith@brettcsmith.org
split_ods_links: New tool.

See docstring—this is mostly a post-filter to improve Excel compatibility.
2 files changed with 140 insertions and 1 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/tools/split_ods_links.py
Show inline comments
 
new file 100644
 
#!/usr/bin/env python3
 
"""split_ods_links.py - Rewrite an ODS to have at most one link per cell
 

	
 
This is useful when you plan to send the spreadsheet to an Excel user, which
 
only supports one link per cell.
 
"""
 
# Copyright © 2020 Brett Smith
 
#
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU Affero General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU Affero General Public License for more details.
 
#
 
# You should have received a copy of the GNU Affero General Public License
 
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
 

	
 
import argparse
 
import concurrent.futures as futmod
 
import logging
 
import os
 
import sys
 

	
 
from pathlib import Path
 
from zipfile import BadZipFile
 

	
 
import odf.opendocument  # type:ignore[import]
 
import odf.table  # type:ignore[import]
 
import odf.text  # type:ignore[import]
 

	
 
from ..reports.core import BaseODS
 

	
 
from typing import (
 
    Iterator,
 
    Optional,
 
    Sequence,
 
    TextIO,
 
    Tuple,
 
)
 

	
 
from .. import cliutil
 

	
 
PROGNAME = 'split-ods-links'
 
logger = logging.getLogger('conservancy_beancount.tools.split_ods_links')
 

	
 
class ODS(BaseODS[Tuple[None], None]):
 
    def __init__(self, ods_path: Path) -> None:
 
        super().__init__()
 
        self.document = odf.opendocument.load(ods_path)
 
        self.dirty = False
 

	
 
    def section_key(self, row: Tuple[None]) -> None:
 
        raise NotImplementedError("split_ods_links.ODS.section_key")
 

	
 
    def split_row_cells(self, row: odf.table.TableRow, count: int) -> Iterator[odf.table.TableRow]:
 
        for row_index in range(count):
 
            new_row = self.copy_element(row)
 
            for cell_index, cell in enumerate(new_row.childNodes):
 
                try:
 
                    cell.childNodes = [cell.childNodes[row_index]]
 
                except IndexError:
 
                    new_row.childNodes[cell_index] = odf.table.TableCell()
 
            yield new_row
 

	
 
    def split_link_cells(self) -> None:
 
        for sheet in self.document.spreadsheet.getElementsByType(odf.table.Table):
 
            for row in sheet.getElementsByType(odf.table.TableRow):
 
                cells = row.getElementsByType(odf.table.TableCell)
 
                child_counts = [len(cell.childNodes) for cell in cells]
 
                link_counts = [len(cell.getElementsByType(odf.text.A)) for cell in cells]
 
                if any(count > 1 for count in link_counts):
 
                    for new_row in self.split_row_cells(row, max(child_counts)):
 
                        sheet.insertBefore(new_row, row)
 
                    sheet.removeChild(row)
 
                    self.dirty = True
 

	
 
    @classmethod
 
    def run_split(cls, path: Path, suffix: str) -> bool:
 
        ods = cls(path)
 
        ods.split_link_cells()
 
        if ods.dirty:
 
            out_path = path.with_name(path.name.replace('.', f'{suffix}.', 1))
 
            ods.save_path(out_path)
 
        return ods.dirty
 

	
 

	
 
def parse_arguments(arglist: Optional[Sequence[str]]=None) -> argparse.Namespace:
 
    parser = argparse.ArgumentParser(prog=PROGNAME)
 
    cliutil.add_version_argument(parser)
 
    cliutil.add_loglevel_argument(parser)
 
    cliutil.add_jobs_argument(parser)
 
    parser.add_argument(
 
        '--suffix', '-s',
 
        default='_split',
 
        help="""Suffix to add to filenames for modified spreadsheets.
 
Pass an empty string argument to overwrite the original spreadsheet.
 
Default %(default)r.
 
""")
 
    parser.add_argument(
 
        'ods_paths',
 
        metavar='ODS_PATH',
 
        type=Path,
 
        nargs=argparse.ONE_OR_MORE,
 
        help="""ODS file(s) to split links in
 
""")
 
    return parser.parse_args(arglist)
 

	
 
def main(arglist: Optional[Sequence[str]]=None,
 
         stdout: TextIO=sys.stdout,
 
         stderr: TextIO=sys.stderr,
 
) -> int:
 
    args = parse_arguments(arglist)
 
    cliutil.set_loglevel(logger, args.loglevel)
 
    args.ods_paths.sort(key=lambda path: path.stat().st_size, reverse=True)
 

	
 
    returncode = 0
 
    max_procs = max(1, min(args.jobs, len(args.ods_paths)))
 
    with futmod.ProcessPoolExecutor(max_procs) as pool:
 
        procs = {pool.submit(ODS.run_split, path, args.suffix) for path in args.ods_paths}
 
        for ods_path, proc in zip(args.ods_paths, procs):
 
            try:
 
                proc.result()
 
            except IOError as error:
 
                logger.error("error reading %s: %s", ods_path, error.strerror)
 
                returncode = os.EX_DATAERR
 
            except BadZipFile as error:
 
                logger.error("error parsing %s: %s", ods_path, error.args[0])
 
                returncode = os.EX_DATAERR
 
    return returncode
 

	
 
entry_point = cliutil.make_entry_point(__name__, PROGNAME)
 

	
 
if __name__ == '__main__':
 
    exit(entry_point())
setup.py
Show inline comments
...
 
@@ -5,7 +5,7 @@ from setuptools import setup
 
setup(
 
    name='conservancy_beancount',
 
    description="Plugin, library, and reports for reading Conservancy's books",
 
    version='1.9.7',
 
    version='1.10.0',
 
    author='Software Freedom Conservancy',
 
    author_email='info@sfconservancy.org',
 
    license='GNU AGPLv3+',
...
 
@@ -44,6 +44,7 @@ setup(
 
            'fund-report = conservancy_beancount.reports.fund:entry_point',
 
            'ledger-report = conservancy_beancount.reports.ledger:entry_point',
 
            'opening-balances = conservancy_beancount.tools.opening_balances:entry_point',
 
            'split-ods-links = conservancy_beancount.tools.split_ods_links:entry_point',
 
        ],
 
    },
 
)
0 comments (0 inline, 0 general)