diff --git a/conservancy_beancount/pdfforms/fill.py b/conservancy_beancount/pdfforms/fill.py new file mode 100644 index 0000000000000000000000000000000000000000..0dbb4955dd2b4aabdde7376d272ce43c3acd3e78 --- /dev/null +++ b/conservancy_beancount/pdfforms/fill.py @@ -0,0 +1,445 @@ +"""fill.py - PDF writer class""" +# Copyright © 2021 Brett Smith +# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0 +# +# Full copyright and licensing details can be found at toplevel file +# LICENSE.txt in the repository. + +import argparse +import contextlib +import inspect +import itertools +import logging +import os +import re +import subprocess +import sys + +from codecs import BOM_UTF16_BE +from pathlib import Path + +import yaml + +from pdfminer import psparser # type:ignore[import] +from pdfminer.pdfdocument import PDFDocument # type:ignore[import] +from pdfminer.pdfparser import PDFParser # type:ignore[import] +from pdfminer.pdftypes import resolve1 # type:ignore[import] + +from . import fields as fieldmod +from . import utils as pdfutils +from .. import cliutil + +from typing import ( + Any, + BinaryIO, + Dict, + Iterator, + List, + Mapping, + Match, + NamedTuple, + Optional, + Sequence, + TextIO, + Tuple, + Type, + Union, + cast, +) + +EmitBytes = Iterator[bytes] +FieldSource = Mapping[str, Any] + +PROGNAME = 'pdfform-fill' +logger = logging.getLogger('conservancy_beancount.pdfforms.extract') + +SUPPORTED_VALUE_TYPES: Mapping[Type[fieldmod.FormField], Tuple[Type, ...]] = { + ft: inspect.signature(ft.set_value).parameters['value'].annotation.__args__ + for ft in vars(fieldmod).values() + if isinstance(ft, type) + and issubclass(ft, fieldmod.FormField) + and ft is not fieldmod.FormField +} + +class PDFWriter: + """Convert an arbitrary Python object out to PDF""" + HEADER = b'''%FDF-1.2 +%\xe2\xe3\xcf\xd3 +1 0 obj +''' + FOOTER = b''' +endobj +trailer +<> +%%EOF +''' + # From the PDF spec section 7.3.5 "Name Objects" + LITERAL_ESC_RE = re.compile(b'[^\x21\x22\x24-\x7e]+') + STRING_ESC = {ord(c): f'\\{c}' for c in '()\\'} + + @staticmethod + def escape_literal(match: Match[bytes]) -> bytes: + return b''.join( + hex(c).replace('0x', '#', 1).encode('ascii') + for c in match.group(0) + ) + + def emit_array(self, obj: Sequence[Any]) -> EmitBytes: + yield b'[\n' + for item in obj: + yield from self.emit(item) + yield b'\n' + yield b']' + + def emit_boolean(self, obj: bool) -> EmitBytes: + yield b'true' if obj else b'false' + + def emit_dictionary(self, obj: Mapping[str, Any]) -> EmitBytes: + yield b'<<\n' + for key, value in obj.items(): + yield from self.emit_literal(key) + yield b' ' + yield from self.emit(value) + yield b'\n' + yield b'>>' + + def emit_literal(self, obj: Union[str, psparser.PSLiteral]) -> EmitBytes: + if isinstance(obj, psparser.PSLiteral): + obj = cast(str, obj.name) + yield b'/' + yield self.LITERAL_ESC_RE.sub(self.escape_literal, obj.encode('ascii')) + + def emit_null(self, obj: None=None) -> EmitBytes: + yield b'null' + + def emit_number(self, obj: Union[int, float]) -> EmitBytes: + yield str(obj).encode('ascii') + + def emit_string(self, obj: str) -> EmitBytes: + yield b'(' + yield pdfutils.encode_text(obj.translate(self.STRING_ESC)) + yield b')' + + def emit(self, obj: Any) -> EmitBytes: + if obj is None: + yield from self.emit_null(obj) + elif isinstance(obj, bool): + yield from self.emit_boolean(obj) + elif isinstance(obj, psparser.PSLiteral): + yield from self.emit_literal(obj) + elif isinstance(obj, (int, float)): + yield from self.emit_number(obj) + elif isinstance(obj, str): + yield from self.emit_string(obj) + elif isinstance(obj, bytes): + raise ValueError("can't emit raw bytes") + elif isinstance(obj, Mapping): + yield from self.emit_dictionary(obj) + elif isinstance(obj, Sequence): + yield from self.emit_array(obj) + else: + raise ValueError(f"don't know how to emit {type(obj).__name__}") + + def write_document(self, obj: Any, out_file: BinaryIO) -> None: + out_file.write(self.HEADER) + for out_bytes in self.emit(obj): + out_file.write(out_bytes) + out_file.write(self.FOOTER) + + +class FillProblem(NamedTuple): + level: int + yaml_index: int + name: Optional[str] + errdesc: str + + def log(self, logger: logging.Logger=logger) -> None: + logger.log( + self.level, + "YAML form field #%d%s%s", + self.yaml_index + 1, + ' ' if self.name is None else f' ({self.name}) ', + self.errdesc, + ) + + +def _ensure_field( + field_map: Dict[str, fieldmod.FormField], + key: str, + field: Optional[FieldSource]=None, + yaml_index: int=-2, +) -> Tuple[fieldmod.FormField, Optional[FillProblem]]: + try: + return (field_map[key], None) + except KeyError: + if field is None: + field = {} + problem: Optional[FillProblem] = None + parent_key, _, kid_name = key.rpartition('.') + kid_source: fieldmod.FieldSource = {'T': pdfutils.encode_text(kid_name)} + try: + field_type = field['type'] + except KeyError: + pass + else: + try: + field_type = fieldmod.FieldType[field_type.title()].value + except KeyError: + problem = FillProblem( + logging.ERROR, yaml_index, key, + f"has unknown FDF type {field_type!r}", + ) + kid_source['FT'] = psparser.PSLiteralTable.intern(field_type) + try: + options = iter(field['options']) + except KeyError: + pass + else: + kid_source['AP'] = {'N': {opt: None for opt in options}} + kid = fieldmod.FormField.by_type(kid_source) + if parent_key: + parent, _ = _ensure_field(field_map, parent_key) + parent.add_kid(kid) + field_map[key] = kid + return (kid, problem) + +def _set_field_value( + field: fieldmod.FormField, + value: Any, + yaml_index: int=-2, + yaml_key: Optional[str]=None, +) -> Iterator[FillProblem]: + set_ok = True + if value is not None: + field_type = type(field) + try: + set_ok = isinstance(value, SUPPORTED_VALUE_TYPES[field_type]) + except KeyError: + yield FillProblem(logging.ERROR, yaml_index, yaml_key, + "assigns a value to an unsupported field type") + else: + # bools shouldn't be allowed in as ints for this purpose. + if set_ok and isinstance(value, bool): + set_ok = any(issubclass(t, bool) + for t in SUPPORTED_VALUE_TYPES[field_type]) + if not set_ok: + set_type = type(value).__name__ + yield FillProblem(logging.ERROR, yaml_index, yaml_key, + f"assigns a {set_type} value to a {field_type.__name__}") + if field.is_readonly(): + yield FillProblem(logging.WARNING, yaml_index, yaml_key, + "assigns a value to a readonly field") + if set_ok: + field.set_value(value) + +def generate_form( + form_source: Sequence[FieldSource], +) -> Tuple[Sequence[FieldSource], Sequence[FillProblem]]: + problems: List[FillProblem] = [] + field_map: Dict[str, fieldmod.FormField] = {} + for index, fill in enumerate(form_source): + try: + field_key = fill['fdf']['name'] + except KeyError: + problems.append(FillProblem(logging.ERROR, index, None, "has no FDF name")) + continue + field, problem = _ensure_field(field_map, field_key, fill['fdf'], index) + if problem is not None: + problems.append(problem) + try: + set_value = fill['value'] + except KeyError: + pass + else: + problems.extend(_set_field_value(field, set_value, index, field_key)) + fields = [ + field.as_filled_fdf() + for key, field in field_map.items() + if '.' not in key + ] + return (fields, problems) + +def merge_form( + form_fills: Sequence[FieldSource], + form_source: Sequence[fieldmod.FieldSource], +) -> Tuple[Sequence[FieldSource], Sequence[FillProblem]]: + problems: List[FillProblem] = [] + field_list = [fieldmod.FormField.by_type(resolve1(field)) for field in form_source] + field_map = dict( + kvpair + for field in field_list + for kvpair in field.as_mapping() + ) + for index, fill in enumerate(form_fills): + try: + field_key = fill['fdf']['name'] + except KeyError: + problems.append(FillProblem(logging.ERROR, index, None, "has no FDF name")) + continue + try: + field = field_map[field_key] + except KeyError: + problems.append(FillProblem( + logging.ERROR, index, field_key, + "refers to a field that does not exist in the source form", + )) + continue + try: + expect_type = fieldmod.FieldType[fill['fdf']['type'].title()] + except KeyError: + pass + else: + try: + actual_type = field.field_type() + except ValueError: + type_name: Optional[str] = None + else: + type_name = actual_type.value + if expect_type.value != type_name: + problems.append(FillProblem( + logging.WARNING, index, field_key, + f"has type {expect_type.name} but source has type {type_name}", + )) + try: + set_value = fill['value'] + except KeyError: + pass + else: + problems.extend(_set_field_value(field, set_value, index, field_key)) + return ([field.as_filled_fdf() for field in field_list], problems) + +def parse_arguments(arglist: Optional[Sequence[str]]=None) -> argparse.Namespace: + parser = argparse.ArgumentParser(prog=PROGNAME) + cliutil.add_version_argument(parser) + cliutil.add_loglevel_argument(parser) + parser.add_argument( + '--force', + action='count', + default=0, + help="""Continue with filling the PDF even if there are problems in the +input YAML. Pass this option twice to continue even with major problems. +""") + parser.add_argument( + '--pdftk', + type=Path, + default=Path('pdftk'), + help="""Path of the `pdftk` executable. +Default searched from your $PATH. +""") + parser.add_argument( + '--form-key', '-f', + metavar='KEY', + help="""Key in the document catalog with form data. +Default is guessed by examining the document. +""") + parser.add_argument( + '--output-file', '-O', + metavar='PATH', + type=Path, + help="""Write output to this file, or stdout when PATH is `-`. +Default is generated from the input filename. +""") + parser.add_argument( + 'yaml_file', + type=Path, + help="""YAML file with values generated from pdfform-extract +""") + parser.add_argument( + 'pdf_file', + nargs='?', + type=Path, + help="""PDF file with forms to fill. If omitted, pdfform-fill generates +FDF output that you can give to `pdftk fill_form` later. +""") + return parser.parse_args(arglist) + +def change_suffix(path: Path, suffix: str, backup: str='_filled') -> Path: + if path.suffix == suffix: + return path.with_name(f'{path.stem}{backup}{suffix}') + else: + return path.with_suffix(suffix) + +def main(arglist: Optional[Sequence[str]]=None, + stdout: TextIO=sys.stdout, + stderr: TextIO=sys.stderr, +) -> int: + args = parse_arguments(arglist) + cliutil.set_loglevel(logger, args.loglevel) + + with args.yaml_file.open() as yaml_file: + try: + yaml_source = yaml.safe_load(yaml_file) + except yaml.error.YAMLError as error: + logger.critical("error parsing %s: %s", args.yaml_file, error) + return os.EX_NOINPUT + if not isinstance(yaml_source.get('fields'), list): + logger.critical("YAML file does not include a list of fields") + return os.EX_NOINPUT + + if args.pdf_file is None: + fill_mode = False + if args.form_key is None: + args.form_key = yaml_source.get('form key', 'FDF') + fields, problems = generate_form(yaml_source['fields']) + else: + with args.pdf_file.open('rb') as pdf_file: + parser = PDFParser(pdf_file) + pdf_doc = PDFDocument(parser) + if args.form_key is None: + try: + args.form_key = pdfutils.guess_form_key(pdf_doc) + except ValueError as error: + logger.error("%s", error.args[0]) + logger.info("you can specify a form key using --form-key") + return os.EX_NOINPUT + fields, problems = merge_form( + yaml_source['fields'], + resolve1(pdf_doc.catalog[args.form_key])['Fields'], + ) + fill_mode = cliutil.can_run(['pdftk', '--version']) + if not fill_mode: + logger.warning("cannot run pdftk to fill the PDF form; writing FDF instead") + + worst_problem = -1 + for problem in problems: + problem.log() + worst_problem = max(worst_problem, problem.level) + if args.force > 1: + problems_fatal = False + elif args.force == 1: + problems_fatal = worst_problem > logging.WARNING + else: + problems_fatal = worst_problem >= 0 + if problems_fatal: + return os.EX_DATAERR + + if args.output_file is None: + args.output_file = change_suffix( + args.pdf_file or args.yaml_file, + '.pdf' if fill_mode else '.fdf', + ) + logger.info("writing output to %s", args.output_file) + out_writer = PDFWriter() + # pdftk always expects form fill data to be under the `FDF` key, + # regardless of what the original PDF uses. + out_doc = {'FDF': {'Fields': fields}} + with contextlib.ExitStack() as exit_stack: + if fill_mode: + pdftk = exit_stack.enter_context(subprocess.Popen([ + args.pdftk, str(args.pdf_file), + 'fill_form', '-', + 'output', str(args.output_file), + ], stdin=subprocess.PIPE)) + out_file = exit_stack.enter_context(cast(BinaryIO, pdftk.stdin)) + else: + out_file = cliutil.bytes_output(args.output_file, stdout) + out_writer.write_document(out_doc, out_file) + try: + return pdftk.returncode + except NameError: + return os.EX_OK + +entry_point = cliutil.make_entry_point(__name__, PROGNAME) + +if __name__ == '__main__': + exit(entry_point()) diff --git a/setup.py b/setup.py index 9bd200806484a00efb2abd5de176a1bd6622fd40..651db3e4fce4f7465fd42269f4671445ee923188 100755 --- a/setup.py +++ b/setup.py @@ -49,6 +49,7 @@ setup( 'ledger-report = conservancy_beancount.reports.ledger:entry_point', 'opening-balances = conservancy_beancount.tools.opening_balances:entry_point', 'pdfform-extract = conservancy_beancount.pdfforms.extract:entry_point', + 'pdfform-fill = conservancy_beancount.pdfforms.fill:entry_point', 'split-ods-links = conservancy_beancount.tools.split_ods_links:entry_point', ], }, diff --git a/tests/pdfforms/form1.fdf b/tests/pdfforms/form1.fdf index c032a440a81823d3dee214e870cccd67c276f44e..a42b0bf916db47b5caa8b126583dc20bb4af31c4 100644 --- a/tests/pdfforms/form1.fdf +++ b/tests/pdfforms/form1.fdf @@ -59,6 +59,12 @@ /FT /Tx /Ff 1 >> + << + % Submit button + /T (submit) + /FT /Btn + /Ff 65536 + >> ] >>] >> diff --git a/tests/pdfforms/form1.yml b/tests/pdfforms/form1.yml index a17e187a8c504ed19e4034abb1468affbbd266c5..0e896e02c97dca753fd3e9ea3afbb4913a374fbc 100644 --- a/tests/pdfforms/form1.yml +++ b/tests/pdfforms/form1.yml @@ -1,25 +1,30 @@ -- fdf: - type: Tx - name: topform.text1_0 -- fdf: - type: Btn - name: topform.button1.button1_0 - options: ['1', 'Off'] -- fdf: - type: Btn - name: topform.button1.button1_1 - options: ['2', 'Off'] -- fdf: - type: Tx - name: topform.text1_1 -- fdf: - type: Tx - name: topform.text2_0 -- fdf: - type: Btn - name: topform.button2.button2_0 - options: ['1', 'Off'] -- fdf: - type: Btn - name: topform.button2.button2_1 - options: ['2', 'Off'] +from file: form1.fdf +fields: + - fdf: + type: Tx + name: topform.text1_0 + - fdf: + type: Btn + name: topform.button1.button1_0 + options: ['1', 'Off'] + - fdf: + type: Btn + name: topform.button1.button1_1 + options: ['2', 'Off'] + - fdf: + type: Tx + name: topform.text1_1 + - fdf: + type: Tx + name: topform.text2_0 + - fdf: + type: Btn + name: topform.button2.button2_0 + options: ['1', 'Off'] + - fdf: + type: Btn + name: topform.button2.button2_1 + options: ['2', 'Off'] + - fdf: + type: Btn + name: topform.submit diff --git a/tests/pdfforms/form1_fill.yml b/tests/pdfforms/form1_fill.yml new file mode 100644 index 0000000000000000000000000000000000000000..92f7584cbdca64c2f6128b6d50a9df55b29c7b84 --- /dev/null +++ b/tests/pdfforms/form1_fill.yml @@ -0,0 +1,32 @@ +from file: form1.fdf +fields: + - fdf: + type: Tx + name: topform.text1_0 + value: text 1.0 + - fdf: + type: Btn + name: topform.button1.button1_0 + options: ['1', 'Off'] + value: on + - fdf: + type: Btn + name: topform.button1.button1_1 + options: ['2', 'Off'] + - fdf: + type: Tx + name: topform.text1_1 + value: text 1.1 + - fdf: + type: Tx + name: topform.text2_0 + value: text 2.0 + - fdf: + type: Btn + name: topform.button2.button2_0 + options: ['1', 'Off'] + - fdf: + type: Btn + name: topform.button2.button2_1 + options: ['2', 'Off'] + value: on diff --git a/tests/test_pdfforms_extract.py b/tests/test_pdfforms_extract.py index 0f08fd6d96c26ba47d11be6cdd3a403208c91423..dbacd48bb2172b6b503ae95fb6c3095bcdea9420 100644 --- a/tests/test_pdfforms_extract.py +++ b/tests/test_pdfforms_extract.py @@ -21,7 +21,7 @@ def compare_to_yaml(actual, yaml_path, from_file, form_key): if isinstance(yaml_path, str): yaml_path = testutil.test_path(f'pdfforms/{yaml_path}') with yaml_path.open() as yaml_file: - expect_fields = yaml.safe_load(yaml_file) + expect_fields = yaml.safe_load(yaml_file)['fields'] assert actual.get('from file') == from_file assert actual.get('form key') == form_key for act_f, exp_f in itertools.zip_longest(actual.get('fields', ()), expect_fields): diff --git a/tests/test_pdfforms_fill.py b/tests/test_pdfforms_fill.py new file mode 100644 index 0000000000000000000000000000000000000000..bdec074a3ccaf8244fc14f4912b1c39a9646b0d6 --- /dev/null +++ b/tests/test_pdfforms_fill.py @@ -0,0 +1,381 @@ +"""test_pdfforms_writer.py - Unit tests for PDF writer""" +# Copyright © 2020 Brett Smith +# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0 +# +# Full copyright and licensing details can be found at toplevel file +# LICENSE.txt in the repository. + +import codecs +import io +import logging +import re +import shutil + +from pathlib import Path + +import pytest +import yaml + +from . import testutil +from pdfminer.pdfdocument import PDFDocument +from pdfminer.pdfparser import PDFParser +from pdfminer.pdftypes import resolve1 +from pdfminer.psparser import PSLiteral + +from conservancy_beancount.pdfforms import fill as fillmod + +PDFTK = shutil.which('pdftk') +# Per the PDF spec, 7.2.2 "Character Set" Table 1 +WHITESPACE = b'\x00\x09\x0A\x0C\x0D\x20' +WHITESPACE_RE = re.compile(b'[' + WHITESPACE + b']+') + +@pytest.fixture(scope='module') +def writer(): + return fillmod.PDFWriter() + +def expected_re(expected): + pattern = re.escape(expected) + # Unescape some things that don't strictly need to be escaped. + pattern = re.sub(rb'\\(<|>| )', rb'\1', pattern) + # Allow arbitrary whitespace around punctuation tokens. + pattern = re.sub(rb'(<<|>>|\\\[|\\\])', rb'\\s*\1\\s*', pattern) + # Allow any kind of whitespace where any is required. + pattern = WHITESPACE_RE.sub(rb'\\s+', pattern) + return pattern + +def utf16_str(s): + return b''.join([ + b'(', + codecs.BOM_UTF16_BE, + s.encode('utf-16be'), + b')', + ]) + +def open_pdf(source): + if isinstance(source, Path): + source = source.open('rb') + else: + source.seek(0) + return PDFDocument(PDFParser(source)) + +def merge_form(yaml_fills, form_filename='form1.fdf', form_key='FDF'): + with testutil.test_path(f'pdfforms/{form_filename}') as fdf_path: + pdf = open_pdf(fdf_path) + pdf_fields = resolve1(pdf.catalog[form_key])['Fields'] + return fillmod.merge_form(yaml_fills, pdf_fields) + +@pytest.mark.parametrize('source,expected', [ + (None, b'null'), + (True, b'true'), + (False, b'false'), + (0, b'0'), + (1, b'1'), + (345, b'345'), + (34.56, b'34.56'), + ('', b'()'), + ('ascii', b'(ascii)'), + (')parens(', br'(\)parens\()'), + ('UTF—16', utf16_str('UTF—16')), + (')¤(', utf16_str(r'\)¤\(')), + (PSLiteral('lit'), b'/lit'), + (PSLiteral('# header'), b'/#23#20header'), +]) +def test_write_scalar(writer, source, expected): + actual = b''.join(writer.emit(source)).strip(WHITESPACE) + assert actual == expected + +@pytest.mark.parametrize('source,expected', [ + ([], b'[]'), + ([1, 2, 3], b'[1 2 3]'), + ([[1, 3], [2, 4], []], b'[[1 3][2 4][]]'), + ({}, b'<<>>'), + ({'Yes': True, 'No': False}, b'<>'), + ({'Kids': [1, 2, 3]}, b'<>'), +]) +def test_write_compound(writer, source, expected): + pattern = expected_re(expected) + actual = b''.join(writer.emit(source)) + assert re.fullmatch(pattern, actual) + +def test_write_document(writer): + pysrc = {'FDF': {'Fields': [ + {'FT': PSLiteral('Tx'), 'T': 'text'}, + {'FT': PSLiteral('Btn'), 'T': 'check'}, + ]}} + doc = io.BytesIO() + writer.write_document(pysrc, doc) + pdf = open_pdf(doc) + assert len(pdf.catalog) == 1 + actual = resolve1(pdf.catalog['FDF']) + assert len(actual) == 1 + f1, f2 = actual['Fields'] + assert f1['FT'].name == 'Tx' + assert f1['T'] == b'text' + assert f2['FT'].name == 'Btn' + assert f2['T'] == b'check' + +def test_merge(): + with testutil.test_path('pdfforms/form1_fill.yml').open() as yaml_file: + form_yaml = yaml.safe_load(yaml_file)['fields'] + actual, errors = merge_form(form_yaml) + assert not errors + expected = { + 'text1_0': 'text 1.0', + 'button1_0': PSLiteral('1'), + 'button1_1': None, + 'text1_1': 'text 1.1', + 'text2_0': 'text 2.0', + 'button2_0': None, + 'button2_1': PSLiteral('2'), + } + for field in actual: + try: + expect_value = expected.pop(field['T']) + except KeyError: + pass + else: + actual_value = field.get('V') + if isinstance(expect_value, PSLiteral): + assert actual_value.name == expect_value.name + else: + assert actual_value == expect_value + actual.extend(field.get('Kids', ())) + assert not expected, "not all expected fields found in filled form data" + +@pytest.mark.parametrize('name', [None, 'nonesuchfield']) +def test_merge_bad_name(name): + fill = {'fdf': {}} + if name is not None: + fill['fdf']['name'] = name + _, errors = merge_form([fill]) + error, = errors + assert error.level >= logging.ERROR + assert error.yaml_index == 0 + assert error.name == name + +@pytest.mark.parametrize('name,yaml_type', [ + ('topform.text1_0', 'Btn'), + ('topform.button1.button1_0', 'Tx'), +]) +def test_merge_yaml_wrong_type(name, yaml_type): + fill = {'fdf': {'name': name, 'type': yaml_type}} + _, errors = merge_form([fill]) + error, = errors + assert error.level >= logging.WARNING + assert error.yaml_index == 0 + assert error.name == name + +@pytest.mark.parametrize('value', ['', ' ', 'readwrite']) +def test_merge_readonly_field(value): + fill = [{ + 'fdf': {'name': 'topform.text2_R'}, + 'value': value, + }] + _, errors = merge_form(fill) + error, = errors + assert error.level >= logging.WARNING + assert error.yaml_index == 0 + assert error.name == 'topform.text2_R' + +@pytest.mark.parametrize('value', [None, True, 'Yes']) +def test_merge_nonterminal_field(value): + yaml_fills = [{ + 'fdf': {'name': 'topform.button1'}, + 'value': value, + }] + _, errors = merge_form(yaml_fills) + if value is None: + assert not errors + else: + error, = errors + assert error.level >= logging.WARNING + assert error.yaml_index == 0 + assert error.name == 'topform.button1' + +@pytest.mark.parametrize('value', [None, True, 'Yes']) +def test_merge_unsupported_field_type(value): + yaml_fills = [{ + 'fdf': {'name': 'topform.submit', 'type': 'Btn'}, + 'value': value, + }] + _, errors = merge_form(yaml_fills) + if value is None: + assert not errors + else: + error, = errors + assert error.level >= logging.WARNING + assert error.yaml_index == 0 + assert error.name == 'topform.submit' + +@pytest.mark.parametrize('value', [True, False, [], {}]) +def test_merge_unsupported_text_value(value): + yaml_fills = [{ + 'fdf': {'name': 'topform.text1_0'}, + 'value': value, + }] + _, errors = merge_form(yaml_fills) + error, = errors + assert error.level >= logging.ERROR + assert error.yaml_index == 0 + assert error.name == 'topform.text1_0' + +@pytest.mark.parametrize('value', ['', 'Off', 'Yes', [], {}]) +def test_merge_unsupported_checkbox_value(value): + yaml_fills = [{ + 'fdf': {'name': 'topform.button1.button1_0'}, + 'value': value, + }] + _, errors = merge_form(yaml_fills) + error, = errors + assert error.level >= logging.ERROR + assert error.yaml_index == 0 + assert error.name == 'topform.button1.button1_0' + +def test_generate(): + source = [ + {'fdf': {'name': 'form.text', 'type': 'Tx'}, 'value': 'generated'}, + {'fdf': {'name': 'form.button', 'type': 'Btn'}, 'value': True}, + ] + actual, errors = fillmod.generate_form(source) + assert not errors + form_root, = actual + assert form_root['T'] == 'form' + assert 'V' not in form_root + text, checkbox = form_root['Kids'] + assert text['T'] == 'text' + assert text['V'] == 'generated' + assert not text.get('Kids') + assert checkbox['T'] == 'button' + assert checkbox['V'].name == 'Yes' + assert not checkbox.get('Kids') + +@pytest.mark.parametrize('options,value', [ + (['1'], True), + (['1'], False), + (['On', 'Off'], True), + (['On', 'Off'], False), +]) +def test_generate_checkbox_with_options(options, value): + source = [{ + 'fdf': {'name': 'cbox', 'type': 'Btn', 'options': options}, + 'value': value, + }] + actual, errors = fillmod.generate_form(source) + assert not errors + assert actual[0]['V'].name == (options[0] if value else 'Off') + +@pytest.mark.parametrize('yaml_type', [None, 'Ch', 'Sig']) +def test_generate_unsupported_field_type(yaml_type): + source = [{ + 'fdf': {'name': 'badtype', 'type': yaml_type}, + 'value': 'unsupported type value', + }] + if yaml_type is None: + del source[0]['fdf']['type'] + _, errors = fillmod.generate_form(source) + error, = errors + assert error.level >= logging.ERROR + assert error.yaml_index == 0 + assert error.name == 'badtype' + +def test_generate_invalid_field_type(): + source = [{ + 'fdf': {'name': 'badtype', 'type': ''}, + 'value': 'unsupported type value', + }] + _, errors = fillmod.generate_form(source) + assert errors + found_msg = False + for error in errors: + assert error.level >= logging.ERROR + assert error.yaml_index == 0 + assert error.name == 'badtype' + found_msg = found_msg or '' in error.errdesc + assert found_msg, "no errors mentioned unknown field type" + +@pytest.mark.parametrize('value', [True, False, [], {}]) +def test_generate_unsupported_text_value(value): + source = [{ + 'fdf': {'name': 'badtext', 'type': 'Tx'}, + 'value': value, + }] + _, errors = fillmod.generate_form(source) + error, = errors + assert error.level >= logging.ERROR + assert error.yaml_index == 0 + assert error.name == 'badtext' + +@pytest.mark.parametrize('value', ['', 'Off', 'Yes', [], {}]) +def test_generate_unsupported_checkbox_value(value): + source = [{ + 'fdf': {'name': 'badbutton', 'type': 'Btn'}, + 'value': value, + }] + _, errors = fillmod.generate_form(source) + error, = errors + assert error.level >= logging.ERROR + assert error.yaml_index == 0 + assert error.name == 'badbutton' + +def test_main_generate_fdf(): + arglist = ['--output-file=-', str(testutil.test_path('pdfforms/form1_fill.yml'))] + stdout = io.BytesIO() + stderr = io.StringIO() + retcode = fillmod.main(arglist, stdout, stderr) + assert retcode == 0 + assert not stderr.getvalue() + patterns = iter(expected_re(p) for p in [ + b'/T (text1_0)', + b'/V (text 1.0)', + b'/T (button1_0)', + b'/V /1', + b'/T (text2_0)', + b'/V (text 2.0)', + b'/T (button2_1)', + b'/V /2', + ]) + pattern = next(patterns) + stdout.seek(0) + for line in stdout: + if re.search(pattern, line): + try: + pattern = next(patterns) + except StopIteration: + break + else: + pytest.fail(f"pattern {pattern!r} not found in FDF output") + +@pytest.mark.skipUnless(PDFTK, "need pdftk installed") +@pytest.mark.xfail(reason="`pdftk fill_form` expects a full PDF") +def test_main_fill_pdf(): + arglist = [ + '--pdftk', PDFTK, + '--output-file', '-', + str(testutil.test_path('pdfforms/form1_fill.yml')), + str(testutil.test_path('pdfforms/form1.fdf')), + ] + stdout = io.BytesIO() + stderr = io.StringIO() + retcode = fillmod.main(arglist, stdout, stderr) + assert retcode == 0 + assert not stderr.getvalue() + patterns = iter(expected_re(p) for p in [ + b'/T (text1_0)', + b'/V (text 1.0)', + b'/T (button1_0)', + b'/V /1', + b'/T (text2_0)', + b'/V (text 2.0)', + b'/T (button2_1)', + b'/V /2', + ]) + pattern = next(patterns) + stdout.seek(0) + for line in stdout: + if re.search(pattern, line): + try: + pattern = next(patterns) + except StopIteration: + break + else: + pytest.fail(f"pattern {pattern!r} not found in FDF output")