"""test_pdfforms_writer.py - Unit tests for PDF writer""" # Copyright © 2020 Brett Smith # License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0 # # Full copyright and licensing details can be found at toplevel file # LICENSE.txt in the repository. import codecs import io import logging import re import shutil from pathlib import Path import pytest import yaml from . import testutil from pdfminer.pdfdocument import PDFDocument from pdfminer.pdfparser import PDFParser from pdfminer.pdftypes import resolve1 from pdfminer.psparser import PSLiteral from conservancy_beancount.pdfforms import fill as fillmod PDFTK = shutil.which('pdftk') # Per the PDF spec, 7.2.2 "Character Set" Table 1 WHITESPACE = b'\x00\x09\x0A\x0C\x0D\x20' WHITESPACE_RE = re.compile(b'[' + WHITESPACE + b']+') @pytest.fixture(scope='module') def writer(): return fillmod.PDFWriter() def expected_re(expected): pattern = re.escape(expected) # Unescape some things that don't strictly need to be escaped. pattern = re.sub(rb'\\(<|>| )', rb'\1', pattern) # Allow arbitrary whitespace around punctuation tokens. pattern = re.sub(rb'(<<|>>|\\\[|\\\])', rb'\\s*\1\\s*', pattern) # Allow any kind of whitespace where any is required. pattern = WHITESPACE_RE.sub(rb'\\s+', pattern) return pattern def utf16_str(s): return b''.join([ b'(', codecs.BOM_UTF16_BE, s.encode('utf-16be'), b')', ]) def open_pdf(source): if isinstance(source, Path): source = source.open('rb') else: source.seek(0) return PDFDocument(PDFParser(source)) def merge_form(yaml_fills, form_filename='form1.fdf', form_key='FDF'): with testutil.test_path(f'pdfforms/{form_filename}') as fdf_path: pdf = open_pdf(fdf_path) pdf_fields = resolve1(pdf.catalog[form_key])['Fields'] return fillmod.merge_form(yaml_fills, pdf_fields) @pytest.mark.parametrize('source,expected', [ (None, b'null'), (True, b'true'), (False, b'false'), (0, b'0'), (1, b'1'), (345, b'345'), (34.56, b'34.56'), ('', b'()'), ('ascii', b'(ascii)'), (')parens(', br'(\)parens\()'), ('UTF—16', utf16_str('UTF—16')), (')¤(', utf16_str(r'\)¤\(')), (PSLiteral('lit'), b'/lit'), (PSLiteral('# header'), b'/#23#20header'), ]) def test_write_scalar(writer, source, expected): actual = b''.join(writer.emit(source)).strip(WHITESPACE) assert actual == expected @pytest.mark.parametrize('source,expected', [ ([], b'[]'), ([1, 2, 3], b'[1 2 3]'), ([[1, 3], [2, 4], []], b'[[1 3][2 4][]]'), ({}, b'<<>>'), ({'Yes': True, 'No': False}, b'<>'), ({'Kids': [1, 2, 3]}, b'<>'), ]) def test_write_compound(writer, source, expected): pattern = expected_re(expected) actual = b''.join(writer.emit(source)) assert re.fullmatch(pattern, actual) def test_write_document(writer): pysrc = {'FDF': {'Fields': [ {'FT': PSLiteral('Tx'), 'T': 'text'}, {'FT': PSLiteral('Btn'), 'T': 'check'}, ]}} doc = io.BytesIO() writer.write_document(pysrc, doc) pdf = open_pdf(doc) assert len(pdf.catalog) == 1 actual = resolve1(pdf.catalog['FDF']) assert len(actual) == 1 f1, f2 = actual['Fields'] assert f1['FT'].name == 'Tx' assert f1['T'] == b'text' assert f2['FT'].name == 'Btn' assert f2['T'] == b'check' def test_merge(): with testutil.test_path('pdfforms/form1_fill.yml').open() as yaml_file: form_yaml = yaml.safe_load(yaml_file)['fields'] actual, errors = merge_form(form_yaml) assert not errors expected = { 'text1_0': 'text 1.0', 'button1_0': PSLiteral('1'), 'button1_1': None, 'text1_1': 'text 1.1', 'text2_0': 'text 2.0', 'button2_0': None, 'button2_1': PSLiteral('2'), } for field in actual: try: expect_value = expected.pop(field['T']) except KeyError: pass else: actual_value = field.get('V') if isinstance(expect_value, PSLiteral): assert actual_value.name == expect_value.name else: assert actual_value == expect_value actual.extend(field.get('Kids', ())) assert not expected, "not all expected fields found in filled form data" @pytest.mark.parametrize('name', [None, 'nonesuchfield']) def test_merge_bad_name(name): fill = {'fdf': {}} if name is not None: fill['fdf']['name'] = name _, errors = merge_form([fill]) error, = errors assert error.level >= logging.ERROR assert error.yaml_index == 0 assert error.name == name @pytest.mark.parametrize('name,yaml_type', [ ('topform.text1_0', 'Btn'), ('topform.button1.button1_0', 'Tx'), ]) def test_merge_yaml_wrong_type(name, yaml_type): fill = {'fdf': {'name': name, 'type': yaml_type}} _, errors = merge_form([fill]) error, = errors assert error.level >= logging.WARNING assert error.yaml_index == 0 assert error.name == name @pytest.mark.parametrize('value', ['', ' ', 'readwrite']) def test_merge_readonly_field(value): fill = [{ 'fdf': {'name': 'topform.text2_R'}, 'value': value, }] _, errors = merge_form(fill) error, = errors assert error.level >= logging.WARNING assert error.yaml_index == 0 assert error.name == 'topform.text2_R' @pytest.mark.parametrize('value', [None, True, 'Yes']) def test_merge_nonterminal_field(value): yaml_fills = [{ 'fdf': {'name': 'topform.button1'}, 'value': value, }] _, errors = merge_form(yaml_fills) if value is None: assert not errors else: error, = errors assert error.level >= logging.WARNING assert error.yaml_index == 0 assert error.name == 'topform.button1' @pytest.mark.parametrize('value', [None, True, 'Yes']) def test_merge_unsupported_field_type(value): yaml_fills = [{ 'fdf': {'name': 'topform.submit', 'type': 'Btn'}, 'value': value, }] _, errors = merge_form(yaml_fills) if value is None: assert not errors else: error, = errors assert error.level >= logging.WARNING assert error.yaml_index == 0 assert error.name == 'topform.submit' @pytest.mark.parametrize('value', [True, False, [], {}]) def test_merge_unsupported_text_value(value): yaml_fills = [{ 'fdf': {'name': 'topform.text1_0'}, 'value': value, }] _, errors = merge_form(yaml_fills) error, = errors assert error.level >= logging.ERROR assert error.yaml_index == 0 assert error.name == 'topform.text1_0' @pytest.mark.parametrize('value', ['', 'Off', 'Yes', [], {}]) def test_merge_unsupported_checkbox_value(value): yaml_fills = [{ 'fdf': {'name': 'topform.button1.button1_0'}, 'value': value, }] _, errors = merge_form(yaml_fills) error, = errors assert error.level >= logging.ERROR assert error.yaml_index == 0 assert error.name == 'topform.button1.button1_0' def test_generate(): source = [ {'fdf': {'name': 'form.text', 'type': 'Tx'}, 'value': 'generated'}, {'fdf': {'name': 'form.button', 'type': 'Btn'}, 'value': True}, ] actual, errors = fillmod.generate_form(source) assert not errors form_root, = actual assert form_root['T'] == 'form' assert 'V' not in form_root text, checkbox = form_root['Kids'] assert text['T'] == 'text' assert text['V'] == 'generated' assert not text.get('Kids') assert checkbox['T'] == 'button' assert checkbox['V'].name == 'Yes' assert not checkbox.get('Kids') @pytest.mark.parametrize('options,value', [ (['1'], True), (['1'], False), (['On', 'Off'], True), (['On', 'Off'], False), ]) def test_generate_checkbox_with_options(options, value): source = [{ 'fdf': {'name': 'cbox', 'type': 'Btn', 'options': options}, 'value': value, }] actual, errors = fillmod.generate_form(source) assert not errors assert actual[0]['V'].name == (options[0] if value else 'Off') @pytest.mark.parametrize('yaml_type', [None, 'Ch', 'Sig']) def test_generate_unsupported_field_type(yaml_type): source = [{ 'fdf': {'name': 'badtype', 'type': yaml_type}, 'value': 'unsupported type value', }] if yaml_type is None: del source[0]['fdf']['type'] _, errors = fillmod.generate_form(source) error, = errors assert error.level >= logging.ERROR assert error.yaml_index == 0 assert error.name == 'badtype' def test_generate_invalid_field_type(): source = [{ 'fdf': {'name': 'badtype', 'type': ''}, 'value': 'unsupported type value', }] _, errors = fillmod.generate_form(source) assert errors found_msg = False for error in errors: assert error.level >= logging.ERROR assert error.yaml_index == 0 assert error.name == 'badtype' found_msg = found_msg or '' in error.errdesc assert found_msg, "no errors mentioned unknown field type" @pytest.mark.parametrize('value', [True, False, [], {}]) def test_generate_unsupported_text_value(value): source = [{ 'fdf': {'name': 'badtext', 'type': 'Tx'}, 'value': value, }] _, errors = fillmod.generate_form(source) error, = errors assert error.level >= logging.ERROR assert error.yaml_index == 0 assert error.name == 'badtext' @pytest.mark.parametrize('value', ['', 'Off', 'Yes', [], {}]) def test_generate_unsupported_checkbox_value(value): source = [{ 'fdf': {'name': 'badbutton', 'type': 'Btn'}, 'value': value, }] _, errors = fillmod.generate_form(source) error, = errors assert error.level >= logging.ERROR assert error.yaml_index == 0 assert error.name == 'badbutton' def test_main_generate_fdf(): arglist = ['--output-file=-', str(testutil.test_path('pdfforms/form1_fill.yml'))] stdout = io.BytesIO() stderr = io.StringIO() retcode = fillmod.main(arglist, stdout, stderr) assert retcode == 0 assert not stderr.getvalue() patterns = iter(expected_re(p) for p in [ b'/T (text1_0)', b'/V (text 1.0)', b'/T (button1_0)', b'/V /1', b'/T (text2_0)', b'/V (text 2.0)', b'/T (button2_1)', b'/V /2', ]) pattern = next(patterns) stdout.seek(0) for line in stdout: if re.search(pattern, line): try: pattern = next(patterns) except StopIteration: break else: pytest.fail(f"pattern {pattern!r} not found in FDF output") @pytest.mark.skipif(not PDFTK, reason="need pdftk installed") @pytest.mark.xfail(reason="`pdftk fill_form` expects a full PDF") def test_main_fill_pdf(): arglist = [ '--pdftk', PDFTK, '--output-file', '-', str(testutil.test_path('pdfforms/form1_fill.yml')), str(testutil.test_path('pdfforms/form1.fdf')), ] stdout = io.BytesIO() stderr = io.StringIO() retcode = fillmod.main(arglist, stdout, stderr) assert retcode == 0 assert not stderr.getvalue() patterns = iter(expected_re(p) for p in [ b'/T (text1_0)', b'/V (text 1.0)', b'/T (button1_0)', b'/V /1', b'/T (text2_0)', b'/V (text 2.0)', b'/T (button2_1)', b'/V /2', ]) pattern = next(patterns) stdout.seek(0) for line in stdout: if re.search(pattern, line): try: pattern = next(patterns) except StopIteration: break else: pytest.fail(f"pattern {pattern!r} not found in FDF output")