Files @ 5a8da108b983
Branch filter:

Location: NPO-Accounting/conservancy_beancount/tests/test_pdfforms_extract.py

bsturmfels
statement_reconciler: Add initial Chase bank CSV statement matching

We currently don't have many examples to work with, so haven't done any
significant testing of the matching accuracy between statement and books.
"""test_pdfforms_extract.py - Unit tests for PDF form extractor"""
# Copyright © 2020  Brett Smith
# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0
#
# Full copyright and licensing details can be found at toplevel file
# LICENSE.txt in the repository.

import io
import itertools

import pytest
import yaml

from . import testutil

from pathlib import Path

from conservancy_beancount.pdfforms import extract as extractmod

def compare_to_yaml(actual, yaml_path, from_file, form_key):
    if isinstance(yaml_path, str):
        yaml_path = testutil.test_path(f'pdfforms/{yaml_path}')
    with yaml_path.open() as yaml_file:
        expect_fields = yaml.safe_load(yaml_file)['fields']
    assert actual.get('from file') == from_file
    assert actual.get('form key') == form_key
    for act_f, exp_f in itertools.zip_longest(actual.get('fields', ()), expect_fields):
        for key, exp_value in exp_f.items():
            assert act_f[key] == exp_value

@pytest.mark.parametrize('fdf_filename,form_key,fields_yaml', [
    ('form1.fdf', 'FDF', 'form1.yml'),
])
def test_extract_from_path(fdf_filename, form_key, fields_yaml):
    fdf_path = testutil.test_path(f'pdfforms/{fdf_filename}')
    with extractmod.FormExtractor.from_path(fdf_path) as extractor:
        actual = extractor.extract()
    compare_to_yaml(actual, fields_yaml, fdf_filename, form_key)

@pytest.mark.parametrize('fdf_filename,form_key,fields_yaml', [
    ('form1.fdf', 'FDF', 'form1.yml'),
])
def test_extract_from_file(fdf_filename, form_key, fields_yaml):
    with testutil.test_path(f'pdfforms/{fdf_filename}').open('rb') as fdf_file:
        extractor = extractmod.FormExtractor.from_file(fdf_file)
        actual = extractor.extract()
    compare_to_yaml(actual, fields_yaml, fdf_filename, form_key)

@pytest.mark.parametrize('fdf_filename,form_key,fields_yaml', [
    ('form1_fill.fdf', 'FDF', 'form1_fill.yml'),
])
def test_extract_from_file(fdf_filename, form_key, fields_yaml):
    with testutil.test_path(f'pdfforms/{fdf_filename}').open('rb') as fdf_file:
        extractor = extractmod.FormExtractor.from_file(fdf_file)
        actual = extractor.extract()
    compare_to_yaml(actual, fields_yaml, fdf_filename, form_key)

@pytest.mark.parametrize('fdf_filename,form_key,fields_yaml', [
    ('form1.fdf', 'FDF', 'form1.yml'),
])
def test_main(fdf_filename, form_key, fields_yaml):
    fdf_path = testutil.test_path(f'pdfforms/{fdf_filename}')
    arglist = [str(fdf_path)]
    stdout = io.StringIO()
    stderr = io.StringIO()
    returncode = extractmod.main(arglist, stdout, stderr)
    assert returncode == 0
    assert not stderr.getvalue()
    stdout.seek(0)
    actual = yaml.safe_load(stdout)
    compare_to_yaml(actual, fields_yaml, fdf_filename, form_key)