Changeset - 1c95c1b1b1fb
[Not reviewed]
0 1 1
Brett Smith - 3 years ago 2021-01-09 20:17:55
brettcsmith@brettcsmith.org
irs990scheduleA: New PDF extractor.
2 files changed with 85 insertions and 1 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/pdfforms/extract/irs990scheduleA.py
Show inline comments
 
new file 100644
 
"""irs990scheduleA.py - Extract IRS 990 Schedule A form data from the prior FY"""
 
# Copyright © 2021  Brett Smith
 
# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0
 
#
 
# Full copyright and licensing details can be found at toplevel file
 
# LICENSE.txt in the repository.
 

	
 
import collections
 
import functools
 
import itertools
 
import logging
 

	
 
from . import FormExtractor, main
 
from .. import fields as fieldmod
 
from ... import cliutil
 

	
 
from typing import (
 
    Iterable,
 
    Iterator,
 
    Optional,
 
    Tuple,
 
)
 

	
 
PROGNAME = 'pdfform-extract-irs990scheduleA'
 
logger = logging.getLogger('conservancy_beancount.pdfforms.extract.irs990scheduleA')
 

	
 
def _make_shifts(
 
        key_fmt: str,
 
        start_count: int,
 
        shift_count: int=4,
 
        clear_count: int=2,
 
) -> Iterator[Tuple[str, Optional[str]]]:
 
    for index in range(start_count, start_count + shift_count):
 
        yield (key_fmt.format(index), key_fmt.format(index + 1))
 
    index += 1
 
    for index in range(index, index + clear_count):
 
        yield (key_fmt.format(index), None)
 

	
 
class IRS990ScheduleAExtractor(FormExtractor):
 
    _BLANK_FIELDS = [
 
        'topmostSubform[0].Page2[0].Table_SectionA[0].Line5[0].f2_25[0]',
 
        'topmostSubform[0].Page2[0].Table_SectionA[0].Line6[0].f2_26[0]',
 
        'topmostSubform[0].Page2[0].Table_SectionB[0].Line11[0].f2_51[0]',
 
        'topmostSubform[0].Page2[0].f2_52[0]',
 
        'topmostSubform[0].Page2[0].f2_53[0]',
 
        'topmostSubform[0].Page2[0].c2_2[0]',
 
        'topmostSubform[0].Page2[0].c2_4[0]',
 
    ]
 
    _FIELD_SOURCES = dict(itertools.chain(
 
        _make_shifts('topmostSubform[0].Page2[0].Table_SectionA[0].Line1[0].f2_{}[0]', 1),
 
        _make_shifts('topmostSubform[0].Page2[0].Table_SectionA[0].Line2[0].f2_{}[0]', 7),
 
        _make_shifts('topmostSubform[0].Page2[0].Table_SectionA[0].Line3[0].f2_{}[0]', 13),
 
        _make_shifts('topmostSubform[0].Page2[0].Table_SectionA[0].Line4[0].f2_{}[0]', 19),
 
        _make_shifts('topmostSubform[0].Page2[0].Table_SectionB[0].Line7[0].f2_{}[0]', 27),
 
        _make_shifts('topmostSubform[0].Page2[0].Table_SectionB[0].Line8[0].f2_{}[0]', 33),
 
        _make_shifts('topmostSubform[0].Page2[0].Table_SectionB[0].Line9[0].f2_{}[0]', 39),
 
        _make_shifts('topmostSubform[0].Page2[0].Table_SectionB[0].Line10[0].f2_{}[0]', 45),
 
        iter((key, None) for key in _BLANK_FIELDS),
 
    ))
 
    # Part II Section C
 
    _FIELD_SOURCES['topmostSubform[0].Page2[0].f2_54[0]'] = 'topmostSubform[0].Page2[0].f2_53[0]'
 
    _FIELD_SOURCES['topmostSubform[0].Page2[0].c2_3[0]'] = 'topmostSubform[0].Page2[0].c2_2[0]'
 
    _FIELD_SOURCES['topmostSubform[0].Page2[0].c2_5[0]'] = 'topmostSubform[0].Page2[0].c2_4[0]'
 

	
 
    def _transform_fields(self, fields: Iterable[fieldmod.FormField]) -> None:
 
        fields_map = dict(
 
            kvpair
 
            for field in fields
 
            for kvpair in field.as_mapping()
 
        )
 
        new_values = {
 
            key: None if src_key is None else fields_map[src_key].value()
 
            for key, src_key in self._FIELD_SOURCES.items()
 
        }
 
        for key, value in new_values.items():
 
            fields_map[key].set_value(value)
 

	
 

	
 
main = functools.partial(main, extract_cls=IRS990ScheduleAExtractor)
 
entry_point = cliutil.make_entry_point(__name__, PROGNAME)
 

	
 
if __name__ == '__main__':
 
    exit(entry_point())
setup.py
Show inline comments
...
 
@@ -5,7 +5,7 @@ from setuptools import setup
 
setup(
 
    name='conservancy_beancount',
 
    description="Plugin, library, and reports for reading Conservancy's books",
 
    version='1.15.2',
 
    version='1.15.3',
 
    author='Software Freedom Conservancy',
 
    author_email='info@sfconservancy.org',
 
    license='GNU AGPLv3+',
...
 
@@ -50,6 +50,7 @@ setup(
 
            'ledger-report = conservancy_beancount.reports.ledger:entry_point',
 
            'opening-balances = conservancy_beancount.tools.opening_balances:entry_point',
 
            'pdfform-extract = conservancy_beancount.pdfforms.extract:entry_point',
 
            'pdfform-extract-irs990scheduleA = conservancy_beancount.pdfforms.extract.irs990scheduleA:entry_point',
 
            'pdfform-fill = conservancy_beancount.pdfforms.fill:entry_point',
 
            'split-ods-links = conservancy_beancount.tools.split_ods_links:entry_point',
 
        ],
0 comments (0 inline, 0 general)