Changeset - eba4ed45b0ab
[Not reviewed]
0 1 2
Brett Smith - 3 years ago 2021-01-23 15:23:03
brettcsmith@brettcsmith.org
reconcile: Start module with PayPal reconciliation report.
3 files changed with 417 insertions and 1 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/reconcile/__init__.py
Show inline comments
 
new file 100644
conservancy_beancount/reconcile/paypal.py
Show inline comments
 
new file 100644
 
#!/usr/bin/env python3
 
"""paypal.py - Reconcile PayPal account from CSV statement"""
 
# Copyright © 2021  Brett Smith
 
# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0
 
#
 
# Full copyright and licensing details can be found at toplevel file
 
# LICENSE.txt in the repository.
 

	
 
import argparse
 
import collections
 
import csv
 
import datetime
 
import enum
 
import logging
 
import operator
 
import os
 
import sys
 
import unicodedata
 

	
 
from decimal import Decimal
 
from pathlib import Path
 

	
 
import odf.style  # type:ignore[import]
 
import odf.table  # type:ignore[import]
 

	
 
from beancount.parser import printer as bc_printer
 

	
 
from .. import books
 
from .. import cliutil
 
from .. import config as configmod
 
from .. import data
 
from ..ranges import DateRange
 
from ..reports import core
 

	
 
from typing import (
 
    Any,
 
    Hashable,
 
    Iterable,
 
    Iterator,
 
    List,
 
    Mapping,
 
    NamedTuple,
 
    Optional,
 
    Sequence,
 
    TextIO,
 
    Tuple,
 
    Union,
 
)
 

	
 
PROGNAME = 'reconcile-paypal'
 
logger = logging.getLogger('conservancy_beancount.reconcile.paypal')
 

	
 
class ReconcileProblems(enum.IntFlag):
 
    NOT_IN_STATEMENT = enum.auto()
 
    DUP_IN_STATEMENT = enum.auto()
 
    AMOUNT_DIFF = enum.auto()
 
    MONTH_DIFF = enum.auto()
 
    DATE_DIFF = enum.auto()
 

	
 

	
 
class PayPalPosting(NamedTuple):
 
    filename: str
 
    lineno: int
 
    txn_id: str
 
    date: datetime.date
 
    amount: data.Amount
 
    description: str
 
    entity: Optional[str]
 

	
 
    @classmethod
 
    def from_books(cls, post: data.Posting) -> 'PayPalPosting':
 
        return cls(
 
            post.meta.get('filename', ''),
 
            post.meta.get('lineno', 0),
 
            post.meta.get('paypal-id', '<missing paypal-id>'),
 
            post.meta.date,
 
            post.units,
 
            post.meta.txn.narration,
 
            post.meta.get('entity'),
 
        )
 

	
 
    @classmethod
 
    def from_statement(
 
            cls,
 
            row: Mapping[str, str],
 
            filename: str='<statement>',
 
            lineno: int=0,
 
    ) -> 'PayPalPosting':
 
        row_net = Decimal(row['Net'].replace(',', ''))
 
        return cls(
 
            filename,
 
            lineno,
 
            row['Transaction ID'],
 
            datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
 
            data.Amount(row_net, row['Currency']),
 
            row['Name'],
 
            None,
 
        )
 

	
 
    @classmethod
 
    def iter_statement(cls, path: Path) -> Iterator['PayPalPosting']:
 
        with path.open() as csv_file:
 
            # PayPal's CSVs start with a ZWNJ space.
 
            # Discard that, assuming it's still there.
 
            orig_pos = csv_file.tell()
 
            char1 = csv_file.read(1)
 
            if not unicodedata.category(char1).startswith('C'):
 
                csv_file.seek(orig_pos)
 
            in_csv = csv.DictReader(csv_file)
 
            filename = str(path)
 
            for lineno, row in enumerate(in_csv, 2):
 
                yield cls.from_statement(row, filename, lineno)
 

	
 
    def compare(self, other: 'PayPalPosting') -> int:
 
        retval = 0
 
        if (self.date.year != other.date.year
 
            or self.date.month != other.date.month):
 
            retval |= ReconcileProblems.MONTH_DIFF
 
            retval |= ReconcileProblems.DATE_DIFF
 
        elif self.date.day != other.date.day:
 
            retval |= ReconcileProblems.DATE_DIFF
 
        return retval
 

	
 

	
 
class PayPalReconciler:
 
    __slots__ = ['books_posts', 'statement_posts']
 

	
 
    def __init__(self) -> None:
 
        self.books_posts: List[PayPalPosting] = []
 
        self.statement_posts: List[PayPalPosting] = []
 

	
 
    def add_from_books(self, post: PayPalPosting) -> None:
 
        self.books_posts.append(post)
 

	
 
    def add_from_statement(self, post: PayPalPosting) -> None:
 
        self.statement_posts.append(post)
 

	
 
    def post_problems(self) -> Iterator[Tuple[PayPalPosting, int]]:
 
        try:
 
            statement_post = self.statement_posts[0]
 
        except IndexError:
 
            try:
 
                statement_post = self.books_posts[0]
 
            except IndexError:
 
                return
 
        for post in self.books_posts:
 
            yield (post, statement_post.compare(post))
 

	
 
    def problems(self) -> int:
 
        stmt_count = len(self.statement_posts)
 
        if stmt_count == 0:
 
            return ReconcileProblems.NOT_IN_STATEMENT
 
        elif stmt_count > 1:
 
            return ReconcileProblems.DUP_IN_STATEMENT
 
        else:
 
            balance = core.Balance(post.amount for post in self.books_posts)
 
            if not (balance - self.statement_posts[0].amount).is_zero():
 
                return ReconcileProblems.AMOUNT_DIFF
 
            worst_problem = 0
 
            for _, problems in self.post_problems():
 
                if problems & ~ReconcileProblems.DATE_DIFF:
 
                    return problems
 
                else:
 
                    worst_problem = worst_problem or problems
 
            return worst_problem
 

	
 
    def sort_key(self) -> Hashable:
 
        try:
 
            post = self.statement_posts[0]
 
        except IndexError:
 
            post = self.books_posts[0]
 
        return post.date
 

	
 

	
 
class PayPalReconciliationReport(core.BaseODS[PayPalPosting, str]):
 
    COLUMN_WIDTHS = {
 
        'Source': 1.5,
 
        'Transaction ID': 1.5,
 
        'Date': None,
 
        'Amount': 1,
 
        'Description': 2.5,
 
        'Entity': 1.5,
 
    }
 

	
 
    def __init__(self) -> None:
 
        super().__init__()
 
        self.bad_sheet = self.start_sheet("Reconciliation")
 
        self.good_sheet = self.start_sheet("Matched Postings")
 

	
 
    def init_styles(self) -> None:
 
        super().init_styles()
 
        self.error_style = self.bgcolor_style('#ffcc99')
 
        self.warning_style = self.bgcolor_style('#ffff00')
 
        self.endrow_style = self.border_style(core.Border.BOTTOM, '1pt')
 

	
 
    def start_sheet(self, name: str) -> odf.table.Table:
 
        self.use_sheet(name)
 
        for col_width in self.COLUMN_WIDTHS.values():
 
            col_style = None if col_width is None else self.column_style(col_width)
 
            self.sheet.addElement(odf.table.TableColumn(stylename=col_style))
 
        self.add_row(*(
 
            self.string_cell(col_name, stylename=self.style_bold)
 
            for col_name in self.COLUMN_WIDTHS
 
        ))
 
        self.lock_first_row()
 
        return self.sheet
 

	
 
    def section_key(self, row: PayPalPosting) -> str:
 
        return row.txn_id
 

	
 
    def write_row(
 
            self,
 
            row: PayPalPosting,
 
            source_style: Union[None, str, odf.style.Style]=None,
 
            problems: int=0,
 
    ) -> None:
 
        source_style = self.merge_styles(source_style, self.style_endtext)
 
        if problems & ReconcileProblems.MONTH_DIFF:
 
            date_style: Optional[odf.style.Style] = self.error_style
 
        elif problems & ReconcileProblems.DATE_DIFF:
 
            date_style = self.warning_style
 
        else:
 
            date_style = None
 
        date_style = self.merge_styles(date_style, self.style_date)
 
        if problems & ReconcileProblems.AMOUNT_DIFF:
 
            amount_style: Optional[odf.style.Style] = self.error_style
 
        else:
 
            amount_style = None
 
        amount_style = self.merge_styles(
 
            amount_style, self.currency_style(row.amount.currency),
 
        )
 
        self.add_row(
 
            self.string_cell(f'{row.filename}:{row.lineno}', stylename=source_style),
 
            self.string_cell(row.txn_id),
 
            self.date_cell(row.date, stylename=date_style),
 
            self.currency_cell(row.amount, stylename=amount_style),
 
            self.string_cell(row.description),
 
            self.string_cell(row.entity or ''),
 
        )
 

	
 
    def write_missing(self, source: str, txn_id: str) -> None:
 
        self.add_row(
 
            self.string_cell(source, stylename=self.error_style),
 
            self.string_cell(txn_id),
 
            odf.table.TableCell(),
 
            odf.table.TableCell(),
 
            self.string_cell(f"not found in {source}"),
 
            odf.table.TableCell(),
 
        )
 

	
 
    def write_all(self, source: Mapping[str, PayPalReconciler]) -> None:
 
        for txn_id, reconciler in sorted(
 
                source.items(), key=lambda kv: kv[1].sort_key(),
 
        ):
 
            problems = reconciler.problems()
 
            if problems & ~ReconcileProblems.DATE_DIFF:
 
                self.sheet = self.bad_sheet
 
            else:
 
                self.sheet = self.good_sheet
 
            posts_iter = iter(reconciler.statement_posts)
 
            try:
 
                statement_post = next(posts_iter)
 
            except StopIteration:
 
                self.write_missing('statement', txn_id)
 
            else:
 
                self.write_row(statement_post)
 
                for post in posts_iter:
 
                    stmt_problems = statement_post.compare(post)
 
                    if post.amount != statement_post.amount:
 
                        stmt_problems |= ReconcileProblems.AMOUNT_DIFF
 
                    self.write_row(statement_post, self.error_style, stmt_problems)
 
            if self.sheet is self.bad_sheet:
 
                probs_iter = reconciler.post_problems()
 
                try:
 
                    post, post_problems = next(probs_iter)
 
                except StopIteration:
 
                    self.write_missing('books', txn_id)
 
                else:
 
                    self.write_row(post, None, problems | post_problems)
 
                    for post, post_problems in probs_iter:
 
                        self.write_row(post, None, problems | post_problems)
 
                for cell in self.sheet.lastChild.childNodes:
 
                    cell_style = self.merge_styles(
 
                        cell.getAttribute('stylename'),
 
                        self.endrow_style,
 
                    )
 
                    cell.setAttribute('stylename', cell_style)
 

	
 

	
 
def parse_arguments(arglist: Optional[Sequence[str]]=None) -> argparse.Namespace:
 
    parser = argparse.ArgumentParser()
 
    cliutil.add_version_argument(parser)
 
    cliutil.add_loglevel_argument(parser)
 
    parser.add_argument(
 
        '--account',
 
        default='Assets:PayPal',
 
        help="""Name of the Beancount account with PayPal postings.
 
Default %(default)r.
 
""")
 
    parser.add_argument(
 
        '--begin', '--start', '-b',
 
        dest='start_date',
 
        metavar='DATE',
 
        type=cliutil.date_arg,
 
        help="""Date to start reconciliation, inclusive, in YYYY-MM-DD format.
 
The default is the first date in the statement.
 
""")
 
    parser.add_argument(
 
        '--end', '--stop', '-e',
 
        dest='stop_date',
 
        metavar='DATE',
 
        type=cliutil.date_arg,
 
        help="""Date to stop reconciliation, exclusive, in YYYY-MM-DD format.
 
The default is the last date in the statement.
 
""")
 
    parser.add_argument(
 
        '--date-fuzz',
 
        metavar='DAYS',
 
        type=int,
 
        default=30,
 
        # Loading an extra month on each end should be more than enough for
 
        # real reconciliation. This switch is mostly a debug aid.
 
        help=argparse.SUPPRESS,
 
    )
 
    parser.add_argument(
 
        '--output-file', '-O',
 
        metavar='PATH',
 
        type=Path,
 
        help="""Write the report to this file, or stdout when PATH is `-`.
 
The default is `PayPalReconciliation_<StartDate>_<StopDate>.ods`.
 
""")
 
    parser.add_argument(
 
        'statement',
 
        type=Path,
 
        help="Path to the PayPal statement CSV",
 
    )
 
    return parser.parse_args(arglist)
 

	
 
def main(arglist: Optional[Sequence[str]]=None,
 
         stdout: TextIO=sys.stdout,
 
         stderr: TextIO=sys.stderr,
 
         config: Optional[configmod.Config]=None,
 
) -> int:
 
    args = parse_arguments(arglist)
 
    cliutil.set_loglevel(logger, args.loglevel)
 
    if config is None:
 
        config = configmod.Config()
 
        config.load_file()
 

	
 
    date_range = DateRange(
 
        args.start_date or datetime.date(datetime.MINYEAR, 1, 1),
 
        args.stop_date or datetime.date(datetime.MAXYEAR, 12, 1),
 
    )
 
    reconcilers: Mapping[str, PayPalReconciler] = collections.defaultdict(PayPalReconciler)
 
    for stmt_post in PayPalPosting.iter_statement(args.statement):
 
        if stmt_post.amount.number and stmt_post.date in date_range:
 
            reconcilers[stmt_post.txn_id].add_from_statement(stmt_post)
 
    if not reconcilers:
 
        logger.warning("no posting data read from statement")
 

	
 
    if args.start_date is None:
 
        args.start_date = min(
 
            post.date
 
            for rec in reconcilers.values()
 
            for post in rec.statement_posts
 
        )
 
    if args.stop_date is None:
 
        args.stop_date = max(
 
            post.date
 
            for rec in reconcilers.values()
 
            for post in rec.statement_posts
 
        ) + datetime.timedelta(days=1)
 

	
 
    returncode = os.EX_OK
 
    books_loader = config.books_loader()
 
    if books_loader is None:
 
        entries, load_errors, options = books.Loader.load_none(config.config_file_path())
 
        returncode = cliutil.ExitCode.NoConfiguration
 
    else:
 
        date_fuzz = datetime.timedelta(days=args.date_fuzz)
 
        entries, load_errors, options = books_loader.load_fy_range(
 
            args.start_date - date_fuzz, args.stop_date + date_fuzz,
 
        )
 
        if load_errors:
 
            returncode = cliutil.ExitCode.BeancountErrors
 
    for error in load_errors:
 
        bc_printer.print_error(error, file=stderr)
 

	
 
    date_range = DateRange(args.start_date, args.stop_date)
 
    for bean_post in data.Posting.from_entries(entries):
 
        if bean_post.account != args.account:
 
            continue
 
        paypal_post = PayPalPosting.from_books(bean_post)
 
        if paypal_post.txn_id in reconcilers or (
 
                paypal_post.amount.number and paypal_post.date in date_range
 
        ):
 
            reconcilers[paypal_post.txn_id].add_from_books(paypal_post)
 

	
 
    report = PayPalReconciliationReport()
 
    report.write_all(reconcilers)
 
    if args.output_file is None:
 
        args.output_file = Path('PayPalReconciliation_{}_{}.ods'.format(
 
            args.start_date.isoformat(),
 
            args.stop_date.isoformat(),
 
        ))
 
        logger.info("Writing report to %s", args.output_file)
 
    ods_file = cliutil.bytes_output(args.output_file, stdout)
 
    report.save_file(ods_file)
 
    return returncode
 

	
 
entry_point = cliutil.make_entry_point(__name__, PROGNAME)
 

	
 
if __name__ == '__main__':
 
    exit(entry_point())
setup.py
Show inline comments
...
 
@@ -5,7 +5,7 @@ from setuptools import setup
 
setup(
 
    name='conservancy_beancount',
 
    description="Plugin, library, and reports for reading Conservancy's books",
 
    version='1.15.5',
 
    version='1.16.0',
 
    author='Software Freedom Conservancy',
 
    author_email='info@sfconservancy.org',
 
    license='GNU AGPLv3+',
...
 
@@ -35,6 +35,7 @@ setup(
 
        'conservancy_beancount.pdfforms',
 
        'conservancy_beancount.pdfforms.extract',
 
        'conservancy_beancount.plugin',
 
        'conservancy_beancount.reconcile',
 
        'conservancy_beancount.reports',
 
        'conservancy_beancount.tools',
 
    ],
...
 
@@ -52,6 +53,7 @@ setup(
 
            'pdfform-extract = conservancy_beancount.pdfforms.extract:entry_point',
 
            'pdfform-extract-irs990scheduleA = conservancy_beancount.pdfforms.extract.irs990scheduleA:entry_point',
 
            'pdfform-fill = conservancy_beancount.pdfforms.fill:entry_point',
 
            'reconcile-paypal = conservancy_beancount.reconcile.paypal:entry_point',
 
            'split-ods-links = conservancy_beancount.tools.split_ods_links:entry_point',
 
        ],
 
    },
0 comments (0 inline, 0 general)