Changeset - 20d242c7c753
[Not reviewed]
0 2 0
Ben Sturmfels (bsturmfels) - 2 years ago 2022-03-02 07:26:10
ben@sturm.com.au
reconcile: Fix a mutation bug causing not all matches to be passed through.
2 files changed with 54 insertions and 20 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/reconcile/statement_reconciler.py
Show inline comments
...
 
@@ -95,43 +95,46 @@ Other related problems we're not dealing with here:
 
import argparse
 
import collections
 
import copy
 
import csv
 
import datetime
 
import decimal
 
import io
 
import itertools
 
import logging
 
import os
 
import re
 
import sys
 
from typing import Callable, Dict, List, Tuple, TextIO
 
from typing import Callable, Dict, List, Optional, Sequence, Tuple, TextIO
 

	
 
from beancount import loader
 
from beancount.query.query import run_query
 
from colorama import Fore, Style  # type: ignore
 

	
 
from .. import cliutil
 
from .. import config as configmod
 

	
 
if not sys.warnoptions:
 
    import warnings
 
    # Disable annoying warning from thefuzz prompting for a C extension. The
 
    # current pure-Python implementation isn't a bottleneck for us.
 
    warnings.filterwarnings('ignore', category=UserWarning, module='thefuzz.fuzz')
 
from thefuzz import fuzz  # type: ignore
 

	
 
logger = logging.getLogger()
 
logger.setLevel(logging.INFO)
 

	
 
# Console logging.
 
logger.addHandler(logging.StreamHandler())
 
PROGNAME = 'reconcile-statement'
 
logger = logging.getLogger(__name__)
 

	
 
# Get some interesting feedback on call to RT with this:
 
# logger.setLevel(logging.DEBUG)
 
# logger.addHandler(logging.StreamHandler())
 

	
 
JUNK_WORDS = [
 
    'software',
 
    'freedom',
 
    'conservancy',
 
    'conse',
 
    'payment',
 
    'echeck',
 
    'bill',
 
    'debit',
 
    'wire',
 
    'credit',
...
 
@@ -448,36 +451,45 @@ def parse_path(path: str) -> str:
 

	
 
def parse_repo_relative_path(path: str) -> str:
 
    if not os.path.exists(path):
 
        raise argparse.ArgumentTypeError(f'File {path} does not exist.')
 
    repo = os.getenv('CONSERVANCY_REPOSITORY')
 
    if not repo:
 
        raise argparse.ArgumentTypeError('$CONSERVANCY_REPOSITORY is not set.')
 
    if not path.startswith(repo):
 
        raise argparse.ArgumentTypeError(f'File {path} does not share a common prefix with $CONSERVANCY_REPOSITORY {repo}.')
 
    return path
 

	
 

	
 
def parse_args(argv: List[str]) -> argparse.Namespace:
 
    parser = argparse.ArgumentParser(description='Reconciliation helper')
 
def parse_decimal_with_separator(number_text: str) -> decimal.Decimal:
 
    """decimal.Decimal can't parse numbers with thousands separator."""
 
    number_text = number_text.replace(',', '')
 
    return decimal.Decimal(number_text)
 

	
 

	
 
def parse_arguments(argv: List[str]) -> argparse.Namespace:
 
    parser = argparse.ArgumentParser(prog=PROGNAME, description='Reconciliation helper')
 
    cliutil.add_version_argument(parser)
 
    cliutil.add_loglevel_argument(parser)
 
    parser.add_argument('--beancount-file', required=True, type=parse_path)
 
    parser.add_argument('--csv-statement', required=True, type=parse_repo_relative_path)
 
    parser.add_argument('--bank-statement', required=True, type=parse_repo_relative_path)
 
    parser.add_argument('--account', required=True, help='eg. Liabilities:CreditCard:AMEX')
 
    parser.add_argument('--grep-output-filename')
 
    # parser.add_argument('--report-group-regex')
 
    parser.add_argument('--show-reconciled-matches', action='store_true')
 
    parser.add_argument('--statement-balance', type=decimal.Decimal, required=True, help="A.K.A \"cleared balance\" taken from the end of the period on the PDF statement. Required because CSV statements don't include final or running totals")
 
    parser.add_argument('--statement-balance', type=parse_decimal_with_separator, required=True, help="A.K.A \"cleared balance\" taken from the end of the period on the PDF statement. Required because CSV statements don't include final or running totals")
 
    parser.add_argument('--non-interactive', action='store_true', help="Don't prompt to write to the books")
 
    return parser.parse_args(args=argv[1:])
 
    args = parser.parse_args(args=argv)
 
    return args
 

	
 

	
 
def totals(matches: List[Tuple[List, List, List]]) -> Tuple[decimal.Decimal, decimal.Decimal, decimal.Decimal]:
 
    total_matched = decimal.Decimal(0)
 
    total_missing_from_books = decimal.Decimal(0)
 
    total_missing_from_statement = decimal.Decimal(0)
 
    for statement_entries, books_entries, _ in matches:
 
        if statement_entries and books_entries:
 
            total_matched += sum(c['amount'] for c in statement_entries)
 
        elif statement_entries:
 
            total_missing_from_books += sum(c['amount'] for c in statement_entries)
 
        else:
...
 
@@ -503,44 +515,51 @@ def subset_match(statement_trans: List[dict], books_trans: List[dict]) -> Tuple[
 
        r2['amount'] = total
 
        for i, r1 in enumerate(statement_trans):
 
            score, note = records_match(r1, r2)
 
            if score >= 0.5 and score >= best_match_score:
 
                matches_found += 1
 
                best_match_score = score
 
                best_match_index = i
 
                best_match_note = note
 
        if best_match_score > 0.5 and matches_found == 1 and 'check-id mismatch' not in best_match_note or best_match_score > 0.8:
 
            matches.append(([statement_trans[best_match_index]], group_items, best_match_note))
 
            if best_match_index is not None:
 
                del statement_trans[best_match_index]
 
            for item in group_items:
 
                # TODO: Why?
 
                books_trans.remove(item)
 
        else:
 
            remaining_books_trans.append(r2)
 
    for r1 in statement_trans:
 
        remaining_statement_trans.append(r1)
 
    return matches, remaining_statement_trans, remaining_books_trans
 

	
 

	
 
def process_unmatched(statement_trans: List[dict], books_trans: List[dict]) -> List[Tuple[List, List, List]]:
 
    matches: List[Tuple[List, List, List]] = []
 
    for r1 in statement_trans:
 
        matches.append(([r1], [], ['no match']))
 
    for r2 in books_trans:
 
        matches.append(([], [r2], ['no match']))
 
    return matches
 

	
 

	
 
def main(args: argparse.Namespace) -> None:
 
def main(arglist: Optional[Sequence[str]] = None,
 
         stdout: TextIO = sys.stdout,
 
         stderr: TextIO = sys.stderr,
 
         config: Optional[configmod.Config] = None,
 
         ) -> int:
 
    args = parse_arguments(arglist)
 
    cliutil.set_loglevel(logger, args.loglevel)
 
    if config is None:
 
        config = configmod.Config()
 
        config.load_file()
 

	
 
    # TODO: Should put in a sanity check to make sure the statement you're feeding
 
    # in matches the account you've provided.
 

	
 
    # TODO: Can we open the files first, then pass the streams on to the rest of the program?
 

	
 
    if 'AMEX' in args.account:
 
        validate_csv = validate_amex_csv
 
        standardize_statement_record = standardize_amex_record
 
    else:
 
        validate_csv = validate_fr_csv
 
        standardize_statement_record = standardize_fr_record
 

	
...
 
@@ -598,19 +617,16 @@ def main(args: argparse.Namespace) -> None:
 
    print('-' * 155)
 

	
 
    # Write statement metadata back to books
 
    metadata_to_apply = []
 
    for match in matches:
 
        metadata_to_apply.extend(metadata_for_match(match, args.bank_statement, args.csv_statement))
 
    if metadata_to_apply and not args.non_interactive:
 
        print('Mark matched transactions as reconciled in the books? (y/N) ', end='')
 
        if input().lower() == 'y':
 
            write_metadata_to_books(metadata_to_apply)
 

	
 

	
 
if __name__ == '__main__':
 
    args = parse_args(sys.argv)
 
    main(args)
 
entry_point = cliutil.make_entry_point(__name__, PROGNAME)
 

	
 
def entry_point():
 
    args = parse_args(sys.argv)
 
    main(args)
 
if __name__ == '__main__':
 
    exit(entry_point())
tests/test_reconcile.py
Show inline comments
...
 
@@ -299,12 +299,30 @@ def test_payee_not_considered_if_check_id_present():
 
        [S3],
 
        [B3_unmatched_check_id],
 
    )
 

	
 
def test_subset_sum_match():
 
    statement = [S4]
 
    books = [B4A, B4B, B4C]
 
    assert subset_match(statement, books) == (
 
        [([S4], [B4A, B4B, B4C], [])],
 
        [],  # No remaining statement trans.
 
        [],  # No remaining books trans.
 
    )
 

	
 
def test_subset_passes_through_all_non_matches():
 
    """This was used to locate a bug where some of the non-matches had
 
    gone missing due to mutation of books_trans."""
 
    statement_trans = [
 
        S1,  # No match
 
        S4,  # Match
 
    ]
 
    books_trans = [
 
        B2,  # No match
 
        B4A, B4B, B4C, # Match
 
        B3_next_day, B3_next_week,  # No match
 
    ]
 
    assert subset_match(statement_trans, books_trans) == (
 
        [([S4], [B4A, B4B, B4C], [])],  # Matched
 
        [S1],  # No match: preserved intact
 
        [B2, B3_next_day, B3_next_week]  # No match: preserved intact
 
    )
0 comments (0 inline, 0 general)