Changeset - 405dd553cb00
[Not reviewed]
0 2 0
Ben Sturmfels (bsturmfels) - 2 years ago 2022-02-23 06:25:02
ben@sturm.com.au
reconcile: Implement "subset sum" feature matching multiple books entries to a single statement entry.
2 files changed with 171 insertions and 65 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/reconcile/statement_reconciler.py
Show inline comments
...
 
@@ -51,2 +51,3 @@ import decimal
 
import io
 
import itertools
 
import logging
...
 
@@ -59,2 +60,3 @@ from beancount import loader
 
from beancount.query.query import run_query
 
from colorama import Fore, Style
 

	
...
 
@@ -166,16 +168,21 @@ def standardize_beancount_record(row) -> Dict:  # type: ignore[no-untyped-def]
 

	
 
def format_record(records: list[dict]) -> str:
 
    if len(records) == 1:
 
        record = records[0]
 

	
 
        if record['payee'] and record['check_id']:
 
            output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} {record['payee'][:25]} #{record['check_id']}".ljust(59)
 
        elif record['payee']:
 
            output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} {record['payee'][:35]}".ljust(59)
 
        else:
 
            output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} #{record['check_id']}".ljust(59)
 
        return output
 
def format_record(record: dict) -> str:
 
    if record['payee'] and record['check_id']:
 
        output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} {record['payee'][:25]} #{record['check_id']}".ljust(59)
 
    elif record['payee']:
 
        output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} {record['payee'][:35]}".ljust(59)
 
    else:
 
        raise NotImplementedError
 
        output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} #{record['check_id']}".ljust(59)
 
    return output
 

	
 

	
 
def format_multirecord(r1s, r2s, note):
 
    total = sum(x['amount'] for x in r2s)
 
    assert len(r1s) == 1
 
    assert len(r2s) > 1
 
    match_output = []
 
    match_output.append([r1s[0]['date'].isoformat() + ' ' + r1s[0]['payee'], f'{format_record(r1s[0])}  →  {format_record(r2s[0])}  ✓ Matched{note}'])
 
    for i, r2 in enumerate(r2s[1:]):
 
        match_output.append([r1s[0]['date'].isoformat() + str(i) + r1s[0]['payee'], f'{r1s[0]["date"].isoformat()}:             ↳                                    →  {format_record(r2)}  ✓ Matched{note}'])
 
    return match_output
 

	
...
 
@@ -255,9 +262,3 @@ def match_statement_and_books(statement_trans: list, books_trans: list):
 
    matches = []
 
    # We need a realised list and should be a copy so we can safely delete
 
    # items.
 
    books_trans = list(books_trans)
 

	
 
    # We can delete the matched books trans, but seems not a good idea to delete
 
    # while iterating through statement_trans. Instead pushing onto a separate
 
    # list.
 
    remaining_books_trans = []
 
    remaining_statement_trans = []
...
 
@@ -280,8 +281,9 @@ def match_statement_and_books(statement_trans: list, books_trans: list):
 
            matches.append(([r1], [books_trans[best_match_index]], best_match_note))
 
            # Don't try to make a second match against this books entry.
 
            del books_trans[best_match_index]
 
        else:
 
            matches.append(([r1], [], ['no match']))
 
            remaining_statement_trans.append(r1)
 
    for r2 in books_trans:
 
        matches.append(([], [r2], ['no match']))
 
    return matches
 
        remaining_books_trans.append(r2)
 
    return matches, remaining_statement_trans, remaining_books_trans
 

	
...
 
@@ -294,8 +296,11 @@ def format_matches(matches, csv_statement: str, show_reconciled_matches):
 
        if r1 and r2:
 
            if show_reconciled_matches:
 
                match_output.append([r1[0]['date'], f'{format_record(r1)}  →  {format_record(r2)}  ✓ Matched{note}'])
 
            if show_reconciled_matches and all(x['bank_statement'] for x in r2):
 
                if len(r2) == 1:
 
                    match_output.append([r1[0]['date'].isoformat() + r1[0]['payee'], f'{format_record(r1[0])}  →  {format_record(r2[0])}  ✓ Matched{note}'])
 
                else:
 
                    match_output.extend(format_multirecord(r1, r2, note))
 
        elif r1:
 
            match_output.append([r1[0]['date'], f'{format_record(r1)}  →  {" ":^59}  ✗ NOT IN BOOKS ({os.path.basename(csv_statement)}:{r1[0]["line"]})'])
 
            match_output.append([r1[0]['date'].isoformat() + r1[0]['payee'], Fore.RED + Style.BRIGHT + f'{format_record(r1[0])}  →  {" ":^59}  ✗ NOT IN BOOKS ({os.path.basename(csv_statement)}:{r1[0]["line"]})' + Style.RESET_ALL])
 
        else:
 
            match_output.append([r2[0]['date'], f'{" ":^59}  →  {format_record(r2)}  ✗ NOT ON STATEMENT ({os.path.basename(r2[0]["filename"])}:{r2[0]["line"]})'])
 
            match_output.append([r2[0]['date'].isoformat() + r2[0]['payee'], Fore.RED + Style.BRIGHT + f'{" ":^59}  →  {format_record(r2[0])}  ✗ NOT ON STATEMENT ({os.path.basename(r2[0]["filename"])}:{r2[0]["line"]})' + Style.RESET_ALL])
 
    return match_output
...
 
@@ -402,2 +407,47 @@ def totals(matches):
 

	
 

	
 
def subset_match(statement_trans, books_trans):
 
    matches = []
 
    remaining_books_trans = []
 
    remaining_statement_trans = []
 

	
 
    groups = itertools.groupby(books_trans, key=lambda x: (x['date'], x['payee']))
 
    for k, group in groups:
 
        best_match_score = 0
 
        best_match_index = None
 
        best_match_note = ''
 
        matches_found = 0
 

	
 
        group_items = list(group)
 
        total = sum(x['amount'] for x in group_items)
 
        r2 = copy.copy(group_items[0])
 
        r2['amount'] = total
 
        for i, r1 in enumerate(statement_trans):
 
            score, note = records_match(r1, r2)
 
            if score >= 0.5 and score >= best_match_score:
 
                matches_found += 1
 
                best_match_score = score
 
                best_match_index = i
 
                best_match_note = note
 
        if best_match_score > 0.5 and matches_found == 1 and 'check-id mismatch' not in best_match_note or best_match_score > 0.8:
 
            if best_match_score <= 0.8:
 
                best_match_note.append('only one decent match')
 
            matches.append(([statement_trans[best_match_index]], group_items, best_match_note))
 
            del statement_trans[best_match_index]
 
            for item in group_items:
 
                books_trans.remove(item)
 
        else:
 
            remaining_books_trans.append(r2)
 
    for r1 in statement_trans:
 
         remaining_statement_trans.append(r1)
 
    return matches, remaining_statement_trans, remaining_books_trans
 

	
 
def process_unmatched(statement_trans, books_trans):
 
    matches = []
 
    for r1 in statement_trans:
 
        matches.append(([r1], [], ['no match']))
 
    for r2 in books_trans:
 
        matches.append(([], [r2], ['no match']))
 
    return matches
 

	
 
def main(args):
...
 
@@ -445,6 +495,9 @@ def main(args):
 

	
 
    matches = match_statement_and_books(statement_trans, books_trans)
 
    match_output = format_matches(matches, args.csv_statement, args.show_reconciled_matches)
 
    matches, remaining_statement_trans, remaining_books_trans = match_statement_and_books(statement_trans, books_trans)
 
    subset_matches, remaining_statement_trans, remaining_books_trans = subset_match(remaining_statement_trans, remaining_books_trans)
 
    matches.extend(subset_matches)
 
    unmatched = process_unmatched(remaining_statement_trans, remaining_books_trans)
 
    matches.extend(unmatched)
 

	
 
    # assert books_balance == books_balance_reconciled + total_matched + total_missing_from_statement
 
    match_output = format_matches(matches, args.csv_statement, args.show_reconciled_matches)
 

	
tests/test_reconcile.py
Show inline comments
...
 
@@ -15,2 +15,3 @@ from conservancy_beancount.reconcile.statement_reconciler import (
 
    totals,
 
    subset_match,
 
)
...
 
@@ -42,2 +43,9 @@ S3 = {
 
}
 
S4 = {
 
    'date': datetime.date(2022, 8, 11),
 
    'amount': decimal.Decimal('-2260.00'),
 
    'payee': 'Trust 0000000362 210',
 
    'check_id': '',
 
    'line': 555,
 
}
 

	
...
 
@@ -116,2 +124,24 @@ B3_unmatched_check_id = {
 
}
 
B4A = {
 
    'date': datetime.date(2022, 8, 11),
 
    'amount': decimal.Decimal('-250.00'),
 
    'payee': 'TRUST 0000000362 ACH Retirement Plan',
 
    'check_id': '',
 
    'line': 1000,
 
}
 
B4B = {
 
    'date': datetime.date(2022, 8, 11),
 
    'amount': decimal.Decimal('-250.00'),
 
    'payee': 'TRUST 0000000362 ACH Retirement Plan',
 
    'check_id': '',
 
    'line': 1000,
 
}
 
B4C = {
 
    'date': datetime.date(2022, 8, 11),
 
    'amount': decimal.Decimal('-1760.00'),
 
    'payee': 'TRUST 0000000362 ACH Retirement Plan',
 
    'check_id': '',
 
    'line': 1000,
 
}
 

	
 

	
...
 
@@ -121,3 +151,3 @@ def test_one_exact_match():
 
    books = [B1]
 
    assert match_statement_and_books(statement, books) == [
 
    assert match_statement_and_books(statement, books) == (
 
        # Match, match, notes.
...
 
@@ -127,4 +157,6 @@ def test_one_exact_match():
 
        # transaction.
 
        ([S1], [B1], []),
 
    ]
 
        [([S1], [B1], [])],
 
        [],
 
        [],
 
    )
 

	
...
 
@@ -133,6 +165,7 @@ def test_multiple_exact_matches():
 
    books = [B1, B2]
 
    assert match_statement_and_books(statement, books) == [
 
        ([S1], [B1], []),
 
        ([S2], [B2], []),
 
    ]
 
    assert match_statement_and_books(statement, books) == (
 
        [([S1], [B1], []), ([S2], [B2], [])],
 
        [],
 
        [],
 
    )
 

	
...
 
@@ -141,5 +174,7 @@ def test_one_mismatch():
 
    books = []
 
    assert match_statement_and_books(statement, books) == [
 
        ([S1], [], ['no match']),
 
    ]
 
    assert match_statement_and_books(statement, books) == (
 
        [],
 
        [S1],
 
        [],
 
    )
 

	
...
 
@@ -148,6 +183,7 @@ def test_multiple_mismatches():
 
    books = [B2]
 
    assert match_statement_and_books(statement, books) == [
 
        ([S1], [], ['no match']),
 
        ([], [B2], ['no match']),
 
    ]
 
    assert match_statement_and_books(statement, books) == (
 
        [],
 
        [S1],
 
        [B2],
 
    )
 

	
...
 
@@ -156,5 +192,7 @@ def test_next_day_matches():
 
    books = [B3_next_day]
 
    assert match_statement_and_books(statement, books) == [
 
        ([S3], [B3_next_day], ['+/- 1 days']),
 
    ]
 
    assert match_statement_and_books(statement, books) == (
 
        [([S3], [B3_next_day], ['+/- 1 days'])],
 
        [],
 
        [],
 
    )
 

	
...
 
@@ -163,5 +201,7 @@ def test_next_week_matches():
 
    books = [B3_next_week]
 
    assert match_statement_and_books(statement, books) == [
 
        ([S3], [B3_next_week], ['+/- 7 days']),
 
    ]
 
    assert match_statement_and_books(statement, books) == (
 
        [([S3], [B3_next_week], ['+/- 7 days'])],
 
        [],
 
        [],
 
    )
 

	
...
 
@@ -170,6 +210,7 @@ def test_incorrect_amount_does_not_match():
 
    books = [B3_mismatch_amount]
 
    assert match_statement_and_books(statement, books) == [
 
        ([S3], [], ['no match']),
 
        ([], [B3_mismatch_amount], ['no match']),
 
    ]
 
    assert match_statement_and_books(statement, books) == (
 
        [],
 
        [S3],
 
        [B3_mismatch_amount],
 
    )
 

	
...
 
@@ -178,5 +219,7 @@ def test_payee_mismatch_ok_when_only_one_that_amount_and_date():
 
    books = [B3_payee_mismatch_1]
 
    assert match_statement_and_books(statement, books) == [
 
        ([S3], [B3_payee_mismatch_1], ['payee mismatch', 'only one decent match']),
 
    ]
 
    assert match_statement_and_books(statement, books) == (
 
        [([S3], [B3_payee_mismatch_1], ['payee mismatch', 'only one decent match'])],
 
        [],
 
        [],
 
    )
 

	
...
 
@@ -186,7 +229,7 @@ def test_payee_mismatch_not_ok_when_multiple_that_amount_and_date():
 
    match = match_statement_and_books(statement, books)
 
    assert match == [
 
        ([S3], [], ['no match']),
 
        ([], [B3_payee_mismatch_1], ['no match']),
 
        ([], [B3_payee_mismatch_2], ['no match']),
 
    ]
 
    assert match == (
 
        [],
 
        [S3],
 
        [B3_payee_mismatch_1, B3_payee_mismatch_2],
 
    )
 

	
...
 
@@ -253,5 +296,15 @@ def test_payee_not_considered_if_check_id_present():
 
    books = [B3_unmatched_check_id]
 
    assert match_statement_and_books(statement, books) == [
 
        ([S3], [], ['no match']),
 
        ([], [B3_unmatched_check_id], ['no match']),
 
    ]
 
    assert match_statement_and_books(statement, books) == (
 
        [],
 
        [S3],
 
        [B3_unmatched_check_id],
 
    )
 

	
 
def test_subset_sum_match():
 
    statement = [S4]
 
    books = [B4A, B4B, B4C]
 
    assert subset_match(statement, books) == (
 
        [([S4], [B4A, B4B, B4C], [])],
 
        [],  # No remaining statement trans.
 
        [],  # No remaining books trans.
 
    )
0 comments (0 inline, 0 general)