Changeset - 517d4027b462
[Not reviewed]
0 2 0
Ben Sturmfels (bsturmfels) - 2 years ago 2022-02-21 11:43:22
ben@sturm.com.au
reconcile: Explicitly avoid payee matching when check-id provided in books.
2 files changed with 21 insertions and 2 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/reconcile/prototype_amex_reconciler.py
Show inline comments
...
 
@@ -202,93 +202,93 @@ def records_match(r1: Dict, r2: Dict) -> Tuple[bool, str]:
 
    """Do these records represent the same transaction?"""
 

	
 
    date_score = date_proximity(r1['date'], r2['date'])
 
    if r1['date'] == r2['date']:
 
        date_message = ''
 
    elif date_score > 0.0:
 
        diff = abs((r1['date'] - r2['date']).days)
 
        date_message = f'+/- {diff} days'
 
    else:
 
        date_message = 'date mismatch'
 

	
 
    if r1['amount'] == r2['amount']:
 
        amount_score, amount_message = 2.0, ''
 
    else:
 
        amount_score, amount_message = 0.0, 'amount mismatch'
 

	
 
    # We never consider payee if there's a check_id in the books.
 
    check_message = ''
 
    payee_message = ''
 
    if r2['check_id']:
 
        payee_score = 0.0
 
        if r1['check_id'] and r2['check_id'] and r1['check_id'] == r2['check_id']:
 
            check_score = 1.0
 
        else:
 
            check_message = 'Check id not matched'
 
            check_message = 'check-id mismatch'
 
            check_score = 0.0
 
    else:
 
        check_score = 0.0
 
        payee_score = payee_match(r1['payee'], r2['payee'])
 
        if payee_score > 0.8:
 
            payee_message = ''
 
        elif payee_score > 0.4:
 
            payee_message = 'partial payee match'
 
        else:
 
            payee_message = 'payee mismatch'
 

	
 
    overall_score = (date_score + amount_score + check_score + payee_score) / 4
 
    overall_message = [m for m in [date_message, amount_message, check_message, payee_message] if m]
 
    return overall_score, overall_message
 

	
 

	
 
def match_statement_and_books(statement_trans: list, books_trans: list):
 
    """
 

	
 

	
 
    Runs through all the statement transactions to find a matching transaction
 
    in the books. If found, the books transaction is marked off so that it can
 
    only be matched once. Some transactions will be matched, some will be on the
 
    statement but not the books and some on the books but not the statement.
 

	
 
    """
 
    matches = []
 
    # We need a realised list and should be a copy so we can safely delete
 
    # items.
 
    books_trans = list(books_trans)
 

	
 
    for r1 in statement_trans:
 
        best_match_score = 0
 
        best_match_index = None
 
        best_match_note = ''
 
        matches_found = 0
 
        for i, r2 in enumerate(books_trans):
 
            score, note = records_match(r1, r2)
 
            if score >= 0.5 and score >= best_match_score:
 
                matches_found += 1
 
                best_match_score = score
 
                best_match_index = i
 
                best_match_note = note
 
        if best_match_score > 0.5 and matches_found == 1 and 'payee_mismatch' not in best_match_note or best_match_score > 0.8:
 
        if best_match_score > 0.5 and matches_found == 1 and 'check-id mismatch' not in best_match_note or best_match_score > 0.8:
 
            if best_match_score <= 0.8:
 
                best_match_note.append('only one decent match')
 
            matches.append(([r1], [books_trans[best_match_index]], best_match_note))
 
            del books_trans[best_match_index]
 
        else:
 
            matches.append(([r1], [], ['no match']))
 
    for r2 in books_trans:
 
        matches.append(([], [r2], ['no match']))
 
    return matches
 

	
 

	
 
def format_matches(matches, csv_statement: str, show_reconciled_matches):
 
    match_output = []
 
    for r1, r2, note in matches:
 
        note = ', '.join(note)
 
        note = ': ' + note if note else note
 
        if r1 and r2:
 
            if show_reconciled_matches:
 
                match_output.append([r1[0]['date'], f'{format_record(r1)}  →  {format_record(r2)}  ✓ Matched{note}'])
 
        elif r1:
 
            match_output.append([r1[0]['date'], f'{format_record(r1)}  →  {" ":^59}  ✗ NOT IN BOOKS ({os.path.basename(csv_statement)}:{r1[0]["line"]})'])
 
        else:
 
            match_output.append([r2[0]['date'], f'{" ":^59}  →  {format_record(r2)}  ✗ NOT ON STATEMENT ({os.path.basename(r2[0]["filename"])}:{r2[0]["line"]})'])
 
    return match_output
tests/test_reconcile.py
Show inline comments
...
 
@@ -79,48 +79,57 @@ B3_mismatch_amount = {
 
    'payee': 'USPS',
 
    'check_id': '',
 
    'filename': '2022/main.beancount',
 
    'line': 999,
 
    'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
 
}
 
B3_payee_mismatch_1 = {
 
    'date': datetime.date(2022, 1, 3),
 
    'amount': decimal.Decimal('30.00'),
 
    'payee': 'Credit X',
 
    'check_id': '',
 
    'filename': '2022/main.beancount',
 
    'line': 999,
 
    'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
 
}
 
B3_payee_mismatch_2 = {
 
    'date': datetime.date(2022, 1, 3),
 
    'amount': decimal.Decimal('30.00'),
 
    'payee': 'Credit Y',
 
    'check_id': '',
 
    'filename': '2022/main.beancount',
 
    'line': 999,
 
    'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
 
}
 
B3_unmatched_check_id = {
 
    'date': datetime.date(2022, 1, 3),
 
    'amount': decimal.Decimal('30.00'),
 
    'payee': 'USPS',
 
    'check_id': '1234',
 
    'filename': '2022/main.beancount',
 
    'line': 999,
 
    'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
 
}
 

	
 

	
 
def test_one_exact_match():
 
    statement = [S1]
 
    books = [B1]
 
    assert match_statement_and_books(statement, books) == [
 
        ([S1], [B1], []),
 
    ]
 

	
 
def test_multiple_exact_matches():
 
    statement = [S1, S2]
 
    books = [B1, B2]
 
    assert match_statement_and_books(statement, books) == [
 
        ([S1], [B1], []),
 
        ([S2], [B2], []),
 
    ]
 

	
 
def test_one_mismatch():
 
    statement = [S1]
 
    books = []
 
    assert match_statement_and_books(statement, books) == [
 
        ([S1], [], ['no match']),
 
    ]
 

	
...
 
@@ -209,24 +218,34 @@ def test_write_to_books():
 
    books = textwrap.dedent("""\
 
        2021-08-16 txn "Gandi" "transfer seleniumconf.us"
 
          Liabilities:CreditCard:AMEX            -15.50 USD
 
          Expenses:Hosting                        15.50 USD""")
 
    f = tempfile.NamedTemporaryFile('w', delete=False)
 
    f.write(books)
 
    f.close()
 
    metadata = [(f.name, 2, '    bank-statement: statement.pdf')]
 
    write_metadata_to_books(metadata)
 
    with open(f.name) as f:
 
        output = f.read()
 
    assert output == textwrap.dedent("""\
 
        2021-08-16 txn "Gandi" "transfer seleniumconf.us"
 
          Liabilities:CreditCard:AMEX            -15.50 USD
 
            bank-statement: statement.pdf
 
          Expenses:Hosting                        15.50 USD""")
 
    os.remove(f.name)
 

	
 
def test_totals():
 
    assert totals([
 
        ([S1], [B1], []),
 
        ([S2], [], []),
 
        ([], [B3_next_day], []),
 
    ]) == (decimal.Decimal('10'), decimal.Decimal('20'), decimal.Decimal('30'))
 

	
 

	
 
def test_payee_not_considered_if_check_id_present():
 
    # These records match aside from check-id.
 
    statement = [S3]
 
    books = [B3_unmatched_check_id]
 
    assert match_statement_and_books(statement, books) == [
 
        ([S3], [], ['no match']),
 
        ([], [B3_unmatched_check_id], ['no match']),
 
    ]
0 comments (0 inline, 0 general)