From 3792d46bcc32a911c909e2f5a899f7184cfcdcc4 2022-02-08 11:06:01 From: Ben Sturmfels Date: 2022-02-08 11:06:01 Subject: [PATCH] reconcile: Prototype writing statement metadata back to books. --- diff --git a/conservancy_beancount/reconcile/prototype_amex_reconciler.py b/conservancy_beancount/reconcile/prototype_amex_reconciler.py index 982874de9476836add6a726565c4708131dec186..0285c0234175584e768c693b00f3b6fea78e5a9e 100644 --- a/conservancy_beancount/reconcile/prototype_amex_reconciler.py +++ b/conservancy_beancount/reconcile/prototype_amex_reconciler.py @@ -1,13 +1,24 @@ -"""A prototype AMEX statement reconciler. +"""Reconcile an AMEX CSV statement against the books and print differences. Run like this: $ python3 -m pip install thefuzz -$ python3 conservancy_beancount/reconcile/prototype_amex_reconciler.py --beancount-file=$HOME/conservancy/beancount/books/2021.beancount --amex-csv=$HOME/conservancy/confidential/2021-09-10_AMEX_activity.csv -""" +$ python3 conservancy_beancount/reconcile/prototype_amex_reconciler.py \ + --beancount-file=$HOME/conservancy/beancount/books/2021.beancount \ + --amex-csv=$HOME/conservancy/confidential/2021-09-10_AMEX_activity.csv + +Conservancy currently enter data by hand rather than using Beancount importers. +This tool is still somewhat like an importer in that it needs to extract +transaction details from a third-party statement. Instead of creating +directives, it just checks to see that similar directives are already present. + +TODO/ISSUES: + - AMEX statement doesn't provide bank balance or running total +""" import argparse import csv +import collections import datetime import decimal from typing import Dict, List, Tuple @@ -19,6 +30,7 @@ from thefuzz import fuzz # type: ignore # NOTE: Statement doesn't seem to give us a running balance or a final total. def standardize_amex_record(row: Dict) -> Dict: + """Turn an AMEX CSV row into a standard dict format representing a transaction.""" return { 'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(), 'amount': -1 * decimal.Decimal(row['Amount']), @@ -27,10 +39,14 @@ def standardize_amex_record(row: Dict) -> Dict: def standardize_beancount_record(row) -> Dict: # type: ignore[no-untyped-def] + """Turn a Beancount query result row into a standard dict representing a transaction.""" return { 'date': row.date, 'amount': row.number_cost_position, 'payee': row.payee if row.payee else row.narration, + 'filename': row.filename, + 'line': row.posting_line, + 'statement': row.posting_statement, } @@ -49,9 +65,9 @@ def records_match(r1: Dict, r2: Dict) -> Tuple[bool, str]: payee_match_quality = fuzz.token_set_ratio(r1['payee'], r2['payee']) payee_matches = payee_match_quality > 50 if date_matches and amount_matches and payee_matches: - return True, '' + return True, 'Matched' elif date_matches and amount_matches: - return True, f'Low payee match ({payee_match_quality}%)' + return True, f'Matched with low-payee match ({payee_match_quality}%)' else: return False, '' @@ -61,6 +77,7 @@ parser.add_argument('--beancount-file', required=True) parser.add_argument('--amex-csv', required=True) parser.add_argument('--grep-output-filename') # parser.add_argument('--report-group-regex') +parser.add_argument('--show-reconciled-matches', action='store_true') args = parser.parse_args() with open(args.amex_csv) as f: @@ -71,12 +88,21 @@ begin_date = statement_trans[0]['date'] end_date = statement_trans[-1]['date'] # Do we traverse and filter the in-memory entries list and filter that, or do we -# use Beancount Query Language to get a list of transactions? +# use Beancount Query Language (BQL) to get a list of transactions? Currently +# using BQL. +# +# beancount.query.query_compile.compile() and +# beancount.query.query_execute.filter_entries() look useful in this respect, +# but I'm not clear on how to use compile(). An example would help. entries, _, options = loader.load_file(args.beancount_file) + +# String concatenation looks bad, but there's no SQL injection possible here +# because BQL can't write back to the Beancount files. I hope! +query = f"SELECT filename, META('lineno') AS posting_line, META('bank-statement') AS posting_statement, date, number(cost(position)), payee, narration where account = 'Liabilities:CreditCard:AMEX' and date >= {begin_date} and date <= {end_date}" result_types, result_rows = run_query( entries, options, - f"SELECT filename, lineno, date, number(cost(position)), payee, narration where account = 'Liabilities:CreditCard:AMEX' and date >= {begin_date} and date <= {end_date}", + query, ) books_trans = sort_records([standardize_beancount_record(row) for row in result_rows]) @@ -85,21 +111,48 @@ num_books_trans = len(books_trans) statement_index = 0 books_index = 0 matches = [] +metadata_to_apply = [] + +# Run through all the statement transactions to find a matching transaction in +# the books. If found, the books transaction is marked off so that it can only +# be matched once. Some transactions will be matched, some will be on the +# statement but not the books and some on the books but not the statement. for r1 in statement_trans: for r2 in books_trans: match, note = records_match(r1, r2) if match: - matches.append([r2['date'], f'{format_record(r1)} --> {format_record(r2)} {note.upper()}']) + if not r2['statement'] or args.show_reconciled_matches: + matches.append([r2['date'], f'{format_record(r1)} --> {format_record(r2)} ✓ {note}']) + if not r2['statement']: + metadata_to_apply.append((r2['filename'], r2['line'], f' bank-statement: "{args.amex_csv}"\n')) books_trans.remove(r2) break else: - matches.append([r1['date'], f'{format_record(r1)} --> {" ":^41} NOT IN BOOKS']) + matches.append([r1['date'], f'{format_record(r1)} --> {" ":^41} ✗ Not in books']) for r2 in books_trans: - matches.append([r2['date'], f'{" ":^41} --> {format_record(r2)} NOT ON STATEMENT']) + matches.append([r2['date'], f'{" ":^41} --> {format_record(r2)} ✗ Not on statement']) +print(f'-----------------------------------------------------------------------------------------------------------------') print(f'{"STATEMENT":<40} {"BOOKS":<40} NOTES') +print(f'-----------------------------------------------------------------------------------------------------------------') for _, output in sorted(matches): print(output) +print(f'-----------------------------------------------------------------------------------------------------------------') + +if metadata_to_apply: + print('Mark matched transactions as reconciled in the books? (y/N) ', end='') + if input().lower() == 'y': + files = {} + for filename, line, metadata in metadata_to_apply: + if filename not in files: + with open(filename, 'r') as f: + files[filename] = [0, f.readlines()] + files[filename][1].insert(line + files[filename][0], metadata) + files[filename][0] += 1 + for filename in files: + with open(filename, 'w') as f: + f.writelines(files[filename][1]) + print(f'Wrote {filename}.') # Local Variables: # python-shell-interpreter: "/home/ben/\.virtualenvs/conservancy-beancount-py39/bin/python"