NPO-Accounting/conservancy_beancount Changeset - 3792d46bcc32

Changeset - 3792d46bcc32

Parent rev.

Child rev.

[Not reviewed]

0 1 0

Ben Sturmfels (bsturmfels) - 2 years ago 2022-02-08 11:06:01
ben@sturm.com.au

reconcile: Prototype writing statement metadata back to books.

1 file changed with 63 insertions and 10 deletions:

conservancy_beancount/reconcile/prototype_amex_reconciler.py

0 comments (0 inline, 0 general)

conservancy_beancount/reconcile/prototype_amex_reconciler.py

➞

Show inline comments

-"""A prototype AMEX statement reconciler.
+"""Reconcile an AMEX CSV statement against the books and print differences.
 Run like this:
 $ python3 -m pip install thefuzz
 $ python3 conservancy_beancount/reconcile/prototype_amex_reconciler.py --beancount-file=$HOME/conservancy/beancount/books/2021.beancount --amex-csv=$HOME/conservancy/confidential/2021-09-10_AMEX_activity.csv
 """
 $ python3 conservancy_beancount/reconcile/prototype_amex_reconciler.py \
   --beancount-file=$HOME/conservancy/beancount/books/2021.beancount \
   --amex-csv=$HOME/conservancy/confidential/2021-09-10_AMEX_activity.csv
 Conservancy currently enter data by hand rather than using Beancount importers.
 This tool is still somewhat like an importer in that it needs to extract
 transaction details from a third-party statement. Instead of creating
 directives, it just checks to see that similar directives are already present.
 TODO/ISSUES:
  - AMEX statement doesn't provide bank balance or running total
 """
 import argparse
 import csv
 import collections
 import datetime
 import decimal
 from typing import Dict, List, Tuple
 from beancount import loader
 from beancount.query.query import run_query
 from thefuzz import fuzz  # type: ignore
 # NOTE: Statement doesn't seem to give us a running balance or a final total.
 def standardize_amex_record(row: Dict) -> Dict:
     """Turn an AMEX CSV row into a standard dict format representing a transaction."""
     return {
         'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
         'amount': -1 * decimal.Decimal(row['Amount']),
         'payee': row['Description'],
+    }
 def standardize_beancount_record(row) -> Dict:  # type: ignore[no-untyped-def]
     """Turn a Beancount query result row into a standard dict representing a transaction."""
     return {
         'date': row.date,
         'amount': row.number_cost_position,
         'payee': row.payee if row.payee else row.narration,
         'filename': row.filename,
         'line': row.posting_line,
         'statement': row.posting_statement,
+    }
 def format_record(record: Dict) -> str:
     return f"{record['date'].isoformat()}: {record['amount']:>8} {record['payee'][:20]:<20}"
@@ ... / @@ -46,61 +62,98 @@ def records_match(r1: Dict, r2: Dict) -> Tuple[bool, str]: @@
     """Do these records represent the same transaction?"""
     date_matches = r1['date'] >= r2['date'] - datetime.timedelta(days=1) and r1['date'] <= r2['date'] + datetime.timedelta(days=1)
     amount_matches = r1['amount'] == r2['amount']
     payee_match_quality = fuzz.token_set_ratio(r1['payee'], r2['payee'])
     payee_matches = payee_match_quality > 50
     if date_matches and amount_matches and payee_matches:
         return True, ''
+        return True, 'Matched'
     elif date_matches and amount_matches:
-        return True, f'Low payee match ({payee_match_quality}%)'
+        return True, f'Matched with low-payee match ({payee_match_quality}%)'
     else:
         return False, ''
 parser = argparse.ArgumentParser(description='Reconciliation helper')
 parser.add_argument('--beancount-file', required=True)
 parser.add_argument('--amex-csv', required=True)
 parser.add_argument('--grep-output-filename')
 # parser.add_argument('--report-group-regex')
 parser.add_argument('--show-reconciled-matches', action='store_true')
 args = parser.parse_args()
 with open(args.amex_csv) as f:
     reader = csv.DictReader(f)
     statement_trans = sort_records([standardize_amex_record(row) for row in reader])
 begin_date = statement_trans[0]['date']
 end_date = statement_trans[-1]['date']
 # Do we traverse and filter the in-memory entries list and filter that, or do we
 # use Beancount Query Language to get a list of transactions?
 # use Beancount Query Language (BQL) to get a list of transactions? Currently
 # using BQL.
+#
 # beancount.query.query_compile.compile() and
 # beancount.query.query_execute.filter_entries() look useful in this respect,
 # but I'm not clear on how to use compile(). An example would help.
 entries, _, options = loader.load_file(args.beancount_file)
 # String concatenation looks bad, but there's no SQL injection possible here
 # because BQL can't write back to the Beancount files. I hope!
 query = f"SELECT filename, META('lineno') AS posting_line, META('bank-statement') AS posting_statement, date, number(cost(position)), payee, narration where account = 'Liabilities:CreditCard:AMEX' and date >= {begin_date} and date <= {end_date}"
 result_types, result_rows = run_query(
     entries,
     options,
-    f"SELECT filename, lineno, date, number(cost(position)), payee, narration where account = 'Liabilities:CreditCard:AMEX' and date >= {begin_date} and date <= {end_date}",
+    query,
+)
 books_trans = sort_records([standardize_beancount_record(row) for row in result_rows])
 num_statement_records = len(statement_trans)
 num_books_trans = len(books_trans)
 statement_index = 0
 books_index = 0
 matches = []
 metadata_to_apply = []
 # Run through all the statement transactions to find a matching transaction in
 # the books. If found, the books transaction is marked off so that it can only
 # be matched once. Some transactions will be matched, some will be on the
 # statement but not the books and some on the books but not the statement.
 for r1 in statement_trans:
     for r2 in books_trans:
         match, note = records_match(r1, r2)
         if match:
             matches.append([r2['date'], f'{format_record(r1)}    -->    {format_record(r2)}  {note.upper()}'])
             if not r2['statement'] or args.show_reconciled_matches:
                     matches.append([r2['date'], f'{format_record(r1)}    -->    {format_record(r2)}  ✓ {note}'])
             if not r2['statement']:
                 metadata_to_apply.append((r2['filename'], r2['line'], f'    bank-statement: "{args.amex_csv}"\n'))
             books_trans.remove(r2)
             break
     else:
-        matches.append([r1['date'], f'{format_record(r1)}    -->    {" ":^41}  NOT IN BOOKS'])
+        matches.append([r1['date'], f'{format_record(r1)}    -->    {" ":^41}  ✗ Not in books'])
 for r2 in books_trans:
-    matches.append([r2['date'], f'{" ":^41}    -->    {format_record(r2)}  NOT ON STATEMENT'])
+    matches.append([r2['date'], f'{" ":^41}    -->    {format_record(r2)}  ✗ Not on statement'])
 print(f'-----------------------------------------------------------------------------------------------------------------')
 print(f'{"STATEMENT":<40}            {"BOOKS":<40}   NOTES')
 print(f'-----------------------------------------------------------------------------------------------------------------')
 for _, output in sorted(matches):
     print(output)
 print(f'-----------------------------------------------------------------------------------------------------------------')
 if metadata_to_apply:
     print('Mark matched transactions as reconciled in the books? (y/N) ', end='')
     if input().lower() == 'y':
         files = {}
         for filename, line, metadata in metadata_to_apply:
             if filename not in files:
                 with open(filename, 'r') as f:
                     files[filename] = [0, f.readlines()]
             files[filename][1].insert(line + files[filename][0], metadata)
             files[filename][0] += 1
         for filename in files:
             with open(filename, 'w') as f:
                 f.writelines(files[filename][1])
                 print(f'Wrote {filename}.')
 # Local Variables:
 # python-shell-interpreter: "/home/ben/\.virtualenvs/conservancy-beancount-py39/bin/python"
 # End:

0 comments (0 inline, 0 general)