From 3792d46bcc32a911c909e2f5a899f7184cfcdcc4 2022-02-08 11:06:01
From: Ben Sturmfels <ben@sturm.com.au>
Date: 2022-02-08 11:06:01
Subject: [PATCH] reconcile: Prototype writing statement metadata back to books.

---

diff --git a/conservancy_beancount/reconcile/prototype_amex_reconciler.py b/conservancy_beancount/reconcile/prototype_amex_reconciler.py
index 982874de9476836add6a726565c4708131dec186..0285c0234175584e768c693b00f3b6fea78e5a9e 100644
--- a/conservancy_beancount/reconcile/prototype_amex_reconciler.py
+++ b/conservancy_beancount/reconcile/prototype_amex_reconciler.py
@@ -1,13 +1,24 @@
-"""A prototype AMEX statement reconciler.
+"""Reconcile an AMEX CSV statement against the books and print differences.
 
 Run like this:
 
 $ python3 -m pip install thefuzz
-$ python3 conservancy_beancount/reconcile/prototype_amex_reconciler.py --beancount-file=$HOME/conservancy/beancount/books/2021.beancount --amex-csv=$HOME/conservancy/confidential/2021-09-10_AMEX_activity.csv
-"""
+$ python3 conservancy_beancount/reconcile/prototype_amex_reconciler.py \
+  --beancount-file=$HOME/conservancy/beancount/books/2021.beancount \
+  --amex-csv=$HOME/conservancy/confidential/2021-09-10_AMEX_activity.csv
+
+Conservancy currently enter data by hand rather than using Beancount importers.
+This tool is still somewhat like an importer in that it needs to extract
+transaction details from a third-party statement. Instead of creating
+directives, it just checks to see that similar directives are already present.
+
+TODO/ISSUES:
+ - AMEX statement doesn't provide bank balance or running total
 
+"""
 import argparse
 import csv
+import collections
 import datetime
 import decimal
 from typing import Dict, List, Tuple
@@ -19,6 +30,7 @@ from thefuzz import fuzz  # type: ignore
 # NOTE: Statement doesn't seem to give us a running balance or a final total.
 
 def standardize_amex_record(row: Dict) -> Dict:
+    """Turn an AMEX CSV row into a standard dict format representing a transaction."""
     return {
         'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
         'amount': -1 * decimal.Decimal(row['Amount']),
@@ -27,10 +39,14 @@ def standardize_amex_record(row: Dict) -> Dict:
 
 
 def standardize_beancount_record(row) -> Dict:  # type: ignore[no-untyped-def]
+    """Turn a Beancount query result row into a standard dict representing a transaction."""
     return {
         'date': row.date,
         'amount': row.number_cost_position,
         'payee': row.payee if row.payee else row.narration,
+        'filename': row.filename,
+        'line': row.posting_line,
+        'statement': row.posting_statement,
     }
 
 
@@ -49,9 +65,9 @@ def records_match(r1: Dict, r2: Dict) -> Tuple[bool, str]:
     payee_match_quality = fuzz.token_set_ratio(r1['payee'], r2['payee'])
     payee_matches = payee_match_quality > 50
     if date_matches and amount_matches and payee_matches:
-        return True, ''
+        return True, 'Matched'
     elif date_matches and amount_matches:
-        return True, f'Low payee match ({payee_match_quality}%)'
+        return True, f'Matched with low-payee match ({payee_match_quality}%)'
     else:
         return False, ''
 
@@ -61,6 +77,7 @@ parser.add_argument('--beancount-file', required=True)
 parser.add_argument('--amex-csv', required=True)
 parser.add_argument('--grep-output-filename')
 # parser.add_argument('--report-group-regex')
+parser.add_argument('--show-reconciled-matches', action='store_true')
 args = parser.parse_args()
 
 with open(args.amex_csv) as f:
@@ -71,12 +88,21 @@ begin_date = statement_trans[0]['date']
 end_date = statement_trans[-1]['date']
 
 # Do we traverse and filter the in-memory entries list and filter that, or do we
-# use Beancount Query Language to get a list of transactions?
+# use Beancount Query Language (BQL) to get a list of transactions? Currently
+# using BQL.
+#
+# beancount.query.query_compile.compile() and
+# beancount.query.query_execute.filter_entries() look useful in this respect,
+# but I'm not clear on how to use compile(). An example would help.
 entries, _, options = loader.load_file(args.beancount_file)
+
+# String concatenation looks bad, but there's no SQL injection possible here
+# because BQL can't write back to the Beancount files. I hope!
+query = f"SELECT filename, META('lineno') AS posting_line, META('bank-statement') AS posting_statement, date, number(cost(position)), payee, narration where account = 'Liabilities:CreditCard:AMEX' and date >= {begin_date} and date <= {end_date}"
 result_types, result_rows = run_query(
     entries,
     options,
-    f"SELECT filename, lineno, date, number(cost(position)), payee, narration where account = 'Liabilities:CreditCard:AMEX' and date >= {begin_date} and date <= {end_date}",
+    query,
 )
 books_trans = sort_records([standardize_beancount_record(row) for row in result_rows])
 
@@ -85,21 +111,48 @@ num_books_trans = len(books_trans)
 statement_index = 0
 books_index = 0
 matches = []
+metadata_to_apply = []
+
+# Run through all the statement transactions to find a matching transaction in
+# the books. If found, the books transaction is marked off so that it can only
+# be matched once. Some transactions will be matched, some will be on the
+# statement but not the books and some on the books but not the statement.
 for r1 in statement_trans:
     for r2 in books_trans:
         match, note = records_match(r1, r2)
         if match:
-            matches.append([r2['date'], f'{format_record(r1)}    -->    {format_record(r2)}  {note.upper()}'])
+            if not r2['statement'] or args.show_reconciled_matches:
+                    matches.append([r2['date'], f'{format_record(r1)}    -->    {format_record(r2)}  ✓ {note}'])
+            if not r2['statement']:
+                metadata_to_apply.append((r2['filename'], r2['line'], f'    bank-statement: "{args.amex_csv}"\n'))
             books_trans.remove(r2)
             break
     else:
-        matches.append([r1['date'], f'{format_record(r1)}    -->    {" ":^41}  NOT IN BOOKS'])
+        matches.append([r1['date'], f'{format_record(r1)}    -->    {" ":^41}  ✗ Not in books'])
 for r2 in books_trans:
-    matches.append([r2['date'], f'{" ":^41}    -->    {format_record(r2)}  NOT ON STATEMENT'])
+    matches.append([r2['date'], f'{" ":^41}    -->    {format_record(r2)}  ✗ Not on statement'])
 
+print(f'-----------------------------------------------------------------------------------------------------------------')
 print(f'{"STATEMENT":<40}            {"BOOKS":<40}   NOTES')
+print(f'-----------------------------------------------------------------------------------------------------------------')
 for _, output in sorted(matches):
     print(output)
+print(f'-----------------------------------------------------------------------------------------------------------------')
+
+if metadata_to_apply:
+    print('Mark matched transactions as reconciled in the books? (y/N) ', end='')
+    if input().lower() == 'y':
+        files = {}
+        for filename, line, metadata in metadata_to_apply:
+            if filename not in files:
+                with open(filename, 'r') as f:
+                    files[filename] = [0, f.readlines()]
+            files[filename][1].insert(line + files[filename][0], metadata)
+            files[filename][0] += 1
+        for filename in files:
+            with open(filename, 'w') as f:
+                f.writelines(files[filename][1])
+                print(f'Wrote {filename}.')
 
 # Local Variables:
 # python-shell-interpreter: "/home/ben/\.virtualenvs/conservancy-beancount-py39/bin/python"