Files
@ bc4d897c9293
Branch filter:
Location: NPO-Accounting/conservancy_beancount/conservancy_beancount/reconcile/prototype_amex_reconciler.py
bc4d897c9293
8.7 KiB
text/x-python
reconcile: Tweak output.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 | """Reconcile an AMEX CSV statement against the books and print differences.
Run like this:
$ python3 -m pip install thefuzz
$ python3 conservancy_beancount/reconcile/prototype_amex_reconciler.py \
--beancount-file=$HOME/conservancy/beancount/books/2021.beancount \
--csv-statement=$HOME/conservancy/confidential/2021-09-10_AMEX_activity.csv \
--account=Liabilities:CreditCard:AMEX
Conservancy currently enter data by hand rather than using Beancount importers.
This tool is still somewhat like an importer in that it needs to extract
transaction details from a third-party statement. Instead of creating
directives, it just checks to see that similar directives are already present.
Problems this attempts to address:
- errors in the books take hours to find during reconciliation ("you're entering a world of pain")
- adding statement/reconciliation metadata to books is manual and prone to mistakes
- paper checks are entered in the books when written, but may not be cashed until months later (reconcile errors)
- balance checks are manually updated in svn/Financial/Ledger/sanity-check-balances.yaml
- creating reconciliation reports
- jumping to an individual transaction in the books isn't trivial - Emacs grep mode is helpful
- normally transactions are entered manually, but potentially could create transaction directives (a.k.a. importing)
Q. How are reconciliation reports created currently? How do you read them?
TODO/ISSUES:
- AMEX statement doesn't provide bank balance or running total
"""
import argparse
import csv
import datetime
import decimal
import os
from typing import Dict, List, Tuple
from beancount import loader
from beancount.query.query import run_query
from thefuzz import fuzz # type: ignore
# NOTE: Statement doesn't seem to give us a running balance or a final total.
def standardize_amex_record(row: Dict, line: int) -> Dict:
"""Turn an AMEX CSV row into a standard dict format representing a transaction."""
return {
'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
'amount': -1 * decimal.Decimal(row['Amount']),
'line': line,
'payee': row['Description'] or '',
}
def standardize_beancount_record(row) -> Dict: # type: ignore[no-untyped-def]
"""Turn a Beancount query result row into a standard dict representing a transaction."""
return {
'date': row.date,
'amount': row.number_cost_position,
'payee': row.payee if row.payee else row.narration,
'filename': row.filename,
'line': row.posting_line,
'statement': row.posting_statement,
}
def standardize_fr_record(row: Dict, line: int) -> Dict:
return {
'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
'amount': decimal.Decimal(row['Amount']),
'payee': row['Detail'] or row['Description'],
'line': line,
}
def format_record(record: Dict) -> str:
return f"{record['date'].isoformat()}: {record['amount']:12,.2f} {record['payee'][:20]:<20}"
def sort_records(records: List) -> List:
return sorted(records, key=lambda x: (x['date'], x['amount']))
def records_match(r1: Dict, r2: Dict) -> Tuple[bool, str]:
"""Do these records represent the same transaction?"""
date_matches_exactly = r1['date'] == r2['date']
date_matches_loosly = r1['date'] >= r2['date'] - datetime.timedelta(days=3) and r1['date'] <= r2['date'] + datetime.timedelta(days=3)
amount_matches = r1['amount'] == r2['amount']
payee_match_quality = fuzz.token_set_ratio(r1['payee'], r2['payee'])
payee_matches = payee_match_quality > 50
if date_matches_exactly and amount_matches and payee_matches:
return True, 'Matched'
elif date_matches_loosly and amount_matches and payee_matches:
return True, 'Matched +/- 3 days'
elif date_matches_exactly and amount_matches:
return True, f'Matched ignoring payee'
elif date_matches_loosly and amount_matches:
return True, f'Matched +/- 3 days, ignoring payee'
else:
return False, ''
parser = argparse.ArgumentParser(description='Reconciliation helper')
parser.add_argument('--beancount-file', required=True)
parser.add_argument('--csv-statement', required=True)
parser.add_argument('--account', required=True, help='eg. Liabilities:CreditCard:AMEX')
parser.add_argument('--grep-output-filename')
# parser.add_argument('--report-group-regex')
parser.add_argument('--show-reconciled-matches', action='store_true')
args = parser.parse_args()
# TODO: Should put in a sanity check to make sure the statement you're feeding
# in matches the account you've provided.
if 'AMEX' in args.account:
standardize_statement_record = standardize_amex_record
else:
standardize_statement_record = standardize_fr_record
with open(args.csv_statement) as f:
reader = csv.DictReader(f)
statement_trans = sort_records([standardize_statement_record(row, reader.line_num) for row in reader])
begin_date = statement_trans[0]['date']
end_date = statement_trans[-1]['date']
# Do we traverse and filter the in-memory entries list and filter that, or do we
# use Beancount Query Language (BQL) to get a list of transactions? Currently
# using BQL.
#
# beancount.query.query_compile.compile() and
# beancount.query.query_execute.filter_entries() look useful in this respect,
# but I'm not clear on how to use compile(). An example would help.
entries, _, options = loader.load_file(args.beancount_file)
# String concatenation looks bad, but there's no SQL injection possible here
# because BQL can't write back to the Beancount files. I hope!
query = f"SELECT filename, META('lineno') AS posting_line, META('bank-statement') AS posting_statement, date, number(cost(position)), payee, narration where account = '{args.account}' and date >= {begin_date} and date <= {end_date}"
result_types, result_rows = run_query(
entries,
options,
query,
)
books_trans = sort_records([standardize_beancount_record(row) for row in result_rows])
num_statement_records = len(statement_trans)
num_books_trans = len(books_trans)
statement_index = 0
books_index = 0
matches = []
metadata_to_apply = []
# Run through all the statement transactions to find a matching transaction in
# the books. If found, the books transaction is marked off so that it can only
# be matched once. Some transactions will be matched, some will be on the
# statement but not the books and some on the books but not the statement.
#
# Doesn't currently find exact matches when there are a bunch of transactions
# for the same amount on the same day. Probably ought to do a first pass through
# the books transactions to find an closely matching payee, then do another pass
# disregarding payee.
for r1 in statement_trans:
for r2 in books_trans:
match, note = records_match(r1, r2)
if match:
if not r2['statement'] or args.show_reconciled_matches:
matches.append([r2['date'], f'{format_record(r1)} → {format_record(r2)} ✓ {note}'])
if not r2['statement']:
metadata_to_apply.append((r2['filename'], r2['line'], f' bank-statement: "{os.path.basename(args.csv_statement)}:{r2["line"]}"\n'))
books_trans.remove(r2)
break
else:
matches.append([r1['date'], f'{format_record(r1)} → {" ":^45} ✗ Not in books ({os.path.basename(args.csv_statement)}:{r1["line"]})'])
for r2 in books_trans:
matches.append([r2['date'], f'{" ":^45} → {format_record(r2)} ✗ Not on statement ({os.path.basename(r2["filename"])}:{r2["line"]})'])
print('-' * 155)
print(f'{"Statement transaction":<38} {"Books transaction":<44} Notes')
print('-' * 155)
for _, output in sorted(matches):
print(output)
print('-' * 155)
# Write statement metadata back to books
if metadata_to_apply:
print('Mark matched transactions as reconciled in the books? (y/N) ', end='')
if input().lower() == 'y':
files = {}
# Query results aren't necessarily sequential in a file, so need to sort
# so that our line number offsets work.
for filename, line, metadata in sorted(metadata_to_apply):
if filename not in files:
with open(filename, 'r') as f:
# print(f'Opening {filename}.')
files[filename] = [0, f.readlines()] # Offset and contents
files[filename][1].insert(line + files[filename][0], metadata)
files[filename][0] += 1
# print(f'File {filename} offset {files[filename][0]}')
for filename in files:
with open(filename, 'w') as f:
f.writelines(files[filename][1])
print(f'Wrote {filename}.')
# Local Variables:
# python-shell-interpreter: "/home/ben/\.virtualenvs/conservancy-beancount-py39/bin/python"
# End:
|