Changeset - 4bb6177e45bc
[Not reviewed]
0 1 0
Ben Sturmfels (bsturmfels) - 2 years ago 2022-02-09 23:23:00
ben@sturm.com.au
reconcile: Update cleared query.
1 file changed with 5 insertions and 2 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/reconcile/prototype_amex_reconciler.py
Show inline comments
...
 
@@ -8,48 +8,50 @@ $ python3 conservancy_beancount/reconcile/prototype_amex_reconciler.py \
 
  --csv-statement=$HOME/conservancy/confidential/2021-09-10_AMEX_activity.csv \
 
  --account=Liabilities:CreditCard:AMEX
 

	
 
Conservancy currently enter data by hand rather than using Beancount importers.
 
This tool is still somewhat like an importer in that it needs to extract
 
transaction details from a third-party statement. Instead of creating
 
directives, it just checks to see that similar directives are already present.
 

	
 
Problems in scope:
 
 - errors in the books take hours to find during reconciliation ("you're entering a world of pain")
 
 - adding statement/reconciliation metadata to books is manual and prone to mistakes
 
 - Beancount doesn't provide any infrastructure for programmatically updating the books, only appending
 
 - after updates to the books files, beancount must be restarted to reflect updates
 
 - updates also invalidate the cache meaning restart takes several minutes
 
 - paper checks are entered in the books when written, but may not be cashed until months later (reconcile errors)
 
 - balance checks are manually updated in svn/Financial/Ledger/sanity-check-balances.yaml
 
 - jumping to an individual transaction in a large ledger isn't trivial - Emacs grep mode is the current best option
 
 - Pam and other staff don't use Emacs
 
 - auditors would prefer Bradley didn't perform reconciliation, ideally not Rosanne either
 
 - transactions are entered manually and reconciled after the fact, but importing from statements may be useful in some cases
 

	
 
Q. How are reconciliation reports created currently? How do you read them?
 
 - by hand from copying and pasting from the helper tool output
 

	
 
Problem is potentially similar to diff-ing, but in the books, transaction order isn't super significant.
 

	
 
TODO/ISSUES:
 
 - AMEX statement doesn't provide bank balance or running total
 

	
 
"""
 
import argparse
 
import csv
 
import datetime
 
import decimal
 
import os
 
from typing import Dict, List, Tuple
 

	
 
from beancount import loader
 
from beancount.query.query import run_query
 
from thefuzz import fuzz  # type: ignore
 

	
 
# NOTE: Statement doesn't seem to give us a running balance or a final total.
 

	
 

	
 
def standardize_amex_record(row: Dict, line: int) -> Dict:
 
    """Turn an AMEX CSV row into a standard dict format representing a transaction."""
 
    return {
 
        'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
 
        'amount': -1 * decimal.Decimal(row['Amount']),
 
        'line': line,
...
 
@@ -118,56 +120,57 @@ statement_total = decimal.Decimal(args.statement_total)
 

	
 
# TODO: Should put in a sanity check to make sure the statement you're feeding
 
# in matches the account you've provided.
 
if 'AMEX' in args.account:
 
    standardize_statement_record = standardize_amex_record
 
else:
 
    standardize_statement_record = standardize_fr_record
 

	
 
with open(args.csv_statement) as f:
 
    reader = csv.DictReader(f)
 
    statement_trans = sort_records([standardize_statement_record(row, reader.line_num) for row in reader])
 

	
 
begin_date = statement_trans[0]['date']
 
end_date = statement_trans[-1]['date']
 

	
 
# Do we traverse and filter the in-memory entries list and filter that, or do we
 
# use Beancount Query Language (BQL) to get a list of transactions? Currently
 
# using BQL.
 
#
 
# beancount.query.query_compile.compile() and
 
# beancount.query.query_execute.filter_entries() look useful in this respect,
 
# but I'm not clear on how to use compile(). An example would help.
 
entries, _, options = loader.load_file(args.beancount_file)
 

	
 
total_query = f"""SELECT sum(COST(position)) AS aa WHERE account = "{args.account}"
 
cleared_query = f"""SELECT sum(COST(position)) AS aa WHERE account = "{args.account}"
 
    AND date <= {end_date.isoformat()} AND META('bank-statement') != NULL"""
 
result_types, result_rows = run_query(
 
    entries,
 
    options,
 
    total_query,
 
    cleared_query,
 
    numberify=True,
 
)
 

	
 
cleared_total = result_rows[0][0]
 

	
 
# String concatenation looks bad, but there's no SQL injection possible here
 
# because BQL can't write back to the Beancount files. I hope!
 
query = f"SELECT filename, META('lineno') AS posting_line, META('bank-statement') AS posting_statement, date, number(cost(position)), payee, narration where account = '{args.account}' and date >= {begin_date} and date <= {end_date}"
 
result_types, result_rows = run_query(
 
    entries,
 
    options,
 
    query,
 
)
 

	
 
books_trans = sort_records([standardize_beancount_record(row) for row in result_rows])
 

	
 
num_statement_records = len(statement_trans)
 
num_books_trans = len(books_trans)
 
statement_index = 0
 
books_index = 0
 
matches = []
 
metadata_to_apply = []
 

	
 
# Run through all the statement transactions to find a matching transaction in
 
# the books. If found, the books transaction is marked off so that it can only
 
# be matched once. Some transactions will be matched, some will be on the
 
# statement but not the books and some on the books but not the statement.
0 comments (0 inline, 0 general)