Changeset - fb5d0a57f3c0
[Not reviewed]
0 2 0
Ben Sturmfels (bsturmfels) - 2 years ago 2022-03-01 23:05:07
ben@sturm.com.au
reconcile: CLI entrypoint, improve docs.
2 files changed with 103 insertions and 43 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/reconcile/helper.py
Show inline comments
...
 
@@ -228,6 +228,7 @@ for desc, query in QUERIES.items():
 

	
 
uncleared = [(r[0], r[2], r[4] or r[3], r[1]) for r in uncleared_rows]
 
report_path = os.path.join(os.getenv('CONSERVANCY_REPOSITORY', ''), reconciliation_report_path(account, lastDateInPeriod))
 
# TODO: Make the directory if it doesn't exist.
 
with open(report_path, 'w') as f:
 
    f.write(reconciliation_report(account, lastDateInPeriod, cleared_balance, uncleared, '1900-01-01', all_trans_balance, []))
 
print(f'Wrote reconciliation report: {report_path}.')
conservancy_beancount/reconcile/statement_reconciler.py
Show inline comments
 
"""Reconcile an AMEX/FR CSV statement against the books and print differences.
 
"""Compare a bank CSV statement with the books.
 

	
 
This tool takes an AMEX or First Republic CSV statement file and
 
compares it line-by-line with the Beancount books to make sure that
 
everything matches. This is designed for situations where transactions
 
are entered into the books directly, rather than being imported from a
 
statement after the fact.
 

	
 
The reconciler will attempt to match transactions based on date,
 
amount, check number and payee, but is forgiving to differences in
 
dates, the absensce of check number and inexact matches on
 
payee. Matches are ranked, so where there is only one decent match for
 
an amount/date this is accepted, but if there are multiple similar
 
candidates it will refuse to guess.
 

	
 
The reconciler will also attempt to identify where a single statement
 
entry has been split out into multiple Beancount postings, such as a
 
single bank transfer representing health insurance for multiple
 
employees.
 

	
 
Run it like this:
 

	
 
$ statement_reconciler \
 
  --beancount-file=2021.beancount \
 
  --account=Liabilities:CreditCard:AMEX \
 
  --csv-statement=2021-09-10_AMEX_activity.csv \
 
  --bank-statement=2021-09-10_AMEX_activity.csv \
 
  --statement-balance=1000
 

	
 
Background:
 

	
 
Beancount users often write importers to create bookkeeping entries
 
direct from a bank statement or similar. That approach automates data
 
entry and reconciliation in one step. In some cases though, it's
 
useful to manually enter transactions and reconcile them later
 
on. This workflow helpful in cases like writing a paper check when
 
there's a time lag between committing to making a payment and the
 
funds being debited. That's the workflow we're using here.
 

	
 
Conservancy currently enter data by hand rather than using Beancount
 
importers.  This tool is still somewhat like an importer in that it
 
needs to extract transaction details from a third-party
 
statement. Instead of creating directives, it just checks to see that
 
similar directives are already present. This is a bit like diff-ing a
 
statement with the books (though we're only interested in the presence
 
of lines, not so much their order).
 

	
 
Beancount users often write importers to create bookkeeping entries direct from
 
a bank statement or similar. That approach automates data entry and
 
reconciliation in one step. In some cases though, it's useful to manually enter
 
transactions and reconcile them later on. This workflow helpful in cases like
 
writing a paper check when there's a time lag between committing to making a
 
payment and the funds being debited. That's the workflow we're using here.
 
Problems in scope:
 

	
 
Run like this:
 
 - errors in the books take hours to find during reconciliation,
 
   requiring manually comparing statemnts and the books and are
 
   succeptible to mistakes, such as not noticing when there are two
 
   payments for the same amount on the statement, but not in the books
 
   ("you're entering a world of pain")
 

	
 
$ python3 -m pip install thefuzz
 
$ python3 conservancy_beancount/reconcile/statement_reconciler.py \
 
  --beancount-file=$HOME/conservancy/beancount/books/2021.beancount \
 
  --csv-statement=$HOME/conservancy/confidential/2021-09-10_AMEX_activity.csv \
 
  --account=Liabilities:CreditCard:AMEX
 
 - adding statement/reconciliation metadata to books is/was manual and
 
   prone to mistakes
 

	
 
Conservancy currently enter data by hand rather than using Beancount importers.
 
This tool is still somewhat like an importer in that it needs to extract
 
transaction details from a third-party statement. Instead of creating
 
directives, it just checks to see that similar directives are already present.
 
 - Beancount doesn't provide any infrastructure for programmatically
 
   updating the books, only appending in the case of importers
 

	
 
 - paper checks are entered in the books when written, but may not be
 
   cashed until months later (reconcile errors)
 

	
 
 - jumping to an individual transaction in a large ledger isn't
 
   trivial - Emacs grep mode is the current best option
 

	
 
Problems in scope:
 
 - errors in the books take hours to find during reconciliation ("you're entering a world of pain")
 
 - adding statement/reconciliation metadata to books is manual and prone to mistakes
 
 - Beancount doesn't provide any infrastructure for programmatically updating the books, only appending
 
 - after updates to the books files, beancount must be restarted to reflect updates
 
 - updates also invalidate the cache meaning restart takes several minutes
 
 - paper checks are entered in the books when written, but may not be cashed until months later (reconcile errors)
 
 - balance checks are manually updated in svn/Financial/Ledger/sanity-check-balances.yaml
 
 - jumping to an individual transaction in a large ledger isn't trivial - Emacs grep mode is the current best option
 
 - Pam and other staff don't use Emacs
 
 - auditors would prefer Bradley didn't perform reconciliation, ideally not Rosanne either
 
 - transactions are entered manually and reconciled after the fact, but importing from statements may be useful in some cases
 

	
 
Q. How are reconciliation reports created currently? How do you read them?
 
 - by hand from copying and pasting from the helper tool output
 
 - auditors would prefer Bradley didn't perform reconciliation,
 
   ideally not Rosanne either
 

	
 
 - reconciliation reports are created by hand when there are mismatches
 

	
 
Other related problems we're not dealing with here:
 

	
 
 - after updates to the books files, beancount must be restarted to
 
   reflect updates
 

	
 
 - updates also invalidate the cache meaning restart takes several
 
   minutes
 

	
 
Problem is potentially similar to diff-ing, but in the books, transaction order isn't super significant.
 
 - balance checks are manually updated in
 
   svn/Financial/Ledger/sanity-check-balances.yaml
 

	
 
 - transactions are entered manually and reconciled after the fact,
 
   but importing from statements may be useful in some cases
 

	
 
"""
 

	
 
# TODO:
 
#  - extract the magic numbers
 
#  - consider merging in helper.py
 

	
 
import argparse
 
import collections
 
import copy
...
 
@@ -65,7 +118,7 @@ if not sys.warnoptions:
 
from thefuzz import fuzz  # type: ignore
 

	
 
logger = logging.getLogger()
 
logger.setLevel(logging.DEBUG)
 
logger.setLevel(logging.INFO)
 

	
 
# Console logging.
 
logger.addHandler(logging.StreamHandler())
...
 
@@ -105,6 +158,14 @@ def remove_duplicate_words(text: str) -> str:
 

	
 

	
 
def remove_payee_junk(payee: str) -> str:
 
    """Clean up payee field to improve quality of fuzzy matching.
 

	
 
    It turns out that bank statement "description" fields are
 
    difficult to fuzzy match on because they're long and
 
    noisey. Truncating them (see standardize_XXX_record fns) and
 
    removing the common junk helps significantly.
 

	
 
    """
 
    for r in JUNK_WORDS_RES:
 
        payee = r.sub('', payee)
 
    payee = ZERO_RE.sub('', payee)
...
 
@@ -129,8 +190,11 @@ def read_transactions_from_csv(f: TextIO, standardize_statement_record: Callable
 
    return sort_records([standardize_statement_record(row, i) for i, row in enumerate(reader, 2)])
 

	
 

	
 
# CSV reconciliation report.
 
# Merge helper script?
 
def validate_amex_csv(sample: str, account: str) -> None:
 
    required_cols = {'Date', 'Amount', 'Description', 'Card Member'}
 
    reader = csv.DictReader(io.StringIO(sample))
 
    if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
 
        sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
 

	
 

	
 
def standardize_amex_record(row: Dict, line: int) -> Dict:
...
 
@@ -147,13 +211,6 @@ def standardize_amex_record(row: Dict, line: int) -> Dict:
 
    }
 

	
 

	
 
def validate_amex_csv(sample: str, account: str) -> None:
 
    required_cols = {'Date', 'Amount', 'Description', 'Card Member'}
 
    reader = csv.DictReader(io.StringIO(sample))
 
    if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
 
        sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
 

	
 

	
 
def validate_fr_csv(sample: str, account: str) -> None:
 
    required_cols = {'Date', 'Amount', 'Detail', 'Serial Num'}
 
    reader = csv.DictReader(io.StringIO(sample))
...
 
@@ -346,8 +403,6 @@ def metadata_for_match(match: Tuple[List, List, List], statement_filename: str,
 
    return metadata
 

	
 

	
 
# TODO: Is there a way to pull the side-effecting code out of this function?
 

	
 
def write_metadata_to_books(metadata_to_apply: List[Tuple[str, int, str]]) -> None:
 
    """Insert reconciliation metadata in the books files.
 

	
...
 
@@ -555,3 +610,7 @@ def main(args: argparse.Namespace) -> None:
 
if __name__ == '__main__':
 
    args = parse_args(sys.argv)
 
    main(args)
 

	
 
def entry_point():
 
    args = parse_args(sys.argv)
 
    main(args)
0 comments (0 inline, 0 general)