diff --git a/conservancy_beancount/reconcile/helper.py b/conservancy_beancount/reconcile/helper.py index ea821938b07d682405d342ecda4db6ccd89856ba..3ca11212abd44963ba17682703644a5a233df1e6 100644 --- a/conservancy_beancount/reconcile/helper.py +++ b/conservancy_beancount/reconcile/helper.py @@ -228,6 +228,7 @@ for desc, query in QUERIES.items(): uncleared = [(r[0], r[2], r[4] or r[3], r[1]) for r in uncleared_rows] report_path = os.path.join(os.getenv('CONSERVANCY_REPOSITORY', ''), reconciliation_report_path(account, lastDateInPeriod)) +# TODO: Make the directory if it doesn't exist. with open(report_path, 'w') as f: f.write(reconciliation_report(account, lastDateInPeriod, cleared_balance, uncleared, '1900-01-01', all_trans_balance, [])) print(f'Wrote reconciliation report: {report_path}.') diff --git a/conservancy_beancount/reconcile/statement_reconciler.py b/conservancy_beancount/reconcile/statement_reconciler.py index b9071182b9ff4410cc73eed7c6362e9e2604af5a..6d3c9641200e752e9ab2c23029eadb3a6bf63d60 100644 --- a/conservancy_beancount/reconcile/statement_reconciler.py +++ b/conservancy_beancount/reconcile/statement_reconciler.py @@ -1,44 +1,97 @@ -"""Reconcile an AMEX/FR CSV statement against the books and print differences. +"""Compare a bank CSV statement with the books. + +This tool takes an AMEX or First Republic CSV statement file and +compares it line-by-line with the Beancount books to make sure that +everything matches. This is designed for situations where transactions +are entered into the books directly, rather than being imported from a +statement after the fact. + +The reconciler will attempt to match transactions based on date, +amount, check number and payee, but is forgiving to differences in +dates, the absensce of check number and inexact matches on +payee. Matches are ranked, so where there is only one decent match for +an amount/date this is accepted, but if there are multiple similar +candidates it will refuse to guess. + +The reconciler will also attempt to identify where a single statement +entry has been split out into multiple Beancount postings, such as a +single bank transfer representing health insurance for multiple +employees. + +Run it like this: + +$ statement_reconciler \ + --beancount-file=2021.beancount \ + --account=Liabilities:CreditCard:AMEX \ + --csv-statement=2021-09-10_AMEX_activity.csv \ + --bank-statement=2021-09-10_AMEX_activity.csv \ + --statement-balance=1000 + +Background: + +Beancount users often write importers to create bookkeeping entries +direct from a bank statement or similar. That approach automates data +entry and reconciliation in one step. In some cases though, it's +useful to manually enter transactions and reconcile them later +on. This workflow helpful in cases like writing a paper check when +there's a time lag between committing to making a payment and the +funds being debited. That's the workflow we're using here. + +Conservancy currently enter data by hand rather than using Beancount +importers. This tool is still somewhat like an importer in that it +needs to extract transaction details from a third-party +statement. Instead of creating directives, it just checks to see that +similar directives are already present. This is a bit like diff-ing a +statement with the books (though we're only interested in the presence +of lines, not so much their order). -Beancount users often write importers to create bookkeeping entries direct from -a bank statement or similar. That approach automates data entry and -reconciliation in one step. In some cases though, it's useful to manually enter -transactions and reconcile them later on. This workflow helpful in cases like -writing a paper check when there's a time lag between committing to making a -payment and the funds being debited. That's the workflow we're using here. +Problems in scope: -Run like this: + - errors in the books take hours to find during reconciliation, + requiring manually comparing statemnts and the books and are + succeptible to mistakes, such as not noticing when there are two + payments for the same amount on the statement, but not in the books + ("you're entering a world of pain") -$ python3 -m pip install thefuzz -$ python3 conservancy_beancount/reconcile/statement_reconciler.py \ - --beancount-file=$HOME/conservancy/beancount/books/2021.beancount \ - --csv-statement=$HOME/conservancy/confidential/2021-09-10_AMEX_activity.csv \ - --account=Liabilities:CreditCard:AMEX + - adding statement/reconciliation metadata to books is/was manual and + prone to mistakes -Conservancy currently enter data by hand rather than using Beancount importers. -This tool is still somewhat like an importer in that it needs to extract -transaction details from a third-party statement. Instead of creating -directives, it just checks to see that similar directives are already present. + - Beancount doesn't provide any infrastructure for programmatically + updating the books, only appending in the case of importers + + - paper checks are entered in the books when written, but may not be + cashed until months later (reconcile errors) + + - jumping to an individual transaction in a large ledger isn't + trivial - Emacs grep mode is the current best option -Problems in scope: - - errors in the books take hours to find during reconciliation ("you're entering a world of pain") - - adding statement/reconciliation metadata to books is manual and prone to mistakes - - Beancount doesn't provide any infrastructure for programmatically updating the books, only appending - - after updates to the books files, beancount must be restarted to reflect updates - - updates also invalidate the cache meaning restart takes several minutes - - paper checks are entered in the books when written, but may not be cashed until months later (reconcile errors) - - balance checks are manually updated in svn/Financial/Ledger/sanity-check-balances.yaml - - jumping to an individual transaction in a large ledger isn't trivial - Emacs grep mode is the current best option - Pam and other staff don't use Emacs - - auditors would prefer Bradley didn't perform reconciliation, ideally not Rosanne either - - transactions are entered manually and reconciled after the fact, but importing from statements may be useful in some cases -Q. How are reconciliation reports created currently? How do you read them? - - by hand from copying and pasting from the helper tool output + - auditors would prefer Bradley didn't perform reconciliation, + ideally not Rosanne either + + - reconciliation reports are created by hand when there are mismatches + +Other related problems we're not dealing with here: + + - after updates to the books files, beancount must be restarted to + reflect updates + + - updates also invalidate the cache meaning restart takes several + minutes -Problem is potentially similar to diff-ing, but in the books, transaction order isn't super significant. + - balance checks are manually updated in + svn/Financial/Ledger/sanity-check-balances.yaml + + - transactions are entered manually and reconciled after the fact, + but importing from statements may be useful in some cases """ + +# TODO: +# - extract the magic numbers +# - consider merging in helper.py + import argparse import collections import copy @@ -65,7 +118,7 @@ if not sys.warnoptions: from thefuzz import fuzz # type: ignore logger = logging.getLogger() -logger.setLevel(logging.DEBUG) +logger.setLevel(logging.INFO) # Console logging. logger.addHandler(logging.StreamHandler()) @@ -105,6 +158,14 @@ def remove_duplicate_words(text: str) -> str: def remove_payee_junk(payee: str) -> str: + """Clean up payee field to improve quality of fuzzy matching. + + It turns out that bank statement "description" fields are + difficult to fuzzy match on because they're long and + noisey. Truncating them (see standardize_XXX_record fns) and + removing the common junk helps significantly. + + """ for r in JUNK_WORDS_RES: payee = r.sub('', payee) payee = ZERO_RE.sub('', payee) @@ -129,8 +190,11 @@ def read_transactions_from_csv(f: TextIO, standardize_statement_record: Callable return sort_records([standardize_statement_record(row, i) for i, row in enumerate(reader, 2)]) -# CSV reconciliation report. -# Merge helper script? +def validate_amex_csv(sample: str, account: str) -> None: + required_cols = {'Date', 'Amount', 'Description', 'Card Member'} + reader = csv.DictReader(io.StringIO(sample)) + if reader.fieldnames and not required_cols.issubset(reader.fieldnames): + sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}") def standardize_amex_record(row: Dict, line: int) -> Dict: @@ -147,13 +211,6 @@ def standardize_amex_record(row: Dict, line: int) -> Dict: } -def validate_amex_csv(sample: str, account: str) -> None: - required_cols = {'Date', 'Amount', 'Description', 'Card Member'} - reader = csv.DictReader(io.StringIO(sample)) - if reader.fieldnames and not required_cols.issubset(reader.fieldnames): - sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}") - - def validate_fr_csv(sample: str, account: str) -> None: required_cols = {'Date', 'Amount', 'Detail', 'Serial Num'} reader = csv.DictReader(io.StringIO(sample)) @@ -346,8 +403,6 @@ def metadata_for_match(match: Tuple[List, List, List], statement_filename: str, return metadata -# TODO: Is there a way to pull the side-effecting code out of this function? - def write_metadata_to_books(metadata_to_apply: List[Tuple[str, int, str]]) -> None: """Insert reconciliation metadata in the books files. @@ -555,3 +610,7 @@ def main(args: argparse.Namespace) -> None: if __name__ == '__main__': args = parse_args(sys.argv) main(args) + +def entry_point(): + args = parse_args(sys.argv) + main(args)