Changeset - da5caa3e6cd1
[Not reviewed]
0 1 0
Ben Sturmfels (bsturmfels) - 2 years ago 2022-03-02 07:26:19
ben@sturm.com.au
reconcile: Remove totals, highlight payee mismatches orange.
1 file changed with 18 insertions and 19 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/reconcile/statement_reconciler.py
Show inline comments
 
"""Compare a bank CSV statement with the books.
 

	
 
This tool takes an AMEX or First Republic CSV statement file and
 
compares it line-by-line with the Beancount books to make sure that
 
everything matches. This is designed for situations where transactions
 
are entered into the books directly, rather than being imported from a
 
statement after the fact.
 

	
 
The reconciler will attempt to match transactions based on date,
 
amount, check number and payee, but is forgiving to differences in
 
dates, the absensce of check number and inexact matches on
 
payee. Matches are ranked, so where there is only one decent match for
 
an amount/date this is accepted, but if there are multiple similar
 
candidates it will refuse to guess.
 

	
 
The reconciler will also attempt to identify where a single statement
 
entry has been split out into multiple Beancount postings, such as a
 
single bank transfer representing health insurance for multiple
 
employees.
 

	
 
Run it like this:
 

	
 
$ statement_reconciler \
 
  --beancount-file=2021.beancount \
 
  --account=Liabilities:CreditCard:AMEX \
 
  --csv-statement=2021-09-10_AMEX_activity.csv \
 
  --bank-statement=2021-09-10_AMEX_activity.csv \
 
  --statement-balance=1000
 
  --beancount-file 2021.beancount \
 
  --account Liabilities:CreditCard:AMEX \
 
  --csv-statement ~/svn/2021-09-10_AMEX_activity.csv \
 
  --bank-statement ~/svn/2021-09-10_AMEX_activity.pdf
 

	
 
Background:
 

	
 
Beancount users often write importers to create bookkeeping entries
 
direct from a bank statement or similar. That approach automates data
 
entry and reconciliation in one step. In some cases though, it's
 
useful to manually enter transactions and reconcile them later
 
on. This workflow helpful in cases like writing a paper check when
 
there's a time lag between committing to making a payment and the
 
funds being debited. That's the workflow we're using here.
 

	
 
Conservancy currently enter data by hand rather than using Beancount
 
importers.  This tool is still somewhat like an importer in that it
 
needs to extract transaction details from a third-party
 
statement. Instead of creating directives, it just checks to see that
 
similar directives are already present. This is a bit like diff-ing a
 
statement with the books (though we're only interested in the presence
 
of lines, not so much their order).
 

	
 
Problems in scope:
 

	
 
 - errors in the books take hours to find during reconciliation,
 
   requiring manually comparing statemnts and the books and are
 
   succeptible to mistakes, such as not noticing when there are two
...
 
@@ -353,49 +352,52 @@ def match_statement_and_books(statement_trans: List[Dict], books_trans: List[Dic
 
                best_match_index = i
 
                best_match_note = note
 
        if best_match_score > 0.5 and matches_found == 1 and 'check-id mismatch' not in best_match_note or best_match_score > 0.8:
 
            matches.append(([r1], [books_trans[best_match_index]], best_match_note))
 
            # Don't try to make a second match against this books entry.
 
            if best_match_index is not None:
 
                del books_trans[best_match_index]
 
        else:
 
            remaining_statement_trans.append(r1)
 
    for r2 in books_trans:
 
        remaining_books_trans.append(r2)
 
    return matches, remaining_statement_trans, remaining_books_trans
 

	
 

	
 
# TODO: Return list of tuples (instead of list of lists).
 

	
 
def format_matches(matches: List, csv_statement: str, show_reconciled_matches: bool) -> List[List]:
 
    match_output = []
 
    for r1s, r2s, note in matches:
 
        note = ', '.join(note)
 
        note = ': ' + note if note else note
 
        if r1s and r2s:
 
            if show_reconciled_matches or not all(x['bank_statement'] for x in r2s):
 
                if len(r2s) == 1:
 
                    match_output.append([r1s[0]['date'], f'{format_record(r1s[0])}  →  {format_record(r2s[0])}  ✓ Matched{note}'])
 
                    entry = [r1s[0]['date'], f'{format_record(r1s[0])}  →  {format_record(r2s[0])}  ✓ Matched{note}']
 
                    if 'payee mismatch' in note:
 
                        entry[1] = Fore.YELLOW + Style.BRIGHT + entry[1] + Style.RESET_ALL
 
                    match_output.append(entry)
 
                else:
 
                    match_output.extend(format_multirecord(r1s, r2s, note))
 
        elif r1s:
 
            match_output.append([r1s[0]['date'], Fore.RED + Style.BRIGHT + f'{format_record(r1s[0])}  →  {" ":^59}  ✗ NOT IN BOOKS ({os.path.basename(csv_statement)}:{r1s[0]["line"]})' + Style.RESET_ALL])
 
        else:
 
            match_output.append([r2s[0]['date'], Fore.RED + Style.BRIGHT + f'{" ":^59}  →  {format_record(r2s[0])}  ✗ NOT ON STATEMENT ({os.path.basename(r2s[0]["filename"])}:{r2s[0]["line"]})' + Style.RESET_ALL])
 
    return match_output
 

	
 

	
 
def date_proximity(d1: datetime.date, d2: datetime.date) -> float:
 
    diff = abs(int((d1 - d2).days))
 
    if diff > 60:
 
        return 0.0
 
    else:
 
        return 1.0 - (diff / 60.0)
 

	
 

	
 
def metadata_for_match(match: Tuple[List, List, List], statement_filename: str, csv_filename: str) -> List[Tuple[str, int, str]]:
 
    # Can we really ever have multiple statement entries? Probably not.
 
    statement_filename = get_repo_relative_path(statement_filename)
 
    csv_filename = get_repo_relative_path(csv_filename)
 
    metadata = []
 
    statement_entries, books_entries, _ = match
 
    for books_entry in books_entries:
...
 
@@ -453,53 +455,51 @@ def parse_repo_relative_path(path: str) -> str:
 
    if not os.path.exists(path):
 
        raise argparse.ArgumentTypeError(f'File {path} does not exist.')
 
    repo = os.getenv('CONSERVANCY_REPOSITORY')
 
    if not repo:
 
        raise argparse.ArgumentTypeError('$CONSERVANCY_REPOSITORY is not set.')
 
    if not path.startswith(repo):
 
        raise argparse.ArgumentTypeError(f'File {path} does not share a common prefix with $CONSERVANCY_REPOSITORY {repo}.')
 
    return path
 

	
 

	
 
def parse_decimal_with_separator(number_text: str) -> decimal.Decimal:
 
    """decimal.Decimal can't parse numbers with thousands separator."""
 
    number_text = number_text.replace(',', '')
 
    return decimal.Decimal(number_text)
 

	
 

	
 
def parse_arguments(argv: List[str]) -> argparse.Namespace:
 
    parser = argparse.ArgumentParser(prog=PROGNAME, description='Reconciliation helper')
 
    cliutil.add_version_argument(parser)
 
    cliutil.add_loglevel_argument(parser)
 
    parser.add_argument('--beancount-file', required=True, type=parse_path)
 
    parser.add_argument('--csv-statement', required=True, type=parse_repo_relative_path)
 
    parser.add_argument('--bank-statement', required=True, type=parse_repo_relative_path)
 
    parser.add_argument('--account', required=True, help='eg. Liabilities:CreditCard:AMEX')
 
    parser.add_argument('--grep-output-filename')
 
    # parser.add_argument('--report-group-regex')
 
    parser.add_argument('--show-reconciled-matches', action='store_true')
 
    parser.add_argument('--statement-balance', type=parse_decimal_with_separator, required=True, help="A.K.A \"cleared balance\" taken from the end of the period on the PDF statement. Required because CSV statements don't include final or running totals")
 
    parser.add_argument('--non-interactive', action='store_true', help="Don't prompt to write to the books")
 
    parser.add_argument('--non-interactive', action='store_true', help="Don't prompt to write to the books")    # parser.add_argument('--statement-balance', type=parse_decimal_with_separator, required=True, help="A.K.A \"cleared balance\" taken from the end of the period on the PDF statement. Required because CSV statements don't include final or running totals")
 
    args = parser.parse_args(args=argv)
 
    return args
 

	
 

	
 
def totals(matches: List[Tuple[List, List, List]]) -> Tuple[decimal.Decimal, decimal.Decimal, decimal.Decimal]:
 
    total_matched = decimal.Decimal(0)
 
    total_missing_from_books = decimal.Decimal(0)
 
    total_missing_from_statement = decimal.Decimal(0)
 
    for statement_entries, books_entries, _ in matches:
 
        if statement_entries and books_entries:
 
            total_matched += sum(c['amount'] for c in statement_entries)
 
        elif statement_entries:
 
            total_missing_from_books += sum(c['amount'] for c in statement_entries)
 
        else:
 
            total_missing_from_statement += sum(c['amount'] for c in books_entries)
 
    return total_matched, total_missing_from_books, total_missing_from_statement
 

	
 

	
 
def subset_match(statement_trans: List[dict], books_trans: List[dict]) -> Tuple[List[Tuple[List, List, List]], List[Dict], List[Dict]]:
 
    matches = []
 
    remaining_books_trans = []
 
    remaining_statement_trans = []
 

	
 
    groups = itertools.groupby(books_trans, key=lambda x: (x['date'], x['payee']))
...
 
@@ -560,73 +560,72 @@ def main(arglist: Optional[Sequence[str]] = None,
 
        validate_csv = validate_amex_csv
 
        standardize_statement_record = standardize_amex_record
 
    else:
 
        validate_csv = validate_fr_csv
 
        standardize_statement_record = standardize_fr_record
 

	
 
    with open(args.csv_statement) as f:
 
        sample = f.read(200)
 
        validate_csv(sample, args.account)
 
        f.seek(0)
 
        statement_trans = read_transactions_from_csv(f, standardize_statement_record)
 

	
 
    begin_date = statement_trans[0]['date']
 
    end_date = statement_trans[-1]['date']
 

	
 
    # Do we traverse and filter the in-memory entries list and filter that, or do we
 
    # use Beancount Query Language (BQL) to get a list of transactions? Currently
 
    # using BQL.
 
    #
 
    # beancount.query.query_compile.compile() and
 
    # beancount.query.query_execute.filter_entries() look useful in this respect,
 
    # but I'm not clear on how to use compile(). An example would help.
 
    entries, _, options = loader.load_file(args.beancount_file)
 

	
 
    books_balance_query = f"""SELECT sum(COST(position)) AS aa WHERE account = "{args.account}"
 
        AND date <= {end_date.isoformat()}"""
 
    _, result_rows = run_query(entries, options, books_balance_query, numberify=True)
 
    books_balance = result_rows[0][0] if result_rows else 0
 
    # books_balance_query = f"""SELECT sum(COST(position)) AS aa WHERE account = "{args.account}"
 
    #     AND date <= {end_date.isoformat()}"""
 
    # _, result_rows = run_query(entries, options, books_balance_query, numberify=True)
 
    # books_balance = result_rows[0][0] if result_rows else 0
 

	
 
    # String concatenation looks bad, but there's no SQL injection possible here
 
    # because BQL can't write back to the Beancount files. I hope!
 
    query = f'SELECT filename, META("lineno") AS line, META("bank-statement") AS bank_statement, date, number(cost(position)), payee, ENTRY_META("entity") as entity, ANY_META("check-id") as check_id, narration where account = "{args.account}" and date >= {begin_date} and date <= {end_date}'
 
    _, result_rows = run_query(entries, options, query)
 

	
 
    books_trans = sort_records([standardize_beancount_record(row) for row in result_rows])
 

	
 
    matches, remaining_statement_trans, remaining_books_trans = match_statement_and_books(statement_trans, books_trans)
 
    subset_matches, remaining_statement_trans, remaining_books_trans = subset_match(remaining_statement_trans, remaining_books_trans)
 
    matches.extend(subset_matches)
 
    unmatched = process_unmatched(remaining_statement_trans, remaining_books_trans)
 
    matches.extend(unmatched)
 

	
 
    match_output = format_matches(matches, args.csv_statement, args.show_reconciled_matches)
 

	
 
    _, total_missing_from_books, total_missing_from_statement = totals(matches)
 

	
 
    print('-' * 155)
 
    print(f'{"Statement transaction":<52}            {"Books transaction":<58}   Notes')
 
    statement_heading = f'Statement transactions {begin_date} to {end_date}'
 
    print(f'{statement_heading:<52}            {"Books transactions":<58}   Notes')
 
    print('-' * 155)
 
    for _, output in sorted(match_output, key=lambda x: x[0]):
 
        print(output)
 
    print('-' * 155)
 
    print(f'Statement period {begin_date} to {end_date}')
 
    print(f'Statement/cleared balance:    {args.statement_balance:12,.2f}    (as provided by you)')
 
    print(f'Books balance:                {books_balance:12,.2f}    (all transactions, includes unreconciled)')
 
    print(f'Total not on statement:       {total_missing_from_statement:12,.2f}')
 
    print(f'Total not on books:           {total_missing_from_books:12,.2f}')
 
    print(f'Sub-total not on statement: {total_missing_from_statement:12,.2f}')
 
    print(f'Sub-total not in books:     {total_missing_from_books:12,.2f}')
 
    print(f'Total:                      {total_missing_from_statement + total_missing_from_books:12,.2f}')
 
    print('-' * 155)
 

	
 
    # Write statement metadata back to books
 
    metadata_to_apply = []
 
    for match in matches:
 
        metadata_to_apply.extend(metadata_for_match(match, args.bank_statement, args.csv_statement))
 
    if metadata_to_apply and not args.non_interactive:
 
        print('Mark matched transactions as reconciled in the books? (y/N) ', end='')
 
        if input().lower() == 'y':
 
            write_metadata_to_books(metadata_to_apply)
 

	
 

	
 
entry_point = cliutil.make_entry_point(__name__, PROGNAME)
 

	
 
if __name__ == '__main__':
 
    exit(entry_point())
0 comments (0 inline, 0 general)