NPO-Accounting/conservancy_beancount Changeset - e764f3d0ef05

Changeset - e764f3d0ef05

Parent rev.

Child rev.

[Not reviewed]

0 2 0

Ben Sturmfels (bsturmfels) - 19 months ago 2023-02-11 04:23:15
ben@sturm.com.au

reconcile: Factor out the output formatting

2 files changed with 28 insertions and 13 deletions:

conservancy_beancount/reconcile/statement_reconciler.py

tests/test_reconcile.py

0 comments (0 inline, 0 general)

conservancy_beancount/reconcile/statement_reconciler.py

➞

Show inline comments

@@ ... @@
     statement_filename = get_repo_relative_path(statement_filename)
     csv_filename = get_repo_relative_path(csv_filename)
     metadata = []
     statement_entries, books_entries, _ = match
     for books_entry in books_entries:
         for statement_entry in statement_entries:
             if not books_entry['bank_statement']:
                 metadata.append((books_entry['filename'], books_entry['line'], f'    bank-statement: "{statement_filename}"'))
                 metadata.append((books_entry['filename'], books_entry['line'], f'    bank-statement-csv: "{csv_filename}:{statement_entry["line"]}"'))
     return metadata
 def write_metadata_to_books(metadata_to_apply: List[Tuple[str, int, str]]) -> None:
     """Insert reconciliation metadata in the books files.
     Takes a list of edits to make as tuples of form (filename, lineno, metadata):
+    [
         ('2021/main.beancount', 4245, '    bank-statement: statement.pdf'),
         ('2021/main.beancount', 1057, '    bank-statement: statement.pdf'),
         ('2021/payroll.beancount', 257, '    bank-statement: statement.pdf'),
         ...,
+    ]
     Beancount doesn't provide any infrastructure for programmatically
     updating the books, only appending in the case of importers. So
     we're on our own here.
     """
     file_contents: dict[str, list] = {}
     file_offsets: dict[str, int] = collections.defaultdict(int)
     # Load each books file into memory and insert the relevant metadata lines.
     # Line numbers change as we do this, so we keep track of the offset for each
     # file. Changes must be sorted by line number first or else the offsets will
     # break because we're jumping around making edits.
     for filename, line, metadata in sorted(metadata_to_apply):
         if filename not in file_contents:
             with open(filename, 'r') as f:
                 file_contents[filename] = f.readlines()
         # Insert is inefficient, but fast enough for now in practise.
         file_contents[filename].insert(line + file_offsets[filename], metadata.rstrip() + '\n')
         file_offsets[filename] += 1
     # Writes each updated file back to disk.
     for filename, contents in file_contents.items():
         with open(filename, 'w') as f:
             f.writelines(contents)
             print(f'Wrote {filename}.')
 def get_repo_relative_path(path: str) -> str:
     """Chop off the unique per-person CONSERVANCY_REPOSITORY.
     CSV and PDF statement metadata should be relative to
     CONSERVANCY_REPOSITORY ie. without regards to exactly where on
     your computer all the files live.
     """
     return os.path.relpath(path, start=os.getenv('CONSERVANCY_REPOSITORY'))
 def parse_path(path: str) -> str:
     """Validate that a file exists for use in argparse."""
     if not os.path.exists(path):
         raise argparse.ArgumentTypeError(f'File {path} does not exist.')
     return path
 def parse_repo_relative_path(path: str) -> str:
     """Validate that a file exists and is within $CONSERVANCY_REPOSITORY.
     For use with argparse.
     """
     if not os.path.exists(path):
         raise argparse.ArgumentTypeError(f'File {path} does not exist.')
     repo = os.getenv('CONSERVANCY_REPOSITORY')
     if not repo:
         raise argparse.ArgumentTypeError('$CONSERVANCY_REPOSITORY is not set.')
     if not path.startswith(repo):
         raise argparse.ArgumentTypeError(f'File {path} does not share a common prefix with $CONSERVANCY_REPOSITORY {repo}.')
     return path
 def parse_decimal_with_separator(number_text: str) -> decimal.Decimal:
     """decimal.Decimal can't parse numbers with thousands separator."""
     number_text = number_text.replace(',', '')
     return decimal.Decimal(number_text)
 def parse_arguments(argv: List[str]) -> argparse.Namespace:
     parser = argparse.ArgumentParser(prog=PROGNAME, description='Reconciliation helper')
     cliutil.add_version_argument(parser)
     cliutil.add_loglevel_argument(parser)
     parser.add_argument('--beancount-file', required=True, type=parse_path)
     parser.add_argument('--csv-statement', required=True, type=parse_repo_relative_path)
     parser.add_argument('--bank-statement', required=True, type=parse_repo_relative_path)
     parser.add_argument('--account', required=True, help='eg. Liabilities:CreditCard:AMEX')
     # parser.add_argument('--report-group-regex')
     parser.add_argument('--show-reconciled-matches', action='store_true')
     parser.add_argument('--non-interactive', action='store_true', help="Don't prompt to write to the books")    # parser.add_argument('--statement-balance', type=parse_decimal_with_separator, required=True, help="A.K.A \"cleared balance\" taken from the end of the period on the PDF statement. Required because CSV statements don't include final or running totals")
     args = parser.parse_args(args=argv)
     return args
 def totals(matches: List[Tuple[List, List, List]]) -> Tuple[decimal.Decimal, decimal.Decimal, decimal.Decimal]:
     """Calculate the totals of transactions matched/not-matched."""
     total_matched = decimal.Decimal(0)
     total_missing_from_books = decimal.Decimal(0)
     total_missing_from_statement = decimal.Decimal(0)
     for statement_entries, books_entries, _ in matches:
         if statement_entries and books_entries:
             total_matched += sum(c['amount'] for c in statement_entries)
         elif statement_entries:
             total_missing_from_books += sum(c['amount'] for c in statement_entries)
         else:
             total_missing_from_statement += sum(c['amount'] for c in books_entries)
     return total_matched, total_missing_from_books, total_missing_from_statement
 def process_unmatched(statement_trans: List[dict], books_trans: List[dict]) -> List[Tuple[List, List, List]]:
     """Format the remaining unmatched transactions to be added to one single list of matches."""
     matches: List[Tuple[List, List, List]] = []
     for r1 in statement_trans:
         matches.append(([r1], [], ['no match']))
     for r2 in books_trans:
         matches.append(([], [r2], ['no match']))
     return matches
 def format_output(matches, begin_date, end_date, csv_statement, show_reconciled_matches) -> str:
     with io.StringIO() as out:
         match_output = format_matches(matches, csv_statement, show_reconciled_matches)
         _, total_missing_from_books, total_missing_from_statement = totals(matches)
         print('-' * 155, file=out)
         statement_heading = f'Statement transactions {begin_date} to {end_date}'
         print(f'{statement_heading:<52}            {"Books transactions":<58}   Notes', file=out)
         print('-' * 155, file=out)
         for _, output in sorted(match_output, key=lambda x: x[0]):
             print(output, file=out)
         print('-' * 155, file=out)
         print(f'Sub-total not on statement: {total_missing_from_statement:12,.2f}', file=out)
         print(f'Sub-total not in books:     {total_missing_from_books:12,.2f}', file=out)
         print(f'Total:                      {total_missing_from_statement + total_missing_from_books:12,.2f}', file=out)
         print('-' * 155, file=out)
         return out.getvalue()
 def main(arglist: Optional[Sequence[str]] = None,
          stdout: TextIO = sys.stdout,
          stderr: TextIO = sys.stderr,
          config: Optional[configmod.Config] = None,
          ) -> int:
     args = parse_arguments(arglist)
     cliutil.set_loglevel(logger, args.loglevel)
     if config is None:
         config = configmod.Config()
         config.load_file()
     # Validate and normalise the statement into our standard
     # transaction data structure.
     if 'AMEX' in args.account:
         validate_csv = validate_amex_csv
         read_csv = read_amex_csv
     else:
         validate_csv = validate_fr_csv
         read_csv = read_fr_csv
     with open(args.csv_statement) as f:
         sample = f.read(200)
         # Validate should return true/false and a message.
         validate_csv(sample)
         f.seek(0)
         # TODO: Needs a custom read_transactions_from_csv for each of AMEX and
         # FR since AMEX has a header row and FR doesn't.
         statement_trans = read_csv(f)
     # Dates are taken from the beginning/end of the statement.
     begin_date = statement_trans[0]['date']
     end_date = statement_trans[-1]['date']
     # Query for the Beancount books data for this above period.
+    #
     # There are pros and cons for using Beancount's in-memory entries
     # list directly and also for using Beancount Query Language (BQL)
     # to get a list of transactions? Using BQL because it's
     # convenient, but we don't have access to the full transaction
     # entry objects. Feels a bit strange that these approaches are so
     # disconnected.
+    #
     # beancount.query.query_compile.compile() and
     # beancount.query.query_execute.filter_entries() look useful in this respect,
     # but I'm not clear on how to use compile(). An example would help.
     entries, _, options = loader.load_file(args.beancount_file)
     # String concatenation looks bad, but there's no SQL injection possible here
     # because BQL can't write back to the Beancount files. I hope!
     query = f'SELECT filename, META("lineno") AS line, META("bank-statement") AS bank_statement, date, number(cost(position)), payee, ENTRY_META("entity") as entity, ANY_META("check-id") as check_id, narration where account = "{args.account}" and date >= {begin_date} and date <= {end_date}'
     _, result_rows = run_query(entries, options, query)
     books_trans = sort_records([standardize_beancount_record(row) for row in result_rows])
     # Apply two passes of matching, one for standard matches and one
     # for subset matches.
     matches, remaining_statement_trans, remaining_books_trans = match_statement_and_books(statement_trans, books_trans)
     subset_matches, remaining_statement_trans, remaining_books_trans = subset_match(
         remaining_statement_trans, remaining_books_trans)
     matches.extend(subset_matches)
     # Add the remaining unmatched to make one big list of matches, successful or not.
     unmatched = process_unmatched(remaining_statement_trans, remaining_books_trans)
     matches.extend(unmatched)
     # Print out results of our matching.
     match_output = format_matches(matches, args.csv_statement, args.show_reconciled_matches)
     _, total_missing_from_books, total_missing_from_statement = totals(matches)
     print('-' * 155)
     statement_heading = f'Statement transactions {begin_date} to {end_date}'
     print(f'{statement_heading:<52}            {"Books transactions":<58}   Notes')
     print('-' * 155)
     for _, output in sorted(match_output, key=lambda x: x[0]):
         print(output)
     print('-' * 155)
     print(f'Sub-total not on statement: {total_missing_from_statement:12,.2f}')
     print(f'Sub-total not in books:     {total_missing_from_books:12,.2f}')
     print(f'Total:                      {total_missing_from_statement + total_missing_from_books:12,.2f}')
     print('-' * 155)
     print(format_output(matches, begin_date, end_date, args.csv_statement, args.show_reconciled_matches))
     # Write statement metadata back to the books.
     metadata_to_apply = []
     for match in matches:
         metadata_to_apply.extend(metadata_for_match(match, args.bank_statement, args.csv_statement))
     if metadata_to_apply and not args.non_interactive:
         print('Mark matched transactions as reconciled in the books? (y/N) ', end='')
         if input().lower() == 'y':
             write_metadata_to_books(metadata_to_apply)
 entry_point = cliutil.make_entry_point(__name__, PROGNAME)
 if __name__ == '__main__':
     exit(entry_point())

tests/test_reconcile.py

➞

Show inline comments

 import datetime
 import decimal
 import io
 import os
 import tempfile
 import textwrap
 from conservancy_beancount.reconcile.statement_reconciler import (
     date_proximity,
     format_output,
     match_statement_and_books,
     metadata_for_match,
     payee_match,
     read_amex_csv,
     read_fr_csv,
     remove_duplicate_words,
     remove_payee_junk,
     subset_match,
     totals,
     write_metadata_to_books,
+)
 # These data structures represent individual transactions as taken from the
 # statement ("S") or the books ("B").
 # Statement transaction examples.
 S1 = {
     'date': datetime.date(2022, 1, 1),
     'amount': decimal.Decimal('10.00'),
     'payee': 'Patreon         / Patreon   / 123456/ ST-A1B2C3D4G5H6       /',
     'check_id': '',
     'line': 222,
+}
 S2 = {
     'date': datetime.date(2022, 1, 2),
     'amount': decimal.Decimal('20.00'),
     'payee': 'BT*LINODE           PHILADELPHIA        P',
     'check_id': '',
     'line': 333,
+}
 S3 = {
     'date': datetime.date(2022, 1, 3),
     'amount': decimal.Decimal('30.00'),
     'payee': 'USPS PO 4067540039 0PORTLAND            OR',
     'check_id': '',
     'line': 444,
+}
 S4 = {
     'date': datetime.date(2022, 8, 11),
     'amount': decimal.Decimal('-2260.00'),
     'payee': 'Trust 0000000362 210',
     'check_id': '',
     'line': 555,
+}
 # Books transaction examples.
 B1 = {
     'date': datetime.date(2022, 1, 1),
     'amount': decimal.Decimal('10.00'),
     'payee': 'Patreon',
     'check_id': '',
     'filename': '2022/imports.beancount',
     'line': 777,
     'bank_statement': '',
+}
 B2 = {
     'date': datetime.date(2022, 1, 2),
     'amount': decimal.Decimal('20.00'),
     'payee': 'Linode',
     'check_id': '',
     'filename': '2022/main.beancount',
     'line': 888,
     'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
+}
 B3_next_day = {
     'date': datetime.date(2022, 1, 4),
     'amount': decimal.Decimal('30.00'),
     'payee': 'USPS',
     'check_id': '',
     'filename': '2022/main.beancount',
     'line': 999,
     'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
+}
 B3_next_week = {
     'date': datetime.date(2022, 1, 10),
     'amount': decimal.Decimal('30.00'),
     'payee': 'USPS',
     'check_id': '',
     'filename': '2022/main.beancount',
     'line': 999,
     'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
+}
 B3_mismatch_amount = {
     'date': datetime.date(2022, 1, 3),
     'amount': decimal.Decimal('31.00'),
     'payee': 'USPS',
     'check_id': '',
     'filename': '2022/main.beancount',
     'line': 999,
     'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
+}
 B3_payee_mismatch_1 = {
     'date': datetime.date(2022, 1, 3),
     'amount': decimal.Decimal('30.00'),
     'payee': 'Credit X',
     'check_id': '',
     'filename': '2022/main.beancount',
     'line': 999,
     'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
+}
 B3_payee_mismatch_2 = {
     'date': datetime.date(2022, 1, 3),
     'amount': decimal.Decimal('30.00'),
     'payee': 'Credit Y',
     'check_id': '',
     'filename': '2022/main.beancount',
     'line': 999,
     'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
+}
 B3_unmatched_check_id = {
     'date': datetime.date(2022, 1, 3),
     'amount': decimal.Decimal('30.00'),
     'payee': 'USPS',
     'check_id': '1234',
     'filename': '2022/main.beancount',
     'line': 999,
     'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
+}
 B4A = {
     'date': datetime.date(2022, 8, 11),
     'amount': decimal.Decimal('-250.00'),
     'payee': 'TRUST 0000000362 ACH Retirement Plan',
     'check_id': '',
     'line': 1000,
+}
 B4B = {
     'date': datetime.date(2022, 8, 11),
     'amount': decimal.Decimal('-250.00'),
@@ ... / @@ -254,128 +255,136 @@ def test_date_proximity(): @@
     assert date_proximity(datetime.date(2021, 8, 23), datetime.date(2021, 8, 23) - datetime.timedelta(days=30)) == 0.5
     assert date_proximity(datetime.date(2021, 8, 23), datetime.date(2021, 8, 23) - datetime.timedelta(days=60)) == 0.0
 def test_remove_duplicate_words():
     assert remove_duplicate_words('Hi Foo Kow FOO') == 'Hi Foo Kow'
 def test_payee_matches_when_first_word_matches():
     assert payee_match('Gandi San Francisco', 'Gandi example.com renewal 1234567') == 1.0
     assert payee_match('USPS 123456789 Portland', 'USPS John Brown') == 0.8
 def test_metadata_for_match(monkeypatch):
     monkeypatch.setenv('CONSERVANCY_REPOSITORY', '.')
     assert metadata_for_match(([S1], [B1], []), 'statement.pdf', 'statement.csv') == [
         ('2022/imports.beancount', 777, '    bank-statement: "statement.pdf"'),
         ('2022/imports.beancount', 777, '    bank-statement-csv: "statement.csv:222"'),
+    ]
 def test_no_metadata_if_no_matches():
     assert metadata_for_match(([S1], [], ['no match']), 'statement.pdf', 'statement.csv') == []
     assert metadata_for_match(([], [B1], ['no match']), 'statement.pdf', 'statement.csv') == []
     assert metadata_for_match(([S1], [B2], ['no match']), 'statement.pdf', 'statement.csv') == []
 def test_write_to_books():
     books = textwrap.dedent("""\
 -08-16 txn "Gandi" "transfer seleniumconf.us"
           Liabilities:CreditCard:AMEX            -15.50 USD
           Expenses:Hosting                        15.50 USD""")
     f = tempfile.NamedTemporaryFile('w', delete=False)
     f.write(books)
     f.close()
     metadata = [(f.name, 2, '    bank-statement: statement.pdf')]
     write_metadata_to_books(metadata)
     with open(f.name) as f:
         output = f.read()
     assert output == textwrap.dedent("""\
 -08-16 txn "Gandi" "transfer seleniumconf.us"
           Liabilities:CreditCard:AMEX            -15.50 USD
             bank-statement: statement.pdf
           Expenses:Hosting                        15.50 USD""")
     os.remove(f.name)
 def test_totals():
     assert totals([
         ([S1], [B1], []),
         ([S2], [], []),
         ([], [B3_next_day], []),
     ]) == (decimal.Decimal('10'), decimal.Decimal('20'), decimal.Decimal('30'))
 def test_payee_not_considered_if_check_id_present():
     # These records match aside from check-id.
     statement = [S3]
     books = [B3_unmatched_check_id]
     assert match_statement_and_books(statement, books) == (
         [],
         [S3],
         [B3_unmatched_check_id],
+    )
 def test_subset_sum_match():
     statement = [S4]
     books = [B4A, B4B, B4C]
     assert subset_match(statement, books) == (
         [([S4], [B4A, B4B, B4C], [])],
         [],  # No remaining statement trans.
         [],  # No remaining books trans.
+    )
 def test_subset_passes_through_all_non_matches():
     """This was used to locate a bug where some of the non-matches had
     gone missing due to mutation of books_trans."""
     statement_trans = [
         S1,  # No match
         S4,  # Match
+    ]
     books_trans = [
         B2,  # No match
         B4A, B4B, B4C,  # Match
         B3_next_day, B3_next_week,  # No match
+    ]
     assert subset_match(statement_trans, books_trans) == (
         [([S4], [B4A, B4B, B4C], [])],  # Matched
         [S1],  # No match: preserved intact
         [B2, B3_next_day, B3_next_week]  # No match: preserved intact
+    )
 def test_handles_amex_csv():
     CSV = """Date,Receipt,Description,Card Member,Account #,Amount,Extended Details,Appears On Your Statement As,Address,City/State,Zip Code,Country,Reference,Category\n08/19/2021,,Gandi.net           San Francisco,RODNEY R BROWN,-99999,28.15,"00000009999 00000009999999999999\nGandi.net\nSan Francisco\n00000009999999999999",Gandi.net           San Francisco,"NEPTUNUSSTRAAT 41-63\nHOOFDDORP",,2132 JA,NETHERLANDS (THE),'999999999999999999',Merchandise & Supplies-Internet Purchase\n"""
     expected = [
+        {
             'date': datetime.date(2021, 8, 19),
             'amount': decimal.Decimal('-28.15'),
             'payee': 'Gandi San Francisco',
             'check_id': '',
             'line': 2,
         },
+    ]
     assert read_amex_csv(io.StringIO(CSV)) == expected
 def test_handles_fr_csv():
     CSV = """"DD99999999999","03/31/2022","LAST STATEMENT","","","$1,000.00"\n"9999999999999","04/01/2022","INCOMING WIRE","GONDOR S.S. A111111111BCDE0F","$6.50","$1,006.50"\n"DD99999999999","04/18/2022","CHECK  3741","","$-4.50","$1,002.00"\n"DD99999999999","04/30/2022","THIS STATEMENT","","","$102.00"\n"""
     expected = [
+        {
             'date': datetime.date(2022, 4, 1),
             'amount': decimal.Decimal('6.50'),
             'payee': 'GONDOR S.S. A1111111',
             'check_id': '',
             'line': 2,
         },
+        {
             'date': datetime.date(2022, 4, 18),
             'amount': decimal.Decimal('-4.50'),
             'payee': '',
             'check_id': '3741',
             'line': 3,
         },
+    ]
     assert read_fr_csv(io.StringIO(CSV)) == expected
 def test_format_output():
     statement = [S1]
     books = [B1]
     matches, _, _ = match_statement_and_books(statement, books)
     output = format_output(matches, datetime.date(2022, 1, 1), datetime.date(2022, 2, 1), 'test.csv', True)
     assert '2022-01-01:       10.00 Patreon         / Patreon   / 12345  →  2022-01-01:       10.00 Patreon                              ✓ Matched' in output

0 comments (0 inline, 0 general)