import datetime import decimal import io import os import tempfile import textwrap import pytest from conservancy_beancount.reconcile.statement_reconciler import ( date_proximity, format_output, match_statement_and_books, metadata_for_match, payee_match, read_amex_csv, read_fr_csv, remove_duplicate_words, remove_payee_junk, round_to_month, subset_match, totals, write_metadata_to_books, ) # These data structures represent individual transactions as taken from the # statement ("S") or the books ("B"). # Statement transaction examples. S1 = { 'date': datetime.date(2022, 1, 1), 'amount': decimal.Decimal('10.00'), 'payee': 'Patreon / Patreon / 123456/ ST-A1B2C3D4G5H6 /', 'check_id': '', 'line': 222, } S2 = { 'date': datetime.date(2022, 1, 2), 'amount': decimal.Decimal('20.00'), 'payee': 'BT*LINODE PHILADELPHIA P', 'check_id': '', 'line': 333, } S3 = { 'date': datetime.date(2022, 1, 3), 'amount': decimal.Decimal('30.00'), 'payee': 'USPS PO 4067540039 0PORTLAND OR', 'check_id': '', 'line': 444, } S4 = { 'date': datetime.date(2022, 8, 11), 'amount': decimal.Decimal('-2260.00'), 'payee': 'Trust 0000000362 210', 'check_id': '', 'line': 555, } # Books transaction examples. B1 = { 'date': datetime.date(2022, 1, 1), 'amount': decimal.Decimal('10.00'), 'payee': 'Patreon', 'check_id': '', 'filename': '2022/imports.beancount', 'line': 777, 'bank_statement': '', } B2 = { 'date': datetime.date(2022, 1, 2), 'amount': decimal.Decimal('20.00'), 'payee': 'Linode', 'check_id': '', 'filename': '2022/main.beancount', 'line': 888, 'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf" } B3_next_day = { 'date': datetime.date(2022, 1, 4), 'amount': decimal.Decimal('30.00'), 'payee': 'USPS', 'check_id': '', 'filename': '2022/main.beancount', 'line': 999, 'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf" } B3_next_week = { 'date': datetime.date(2022, 1, 10), 'amount': decimal.Decimal('30.00'), 'payee': 'USPS', 'check_id': '', 'filename': '2022/main.beancount', 'line': 999, 'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf" } B3_mismatch_amount = { 'date': datetime.date(2022, 1, 3), 'amount': decimal.Decimal('31.00'), 'payee': 'USPS', 'check_id': '', 'filename': '2022/main.beancount', 'line': 999, 'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf" } B3_payee_mismatch_1 = { 'date': datetime.date(2022, 1, 3), 'amount': decimal.Decimal('30.00'), 'payee': 'Credit X', 'check_id': '', 'filename': '2022/main.beancount', 'line': 999, 'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf" } B3_payee_mismatch_2 = { 'date': datetime.date(2022, 1, 3), 'amount': decimal.Decimal('30.00'), 'payee': 'Credit Y', 'check_id': '', 'filename': '2022/main.beancount', 'line': 999, 'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf" } B3_unmatched_check_id = { 'date': datetime.date(2022, 1, 3), 'amount': decimal.Decimal('30.00'), 'payee': 'USPS', 'check_id': '1234', 'filename': '2022/main.beancount', 'line': 999, 'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf" } B4A = { 'date': datetime.date(2022, 8, 11), 'amount': decimal.Decimal('-250.00'), 'payee': 'TRUST 0000000362 ACH Retirement Plan', 'check_id': '', 'line': 1000, } B4B = { 'date': datetime.date(2022, 8, 11), 'amount': decimal.Decimal('-250.00'), 'payee': 'TRUST 0000000362 ACH Retirement Plan', 'check_id': '', 'line': 1000, } B4C = { 'date': datetime.date(2022, 8, 11), 'amount': decimal.Decimal('-1760.00'), 'payee': 'TRUST 0000000362 ACH Retirement Plan', 'check_id': '', 'line': 1000, } def test_one_exact_match(): statement = [S1] books = [B1] assert match_statement_and_books(statement, books) == ( # Match, match, notes. # # The matches are a list so we can implement subset-sum matching where # multiple books transactions may match to a single statement # transaction. [([S1], [B1], [])], [], [], ) def test_multiple_exact_matches(): statement = [S1, S2] books = [B1, B2] assert match_statement_and_books(statement, books) == ( [([S1], [B1], []), ([S2], [B2], [])], [], [], ) def test_one_mismatch(): statement = [S1] books = [] assert match_statement_and_books(statement, books) == ( [], [S1], [], ) def test_multiple_mismatches(): statement = [S1] books = [B2] assert match_statement_and_books(statement, books) == ( [], [S1], [B2], ) def test_next_day_matches(): statement = [S3] books = [B3_next_day] assert match_statement_and_books(statement, books) == ( [([S3], [B3_next_day], ['+/- 1 days'])], [], [], ) def test_next_week_matches(): statement = [S3] books = [B3_next_week] assert match_statement_and_books(statement, books) == ( [([S3], [B3_next_week], ['+/- 7 days'])], [], [], ) def test_incorrect_amount_does_not_match(): statement = [S3] books = [B3_mismatch_amount] assert match_statement_and_books(statement, books) == ( [], [S3], [B3_mismatch_amount], ) def test_payee_mismatch_ok_when_only_one_that_amount_and_date(): statement = [S3] books = [B3_payee_mismatch_1] assert match_statement_and_books(statement, books) == ( [([S3], [B3_payee_mismatch_1], ['payee mismatch'])], [], [], ) def test_payee_mismatch_not_ok_when_multiple_that_amount_and_date(): statement = [S3] books = [B3_payee_mismatch_1, B3_payee_mismatch_2] match = match_statement_and_books(statement, books) assert match == ( [], [S3], [B3_payee_mismatch_1, B3_payee_mismatch_2], ) def test_remove_payee_junk(): assert remove_payee_junk('WIDGETSRUS INC PAYMENT 1') == 'WIDGETSRUS' assert remove_payee_junk('0000010017') == '10017' def test_date_proximity(): assert date_proximity(datetime.date(2021, 8, 23), datetime.date(2021, 8, 23)) == 1.0 assert date_proximity(datetime.date(2021, 8, 23), datetime.date(2021, 8, 23) - datetime.timedelta(days=30)) == 0.5 assert date_proximity(datetime.date(2021, 8, 23), datetime.date(2021, 8, 23) - datetime.timedelta(days=60)) == 0.0 def test_remove_duplicate_words(): assert remove_duplicate_words('Hi Foo Kow FOO') == 'Hi Foo Kow' def test_payee_matches_when_first_word_matches(): assert payee_match('Gandi San Francisco', 'Gandi example.com renewal 1234567') == 1.0 assert payee_match('USPS 123456789 Portland', 'USPS John Brown') == 0.8 def test_metadata_for_match(monkeypatch): monkeypatch.setenv('CONSERVANCY_REPOSITORY', '.') assert metadata_for_match(([S1], [B1], []), 'statement.pdf', 'statement.csv') == [ ('2022/imports.beancount', 777, ' bank-statement: "statement.pdf"'), ('2022/imports.beancount', 777, ' bank-statement-csv: "statement.csv:222"'), ] def test_no_metadata_if_no_matches(): assert metadata_for_match(([S1], [], ['no match']), 'statement.pdf', 'statement.csv') == [] assert metadata_for_match(([], [B1], ['no match']), 'statement.pdf', 'statement.csv') == [] assert metadata_for_match(([S1], [B2], ['no match']), 'statement.pdf', 'statement.csv') == [] def test_write_to_books(): books = textwrap.dedent("""\ 2021-08-16 txn "Gandi" "transfer seleniumconf.us" Liabilities:CreditCard:AMEX -15.50 USD Expenses:Hosting 15.50 USD""") f = tempfile.NamedTemporaryFile('w', delete=False) f.write(books) f.close() metadata = [(f.name, 2, ' bank-statement: statement.pdf')] write_metadata_to_books(metadata) with open(f.name) as f: output = f.read() assert output == textwrap.dedent("""\ 2021-08-16 txn "Gandi" "transfer seleniumconf.us" Liabilities:CreditCard:AMEX -15.50 USD bank-statement: statement.pdf Expenses:Hosting 15.50 USD""") os.remove(f.name) def test_totals(): assert totals([ ([S1], [B1], []), ([S2], [], []), ([], [B3_next_day], []), ]) == (decimal.Decimal('10'), decimal.Decimal('20'), decimal.Decimal('30')) def test_payee_not_considered_if_check_id_present(): # These records match aside from check-id. statement = [S3] books = [B3_unmatched_check_id] assert match_statement_and_books(statement, books) == ( [], [S3], [B3_unmatched_check_id], ) def test_subset_sum_match(): statement = [S4] books = [B4A, B4B, B4C] assert subset_match(statement, books) == ( [([S4], [B4A, B4B, B4C], [])], [], # No remaining statement trans. [], # No remaining books trans. ) def test_subset_passes_through_all_non_matches(): """This was used to locate a bug where some of the non-matches had gone missing due to mutation of books_trans.""" statement_trans = [ S1, # No match S4, # Match ] books_trans = [ B2, # No match B4A, B4B, B4C, # Match B3_next_day, B3_next_week, # No match ] assert subset_match(statement_trans, books_trans) == ( [([S4], [B4A, B4B, B4C], [])], # Matched [S1], # No match: preserved intact [B2, B3_next_day, B3_next_week] # No match: preserved intact ) def test_subset_passes_though_unmatched_transactions_with_same_payee(): # Tracy noticed that when multiple books payments had the same date and # payee and were unmatched, they were being displayed lumped together, when # they should have remained separate. B1a = { 'date': datetime.date(2022, 1, 1), 'amount': decimal.Decimal('100.00'), 'payee': 'Hannah', 'check_id': '', 'filename': '2022/imports.beancount', 'line': 777, 'bank_statement': '', } B1b = { 'date': datetime.date(2022, 1, 1), 'amount': decimal.Decimal('100.00'), 'payee': 'Hannah', 'check_id': '', 'filename': '2022/imports.beancount', 'line': 797, 'bank_statement': '', } assert subset_match([], [B1a, B1b]) == ( [], [], [B1a, B1b], # No match: two preserved intact ) def test_handles_amex_csv(): CSV = """Date,Receipt,Description,Card Member,Account #,Amount,Extended Details,Appears On Your Statement As,Address,City/State,Zip Code,Country,Reference,Category\n08/19/2021,,Gandi.net San Francisco,RODNEY R BROWN,-99999,28.15,"00000009999 00000009999999999999\nGandi.net\nSan Francisco\n00000009999999999999",Gandi.net San Francisco,"NEPTUNUSSTRAAT 41-63\nHOOFDDORP",,2132 JA,NETHERLANDS (THE),'999999999999999999',Merchandise & Supplies-Internet Purchase\n""" expected = [ { 'date': datetime.date(2021, 8, 19), 'amount': decimal.Decimal('-28.15'), 'payee': 'Gandi San Francisco', 'check_id': '', 'line': 2, }, ] assert read_amex_csv(io.StringIO(CSV)) == expected def test_handles_fr_csv(): CSV = """"DD99999999999","03/31/2022","LAST STATEMENT","","","$1,000.00"\n"9999999999999","04/01/2022","INCOMING WIRE","GONDOR S.S. A111111111BCDE0F","$6.50","$1,006.50"\n"DD99999999999","04/18/2022","CHECK 3741","","$-4.50","$1,002.00"\n"DD99999999999","04/30/2022","THIS STATEMENT","","","$102.00"\n""" expected = [ { 'date': datetime.date(2022, 4, 1), 'amount': decimal.Decimal('6.50'), 'payee': 'GONDOR S.S. A1111111', 'check_id': '', 'line': 2, }, { 'date': datetime.date(2022, 4, 18), 'amount': decimal.Decimal('-4.50'), 'payee': '', 'check_id': '3741', 'line': 3, }, ] assert read_fr_csv(io.StringIO(CSV)) == expected def test_format_output(): statement = [S1] books = [B1] matches, _, _ = match_statement_and_books(statement, books) output = format_output(matches, datetime.date(2022, 1, 1), datetime.date(2022, 2, 1), 'test.csv', True) assert '2022-01-01: 10.00 Patreon / Patreon / 12345 → 2022-01-01: 10.00 Patreon ✓ Matched' in output month_test_data = [ ((datetime.date(2022, 1, 2), datetime.date(2022, 1, 30)), (datetime.date(2022, 1, 1), datetime.date(2022, 1, 31))), ((datetime.date(2022, 4, 2), datetime.date(2022, 4, 29)), (datetime.date(2022, 4, 1), datetime.date(2022, 4, 30))), ((datetime.date(2022, 2, 2), datetime.date(2022, 2, 27)), (datetime.date(2022, 2, 1), datetime.date(2022, 2, 28))), ((datetime.date(2024, 2, 2), datetime.date(2024, 2, 27)), (datetime.date(2024, 2, 1), datetime.date(2024, 2, 29))), ] @pytest.mark.parametrize('input_dates,rounded_dates', month_test_data) def test_rounds_to_full_month(input_dates, rounded_dates): assert round_to_month(*input_dates) == rounded_dates