Files @ 8b08997fda07
Branch filter:

Location: NPO-Accounting/conservancy_beancount/tests/test_reconcile.py

bsturmfels
reconciler: Add --full-months option to round statement dates to month boundaries
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
import datetime
import decimal
import io
import os
import tempfile
import textwrap

import pytest

from conservancy_beancount.reconcile.statement_reconciler import (
    date_proximity,
    format_output,
    match_statement_and_books,
    metadata_for_match,
    payee_match,
    read_amex_csv,
    read_fr_csv,
    remove_duplicate_words,
    remove_payee_junk,
    round_to_month,
    subset_match,
    totals,
    write_metadata_to_books,
)

# These data structures represent individual transactions as taken from the
# statement ("S") or the books ("B").

# Statement transaction examples.
S1 = {
    'date': datetime.date(2022, 1, 1),
    'amount': decimal.Decimal('10.00'),
    'payee': 'Patreon         / Patreon   / 123456/ ST-A1B2C3D4G5H6       /',
    'check_id': '',
    'line': 222,
}
S2 = {
    'date': datetime.date(2022, 1, 2),
    'amount': decimal.Decimal('20.00'),
    'payee': 'BT*LINODE           PHILADELPHIA        P',
    'check_id': '',
    'line': 333,
}
S3 = {
    'date': datetime.date(2022, 1, 3),
    'amount': decimal.Decimal('30.00'),
    'payee': 'USPS PO 4067540039 0PORTLAND            OR',
    'check_id': '',
    'line': 444,
}
S4 = {
    'date': datetime.date(2022, 8, 11),
    'amount': decimal.Decimal('-2260.00'),
    'payee': 'Trust 0000000362 210',
    'check_id': '',
    'line': 555,
}

# Books transaction examples.
B1 = {
    'date': datetime.date(2022, 1, 1),
    'amount': decimal.Decimal('10.00'),
    'payee': 'Patreon',
    'check_id': '',
    'filename': '2022/imports.beancount',
    'line': 777,
    'bank_statement': '',
}
B2 = {
    'date': datetime.date(2022, 1, 2),
    'amount': decimal.Decimal('20.00'),
    'payee': 'Linode',
    'check_id': '',
    'filename': '2022/main.beancount',
    'line': 888,
    'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
}
B3_next_day = {
    'date': datetime.date(2022, 1, 4),
    'amount': decimal.Decimal('30.00'),
    'payee': 'USPS',
    'check_id': '',
    'filename': '2022/main.beancount',
    'line': 999,
    'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
}
B3_next_week = {
    'date': datetime.date(2022, 1, 10),
    'amount': decimal.Decimal('30.00'),
    'payee': 'USPS',
    'check_id': '',
    'filename': '2022/main.beancount',
    'line': 999,
    'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
}
B3_mismatch_amount = {
    'date': datetime.date(2022, 1, 3),
    'amount': decimal.Decimal('31.00'),
    'payee': 'USPS',
    'check_id': '',
    'filename': '2022/main.beancount',
    'line': 999,
    'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
}
B3_payee_mismatch_1 = {
    'date': datetime.date(2022, 1, 3),
    'amount': decimal.Decimal('30.00'),
    'payee': 'Credit X',
    'check_id': '',
    'filename': '2022/main.beancount',
    'line': 999,
    'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
}
B3_payee_mismatch_2 = {
    'date': datetime.date(2022, 1, 3),
    'amount': decimal.Decimal('30.00'),
    'payee': 'Credit Y',
    'check_id': '',
    'filename': '2022/main.beancount',
    'line': 999,
    'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
}
B3_unmatched_check_id = {
    'date': datetime.date(2022, 1, 3),
    'amount': decimal.Decimal('30.00'),
    'payee': 'USPS',
    'check_id': '1234',
    'filename': '2022/main.beancount',
    'line': 999,
    'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
}
B4A = {
    'date': datetime.date(2022, 8, 11),
    'amount': decimal.Decimal('-250.00'),
    'payee': 'TRUST 0000000362 ACH Retirement Plan',
    'check_id': '',
    'line': 1000,
}
B4B = {
    'date': datetime.date(2022, 8, 11),
    'amount': decimal.Decimal('-250.00'),
    'payee': 'TRUST 0000000362 ACH Retirement Plan',
    'check_id': '',
    'line': 1000,
}
B4C = {
    'date': datetime.date(2022, 8, 11),
    'amount': decimal.Decimal('-1760.00'),
    'payee': 'TRUST 0000000362 ACH Retirement Plan',
    'check_id': '',
    'line': 1000,
}


def test_one_exact_match():
    statement = [S1]
    books = [B1]
    assert match_statement_and_books(statement, books) == (
        # Match, match, notes.
        #
        # The matches are a list so we can implement subset-sum matching where
        # multiple books transactions may match to a single statement
        # transaction.
        [([S1], [B1], [])],
        [],
        [],
    )


def test_multiple_exact_matches():
    statement = [S1, S2]
    books = [B1, B2]
    assert match_statement_and_books(statement, books) == (
        [([S1], [B1], []), ([S2], [B2], [])],
        [],
        [],
    )


def test_one_mismatch():
    statement = [S1]
    books = []
    assert match_statement_and_books(statement, books) == (
        [],
        [S1],
        [],
    )


def test_multiple_mismatches():
    statement = [S1]
    books = [B2]
    assert match_statement_and_books(statement, books) == (
        [],
        [S1],
        [B2],
    )


def test_next_day_matches():
    statement = [S3]
    books = [B3_next_day]
    assert match_statement_and_books(statement, books) == (
        [([S3], [B3_next_day], ['+/- 1 days'])],
        [],
        [],
    )


def test_next_week_matches():
    statement = [S3]
    books = [B3_next_week]
    assert match_statement_and_books(statement, books) == (
        [([S3], [B3_next_week], ['+/- 7 days'])],
        [],
        [],
    )


def test_incorrect_amount_does_not_match():
    statement = [S3]
    books = [B3_mismatch_amount]
    assert match_statement_and_books(statement, books) == (
        [],
        [S3],
        [B3_mismatch_amount],
    )


def test_payee_mismatch_ok_when_only_one_that_amount_and_date():
    statement = [S3]
    books = [B3_payee_mismatch_1]
    assert match_statement_and_books(statement, books) == (
        [([S3], [B3_payee_mismatch_1], ['payee mismatch'])],
        [],
        [],
    )


def test_payee_mismatch_not_ok_when_multiple_that_amount_and_date():
    statement = [S3]
    books = [B3_payee_mismatch_1, B3_payee_mismatch_2]
    match = match_statement_and_books(statement, books)
    assert match == (
        [],
        [S3],
        [B3_payee_mismatch_1, B3_payee_mismatch_2],
    )


def test_remove_payee_junk():
    assert remove_payee_junk('WIDGETSRUS INC PAYMENT 1') == 'WIDGETSRUS'
    assert remove_payee_junk('0000010017') == '10017'


def test_date_proximity():
    assert date_proximity(datetime.date(2021, 8, 23), datetime.date(2021, 8, 23)) == 1.0
    assert date_proximity(datetime.date(2021, 8, 23), datetime.date(2021, 8, 23) - datetime.timedelta(days=30)) == 0.5
    assert date_proximity(datetime.date(2021, 8, 23), datetime.date(2021, 8, 23) - datetime.timedelta(days=60)) == 0.0


def test_remove_duplicate_words():
    assert remove_duplicate_words('Hi Foo Kow FOO') == 'Hi Foo Kow'


def test_payee_matches_when_first_word_matches():
    assert payee_match('Gandi San Francisco', 'Gandi example.com renewal 1234567') == 1.0
    assert payee_match('USPS 123456789 Portland', 'USPS John Brown') == 0.8


def test_metadata_for_match(monkeypatch):
    monkeypatch.setenv('CONSERVANCY_REPOSITORY', '.')
    assert metadata_for_match(([S1], [B1], []), 'statement.pdf', 'statement.csv') == [
        ('2022/imports.beancount', 777, '    bank-statement: "statement.pdf"'),
        ('2022/imports.beancount', 777, '    bank-statement-csv: "statement.csv:222"'),
    ]


def test_no_metadata_if_no_matches():
    assert metadata_for_match(([S1], [], ['no match']), 'statement.pdf', 'statement.csv') == []
    assert metadata_for_match(([], [B1], ['no match']), 'statement.pdf', 'statement.csv') == []
    assert metadata_for_match(([S1], [B2], ['no match']), 'statement.pdf', 'statement.csv') == []


def test_write_to_books():
    books = textwrap.dedent("""\
        2021-08-16 txn "Gandi" "transfer seleniumconf.us"
          Liabilities:CreditCard:AMEX            -15.50 USD
          Expenses:Hosting                        15.50 USD""")
    f = tempfile.NamedTemporaryFile('w', delete=False)
    f.write(books)
    f.close()
    metadata = [(f.name, 2, '    bank-statement: statement.pdf')]
    write_metadata_to_books(metadata)
    with open(f.name) as f:
        output = f.read()
    assert output == textwrap.dedent("""\
        2021-08-16 txn "Gandi" "transfer seleniumconf.us"
          Liabilities:CreditCard:AMEX            -15.50 USD
            bank-statement: statement.pdf
          Expenses:Hosting                        15.50 USD""")
    os.remove(f.name)


def test_totals():
    assert totals([
        ([S1], [B1], []),
        ([S2], [], []),
        ([], [B3_next_day], []),
    ]) == (decimal.Decimal('10'), decimal.Decimal('20'), decimal.Decimal('30'))


def test_payee_not_considered_if_check_id_present():
    # These records match aside from check-id.
    statement = [S3]
    books = [B3_unmatched_check_id]
    assert match_statement_and_books(statement, books) == (
        [],
        [S3],
        [B3_unmatched_check_id],
    )


def test_subset_sum_match():
    statement = [S4]
    books = [B4A, B4B, B4C]
    assert subset_match(statement, books) == (
        [([S4], [B4A, B4B, B4C], [])],
        [],  # No remaining statement trans.
        [],  # No remaining books trans.
    )


def test_subset_passes_through_all_non_matches():
    """This was used to locate a bug where some of the non-matches had
    gone missing due to mutation of books_trans."""
    statement_trans = [
        S1,  # No match
        S4,  # Match
    ]
    books_trans = [
        B2,  # No match
        B4A, B4B, B4C,  # Match
        B3_next_day, B3_next_week,  # No match
    ]
    assert subset_match(statement_trans, books_trans) == (
        [([S4], [B4A, B4B, B4C], [])],  # Matched
        [S1],  # No match: preserved intact
        [B2, B3_next_day, B3_next_week]  # No match: preserved intact
    )


def test_handles_amex_csv():
    CSV = """Date,Receipt,Description,Card Member,Account #,Amount,Extended Details,Appears On Your Statement As,Address,City/State,Zip Code,Country,Reference,Category\n08/19/2021,,Gandi.net           San Francisco,RODNEY R BROWN,-99999,28.15,"00000009999 00000009999999999999\nGandi.net\nSan Francisco\n00000009999999999999",Gandi.net           San Francisco,"NEPTUNUSSTRAAT 41-63\nHOOFDDORP",,2132 JA,NETHERLANDS (THE),'999999999999999999',Merchandise & Supplies-Internet Purchase\n"""
    expected = [
        {
            'date': datetime.date(2021, 8, 19),
            'amount': decimal.Decimal('-28.15'),
            'payee': 'Gandi San Francisco',
            'check_id': '',
            'line': 2,
        },
    ]
    assert read_amex_csv(io.StringIO(CSV)) == expected


def test_handles_fr_csv():
    CSV = """"DD99999999999","03/31/2022","LAST STATEMENT","","","$1,000.00"\n"9999999999999","04/01/2022","INCOMING WIRE","GONDOR S.S. A111111111BCDE0F","$6.50","$1,006.50"\n"DD99999999999","04/18/2022","CHECK  3741","","$-4.50","$1,002.00"\n"DD99999999999","04/30/2022","THIS STATEMENT","","","$102.00"\n"""
    expected = [
        {
            'date': datetime.date(2022, 4, 1),
            'amount': decimal.Decimal('6.50'),
            'payee': 'GONDOR S.S. A1111111',
            'check_id': '',
            'line': 2,
        },
        {
            'date': datetime.date(2022, 4, 18),
            'amount': decimal.Decimal('-4.50'),
            'payee': '',
            'check_id': '3741',
            'line': 3,
        },
    ]
    assert read_fr_csv(io.StringIO(CSV)) == expected


def test_format_output():
    statement = [S1]
    books = [B1]
    matches, _, _ = match_statement_and_books(statement, books)
    output = format_output(matches, datetime.date(2022, 1, 1), datetime.date(2022, 2, 1), 'test.csv', True)
    assert '2022-01-01:       10.00 Patreon         / Patreon   / 12345  →  2022-01-01:       10.00 Patreon                              ✓ Matched' in output


month_test_data = [
    ((datetime.date(2022, 1, 2), datetime.date(2022, 1, 30)),
     (datetime.date(2022, 1, 1), datetime.date(2022, 1, 31))),
    ((datetime.date(2022, 4, 2), datetime.date(2022, 4, 29)),
     (datetime.date(2022, 4, 1), datetime.date(2022, 4, 30))),
    ((datetime.date(2022, 2, 2), datetime.date(2022, 2, 27)),
     (datetime.date(2022, 2, 1), datetime.date(2022, 2, 28))),
    ((datetime.date(2024, 2, 2), datetime.date(2024, 2, 27)),
     (datetime.date(2024, 2, 1), datetime.date(2024, 2, 29))),
]


@pytest.mark.parametrize('input_dates,rounded_dates', month_test_data)
def test_rounds_to_full_month(input_dates, rounded_dates):
    assert round_to_month(*input_dates) == rounded_dates