Changeset - 8b08997fda07
[Not reviewed]
0 2 0
Ben Sturmfels (bsturmfels) - 16 months ago 2023-02-11 05:00:21
ben@sturm.com.au
reconciler: Add --full-months option to round statement dates to month boundaries
2 files changed with 38 insertions and 0 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/reconcile/statement_reconciler.py
Show inline comments
...
 
@@ -286,24 +286,38 @@ def format_record(record: dict) -> str:
 

	
 
def format_multirecord(r1s: List[dict], r2s: List[dict], note: str) -> List[list]:
 
    """Generates output lines for one statement:multiple books transaction match."""
 
    assert len(r1s) == 1
 
    assert len(r2s) > 1
 
    match_output = []
 
    match_output.append([r1s[0]['date'], f'{format_record(r1s[0])}  →  {format_record(r2s[0])}  ✓ Matched{note}'])
 
    for r2 in r2s[1:]:
 
        match_output.append([r1s[0]['date'], f'{r1s[0]["date"].isoformat()}:             ↳                                    →  {format_record(r2)}  ✓ Matched{note}'])
 
    return match_output
 

	
 

	
 
def _start_of_month(time, offset_months=0):
 
    if offset_months > 0:
 
        return _start_of_month(time.replace(day=28) + datetime.timedelta(days=4), offset_months - 1)
 
    else:
 
        return time.replace(day=1)
 

	
 

	
 
def round_to_month(begin_date, end_date):
 
    """Round a beginning and end date to beginning and end of months respectively."""
 
    return (
 
        _start_of_month(begin_date),
 
        _start_of_month(end_date, offset_months=1) - datetime.timedelta(days=1))
 

	
 

	
 
def sort_records(records: List) -> List:
 
    return sorted(records, key=lambda x: (x['date'], x['amount']))
 

	
 

	
 
def first_word_exact_match(a: str, b: str) -> float:
 
    """Score a payee match based first word.
 

	
 
    We get a whole lot of good matches this way. Helps in the
 
    situation where the first word or two of a transaction description
 
    is useful and the rest is garbage.
 

	
 
    """
...
 
@@ -587,24 +601,25 @@ def parse_decimal_with_separator(number_text: str) -> decimal.Decimal:
 

	
 
def parse_arguments(argv: List[str]) -> argparse.Namespace:
 
    parser = argparse.ArgumentParser(prog=PROGNAME, description='Reconciliation helper')
 
    cliutil.add_version_argument(parser)
 
    cliutil.add_loglevel_argument(parser)
 
    parser.add_argument('--beancount-file', required=True, type=parse_path)
 
    parser.add_argument('--csv-statement', required=True, type=parse_repo_relative_path)
 
    parser.add_argument('--bank-statement', required=True, type=parse_repo_relative_path)
 
    parser.add_argument('--account', required=True, help='eg. Liabilities:CreditCard:AMEX')
 
    # parser.add_argument('--report-group-regex')
 
    parser.add_argument('--show-reconciled-matches', action='store_true')
 
    parser.add_argument('--non-interactive', action='store_true', help="Don't prompt to write to the books")    # parser.add_argument('--statement-balance', type=parse_decimal_with_separator, required=True, help="A.K.A \"cleared balance\" taken from the end of the period on the PDF statement. Required because CSV statements don't include final or running totals")
 
    parser.add_argument('--full-months', action='store_true', help='Match payments over the full month, rather that just between the beginning and end dates of the CSV statement')
 
    args = parser.parse_args(args=argv)
 
    return args
 

	
 

	
 
def totals(matches: List[Tuple[List, List, List]]) -> Tuple[decimal.Decimal, decimal.Decimal, decimal.Decimal]:
 
    """Calculate the totals of transactions matched/not-matched."""
 
    total_matched = decimal.Decimal(0)
 
    total_missing_from_books = decimal.Decimal(0)
 
    total_missing_from_statement = decimal.Decimal(0)
 
    for statement_entries, books_entries, _ in matches:
 
        if statement_entries and books_entries:
 
            total_matched += sum(c['amount'] for c in statement_entries)
...
 
@@ -667,24 +682,27 @@ def main(arglist: Optional[Sequence[str]] = None,
 
        sample = f.read(200)
 
        # Validate should return true/false and a message.
 
        validate_csv(sample)
 
        f.seek(0)
 
        # TODO: Needs a custom read_transactions_from_csv for each of AMEX and
 
        # FR since AMEX has a header row and FR doesn't.
 
        statement_trans = read_csv(f)
 

	
 
    # Dates are taken from the beginning/end of the statement.
 
    begin_date = statement_trans[0]['date']
 
    end_date = statement_trans[-1]['date']
 

	
 
    if args.full_months:
 
        begin_date, end_date = round_to_month(begin_date, end_date)
 

	
 
    # Query for the Beancount books data for this above period.
 
    #
 
    # There are pros and cons for using Beancount's in-memory entries
 
    # list directly and also for using Beancount Query Language (BQL)
 
    # to get a list of transactions? Using BQL because it's
 
    # convenient, but we don't have access to the full transaction
 
    # entry objects. Feels a bit strange that these approaches are so
 
    # disconnected.
 
    #
 
    # beancount.query.query_compile.compile() and
 
    # beancount.query.query_execute.filter_entries() look useful in this respect,
 
    # but I'm not clear on how to use compile(). An example would help.
tests/test_reconcile.py
Show inline comments
 
import datetime
 
import decimal
 
import io
 
import os
 
import tempfile
 
import textwrap
 

	
 
import pytest
 

	
 
from conservancy_beancount.reconcile.statement_reconciler import (
 
    date_proximity,
 
    format_output,
 
    match_statement_and_books,
 
    metadata_for_match,
 
    payee_match,
 
    read_amex_csv,
 
    read_fr_csv,
 
    remove_duplicate_words,
 
    remove_payee_junk,
 
    round_to_month,
 
    subset_match,
 
    totals,
 
    write_metadata_to_books,
 
)
 

	
 
# These data structures represent individual transactions as taken from the
 
# statement ("S") or the books ("B").
 

	
 
# Statement transaction examples.
 
S1 = {
 
    'date': datetime.date(2022, 1, 1),
 
    'amount': decimal.Decimal('10.00'),
...
 
@@ -379,12 +382,29 @@ def test_handles_fr_csv():
 
            'line': 3,
 
        },
 
    ]
 
    assert read_fr_csv(io.StringIO(CSV)) == expected
 

	
 

	
 
def test_format_output():
 
    statement = [S1]
 
    books = [B1]
 
    matches, _, _ = match_statement_and_books(statement, books)
 
    output = format_output(matches, datetime.date(2022, 1, 1), datetime.date(2022, 2, 1), 'test.csv', True)
 
    assert '2022-01-01:       10.00 Patreon         / Patreon   / 12345  →  2022-01-01:       10.00 Patreon                              ✓ Matched' in output
 

	
 

	
 
month_test_data = [
 
    ((datetime.date(2022, 1, 2), datetime.date(2022, 1, 30)),
 
     (datetime.date(2022, 1, 1), datetime.date(2022, 1, 31))),
 
    ((datetime.date(2022, 4, 2), datetime.date(2022, 4, 29)),
 
     (datetime.date(2022, 4, 1), datetime.date(2022, 4, 30))),
 
    ((datetime.date(2022, 2, 2), datetime.date(2022, 2, 27)),
 
     (datetime.date(2022, 2, 1), datetime.date(2022, 2, 28))),
 
    ((datetime.date(2024, 2, 2), datetime.date(2024, 2, 27)),
 
     (datetime.date(2024, 2, 1), datetime.date(2024, 2, 29))),
 
]
 

	
 

	
 
@pytest.mark.parametrize('input_dates,rounded_dates', month_test_data)
 
def test_rounds_to_full_month(input_dates, rounded_dates):
 
    assert round_to_month(*input_dates) == rounded_dates
0 comments (0 inline, 0 general)