Changeset - 0968f7f051b8
[Not reviewed]
0 2 0
Ben Sturmfels (bsturmfels) - 17 months ago 2023-01-13 02:58:36
ben@sturm.com.au
reconciler: Handle comma separators in some FR statements
2 files changed with 54 insertions and 8 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/reconcile/statement_reconciler.py
Show inline comments
...
 
@@ -173,80 +173,85 @@ def remove_payee_junk(payee: str) -> str:
 
    for r in JUNK_WORDS_RES:
 
        payee = r.sub('', payee)
 
    payee = ZERO_RE.sub('', payee)
 
    payee = payee.replace(' - ', ' ')
 
    payee = re.sub(r'\.0\.\d+', ' ', payee)
 
    payee = payee.replace('.0', ' ')
 
    payee = payee.replace('/', ' ')
 
    payee = re.sub(re.escape('.com'), ' ', payee, flags=re.IGNORECASE)
 
    payee = re.sub(re.escape('.net'), ' ', payee, flags=re.IGNORECASE)
 
    payee = payee.replace('*', ' ')
 
    payee = ' '.join([i for i in payee.split(' ') if len(i) > 2])
 
    payee = payee.replace('-', ' ')
 
    payee = remove_duplicate_words(payee)
 
    payee.strip()
 
    return payee
 

	
 

	
 
def read_transactions_from_csv(f: TextIO, standardize_statement_record: Callable) -> list:
 
    reader = csv.DictReader(f)
 
    # The reader.line_num is the source line number, not the spreadsheet row
 
    # number due to multi-line records.
 
    return sort_records([standardize_statement_record(row, i) for i, row in enumerate(reader, 2)])
 

	
 

	
 
def parse_amount(amount: str) -> decimal.Decimal:
 
    """Parse amounts and handle comma separators as seen in some FR statements."""
 
    return decimal.Decimal(amount.replace(',', ''))
 

	
 

	
 
def validate_amex_csv(sample: str, account: str) -> None:
 
    required_cols = {'Date', 'Amount', 'Description', 'Card Member'}
 
    reader = csv.DictReader(io.StringIO(sample))
 
    if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
 
        sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
 

	
 

	
 
def standardize_amex_record(row: Dict, line: int) -> Dict:
 
    """Turn an AMEX CSV row into a standard dict format representing a transaction."""
 
    # NOTE: Statement doesn't seem to give us a running balance or a final total.
 
    return {
 
        'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
 
        'amount': -1 * decimal.Decimal(row['Amount']),
 
        'amount': -1 * parse_amount(row['Amount']),
 
        # Descriptions have too much noise, so taking just the start
 
        # significantly assists the fuzzy matching.
 
        'payee': remove_payee_junk(row['Description'] or '')[:20],
 
        'check_id': '',
 
        'line': line,
 
    }
 

	
 

	
 
def validate_fr_csv(sample: str, account: str) -> None:
 
    required_cols = {'Date', 'Amount', 'Detail', 'Serial Num'}
 
    reader = csv.DictReader(io.StringIO(sample))
 
    if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
 
        sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
 

	
 

	
 
def standardize_fr_record(row: Dict, line: int) -> Dict:
 
    return {
 
        'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
 
        'amount': decimal.Decimal(row['Amount']),
 
        'amount': parse_amount(row['Amount']),
 
        'payee': remove_payee_junk(row['Detail'] or '')[:20],
 
        'check_id': row['Serial Num'].lstrip('0'),
 
        'line': line,
 
    }
 

	
 

	
 
def standardize_beancount_record(row) -> Dict:  # type: ignore[no-untyped-def]
 
    """Turn a Beancount query result row into a standard dict representing a transaction."""
 
    return {
 
        'date': row.date,
 
        'amount': row.number_cost_position,
 
        'payee': remove_payee_junk(f'{row.payee or ""} {row.entity or ""} {row.narration or ""}'),
 
        'check_id': str(row.check_id or ''),
 
        'filename': row.filename,
 
        'line': row.line,
 
        'bank_statement': row.bank_statement,
 
    }
 

	
 

	
 
def format_record(record: dict) -> str:
 
    """Generate output lines for a standard 1:1 match."""
 
    if record['payee'] and record['check_id']:
 
        output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} {record['payee'][:25]} #{record['check_id']}".ljust(59)
 
    elif record['payee']:
tests/test_reconcile.py
Show inline comments
 
import datetime
 
import decimal
 
import os
 
import tempfile
 
import textwrap
 

	
 
from conservancy_beancount.reconcile.statement_reconciler import (
 
    match_statement_and_books,
 
    remove_payee_junk,
 
    date_proximity,
 
    remove_duplicate_words,
 
    payee_match,
 
    match_statement_and_books,
 
    metadata_for_match,
 
    write_metadata_to_books,
 
    totals,
 
    payee_match,
 
    remove_duplicate_words,
 
    remove_payee_junk,
 
    standardize_amex_record,
 
    standardize_fr_record,
 
    subset_match,
 
    totals,
 
    write_metadata_to_books,
 
)
 

	
 
# These data structures represent individual transactions as taken from the
 
# statement ("S") or the books ("B").
 

	
 
# Statement transaction examples.
 
S1 = {
 
    'date': datetime.date(2022, 1, 1),
 
    'amount': decimal.Decimal('10.00'),
 
    'payee': 'Patreon         / Patreon   / 123456/ ST-A1B2C3D4G5H6       /',
 
    'check_id': '',
 
    'line': 222,
 
}
 
S2 = {
 
    'date': datetime.date(2022, 1, 2),
 
    'amount': decimal.Decimal('20.00'),
 
    'payee': 'BT*LINODE           PHILADELPHIA        P',
 
    'check_id': '',
 
    'line': 333,
 
}
 
S3 = {
 
    'date': datetime.date(2022, 1, 3),
 
    'amount': decimal.Decimal('30.00'),
 
    'payee': 'USPS PO 4067540039 0PORTLAND            OR',
...
 
@@ -320,24 +322,63 @@ def test_subset_sum_match():
 
    assert subset_match(statement, books) == (
 
        [([S4], [B4A, B4B, B4C], [])],
 
        [],  # No remaining statement trans.
 
        [],  # No remaining books trans.
 
    )
 

	
 

	
 
def test_subset_passes_through_all_non_matches():
 
    """This was used to locate a bug where some of the non-matches had
 
    gone missing due to mutation of books_trans."""
 
    statement_trans = [
 
        S1,  # No match
 
        S4,  # Match
 
    ]
 
    books_trans = [
 
        B2,  # No match
 
        B4A, B4B, B4C,  # Match
 
        B3_next_day, B3_next_week,  # No match
 
    ]
 
    assert subset_match(statement_trans, books_trans) == (
 
        [([S4], [B4A, B4B, B4C], [])],  # Matched
 
        [S1],  # No match: preserved intact
 
        [B2, B3_next_day, B3_next_week]  # No match: preserved intact
 
    )
 

	
 

	
 
def test_handles_fr_record_with_comma_separators():
 
    # CSV would look something like:
 
    #
 
    # "Date","ABA Num","Currency","Account Num","Account Name","Description","BAI Code","Amount","Serial Num","Ref Num","Detail"
 
    # "02/07/2022",,,,,,,"10,000.00",,,"XXXX"
 
    input_row = {
 
        'Date': '02/07/2022',
 
        'Amount': '10,000.00',
 
        'Detail': 'XXXX',
 
        'Serial Num': '',
 
    }
 
    expected = {
 
        'date': datetime.date(2022, 2, 7),
 
        'amount': decimal.Decimal('10000'),
 
        'payee': 'XXXX',
 
        'check_id': '',
 
        'line': 1,
 
    }
 
    assert standardize_fr_record(input_row, line=1) == expected
 

	
 

	
 
def test_handles_amex_record_with_comma_separators():
 
    # This insn't typically a problem with AMEX, but adding for completeness.
 
    input_row = {
 
        'Date': '02/07/2022',
 
        'Amount': '-10,000.00',  # Amounts are from Bank's perspective/negated.
 
        'Description': 'XXXX',
 
        'Serial Num': '',
 
    }
 
    expected = {
 
        'date': datetime.date(2022, 2, 7),
 
        'amount': decimal.Decimal('10000'),
 
        'payee': 'XXXX',
 
        'check_id': '',
 
        'line': 1,
 
    }
 
    assert standardize_amex_record(input_row, line=1) == expected
0 comments (0 inline, 0 general)