Changeset - 3acc097d32c7
[Not reviewed]
0 2 0
Ben Sturmfels (bsturmfels) - 17 months ago 2023-01-30 12:21:42
ben@sturm.com.au
reconcile: Update to support standard FR CSV format

It seems as though we were previously using a transaction search export CSV,
rather than the official statement CSV.
2 files changed with 87 insertions and 67 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/reconcile/statement_reconciler.py
Show inline comments
...
 
@@ -90,6 +90,7 @@ Other related problems we're not dealing with here:
 
"""
 

	
 
# TODO:
 
#  - entry_point seems to swallow errors
 
#  - extract the magic numbers
 
#  - consider merging in helper.py
 

	
...
 
@@ -105,7 +106,7 @@ import logging
 
import os
 
import re
 
import sys
 
from typing import Callable, Dict, List, Optional, Sequence, Tuple, TextIO
 
from typing import Dict, List, Optional, Sequence, Tuple, TextIO
 

	
 
from beancount import loader
 
from beancount.query.query import run_query
...
 
@@ -187,23 +188,16 @@ def remove_payee_junk(payee: str) -> str:
 
    return payee
 

	
 

	
 
def read_transactions_from_csv(f: TextIO, standardize_statement_record: Callable) -> list:
 
    reader = csv.DictReader(f)
 
    # The reader.line_num is the source line number, not the spreadsheet row
 
    # number due to multi-line records.
 
    return sort_records([standardize_statement_record(row, i) for i, row in enumerate(reader, 2)])
 

	
 

	
 
def parse_amount(amount: str) -> decimal.Decimal:
 
    """Parse amounts and handle comma separators as seen in some FR statements."""
 
    return decimal.Decimal(amount.replace(',', ''))
 
    return decimal.Decimal(amount.replace('$', '').replace(',', ''))
 

	
 

	
 
def validate_amex_csv(sample: str, account: str) -> None:
 
def validate_amex_csv(sample: str) -> None:
 
    required_cols = {'Date', 'Amount', 'Description', 'Card Member'}
 
    reader = csv.DictReader(io.StringIO(sample))
 
    if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
 
        sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
 
        sys.exit(f"This AMEX CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}. Please use an unmodified statement direct from the institution.")
 

	
 

	
 
def standardize_amex_record(row: Dict, line: int) -> Dict:
...
 
@@ -220,21 +214,47 @@ def standardize_amex_record(row: Dict, line: int) -> Dict:
 
    }
 

	
 

	
 
def validate_fr_csv(sample: str, account: str) -> None:
 
    required_cols = {'Date', 'Amount', 'Detail', 'Serial Num'}
 
    reader = csv.DictReader(io.StringIO(sample))
 
    if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
 
        sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
 

	
 

	
 
def standardize_fr_record(row: Dict, line: int) -> Dict:
 
    return {
 
        'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
 
        'amount': parse_amount(row['Amount']),
 
        'payee': remove_payee_junk(row['Detail'] or '')[:20],
 
        'check_id': row['Serial Num'].lstrip('0'),
 
def read_amex_csv(f: TextIO) -> list:
 
    reader = csv.DictReader(f)
 
    # The reader.line_num is the source line number, not the spreadsheet row
 
    # number due to multi-line records.
 
    return sort_records([standardize_amex_record(row, i) for i, row in enumerate(reader, 2)])
 

	
 

	
 
def validate_fr_csv(sample: str) -> None:
 
    # No column headers in FR statements
 
    reader = csv.reader(io.StringIO(sample))
 
    next(reader)  # First row is previous statement ending balance
 
    row = next(reader)
 
    date = None
 
    try:
 
        date = datetime.datetime.strptime(row[1], '%m/%d/%Y')
 
    except ValueError:
 
        pass
 
    amount_found = '$' in row[4] and '$' in row[5]
 
    if len(row) != 6 or not date or not amount_found:
 
        sys.exit("This First Republic CSV doesn't seem to have the 6 columns we're expecting, including a date in column 2 and an amount in columns 5 and 6. Please use an unmodified statement direct from the institution.")
 

	
 

	
 
def standardize_fr_record(line, row):
 
    record = {
 
        'date': datetime.datetime.strptime(row[1], '%m/%d/%Y').date(),
 
        'amount': parse_amount(row[4]),
 
        'payee': remove_payee_junk(row[3] or '')[:20],
 
        'check_id': row[2].replace('CHECK  ', '') if 'CHECK  ' in row[2] else '',
 
        'line': line,
 
    }
 
    return record
 

	
 

	
 
def read_fr_csv(f: TextIO) -> list:
 
    reader = csv.reader(f)
 
    # The reader.line_num is the source line number, not the spreadsheet row
 
    # number due to multi-line records.
 
    return sort_records(
 
        standardize_fr_record(i, row) for i, row in enumerate(reader, 1)
 
        if len(row) == 6 and row[2] not in {'LAST STATEMENT', 'THIS STATEMENT'}
 
    )
 

	
 

	
 
def standardize_beancount_record(row) -> Dict:  # type: ignore[no-untyped-def]
...
 
@@ -617,16 +637,19 @@ def main(arglist: Optional[Sequence[str]] = None,
 
    # transaction data structure.
 
    if 'AMEX' in args.account:
 
        validate_csv = validate_amex_csv
 
        standardize_statement_record = standardize_amex_record
 
        read_csv = read_amex_csv
 
    else:
 
        validate_csv = validate_fr_csv
 
        standardize_statement_record = standardize_fr_record
 
        read_csv = read_fr_csv
 

	
 
    with open(args.csv_statement) as f:
 
        sample = f.read(200)
 
        validate_csv(sample, args.account)
 
        # Validate should return true/false and a message.
 
        validate_csv(sample)
 
        f.seek(0)
 
        statement_trans = read_transactions_from_csv(f, standardize_statement_record)
 
        # TODO: Needs a custom read_transactions_from_csv for each of AMEX and
 
        # FR since AMEX has a header row and FR doesn't.
 
        statement_trans = read_csv(f)
 

	
 
    # Dates are taken from the beginning/end of the statement.
 
    begin_date = statement_trans[0]['date']
tests/test_reconcile.py
Show inline comments
 
import datetime
 
import decimal
 
import io
 
import os
 
import tempfile
 
import textwrap
...
 
@@ -9,10 +10,10 @@ from conservancy_beancount.reconcile.statement_reconciler import (
 
    match_statement_and_books,
 
    metadata_for_match,
 
    payee_match,
 
    read_amex_csv,
 
    read_fr_csv,
 
    remove_duplicate_words,
 
    remove_payee_junk,
 
    standardize_amex_record,
 
    standardize_fr_record,
 
    subset_match,
 
    totals,
 
    write_metadata_to_books,
...
 
@@ -345,40 +346,36 @@ def test_subset_passes_through_all_non_matches():
 
    )
 

	
 

	
 
def test_handles_fr_record_with_comma_separators():
 
    # CSV would look something like:
 
    #
 
    # "Date","ABA Num","Currency","Account Num","Account Name","Description","BAI Code","Amount","Serial Num","Ref Num","Detail"
 
    # "02/07/2022",,,,,,,"10,000.00",,,"XXXX"
 
    input_row = {
 
        'Date': '02/07/2022',
 
        'Amount': '10,000.00',
 
        'Detail': 'XXXX',
 
        'Serial Num': '',
 
    }
 
    expected = {
 
        'date': datetime.date(2022, 2, 7),
 
        'amount': decimal.Decimal('10000'),
 
        'payee': 'XXXX',
 
        'check_id': '',
 
        'line': 1,
 
    }
 
    assert standardize_fr_record(input_row, line=1) == expected
 

	
 

	
 
def test_handles_amex_record_with_comma_separators():
 
    # This insn't typically a problem with AMEX, but adding for completeness.
 
    input_row = {
 
        'Date': '02/07/2022',
 
        'Amount': '-10,000.00',  # Amounts are from Bank's perspective/negated.
 
        'Description': 'XXXX',
 
        'Serial Num': '',
 
    }
 
    expected = {
 
        'date': datetime.date(2022, 2, 7),
 
        'amount': decimal.Decimal('10000'),
 
        'payee': 'XXXX',
 
        'check_id': '',
 
        'line': 1,
 
    }
 
    assert standardize_amex_record(input_row, line=1) == expected
 
def test_handles_amex_csv():
 
    CSV = """Date,Receipt,Description,Card Member,Account #,Amount,Extended Details,Appears On Your Statement As,Address,City/State,Zip Code,Country,Reference,Category\n08/19/2021,,Gandi.net           San Francisco,RODNEY R BROWN,-99999,28.15,"00000009999 00000009999999999999\nGandi.net\nSan Francisco\n00000009999999999999",Gandi.net           San Francisco,"NEPTUNUSSTRAAT 41-63\nHOOFDDORP",,2132 JA,NETHERLANDS (THE),'999999999999999999',Merchandise & Supplies-Internet Purchase\n"""
 
    expected = [
 
        {
 
            'date': datetime.date(2021, 8, 19),
 
            'amount': decimal.Decimal('-28.15'),
 
            'payee': 'Gandi San Francisco',
 
            'check_id': '',
 
            'line': 2,
 
        },
 
    ]
 
    assert read_amex_csv(io.StringIO(CSV)) == expected
 

	
 

	
 
def test_handles_fr_csv():
 
    CSV = """"DD99999999999","03/31/2022","LAST STATEMENT","","","$1,000.00"\n"9999999999999","04/01/2022","INCOMING WIRE","GONDOR S.S. A111111111BCDE0F","$6.50","$1,006.50"\n"DD99999999999","04/18/2022","CHECK  3741","","$-4.50","$1,002.00"\n"DD99999999999","04/30/2022","THIS STATEMENT","","","$102.00"\n"""
 
    expected = [
 
        {
 
            'date': datetime.date(2022, 4, 1),
 
            'amount': decimal.Decimal('6.50'),
 
            'payee': 'GONDOR S.S. A1111111',
 
            'check_id': '',
 
            'line': 2,
 
        },
 
        {
 
            'date': datetime.date(2022, 4, 18),
 
            'amount': decimal.Decimal('-4.50'),
 
            'payee': '',
 
            'check_id': '3741',
 
            'line': 3,
 
        },
 
    ]
 
    assert read_fr_csv(io.StringIO(CSV)) == expected
0 comments (0 inline, 0 general)