Changeset - 3acc097d32c7
[Not reviewed]
0 2 0
Ben Sturmfels (bsturmfels) - 17 months ago 2023-01-30 12:21:42
ben@sturm.com.au
reconcile: Update to support standard FR CSV format

It seems as though we were previously using a transaction search export CSV,
rather than the official statement CSV.
2 files changed with 87 insertions and 67 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/reconcile/statement_reconciler.py
Show inline comments
...
 
@@ -92,2 +92,3 @@ Other related problems we're not dealing with here:
 
# TODO:
 
#  - entry_point seems to swallow errors
 
#  - extract the magic numbers
...
 
@@ -107,3 +108,3 @@ import re
 
import sys
 
from typing import Callable, Dict, List, Optional, Sequence, Tuple, TextIO
 
from typing import Dict, List, Optional, Sequence, Tuple, TextIO
 

	
...
 
@@ -189,15 +190,8 @@ def remove_payee_junk(payee: str) -> str:
 

	
 
def read_transactions_from_csv(f: TextIO, standardize_statement_record: Callable) -> list:
 
    reader = csv.DictReader(f)
 
    # The reader.line_num is the source line number, not the spreadsheet row
 
    # number due to multi-line records.
 
    return sort_records([standardize_statement_record(row, i) for i, row in enumerate(reader, 2)])
 

	
 

	
 
def parse_amount(amount: str) -> decimal.Decimal:
 
    """Parse amounts and handle comma separators as seen in some FR statements."""
 
    return decimal.Decimal(amount.replace(',', ''))
 
    return decimal.Decimal(amount.replace('$', '').replace(',', ''))
 

	
 

	
 
def validate_amex_csv(sample: str, account: str) -> None:
 
def validate_amex_csv(sample: str) -> None:
 
    required_cols = {'Date', 'Amount', 'Description', 'Card Member'}
...
 
@@ -205,3 +199,3 @@ def validate_amex_csv(sample: str, account: str) -> None:
 
    if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
 
        sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
 
        sys.exit(f"This AMEX CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}. Please use an unmodified statement direct from the institution.")
 

	
...
 
@@ -222,17 +216,43 @@ def standardize_amex_record(row: Dict, line: int) -> Dict:
 

	
 
def validate_fr_csv(sample: str, account: str) -> None:
 
    required_cols = {'Date', 'Amount', 'Detail', 'Serial Num'}
 
    reader = csv.DictReader(io.StringIO(sample))
 
    if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
 
        sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
 

	
 

	
 
def standardize_fr_record(row: Dict, line: int) -> Dict:
 
    return {
 
        'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
 
        'amount': parse_amount(row['Amount']),
 
        'payee': remove_payee_junk(row['Detail'] or '')[:20],
 
        'check_id': row['Serial Num'].lstrip('0'),
 
def read_amex_csv(f: TextIO) -> list:
 
    reader = csv.DictReader(f)
 
    # The reader.line_num is the source line number, not the spreadsheet row
 
    # number due to multi-line records.
 
    return sort_records([standardize_amex_record(row, i) for i, row in enumerate(reader, 2)])
 

	
 

	
 
def validate_fr_csv(sample: str) -> None:
 
    # No column headers in FR statements
 
    reader = csv.reader(io.StringIO(sample))
 
    next(reader)  # First row is previous statement ending balance
 
    row = next(reader)
 
    date = None
 
    try:
 
        date = datetime.datetime.strptime(row[1], '%m/%d/%Y')
 
    except ValueError:
 
        pass
 
    amount_found = '$' in row[4] and '$' in row[5]
 
    if len(row) != 6 or not date or not amount_found:
 
        sys.exit("This First Republic CSV doesn't seem to have the 6 columns we're expecting, including a date in column 2 and an amount in columns 5 and 6. Please use an unmodified statement direct from the institution.")
 

	
 

	
 
def standardize_fr_record(line, row):
 
    record = {
 
        'date': datetime.datetime.strptime(row[1], '%m/%d/%Y').date(),
 
        'amount': parse_amount(row[4]),
 
        'payee': remove_payee_junk(row[3] or '')[:20],
 
        'check_id': row[2].replace('CHECK  ', '') if 'CHECK  ' in row[2] else '',
 
        'line': line,
 
    }
 
    return record
 

	
 

	
 
def read_fr_csv(f: TextIO) -> list:
 
    reader = csv.reader(f)
 
    # The reader.line_num is the source line number, not the spreadsheet row
 
    # number due to multi-line records.
 
    return sort_records(
 
        standardize_fr_record(i, row) for i, row in enumerate(reader, 1)
 
        if len(row) == 6 and row[2] not in {'LAST STATEMENT', 'THIS STATEMENT'}
 
    )
 

	
...
 
@@ -619,6 +639,6 @@ def main(arglist: Optional[Sequence[str]] = None,
 
        validate_csv = validate_amex_csv
 
        standardize_statement_record = standardize_amex_record
 
        read_csv = read_amex_csv
 
    else:
 
        validate_csv = validate_fr_csv
 
        standardize_statement_record = standardize_fr_record
 
        read_csv = read_fr_csv
 

	
...
 
@@ -626,5 +646,8 @@ def main(arglist: Optional[Sequence[str]] = None,
 
        sample = f.read(200)
 
        validate_csv(sample, args.account)
 
        # Validate should return true/false and a message.
 
        validate_csv(sample)
 
        f.seek(0)
 
        statement_trans = read_transactions_from_csv(f, standardize_statement_record)
 
        # TODO: Needs a custom read_transactions_from_csv for each of AMEX and
 
        # FR since AMEX has a header row and FR doesn't.
 
        statement_trans = read_csv(f)
 

	
tests/test_reconcile.py
Show inline comments
...
 
@@ -2,2 +2,3 @@ import datetime
 
import decimal
 
import io
 
import os
...
 
@@ -11,6 +12,6 @@ from conservancy_beancount.reconcile.statement_reconciler import (
 
    payee_match,
 
    read_amex_csv,
 
    read_fr_csv,
 
    remove_duplicate_words,
 
    remove_payee_junk,
 
    standardize_amex_record,
 
    standardize_fr_record,
 
    subset_match,
...
 
@@ -347,38 +348,34 @@ def test_subset_passes_through_all_non_matches():
 

	
 
def test_handles_fr_record_with_comma_separators():
 
    # CSV would look something like:
 
    #
 
    # "Date","ABA Num","Currency","Account Num","Account Name","Description","BAI Code","Amount","Serial Num","Ref Num","Detail"
 
    # "02/07/2022",,,,,,,"10,000.00",,,"XXXX"
 
    input_row = {
 
        'Date': '02/07/2022',
 
        'Amount': '10,000.00',
 
        'Detail': 'XXXX',
 
        'Serial Num': '',
 
    }
 
    expected = {
 
        'date': datetime.date(2022, 2, 7),
 
        'amount': decimal.Decimal('10000'),
 
        'payee': 'XXXX',
 
        'check_id': '',
 
        'line': 1,
 
    }
 
    assert standardize_fr_record(input_row, line=1) == expected
 

	
 

	
 
def test_handles_amex_record_with_comma_separators():
 
    # This insn't typically a problem with AMEX, but adding for completeness.
 
    input_row = {
 
        'Date': '02/07/2022',
 
        'Amount': '-10,000.00',  # Amounts are from Bank's perspective/negated.
 
        'Description': 'XXXX',
 
        'Serial Num': '',
 
    }
 
    expected = {
 
        'date': datetime.date(2022, 2, 7),
 
        'amount': decimal.Decimal('10000'),
 
        'payee': 'XXXX',
 
        'check_id': '',
 
        'line': 1,
 
    }
 
    assert standardize_amex_record(input_row, line=1) == expected
 
def test_handles_amex_csv():
 
    CSV = """Date,Receipt,Description,Card Member,Account #,Amount,Extended Details,Appears On Your Statement As,Address,City/State,Zip Code,Country,Reference,Category\n08/19/2021,,Gandi.net           San Francisco,RODNEY R BROWN,-99999,28.15,"00000009999 00000009999999999999\nGandi.net\nSan Francisco\n00000009999999999999",Gandi.net           San Francisco,"NEPTUNUSSTRAAT 41-63\nHOOFDDORP",,2132 JA,NETHERLANDS (THE),'999999999999999999',Merchandise & Supplies-Internet Purchase\n"""
 
    expected = [
 
        {
 
            'date': datetime.date(2021, 8, 19),
 
            'amount': decimal.Decimal('-28.15'),
 
            'payee': 'Gandi San Francisco',
 
            'check_id': '',
 
            'line': 2,
 
        },
 
    ]
 
    assert read_amex_csv(io.StringIO(CSV)) == expected
 

	
 

	
 
def test_handles_fr_csv():
 
    CSV = """"DD99999999999","03/31/2022","LAST STATEMENT","","","$1,000.00"\n"9999999999999","04/01/2022","INCOMING WIRE","GONDOR S.S. A111111111BCDE0F","$6.50","$1,006.50"\n"DD99999999999","04/18/2022","CHECK  3741","","$-4.50","$1,002.00"\n"DD99999999999","04/30/2022","THIS STATEMENT","","","$102.00"\n"""
 
    expected = [
 
        {
 
            'date': datetime.date(2022, 4, 1),
 
            'amount': decimal.Decimal('6.50'),
 
            'payee': 'GONDOR S.S. A1111111',
 
            'check_id': '',
 
            'line': 2,
 
        },
 
        {
 
            'date': datetime.date(2022, 4, 18),
 
            'amount': decimal.Decimal('-4.50'),
 
            'payee': '',
 
            'check_id': '3741',
 
            'line': 3,
 
        },
 
    ]
 
    assert read_fr_csv(io.StringIO(CSV)) == expected
0 comments (0 inline, 0 general)