Changeset - 8a4fb5758cb8
[Not reviewed]
main
0 2 0
Ben Sturmfels (bsturmfels) - 30 days ago 2024-08-20 13:32:51
ben@sturm.com.au
statement_reconciler: Fix TypeError in Chase reconciler

There was an incorrect call to replace().
2 files changed with 2 insertions and 2 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/reconcile/statement_reconciler.py
Show inline comments
...
 
@@ -197,193 +197,193 @@ def remove_payee_junk(payee: str) -> str:
 
def parse_amount(amount: str) -> decimal.Decimal:
 
    """Parse amounts and handle comma separators as seen in some FR statements."""
 
    return decimal.Decimal(amount.replace('$', '').replace(',', ''))
 

	
 

	
 
def validate_amex_csv(sample: str) -> None:
 
    required_cols = {'Date', 'Amount', 'Description', 'Card Member'}
 
    reader = csv.DictReader(io.StringIO(sample))
 
    if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
 
        sys.exit(
 
            f"This AMEX CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}. Please use an unmodified statement direct from the institution."
 
        )
 

	
 

	
 
def standardize_amex_record(row: Dict, line: int) -> Dict:
 
    """Turn an AMEX CSV row into a standard dict format representing a transaction."""
 
    # NOTE: Statement doesn't seem to give us a running balance or a final total.
 
    return {
 
        'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
 
        'amount': -1 * parse_amount(row['Amount']),
 
        # Descriptions have too much noise, so taking just the start
 
        # significantly assists the fuzzy matching.
 
        'payee': remove_payee_junk(row['Description'] or '')[:20],
 
        'check_id': '',
 
        'line': line,
 
    }
 

	
 

	
 
def read_amex_csv(f: TextIO) -> list:
 
    reader = csv.DictReader(f)
 
    # The reader.line_num is the source line number, not the spreadsheet row
 
    # number due to multi-line records.
 
    return sort_records(
 
        [standardize_amex_record(row, i) for i, row in enumerate(reader, 2)]
 
    )
 

	
 

	
 
def validate_fr_csv(sample: str) -> None:
 
    # No column headers in FR statements
 
    reader = csv.reader(io.StringIO(sample))
 
    next(reader)  # First row is previous statement ending balance
 
    row = next(reader)
 
    date = None
 
    try:
 
        date = datetime.datetime.strptime(row[1], '%m/%d/%Y')
 
    except ValueError:
 
        pass
 
    amount_found = '$' in row[4] and '$' in row[5]
 
    if len(row) != 6 or not date or not amount_found:
 
        sys.exit(
 
            "This First Republic CSV doesn't seem to have the 6 columns we're expecting, including a date in column 2 and an amount in columns 5 and 6. Please use an unmodified statement direct from the institution."
 
        )
 

	
 

	
 
def standardize_fr_record(line, row):
 
    record = {
 
        'date': datetime.datetime.strptime(row[1], '%m/%d/%Y').date(),
 
        'amount': parse_amount(row[4]),
 
        'payee': remove_payee_junk(row[3] or '')[:20],
 
        'check_id': row[2].replace('CHECK  ', '') if 'CHECK  ' in row[2] else '',
 
        'line': line,
 
    }
 
    return record
 

	
 

	
 
def read_fr_csv(f: TextIO) -> list:
 
    reader = csv.reader(f)
 
    # The reader.line_num is the source line number, not the spreadsheet row
 
    # number due to multi-line records.
 
    return sort_records(
 
        standardize_fr_record(i, row)
 
        for i, row in enumerate(reader, 1)
 
        if len(row) == 6 and row[2] not in {'LAST STATEMENT', 'THIS STATEMENT'}
 
    )
 

	
 

	
 
def validate_chase_csv(sample: str) -> None:
 
    required_cols = {'Date', 'Description', 'Account', 'Transaction Type', 'Amount'}
 
    reader = csv.DictReader(io.StringIO(sample))
 
    if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
 
        sys.exit(
 
            f"This Chase CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}. Please use an unmodified statement direct from the institution."
 
        )
 

	
 

	
 
def standardize_chase_record(row: Dict, line: int) -> Dict:
 
    """Turn an Chase CSV row into a standard dict format representing a transaction."""
 
    return {
 
        'date': datetime.datetime.strptime(row['Date'], '%m/%d/%y').date(),
 
        'amount': -1 * parse_amount(row['Amount']),
 
        # Descriptions have quite a lot of information, but the format is a little
 
        # idiosyncratic. We'll need to see more examples before coming up with any ways
 
        # to handle it in code. Others have used regular expressions to match the
 
        # various transaction types:
 
        # https://github.com/mtlynch/beancount-chase-bank/blob/master/beancount_chase/checking.py
 
        # See also: https://awesome-beancount.com/
 
        'payee': (row['Description'] or '').replace('ORIG CO NAME:')[:20],
 
        'payee': (row['Description'] or '').replace('ORIG CO NAME:', '')[:20],
 
        'check_id': '',
 
        'line': line,
 
    }
 

	
 

	
 
def read_chase_csv(f: TextIO) -> list:
 
    reader = csv.DictReader(f)
 
    # The reader.line_num is the source line number, not the spreadsheet row
 
    # number due to multi-line records.
 
    return sort_records(
 
        [standardize_chase_record(row, i) for i, row in enumerate(reader, 2)]
 
    )
 

	
 

	
 
def standardize_beancount_record(row) -> Dict:  # type: ignore[no-untyped-def]
 
    """Turn a Beancount query result row into a standard dict representing a transaction."""
 
    return {
 
        'date': row.date,
 
        'amount': row.number_cost_position,
 
        'payee': remove_payee_junk(
 
            f'{row.payee or ""} {row.entity or ""} {row.narration or ""}'
 
        ),
 
        'check_id': str(row.check_id or ''),
 
        'filename': row.filename,
 
        'line': row.line,
 
        'bank_statement': row.bank_statement,
 
    }
 

	
 

	
 
def format_record(record: dict) -> str:
 
    """Generate output lines for a standard 1:1 match."""
 
    if record['payee'] and record['check_id']:
 
        output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} {record['payee'][:25]} #{record['check_id']}".ljust(59)
 
    elif record['payee']:
 
        output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} {record['payee'][:35]}".ljust(59)
 
    else:
 
        output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} #{record['check_id']}".ljust(59)
 
    return output
 

	
 

	
 
def format_multirecord(r1s: List[dict], r2s: List[dict], note: str) -> List[list]:
 
    """Generates output lines for one statement:multiple books transaction match."""
 
    assert len(r1s) == 1
 
    assert len(r2s) > 1
 
    match_output = []
 
    match_output.append(
 
        [
 
            r1s[0]['date'],
 
            f'{format_record(r1s[0])}  →  {format_record(r2s[0])}  ✓ Matched{note}',
 
        ]
 
    )
 
    for r2 in r2s[1:]:
 
        match_output.append(
 
            [
 
                r1s[0]['date'],
 
                f'{r1s[0]["date"].isoformat()}:             ↳                                    →  {format_record(r2)}  ✓ Matched{note}',
 
            ]
 
        )
 
    return match_output
 

	
 

	
 
def _start_of_month(time, offset_months=0):
 
    if offset_months > 0:
 
        return _start_of_month(
 
            time.replace(day=28) + datetime.timedelta(days=4), offset_months - 1
 
        )
 
    else:
 
        return time.replace(day=1)
 

	
 

	
 
def round_to_month(begin_date, end_date):
 
    """Round a beginning and end date to beginning and end of months respectively."""
 
    return (
 
        _start_of_month(begin_date),
 
        _start_of_month(end_date, offset_months=1) - datetime.timedelta(days=1),
 
    )
 

	
 

	
 
def sort_records(records: List) -> List:
 
    return sorted(records, key=lambda x: (x['date'], x['amount']))
 

	
 

	
 
def first_word_exact_match(a: str, b: str) -> float:
 
    """Score a payee match based first word.
 

	
 
    We get a whole lot of good matches this way. Helps in the
 
    situation where the first word or two of a transaction description
 
    is useful and the rest is garbage.
 

	
 
    """
 
    if len(a) == 0 or len(b) == 0:
 
        return 0.0
 
    first_a = a.split()[0].strip()
 
    first_b = b.split()[0].strip()
 
    if first_a.casefold() == first_b.casefold():
 
        return min(1.0, 0.2 * len(first_a))
setup.cfg
Show inline comments
 
[metadata]
 
name = conservancy_beancount
 
version = 1.20.0
 
version = 1.20.1
 
author = Software Freedom Conservancy
 
author_email = info@sfconservancy.org
 
description = Plugin, library, and reports for reading Conservancy’s books
 
license = AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0
 
license_files =
 
  LICENSE.txt
 
  AGPLv3.txt
 
long_description = file: README.rst
 
long_description_content_type = text/x-rst; charset=UTF-8
 
project_urls =
 
  Source = %(url)s
 
url = https://k.sfconservancy.org/NPO-Accounting/conservancy_beancount
 

	
 
[bdist_wheel]
 
universal = 1
 

	
 
[mypy]
 
disallow_any_unimported = False
 
disallow_untyped_calls = False
 
disallow_untyped_defs = True
 
show_error_codes = True
 
strict_equality = True
 
warn_redundant_casts = True
 
warn_return_any = True
 
warn_unreachable = True
 
warn_unused_configs = True
 

	
 
[options]
 
include_package_data = True
 
install_requires =
 
  babel>=2.6
 
  beancount>=2.2,<3.0.0
 
  colorama
 
  GitPython>=2.0
 
  odfpy>=1.4.0,!=1.4.1
 
  pdfminer.six>=20200101
 
  python-dateutil>=2.7
 
  PyYAML>=3.0
 
  regex
 
  rt>=2.0,<3.0
 
  thefuzz
 
packages = find:
 
python_requires = >=3.6
 

	
 
[options.extras_require]
 
test =
 
  mypy>=0.770
 
  pytest>=3.0
 
  pytest-mypy
 
  types-requests
 
  types-python-dateutil
 
  types-setuptools
 
  types-PyYAML
 

	
 
[options.entry_points]
 
console_scripts =
 
  accrual-report = conservancy_beancount.reports.accrual:entry_point
 
  assemble-audit-reports = conservancy_beancount.tools.audit_report:entry_point
 
  balance-sheet-report = conservancy_beancount.reports.balance_sheet:entry_point
 
  budget-report = conservancy_beancount.reports.budget:entry_point
 
  bean-sort = conservancy_beancount.tools.sort_entries:entry_point
 
  extract-odf-links = conservancy_beancount.tools.extract_odf_links:entry_point
 
  fund-report = conservancy_beancount.reports.fund:entry_point
 
  ledger-report = conservancy_beancount.reports.ledger:entry_point
 
  opening-balances = conservancy_beancount.tools.opening_balances:entry_point
 
  pdfform-extract = conservancy_beancount.pdfforms.extract:entry_point
 
  pdfform-extract-irs990scheduleA = conservancy_beancount.pdfforms.extract.irs990scheduleA:entry_point
 
  pdfform-fill = conservancy_beancount.pdfforms.fill:entry_point
 
  query-report = conservancy_beancount.reports.query:entry_point
 
  reconcile-paypal = conservancy_beancount.reconcile.paypal:entry_point
 
  reconcile-statement = conservancy_beancount.reconcile.statement:entry_point
 
  split-ods-links = conservancy_beancount.tools.split_ods_links:entry_point
 
  statement-reconciler = conservancy_beancount.reconcile.statement_reconciler:entry_point
 
  reconcile-helper = conservancy_beancount.reconcile.helper:entry_point
 

	
 
[options.package_data]
 
* = py.typed
 

	
 
[options.packages.find]
 
exclude =
 
  tests
 

	
 
[testenv]
 
deps =
 
  mypy>=0.770
 
  pytest>=3.0
 
  pytest-mypy
 
  types-requests
 
  types-python-dateutil
 
  types-setuptools
 
  types-PyYAML
 

	
 
# Beancount includes type declarations but not the `py.typed` flag file mypy
 
# is looking for to know that. Create it ourselves.
 
commands_pre = python -c 'import beancount, pathlib; pathlib.Path(beancount.__file__).with_name("py.typed").touch()'
 

	
0 comments (0 inline, 0 general)