NPO-Accounting/conservancy_beancount Changeset - 5a8da108b983

Changeset - 5a8da108b983

Parent rev.

Child rev.

[Not reviewed]

0 2 0

Ben Sturmfels (bsturmfels) - 2 months ago 2024-07-19 05:57:07
ben@sturm.com.au

statement_reconciler: Add initial Chase bank CSV statement matching

We currently don't have many examples to work with, so haven't done any
significant testing of the matching accuracy between statement and books.

2 files changed with 42 insertions and 2 deletions:

conservancy_beancount/reconcile/statement_reconciler.py

setup.cfg

0 comments (0 inline, 0 general)

conservancy_beancount/reconcile/statement_reconciler.py

➞

Show inline comments

@@ ... @@
 def read_amex_csv(f: TextIO) -> list:
     reader = csv.DictReader(f)
     # The reader.line_num is the source line number, not the spreadsheet row
     # number due to multi-line records.
     return sort_records(
         [standardize_amex_record(row, i) for i, row in enumerate(reader, 2)]
+    )
 def validate_fr_csv(sample: str) -> None:
     # No column headers in FR statements
     reader = csv.reader(io.StringIO(sample))
     next(reader)  # First row is previous statement ending balance
     row = next(reader)
     date = None
     try:
         date = datetime.datetime.strptime(row[1], '%m/%d/%Y')
     except ValueError:
         pass
     amount_found = '$' in row[4] and '$' in row[5]
     if len(row) != 6 or not date or not amount_found:
         sys.exit(
             "This First Republic CSV doesn't seem to have the 6 columns we're expecting, including a date in column 2 and an amount in columns 5 and 6. Please use an unmodified statement direct from the institution."
+        )
 def standardize_fr_record(line, row):
     record = {
         'date': datetime.datetime.strptime(row[1], '%m/%d/%Y').date(),
         'amount': parse_amount(row[4]),
         'payee': remove_payee_junk(row[3] or '')[:20],
         'check_id': row[2].replace('CHECK  ', '') if 'CHECK  ' in row[2] else '',
         'line': line,
+    }
     return record
 def read_fr_csv(f: TextIO) -> list:
     reader = csv.reader(f)
     # The reader.line_num is the source line number, not the spreadsheet row
     # number due to multi-line records.
     return sort_records(
         standardize_fr_record(i, row)
         for i, row in enumerate(reader, 1)
         if len(row) == 6 and row[2] not in {'LAST STATEMENT', 'THIS STATEMENT'}
+    )
 def validate_chase_csv(sample: str) -> None:
     required_cols = {'Date', 'Description', 'Account', 'Transaction Type', 'Amount'}
     reader = csv.DictReader(io.StringIO(sample))
     if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
         sys.exit(
             f"This Chase CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}. Please use an unmodified statement direct from the institution."
+        )
 def standardize_chase_record(row: Dict, line: int) -> Dict:
     """Turn an Chase CSV row into a standard dict format representing a transaction."""
     return {
         'date': datetime.datetime.strptime(row['Date'], '%m/%d/%y').date(),
         'amount': -1 * parse_amount(row['Amount']),
         # Descriptions have quite a lot of information, but the format is a little
         # idiosyncratic. We'll need to see more examples before coming up with any ways
         # to handle it in code. Others have used regular expressions to match the
         # various transaction types:
         # https://github.com/mtlynch/beancount-chase-bank/blob/master/beancount_chase/checking.py
         # See also: https://awesome-beancount.com/
         'payee': (row['Description'] or '').replace('ORIG CO NAME:')[:20],
         'check_id': '',
         'line': line,
+    }
 def read_chase_csv(f: TextIO) -> list:
     reader = csv.DictReader(f)
     # The reader.line_num is the source line number, not the spreadsheet row
     # number due to multi-line records.
     return sort_records(
         [standardize_chase_record(row, i) for i, row in enumerate(reader, 2)]
+    )
 def standardize_beancount_record(row) -> Dict:  # type: ignore[no-untyped-def]
     """Turn a Beancount query result row into a standard dict representing a transaction."""
     return {
         'date': row.date,
         'amount': row.number_cost_position,
         'payee': remove_payee_junk(
             f'{row.payee or ""} {row.entity or ""} {row.narration or ""}'
         ),
         'check_id': str(row.check_id or ''),
         'filename': row.filename,
         'line': row.line,
         'bank_statement': row.bank_statement,
+    }
 def format_record(record: dict) -> str:
     """Generate output lines for a standard 1:1 match."""
     if record['payee'] and record['check_id']:
         output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} {record['payee'][:25]} #{record['check_id']}".ljust(59)
     elif record['payee']:
         output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} {record['payee'][:35]}".ljust(59)
     else:
         output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} #{record['check_id']}".ljust(59)
     return output
 def format_multirecord(r1s: List[dict], r2s: List[dict], note: str) -> List[list]:
     """Generates output lines for one statement:multiple books transaction match."""
     assert len(r1s) == 1
     assert len(r2s) > 1
     match_output = []
     match_output.append(
+        [
             r1s[0]['date'],
             f'{format_record(r1s[0])}  →  {format_record(r2s[0])}  ✓ Matched{note}',
+        ]
+    )
     for r2 in r2s[1:]:
         match_output.append(
+            [
                 r1s[0]['date'],
                 f'{r1s[0]["date"].isoformat()}:             ↳                                    →  {format_record(r2)}  ✓ Matched{note}',
+            ]
+        )
     return match_output
 def _start_of_month(time, offset_months=0):
@@ ... / @@ -739,99 +774,104 @@ def process_unmatched( @@
 def format_output(
     matches, begin_date, end_date, csv_statement, show_reconciled_matches
 ) -> str:
     with io.StringIO() as out:
         match_output = format_matches(matches, csv_statement, show_reconciled_matches)
         _, total_missing_from_books, total_missing_from_statement = totals(matches)
         print('-' * 155, file=out)
         statement_heading = f'Statement transactions {begin_date} to {end_date}'
         print(
             f'{statement_heading:<52}            {"Books transactions":<58}   Notes',
             file=out,
+        )
         print('-' * 155, file=out)
         for _, output in sorted(match_output, key=lambda x: x[0]):
             print(output, file=out)
         print('-' * 155, file=out)
         print(
             f'Sub-total not on statement: {total_missing_from_statement:12,.2f}',
             file=out,
+        )
         print(f'Sub-total not in books:     {total_missing_from_books:12,.2f}', file=out)
         print(
             f'Total:                      {total_missing_from_statement + total_missing_from_books:12,.2f}',
             file=out,
+        )
         print('-' * 155, file=out)
         return out.getvalue()
 def main(
     arglist: Optional[Sequence[str]] = None,
     stdout: TextIO = sys.stdout,
     stderr: TextIO = sys.stderr,
     config: Optional[configmod.Config] = None,
 ) -> int:
     args = parse_arguments(arglist)
     cliutil.set_loglevel(logger, args.loglevel)
     if config is None:
         config = configmod.Config()
         config.load_file()
     # Validate and normalise the statement into our standard
     # transaction data structure.
     if 'AMEX' in args.account:
         validate_csv = validate_amex_csv
         read_csv = read_amex_csv
-    else:
+    elif 'FR' in args.account:
         validate_csv = validate_fr_csv
         read_csv = read_fr_csv
     elif 'Chase' in args.account:
         validate_csv = validate_chase_csv
         read_csv = read_chase_csv
     else:
         sys.exit("This account provided doesn't match one of AMEX, FR or Chase.")
     with open(args.csv_statement) as f:
         sample = f.read(200)
         # Validate should return true/false and a message.
         validate_csv(sample)
         f.seek(0)
         # TODO: Needs a custom read_transactions_from_csv for each of AMEX and
         # FR since AMEX has a header row and FR doesn't.
         statement_trans = read_csv(f)
     # Dates are taken from the beginning/end of the statement.
     # TODO: FR statements include the last day of previous statement and the
     # last day of this statement in the first/last rows.
     begin_date = statement_trans[0]['date']
     end_date = statement_trans[-1]['date']
     if args.full_months:
         begin_date, end_date = round_to_month(begin_date, end_date)
     # Query for the Beancount books data for this above period.
+    #
     # There are pros and cons for using Beancount's in-memory entries
     # list directly and also for using Beancount Query Language (BQL)
     # to get a list of transactions? Using BQL because it's
     # convenient, but we don't have access to the full transaction
     # entry objects. Feels a bit strange that these approaches are so
     # disconnected.
+    #
     # beancount.query.query_compile.compile() and
     # beancount.query.query_execute.filter_entries() look useful in this respect,
     # but I'm not clear on how to use compile(). An example would help.
     entries, _, options = loader.load_file(args.beancount_file)
     # String concatenation looks bad, but there's no SQL injection possible here
     # because BQL can't write back to the Beancount files. I hope!
     query = f"""
         SELECT filename,
         META("lineno") AS line,
         META("bank-statement") AS bank_statement,
         date,
         number(cost(position)),
         payee,
         ENTRY_META("entity") as entity,
         ANY_META("check-id") as check_id,
         narration
         WHERE account = "{args.account}"
             AND date >= {begin_date}
             AND date <= {end_date}"""
     _, result_rows = run_query(entries, options, query)

setup.cfg

➞

Show inline comments

 [metadata]
 name = conservancy_beancount
-version = 1.19.8
+version = 1.20.0
 author = Software Freedom Conservancy
 author_email = info@sfconservancy.org
 description = Plugin, library, and reports for reading Conservancy’s books
 license = AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0
 license_files =
   LICENSE.txt
   AGPLv3.txt
 long_description = file: README.rst
 long_description_content_type = text/x-rst; charset=UTF-8
 project_urls =
   Source = %(url)s
 url = https://k.sfconservancy.org/NPO-Accounting/conservancy_beancount
 [bdist_wheel]
 universal = 1
 [mypy]
 disallow_any_unimported = False
 disallow_untyped_calls = False
 disallow_untyped_defs = True
 show_error_codes = True
 strict_equality = True
 warn_redundant_casts = True
 warn_return_any = True
 warn_unreachable = True
 warn_unused_configs = True
 [options]
 include_package_data = True
 install_requires =
   babel>=2.6
   beancount>=2.2,<3.0.0
   colorama
   GitPython>=2.0
   odfpy>=1.4.0,!=1.4.1
   pdfminer.six>=20200101
   python-dateutil>=2.7
   PyYAML>=3.0
   regex
   rt>=2.0,<3.0
   thefuzz
 packages = find:
 python_requires = >=3.6
 [options.extras_require]
 test =
   mypy>=0.770
   pytest>=3.0

0 comments (0 inline, 0 general)