From 5a8da108b98363e902ccd1452be8966e5ffc3c34 2024-07-19 05:57:07
From: Ben Sturmfels <ben@sturm.com.au>
Date: 2024-07-19 05:57:07
Subject: [PATCH] statement_reconciler: Add initial Chase bank CSV statement matching

We currently don't have many examples to work with, so haven't done any
significant testing of the matching accuracy between statement and books.

---

diff --git a/conservancy_beancount/reconcile/statement_reconciler.py b/conservancy_beancount/reconcile/statement_reconciler.py
index 0aba6b16c44ab13a5a3a7d7ed82d497ef04b8b5d..87865cc1741f0bb3284f8dbf9d9d2025d6e064bc 100644
--- a/conservancy_beancount/reconcile/statement_reconciler.py
+++ b/conservancy_beancount/reconcile/statement_reconciler.py
@@ -270,6 +270,41 @@ def read_fr_csv(f: TextIO) -> list:
     )
 
 
+def validate_chase_csv(sample: str) -> None:
+    required_cols = {'Date', 'Description', 'Account', 'Transaction Type', 'Amount'}
+    reader = csv.DictReader(io.StringIO(sample))
+    if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
+        sys.exit(
+            f"This Chase CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}. Please use an unmodified statement direct from the institution."
+        )
+
+
+def standardize_chase_record(row: Dict, line: int) -> Dict:
+    """Turn an Chase CSV row into a standard dict format representing a transaction."""
+    return {
+        'date': datetime.datetime.strptime(row['Date'], '%m/%d/%y').date(),
+        'amount': -1 * parse_amount(row['Amount']),
+        # Descriptions have quite a lot of information, but the format is a little
+        # idiosyncratic. We'll need to see more examples before coming up with any ways
+        # to handle it in code. Others have used regular expressions to match the
+        # various transaction types:
+        # https://github.com/mtlynch/beancount-chase-bank/blob/master/beancount_chase/checking.py
+        # See also: https://awesome-beancount.com/
+        'payee': (row['Description'] or '').replace('ORIG CO NAME:')[:20],
+        'check_id': '',
+        'line': line,
+    }
+
+
+def read_chase_csv(f: TextIO) -> list:
+    reader = csv.DictReader(f)
+    # The reader.line_num is the source line number, not the spreadsheet row
+    # number due to multi-line records.
+    return sort_records(
+        [standardize_chase_record(row, i) for i, row in enumerate(reader, 2)]
+    )
+
+
 def standardize_beancount_record(row) -> Dict:  # type: ignore[no-untyped-def]
     """Turn a Beancount query result row into a standard dict representing a transaction."""
     return {
@@ -784,9 +819,14 @@ def main(
     if 'AMEX' in args.account:
         validate_csv = validate_amex_csv
         read_csv = read_amex_csv
-    else:
+    elif 'FR' in args.account:
         validate_csv = validate_fr_csv
         read_csv = read_fr_csv
+    elif 'Chase' in args.account:
+        validate_csv = validate_chase_csv
+        read_csv = read_chase_csv
+    else:
+        sys.exit("This account provided doesn't match one of AMEX, FR or Chase.")
 
     with open(args.csv_statement) as f:
         sample = f.read(200)
diff --git a/setup.cfg b/setup.cfg
index af601f1bc447d2eabe2c9997fcaa877aec3e2b53..4005042aafe5197e917d95f9ecea09f44fb1ec3e 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = conservancy_beancount
-version = 1.19.8
+version = 1.20.0
 author = Software Freedom Conservancy
 author_email = info@sfconservancy.org
 description = Plugin, library, and reports for reading Conservancy’s books