NPO-Accounting/conservancy_beancount Changeset - 0968f7f051b8

Changeset - 0968f7f051b8

Parent rev.

Child rev.

[Not reviewed]

0 2 0

Ben Sturmfels (bsturmfels) - 17 months ago 2023-01-13 02:58:36
ben@sturm.com.au

reconciler: Handle comma separators in some FR statements

2 files changed with 54 insertions and 8 deletions:

conservancy_beancount/reconcile/statement_reconciler.py

tests/test_reconcile.py

0 comments (0 inline, 0 general)

conservancy_beancount/reconcile/statement_reconciler.py

➞

Show inline comments

@@ ... / @@ -173,80 +173,85 @@ def remove_payee_junk(payee: str) -> str: @@
     for r in JUNK_WORDS_RES:
         payee = r.sub('', payee)
     payee = ZERO_RE.sub('', payee)
     payee = payee.replace(' - ', ' ')
     payee = re.sub(r'\.0\.\d+', ' ', payee)
     payee = payee.replace('.0', ' ')
     payee = payee.replace('/', ' ')
     payee = re.sub(re.escape('.com'), ' ', payee, flags=re.IGNORECASE)
     payee = re.sub(re.escape('.net'), ' ', payee, flags=re.IGNORECASE)
     payee = payee.replace('*', ' ')
     payee = ' '.join([i for i in payee.split(' ') if len(i) > 2])
     payee = payee.replace('-', ' ')
     payee = remove_duplicate_words(payee)
     payee.strip()
     return payee
 def read_transactions_from_csv(f: TextIO, standardize_statement_record: Callable) -> list:
     reader = csv.DictReader(f)
     # The reader.line_num is the source line number, not the spreadsheet row
     # number due to multi-line records.
     return sort_records([standardize_statement_record(row, i) for i, row in enumerate(reader, 2)])
 def parse_amount(amount: str) -> decimal.Decimal:
     """Parse amounts and handle comma separators as seen in some FR statements."""
     return decimal.Decimal(amount.replace(',', ''))
 def validate_amex_csv(sample: str, account: str) -> None:
     required_cols = {'Date', 'Amount', 'Description', 'Card Member'}
     reader = csv.DictReader(io.StringIO(sample))
     if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
         sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
 def standardize_amex_record(row: Dict, line: int) -> Dict:
     """Turn an AMEX CSV row into a standard dict format representing a transaction."""
     # NOTE: Statement doesn't seem to give us a running balance or a final total.
     return {
         'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
-        'amount': -1 * decimal.Decimal(row['Amount']),
+        'amount': -1 * parse_amount(row['Amount']),
         # Descriptions have too much noise, so taking just the start
         # significantly assists the fuzzy matching.
         'payee': remove_payee_junk(row['Description'] or '')[:20],
         'check_id': '',
         'line': line,
+    }
 def validate_fr_csv(sample: str, account: str) -> None:
     required_cols = {'Date', 'Amount', 'Detail', 'Serial Num'}
     reader = csv.DictReader(io.StringIO(sample))
     if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
         sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
 def standardize_fr_record(row: Dict, line: int) -> Dict:
     return {
         'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
-        'amount': decimal.Decimal(row['Amount']),
+        'amount': parse_amount(row['Amount']),
         'payee': remove_payee_junk(row['Detail'] or '')[:20],
         'check_id': row['Serial Num'].lstrip('0'),
         'line': line,
+    }
 def standardize_beancount_record(row) -> Dict:  # type: ignore[no-untyped-def]
     """Turn a Beancount query result row into a standard dict representing a transaction."""
     return {
         'date': row.date,
         'amount': row.number_cost_position,
         'payee': remove_payee_junk(f'{row.payee or ""} {row.entity or ""} {row.narration or ""}'),
         'check_id': str(row.check_id or ''),
         'filename': row.filename,
         'line': row.line,
         'bank_statement': row.bank_statement,
+    }
 def format_record(record: dict) -> str:
     """Generate output lines for a standard 1:1 match."""
     if record['payee'] and record['check_id']:
         output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} {record['payee'][:25]} #{record['check_id']}".ljust(59)
     elif record['payee']:

tests/test_reconcile.py

➞

Show inline comments

 import datetime
 import decimal
 import os
 import tempfile
 import textwrap
 from conservancy_beancount.reconcile.statement_reconciler import (
     match_statement_and_books,
     remove_payee_junk,
     date_proximity,
     remove_duplicate_words,
     payee_match,
     match_statement_and_books,
     metadata_for_match,
     write_metadata_to_books,
     totals,
     payee_match,
     remove_duplicate_words,
     remove_payee_junk,
     standardize_amex_record,
     standardize_fr_record,
     subset_match,
     totals,
     write_metadata_to_books,
+)
 # These data structures represent individual transactions as taken from the
 # statement ("S") or the books ("B").
 # Statement transaction examples.
 S1 = {
     'date': datetime.date(2022, 1, 1),
     'amount': decimal.Decimal('10.00'),
     'payee': 'Patreon         / Patreon   / 123456/ ST-A1B2C3D4G5H6       /',
     'check_id': '',
     'line': 222,
+}
 S2 = {
     'date': datetime.date(2022, 1, 2),
     'amount': decimal.Decimal('20.00'),
     'payee': 'BT*LINODE           PHILADELPHIA        P',
     'check_id': '',
     'line': 333,
+}
 S3 = {
     'date': datetime.date(2022, 1, 3),
     'amount': decimal.Decimal('30.00'),
     'payee': 'USPS PO 4067540039 0PORTLAND            OR',
@@ ... / @@ -320,24 +322,63 @@ def test_subset_sum_match(): @@
     assert subset_match(statement, books) == (
         [([S4], [B4A, B4B, B4C], [])],
         [],  # No remaining statement trans.
         [],  # No remaining books trans.
+    )
 def test_subset_passes_through_all_non_matches():
     """This was used to locate a bug where some of the non-matches had
     gone missing due to mutation of books_trans."""
     statement_trans = [
         S1,  # No match
         S4,  # Match
+    ]
     books_trans = [
         B2,  # No match
         B4A, B4B, B4C,  # Match
         B3_next_day, B3_next_week,  # No match
+    ]
     assert subset_match(statement_trans, books_trans) == (
         [([S4], [B4A, B4B, B4C], [])],  # Matched
         [S1],  # No match: preserved intact
         [B2, B3_next_day, B3_next_week]  # No match: preserved intact
+    )
 def test_handles_fr_record_with_comma_separators():
     # CSV would look something like:
+    #
     # "Date","ABA Num","Currency","Account Num","Account Name","Description","BAI Code","Amount","Serial Num","Ref Num","Detail"
     # "02/07/2022",,,,,,,"10,000.00",,,"XXXX"
     input_row = {
         'Date': '02/07/2022',
         'Amount': '10,000.00',
         'Detail': 'XXXX',
         'Serial Num': '',
+    }
     expected = {
         'date': datetime.date(2022, 2, 7),
         'amount': decimal.Decimal('10000'),
         'payee': 'XXXX',
         'check_id': '',
         'line': 1,
+    }
     assert standardize_fr_record(input_row, line=1) == expected
 def test_handles_amex_record_with_comma_separators():
     # This insn't typically a problem with AMEX, but adding for completeness.
     input_row = {
         'Date': '02/07/2022',
         'Amount': '-10,000.00',  # Amounts are from Bank's perspective/negated.
         'Description': 'XXXX',
         'Serial Num': '',
+    }
     expected = {
         'date': datetime.date(2022, 2, 7),
         'amount': decimal.Decimal('10000'),
         'payee': 'XXXX',
         'check_id': '',
         'line': 1,
+    }
     assert standardize_amex_record(input_row, line=1) == expected

0 comments (0 inline, 0 general)