NPO-Accounting/conservancy_beancount Changeset - a4bba120eb37

Changeset - a4bba120eb37

Parent rev.

Child rev.

[Not reviewed]

0 1 0

Ben Sturmfels (bsturmfels) - 16 months ago 2023-02-11 03:53:30
ben@sturm.com.au

reconlicer: Move match thresholds to top of module

1 file changed with 9 insertions and 6 deletions:

conservancy_beancount/reconcile/statement_reconciler.py

0 comments (0 inline, 0 general)

conservancy_beancount/reconcile/statement_reconciler.py

➞

Show inline comments

@@ ... / @@ -70,39 +70,40 @@ Other related problems we're not dealing with here: @@
  - after updates to the books files, beancount must be restarted to reflect
    updates
  - updates also invalidate the cache meaning restart takes several minutes
  - balance checks are manually updated in
    svn/Financial/Ledger/sanity-check-balances.yaml
  - transactions are entered manually and reconciled after the fact, but
    importing from statements may be useful in some cases
 Current issue:
  - entry_point seems to swallow errors, meaning you get a fairly unhelpful
    message if there's an unhandled error
 Future possibilities:
  - allow the reconciler to respect manually-applied metadata - not clear how
    this would work exactly
  - allow interactive matching where the user can specifiy a match
  - consider combining this with helper.py into one more complete tool that both
    reconciles and summarises the unreconciled transactions
 """
 # TODO:
 #  - entry_point seems to swallow errors
 #  - extract the magic numbers
 import argparse
 import collections
 import copy
 import csv
 import datetime
 import decimal
 import io
 import itertools
 import logging
 import os
 import re
 import sys
@@ ... / @@ -141,24 +142,26 @@ JUNK_WORDS = [ @@
     'wire',
     'credit',
     "int'l",
     "in.l",
     'llc',
     'online',
     'donation',
     'usd',
     'inc',
+]
 JUNK_WORDS_RES = [re.compile(word, re.IGNORECASE) for word in JUNK_WORDS]
 ZERO_RE = re.compile('^0+')
 FULL_MATCH_THRESHOLD = 0.8
 PARTIAL_MATCH_THRESHOLD = 0.4
 def remove_duplicate_words(text: str) -> str:
     unique_words = []
     known_words = set()
     for word in text.split():
         if word.lower() not in known_words:
             unique_words.append(word)
             known_words.add(word.lower())
     return ' '.join(unique_words)
@@ ... @@
     # Sometimes we get unrelated numbers in the statement column with check-ids,
     # so we can't match based on the existence of a statement check-id.
     if r2['check_id']:
         payee_score = 0.0
         if r1['check_id'] and r2['check_id'] and r1['check_id'] == r2['check_id']:
             check_score = 1.0
         else:
             check_message = 'check-id mismatch'
             check_score = 0.0
     else:
         check_score = 0.0
         payee_score = payee_match(r1['payee'], r2['payee'])
-        if payee_score > 0.8:
+        if payee_score > FULL_MATCH_THRESHOLD:
             payee_message = ''
-        elif payee_score > 0.4:
+        elif payee_score > PARTIAL_MATCH_THRESHOLD:
             payee_message = 'partial payee match'
         else:
             payee_message = 'payee mismatch'
     overall_score = (date_score + amount_score + check_score + payee_score) / 4
     overall_message = [m for m in [date_message, amount_message, check_message, payee_message] if m]
     return overall_score, overall_message
 def match_statement_and_books(statement_trans: List[Dict], books_trans: List[Dict]) -> Tuple[List[Tuple[List, List, List]], List[Dict], List[Dict]]:
     """Match transactions between the statement and books.

0 comments (0 inline, 0 general)