From 86f4232df19e779d2a49bb38afd410b319d5fc87 2023-02-11 07:30:22 From: Ben Sturmfels Date: 2023-02-11 07:30:22 Subject: [PATCH] reconciler: Move other score thresholds to constants --- diff --git a/conservancy_beancount/reconcile/statement_reconciler.py b/conservancy_beancount/reconcile/statement_reconciler.py index c874d4f12ebbaa42b6acd8c086d0ccc00ace46ed..bf83e56b68407a1f1921d4f94d77da327dc14968 100644 --- a/conservancy_beancount/reconcile/statement_reconciler.py +++ b/conservancy_beancount/reconcile/statement_reconciler.py @@ -152,8 +152,10 @@ JUNK_WORDS = [ ] JUNK_WORDS_RES = [re.compile(word, re.IGNORECASE) for word in JUNK_WORDS] ZERO_RE = re.compile('^0+') -FULL_MATCH_THRESHOLD = 0.8 -PARTIAL_MATCH_THRESHOLD = 0.4 +PAYEE_FULL_MATCH_THRESHOLD = 0.8 +PAYEE_PARTIAL_MATCH_THRESHOLD = 0.4 +OVERALL_EXCELLENT_MATCH_THRESHOLD = 0.8 # Clear winner +OVERALL_ACCEPTABLE_MATCH_THRESHOLD = 0.5 # Acceptable if only one match found def remove_duplicate_words(text: str) -> str: @@ -392,9 +394,9 @@ def records_match(r1: Dict, r2: Dict) -> Tuple[float, List[str]]: else: check_score = 0.0 payee_score = payee_match(r1['payee'], r2['payee']) - if payee_score > FULL_MATCH_THRESHOLD: + if payee_score > PAYEE_FULL_MATCH_THRESHOLD: payee_message = '' - elif payee_score > PARTIAL_MATCH_THRESHOLD: + elif payee_score > PAYEE_PARTIAL_MATCH_THRESHOLD: payee_message = 'partial payee match' else: payee_message = 'payee mismatch' @@ -435,16 +437,16 @@ def match_statement_and_books( matches_found = 0 for i, r2 in enumerate(books_trans): score, note = records_match(r1, r2) - if score >= 0.5 and score >= best_match_score: + if score >= OVERALL_ACCEPTABLE_MATCH_THRESHOLD and score >= best_match_score: matches_found += 1 best_match_score = score best_match_index = i best_match_note = note if ( - best_match_score > 0.5 + best_match_score > OVERALL_ACCEPTABLE_MATCH_THRESHOLD and matches_found == 1 and 'check-id mismatch' not in best_match_note - or best_match_score > 0.8 + or best_match_score > OVERALL_EXCELLENT_MATCH_THRESHOLD ): matches.append(([r1], [books_trans[best_match_index]], best_match_note)) # Don't try to make a second match against this books entry. @@ -484,16 +486,16 @@ def subset_match( r2['amount'] = total for i, r1 in enumerate(statement_trans): score, note = records_match(r1, r2) - if score >= 0.5 and score >= best_match_score: + if score >= OVERALL_ACCEPTABLE_MATCH_THRESHOLD and score >= best_match_score: matches_found += 1 best_match_score = score best_match_index = i best_match_note = note if ( - best_match_score > 0.5 + best_match_score > OVERALL_ACCEPTABLE_MATCH_THRESHOLD and matches_found == 1 and 'check-id mismatch' not in best_match_note - or best_match_score > 0.8 + or best_match_score > OVERALL_EXCELLENT_MATCH_THRESHOLD ): matches.append( ([statement_trans[best_match_index]], group_items, best_match_note) @@ -795,6 +797,8 @@ def main( statement_trans = read_csv(f) # Dates are taken from the beginning/end of the statement. + # TODO: FR statements include the last day of previous statement and the + # last day of this statement in the first/last rows. begin_date = statement_trans[0]['date'] end_date = statement_trans[-1]['date']