Changeset - 86f4232df19e
[Not reviewed]
0 1 0
Ben Sturmfels (bsturmfels) - 16 months ago 2023-02-11 07:30:22
ben@sturm.com.au
reconciler: Move other score thresholds to constants
1 file changed with 14 insertions and 10 deletions:
0 comments (0 inline, 0 general)
conservancy_beancount/reconcile/statement_reconciler.py
Show inline comments
...
 
@@ -152,8 +152,10 @@ JUNK_WORDS = [
 
]
 
JUNK_WORDS_RES = [re.compile(word, re.IGNORECASE) for word in JUNK_WORDS]
 
ZERO_RE = re.compile('^0+')
 
FULL_MATCH_THRESHOLD = 0.8
 
PARTIAL_MATCH_THRESHOLD = 0.4
 
PAYEE_FULL_MATCH_THRESHOLD = 0.8
 
PAYEE_PARTIAL_MATCH_THRESHOLD = 0.4
 
OVERALL_EXCELLENT_MATCH_THRESHOLD = 0.8  # Clear winner
 
OVERALL_ACCEPTABLE_MATCH_THRESHOLD = 0.5  # Acceptable if only one match found
 

	
 

	
 
def remove_duplicate_words(text: str) -> str:
...
 
@@ -392,9 +394,9 @@ def records_match(r1: Dict, r2: Dict) -> Tuple[float, List[str]]:
 
    else:
 
        check_score = 0.0
 
        payee_score = payee_match(r1['payee'], r2['payee'])
 
        if payee_score > FULL_MATCH_THRESHOLD:
 
        if payee_score > PAYEE_FULL_MATCH_THRESHOLD:
 
            payee_message = ''
 
        elif payee_score > PARTIAL_MATCH_THRESHOLD:
 
        elif payee_score > PAYEE_PARTIAL_MATCH_THRESHOLD:
 
            payee_message = 'partial payee match'
 
        else:
 
            payee_message = 'payee mismatch'
...
 
@@ -435,16 +437,16 @@ def match_statement_and_books(
 
        matches_found = 0
 
        for i, r2 in enumerate(books_trans):
 
            score, note = records_match(r1, r2)
 
            if score >= 0.5 and score >= best_match_score:
 
            if score >= OVERALL_ACCEPTABLE_MATCH_THRESHOLD and score >= best_match_score:
 
                matches_found += 1
 
                best_match_score = score
 
                best_match_index = i
 
                best_match_note = note
 
        if (
 
            best_match_score > 0.5
 
            best_match_score > OVERALL_ACCEPTABLE_MATCH_THRESHOLD
 
            and matches_found == 1
 
            and 'check-id mismatch' not in best_match_note
 
            or best_match_score > 0.8
 
            or best_match_score > OVERALL_EXCELLENT_MATCH_THRESHOLD
 
        ):
 
            matches.append(([r1], [books_trans[best_match_index]], best_match_note))
 
            # Don't try to make a second match against this books entry.
...
 
@@ -484,16 +486,16 @@ def subset_match(
 
        r2['amount'] = total
 
        for i, r1 in enumerate(statement_trans):
 
            score, note = records_match(r1, r2)
 
            if score >= 0.5 and score >= best_match_score:
 
            if score >= OVERALL_ACCEPTABLE_MATCH_THRESHOLD and score >= best_match_score:
 
                matches_found += 1
 
                best_match_score = score
 
                best_match_index = i
 
                best_match_note = note
 
        if (
 
            best_match_score > 0.5
 
            best_match_score > OVERALL_ACCEPTABLE_MATCH_THRESHOLD
 
            and matches_found == 1
 
            and 'check-id mismatch' not in best_match_note
 
            or best_match_score > 0.8
 
            or best_match_score > OVERALL_EXCELLENT_MATCH_THRESHOLD
 
        ):
 
            matches.append(
 
                ([statement_trans[best_match_index]], group_items, best_match_note)
...
 
@@ -795,6 +797,8 @@ def main(
 
        statement_trans = read_csv(f)
 

	
 
    # Dates are taken from the beginning/end of the statement.
 
    # TODO: FR statements include the last day of previous statement and the
 
    # last day of this statement in the first/last rows.
 
    begin_date = statement_trans[0]['date']
 
    end_date = statement_trans[-1]['date']
 

	
0 comments (0 inline, 0 general)