NPO-Accounting/import2ledger Changeset - 87f32091019e

Changeset - 87f32091019e

Parent rev.

Child rev.

[Not reviewed]

0 1 0

Brett Smith - 6 years ago 2017-12-19 17:43:59
brettcsmith@brettcsmith.org

strparse: Improve extra text parsing in currency_decimal.

This allows a symbol and a currency code to be in different parts of the
string, as long as there's at most one of each.

1 file changed with 19 insertions and 18 deletions:

import2ledger/strparse.py

0 comments (0 inline, 0 general)

import2ledger/strparse.py

➞

Show inline comments

@@ ... / @@ -6,8 +6,14 @@ import unicodedata @@
 import babel.numbers
 CURRENCY_SPEC_PATTERN = r'^{space}(?:|{symbol}{space}{code}|{code}{space}{symbol}){space}$'.format(
     code=r'[A-Za-z]{,3}',
     space=r'\s*',
     symbol=r'(\W?)',
+)
 @functools.lru_cache()
-def _currency_pattern(locale):
+def _currency_amount_pattern(locale):
     minus = babel.numbers.get_minus_sign_symbol(locale)
     plus = babel.numbers.get_plus_sign_symbol(locale)
     dec_sym = babel.numbers.get_decimal_symbol(locale)
@@ ... / @@ -21,27 +27,22 @@ def _currency_pattern(locale): @@
 def currency_decimal(s, locale='en_US_POSIX'):
     try:
-        match = re.search(_currency_pattern(locale), s)
+        match = re.search(_currency_amount_pattern(locale), s)
     except TypeError:
         return decimal.Decimal(s)
     if not match:
         raise ValueError("no decimal found in {!r}".format(s))
     # There may be extra symbols/text before the number, after the number,
     # or between the number and its sign—but only in one of those places.
     extra = None
     for extra_s in [s[:match.start()], match.group(2), s[match.end():]]:
         extra_s = extra_s.strip()
         if extra and extra_s:
             raise ValueError("too much extraneous text in {!r}".format(s))
         extra = extra_s
     # The only extra text allowed is currency specifiers like plain symbols,
     # 'A$', 'US$', 'CAD', 'USD $', etc.
     # Trim any currency symbol.
     if extra and unicodedata.category(extra[-1]) == 'Sc':
         extra = extra[:-1].strip()
     # Anything remaining should look like currency specifier text.
     if extra and ((len(extra) > 3) or (not extra.isalpha())):
         raise ValueError("non-currency text in {!r}: {!r}".format(s, extra))
     extra_s = ''.join([s[:match.start()], match.group(2), s[match.end():]])
     # The only extra text allowed is currency specifiers:
     # '€', 'A$', 'US$', 'CAD', '$USD', etc.
     extra_match = re.match(CURRENCY_SPEC_PATTERN, extra_s)
     if not extra_match:
         extra_ok = False
     else:
         symbol = extra_match.group(1) or extra_match.group(2)
         extra_ok = (not symbol) or (unicodedata.category(symbol) == 'Sc')
     if not extra_ok:
         raise ValueError("non-currency text in {!r}: {!r}".format(s, extra_s))
     return babel.numbers.parse_decimal(match.group(1) + match.group(3), locale)
 def date(date_s, date_fmt):

0 comments (0 inline, 0 general)