NPO-Accounting/import2ledger Changeset - b33c83af0a0f

Changeset - b33c83af0a0f

Parent rev.

Child rev.

[Not reviewed]

0 2 0

Brett Smith - 6 years ago 2018-06-20 19:59:56
brettcsmith@brettcsmith.org

importers: Catch and handle CSV parse errors in can_import.

import2ledger bailed on one of these exceptions when trying to import
a recent XLS file that had a null byte on a line.

2 files changed with 7 insertions and 3 deletions:

import2ledger/importers/_csv.py

setup.py

0 comments (0 inline, 0 general)

import2ledger/importers/_csv.py

➞

Show inline comments

@@ ... / @@ -18,65 +18,69 @@ class CSVImporterBase: @@
       will be copied directly to the entry data dict before _read_row is called.
       Fields named here must exist in the CSV for it to be imported.
     * _read_header(cls, input_file): Some CSVs include "headers" with smaller
       rows before they get to the "real" data.  This classmethod is expected to
       read those rows and return two values: a dict of entry data read from
       the headers, and a list of column names for the real data.  The method
       is expected to leave input_data at the position where the real data
       starts, so callers can run ``csv.DictReader(input_file, column_names)``
       after.
       The default implementation reads rows until it finds one long enough to
       include all of the columns required by NEEDED_FIELDS and COPIED_FIELDS,
       then returns ({}, that_row).
     * _read_header_row(cls, row): A classmethod that returns either a dict,
       or None.  The default implementation of _read_header calls this method
       on each row.  If it returns a dict, those keys and values will be
       included in the entry data returned by _read_header.  If it returns
       None, _read_header expects this is the row with column names for the
       real data, and uses it in its return value.
     * Reader: A class that accepts the input source and iterates over rows of
       formatted data.  Default csv.reader.
     * DictReader: A class that accepts the input source and iterates over rows
       of data organized into dictionaries.  Default csv.DictReader.
     """
     ENTRY_SEED = {}
     COPIED_FIELDS = {}
     Reader = csv.reader
     DictReader = csv.DictReader
     @classmethod
     def _read_header_row(cls, row):
         return {} if len(row) < cls._HEADER_MAX_LEN else None
     @classmethod
     def _read_header(cls, input_file):
         cls._NEEDED_KEYS = cls.NEEDED_FIELDS.union(cls.COPIED_FIELDS)
         cls._HEADER_MAX_LEN = len(cls._NEEDED_KEYS)
         header = {}
         row = None
         for row in cls.Reader(input_file):
             row_data = cls._read_header_row(row)
             if row_data is None:
                 break
             else:
                 header.update(row_data)
         return header, row
     @classmethod
     def can_import(cls, input_file):
         _, fields = cls._read_header(input_file)
         return cls._NEEDED_KEYS.issubset(fields or ())
         try:
             _, fields = cls._read_header(input_file)
         except csv.Error:
             return False
         else:
             return cls._NEEDED_KEYS.issubset(fields or ())
     def __init__(self, input_file):
         self.entry_seed, fields = self._read_header(input_file)
         self.in_csv = self.DictReader(input_file, fields)
     def __iter__(self):
         for row in self.in_csv:
             row_data = self._read_row(row)
             if row_data is not None:
                 copied_fields = {
                     entry_key: row[row_key]
                     for row_key, entry_key in self.COPIED_FIELDS.items()
+                }
                 yield collections.ChainMap(
                     row_data, copied_fields, self.entry_seed, self.ENTRY_SEED)

setup.py

➞

Show inline comments

 #!/usr/bin/env python3
 import sys
 from setuptools import setup, find_packages
 REQUIREMENTS = {
     'install_requires': [
         'babel',
         'enum34;python_version<"3.4"',
     ],
     'setup_requires': ['pytest-runner'],
     'extras_require': {
         'brightfunds': ['xlrd'],
         'nbpy2017': ['beautifulsoup4', 'html5lib'],
     },
+}
 all_extras_require = [
     req for reqlist in REQUIREMENTS['extras_require'].values() for req in reqlist
+]
 REQUIREMENTS['extras_require']['all_importers'] = all_extras_require
 REQUIREMENTS['tests_require'] = [
     'pytest',
     'PyYAML',
     *all_extras_require,
+]
 setup(
     name='import2ledger',
     description="Import different sources of financial data to Ledger",
-    version='0.3',
+    version='0.4',
     author='Brett Smith',
     author_email='brettcsmith@brettcsmith.org',
     license='GNU AGPLv3+',
     packages=find_packages(include=['import2ledger', 'import2ledger.*']),
     entry_points={
         'console_scripts': ['import2ledger = import2ledger.__main__:main'],
     },
     **REQUIREMENTS,
+)

0 comments (0 inline, 0 general)