From ab8559c75bdbbfc497c01a0fc34a4e93cb17999a 2019-08-28 14:22:10 From: Brett Smith Date: 2019-08-28 14:22:10 Subject: [PATCH] csv: Support importing squared CSV spreadsheets. See the test comment for more rationale. --- diff --git a/import2ledger/importers/_csv.py b/import2ledger/importers/_csv.py index 325dcf5e0d97a42a4f1633463d61982c16cc94fd..3e5955b01e2e50e2e4c6700f7c43fd7e6839812b 100644 --- a/import2ledger/importers/_csv.py +++ b/import2ledger/importers/_csv.py @@ -43,9 +43,17 @@ class CSVImporterBase: Reader = csv.reader DictReader = csv.DictReader + @classmethod + def _row_rindex(cls, row, default=None): + """Return the index of the last cell in the row that has a value.""" + for offset, value in enumerate(reversed(row), 1): + if value: + return len(row) - offset + return default + @classmethod def _read_header_row(cls, row): - return {} if len(row) < cls._HEADER_MAX_LEN else None + return {} if cls._row_rindex(row, -1) + 1 < cls._HEADER_MAX_LEN else None @classmethod def _read_header(cls, input_file): diff --git a/import2ledger/importers/benevity.py b/import2ledger/importers/benevity.py index ca7685073eebd8b69569989a2338846e59094624..b1cf0088756ef9339e5c6c52796e9f844fe1ff5a 100644 --- a/import2ledger/importers/benevity.py +++ b/import2ledger/importers/benevity.py @@ -11,10 +11,10 @@ class _DonationsImporterBase(_csv.CSVImporterBase): @classmethod def _read_header_row(cls, row): - row_len = len(row) - if row_len > 2: + row_rindex = cls._row_rindex(row, -1) + if row_rindex > 1: return None - elif row_len == 2 and row[0] in cls.HEADER_FIELDS: + elif row_rindex == 1 and row[0] in cls.HEADER_FIELDS: return {cls.HEADER_FIELDS[row[0]]: row[1]} else: return {} diff --git a/setup.py b/setup.py index 3b8f35e7d5daa896288101d4485efffc511a2c96..11a6e18690bd74c65d78a63fc6a9406a02b4346e 100755 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ REQUIREMENTS['tests_require'] = [ setup( name='import2ledger', description="Import different sources of financial data to Ledger", - version='0.9.2', + version='0.9.3', author='Brett Smith', author_email='brettcsmith@brettcsmith.org', license='GNU AGPLv3+', diff --git a/tests/data/imports.yml b/tests/data/imports.yml index ce06981188aa655365c5b9b0b75c8b8c425d52c4..2ec351e33677f8b63f8f30b59f900a88cedff80e 100644 --- a/tests/data/imports.yml +++ b/tests/data/imports.yml @@ -264,6 +264,8 @@ - source: AmazonAffiliateEarnings.csv importer: amazon.EarningsImporter + header_rows: 1 + header_cols: 12 expect: - payee: Amazon date: !!python/object/apply:datetime.date [2016, 12, 20] @@ -276,6 +278,8 @@ - source: Benevity2018.csv importer: benevity.Donations2018Importer + header_rows: 11 + header_cols: 17 expect: - date: !!python/object/apply:datetime.date [2017, 10, 28] currency: USD @@ -366,6 +370,8 @@ - source: Benevity2019.csv importer: benevity.Donations2019Importer + header_rows: 11 + header_cols: 21 expect: - date: !!python/object/apply:datetime.date [2017, 10, 28] currency: USD diff --git a/tests/test_importers.py b/tests/test_importers.py index 5c63d71930ed4ff95f4666d8b7546d9be3df3952..bb8757c9c33b65b7a1d8a4d6acd77205465e8fd1 100644 --- a/tests/test_importers.py +++ b/tests/test_importers.py @@ -1,8 +1,11 @@ +import csv import datetime import decimal +import io import importlib import itertools import pathlib +import shutil import re import pytest @@ -28,6 +31,25 @@ class TestImporters: with source_path.open() as source_file: assert importer.can_import(source_file) + @pytest.mark.parametrize('source_path,importer,header_rows,header_cols', [ + (t['source'], t['importer'], t['header_rows'], t['header_cols']) + for t in test_data if t.get('header_rows') + ]) + def test_can_import_squared_csv(self, source_path, importer, header_rows, header_cols): + # Sometimes when we munge spreadsheets by hand (e.g., to filter by + # project) tools like LibreOffice Calc write a "squared" spreadsheet, + # where every row has the same length. This test ensures the results + # are still recognized for import. + with io.StringIO() as squared_file: + csv_writer = csv.writer(squared_file) + with source_path.open() as source_file: + for row in itertools.islice(csv.reader(source_file), header_rows): + padding = [None] * (header_cols - len(row)) + csv_writer.writerow(row + padding) + shutil.copyfileobj(source_file, squared_file) + squared_file.seek(0) + assert importer.can_import(squared_file) + @pytest.mark.parametrize('source_path,import_class,expect_results', [ (t['source'], t['importer'], t['expect']) for t in test_data ])