Changeset - ab8559c75bdb
[Not reviewed]
0 5 0
Brett Smith - 5 years ago 2019-08-28 14:22:10
brettcsmith@brettcsmith.org
csv: Support importing squared CSV spreadsheets.

See the test comment for more rationale.
5 files changed with 41 insertions and 5 deletions:
0 comments (0 inline, 0 general)
import2ledger/importers/_csv.py
Show inline comments
...
 
@@ -40,15 +40,23 @@ class CSVImporterBase:
 
    """
 
    ENTRY_SEED = {}
 
    COPIED_FIELDS = {}
 
    Reader = csv.reader
 
    DictReader = csv.DictReader
 

	
 
    @classmethod
 
    def _row_rindex(cls, row, default=None):
 
        """Return the index of the last cell in the row that has a value."""
 
        for offset, value in enumerate(reversed(row), 1):
 
            if value:
 
                return len(row) - offset
 
        return default
 

	
 
    @classmethod
 
    def _read_header_row(cls, row):
 
        return {} if len(row) < cls._HEADER_MAX_LEN else None
 
        return {} if cls._row_rindex(row, -1) + 1 < cls._HEADER_MAX_LEN else None
 

	
 
    @classmethod
 
    def _read_header(cls, input_file):
 
        cls._NEEDED_KEYS = cls.NEEDED_FIELDS.union(cls.COPIED_FIELDS)
 
        cls._HEADER_MAX_LEN = len(cls._NEEDED_KEYS)
 
        header = {}
import2ledger/importers/benevity.py
Show inline comments
...
 
@@ -8,16 +8,16 @@ ZERO_DECIMAL = decimal.Decimal(0)
 
class _DonationsImporterBase(_csv.CSVImporterBase):
 
    NAME_FIELDS = ['Donor First Name', 'Donor Last Name']
 
    NOT_SHARED = 'Not shared by donor'
 

	
 
    @classmethod
 
    def _read_header_row(cls, row):
 
        row_len = len(row)
 
        if row_len > 2:
 
        row_rindex = cls._row_rindex(row, -1)
 
        if row_rindex > 1:
 
            return None
 
        elif row_len == 2 and row[0] in cls.HEADER_FIELDS:
 
        elif row_rindex == 1 and row[0] in cls.HEADER_FIELDS:
 
            return {cls.HEADER_FIELDS[row[0]]: row[1]}
 
        else:
 
            return {}
 

	
 
    def _read_row(self, row):
 
        date_s = row.get(self.DATE_FIELD)
setup.py
Show inline comments
...
 
@@ -27,13 +27,13 @@ REQUIREMENTS['tests_require'] = [
 
    *all_extras_require,
 
]
 

	
 
setup(
 
    name='import2ledger',
 
    description="Import different sources of financial data to Ledger",
 
    version='0.9.2',
 
    version='0.9.3',
 
    author='Brett Smith',
 
    author_email='brettcsmith@brettcsmith.org',
 
    license='GNU AGPLv3+',
 

	
 
    packages=find_packages(include=['import2ledger', 'import2ledger.*']),
 
    entry_points={
tests/data/imports.yml
Show inline comments
...
 
@@ -261,24 +261,28 @@
 
      payment_id: ch_daer0ahwoh9oDeiqu2eimoD7
 
      stripe_id: ch_daer0ahwoh9oDeiqu2eimoD7
 
      invoice_id: "11"
 

	
 
- source: AmazonAffiliateEarnings.csv
 
  importer: amazon.EarningsImporter
 
  header_rows: 1
 
  header_cols: 12
 
  expect:
 
    - payee: Amazon
 
      date: !!python/object/apply:datetime.date [2016, 12, 20]
 
      amount: !!python/object/apply:decimal.Decimal ["4.24"]
 
      currency: USD
 
    - payee: Amazon
 
      date: !!python/object/apply:datetime.date [2017, 1, 7]
 
      amount: !!python/object/apply:decimal.Decimal ["-.08"]
 
      currency: USD
 

	
 
- source: Benevity2018.csv
 
  importer: benevity.Donations2018Importer
 
  header_rows: 11
 
  header_cols: 17
 
  expect:
 
    - date: !!python/object/apply:datetime.date [2017, 10, 28]
 
      currency: USD
 
      disbursement_id: ABCDE12345
 
      amount: !!python/object/apply:decimal.Decimal [20]
 
      net_amount: !!python/object/apply:decimal.Decimal [20]
...
 
@@ -363,12 +367,14 @@
 
      frequency: Recurring
 
      transaction_id: 67890TYUIO
 
      ledger template: benevity donations ledger entry
 

	
 
- source: Benevity2019.csv
 
  importer: benevity.Donations2019Importer
 
  header_rows: 11
 
  header_cols: 21
 
  expect:
 
    - date: !!python/object/apply:datetime.date [2017, 10, 28]
 
      currency: USD
 
      disbursement_id: ABCDE12345
 
      amount: !!python/object/apply:decimal.Decimal [20]
 
      net_amount: !!python/object/apply:decimal.Decimal [20]
tests/test_importers.py
Show inline comments
 
import csv
 
import datetime
 
import decimal
 
import io
 
import importlib
 
import itertools
 
import pathlib
 
import shutil
 
import re
 

	
 
import pytest
 
import yaml
 
from import2ledger import importers, strparse
 

	
...
 
@@ -25,12 +28,31 @@ class TestImporters:
 
        (t['source'], t['importer']) for t in test_data
 
    ])
 
    def test_can_import(self, source_path, importer):
 
        with source_path.open() as source_file:
 
            assert importer.can_import(source_file)
 

	
 
    @pytest.mark.parametrize('source_path,importer,header_rows,header_cols', [
 
        (t['source'], t['importer'], t['header_rows'], t['header_cols'])
 
        for t in test_data if t.get('header_rows')
 
    ])
 
    def test_can_import_squared_csv(self, source_path, importer, header_rows, header_cols):
 
        # Sometimes when we munge spreadsheets by hand (e.g., to filter by
 
        # project) tools like LibreOffice Calc write a "squared" spreadsheet,
 
        # where every row has the same length.  This test ensures the results
 
        # are still recognized for import.
 
        with io.StringIO() as squared_file:
 
            csv_writer = csv.writer(squared_file)
 
            with source_path.open() as source_file:
 
                for row in itertools.islice(csv.reader(source_file), header_rows):
 
                    padding = [None] * (header_cols - len(row))
 
                    csv_writer.writerow(row + padding)
 
                shutil.copyfileobj(source_file, squared_file)
 
            squared_file.seek(0)
 
            assert importer.can_import(squared_file)
 

	
 
    @pytest.mark.parametrize('source_path,import_class,expect_results', [
 
        (t['source'], t['importer'], t['expect']) for t in test_data
 
    ])
 
    def test_import(self, source_path, import_class, expect_results):
 
        with source_path.open() as source_file:
 
            importer = import_class(source_file)
0 comments (0 inline, 0 general)