import pathlib import re DATA_DIR = pathlib.Path(__file__).with_name('data') def normalize_whitespace(s): return re.sub(r'(\t| {3,})', ' ', s)