diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 345c0a1d1..270ae0a39 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -7,6 +7,33 @@ Change Log
 ----------
 
 
+7.12.0
+======
+
+* New module ``sheet_utils`` for loading workbooks.
+
+  * Important things of interest:
+
+    * Class ``ItemManager`` for loading Item-style data
+      from any ``.xlsx``, ``.csv`` or ``.tsv`` files.
+
+    * Function ``load_items`` that does the same as ``ItemManager.load``.
+
+  * Various lower-level implementation classes such as:
+
+    * Classes ``XlsxManager``, ``CsvManager`` and ``TsvManager`` for loading raw data
+      from ``.xlsx``, ``.csv``, and ``.tsv`` files, respectively.
+
+    * Classes ``XlsxItemManager``, ``CsvItemManager``, and ``TsvItemManager`` for loading Item-style data
+      from ``.xlsx``, ``.csv``, and ``.tsv`` files, respectively.
+
+* New functionality in ``misc_utils``:
+
+  * New function ``is_uuid`` (migrated from Fourfront)
+  * New function ``pad_to``
+  * New class ``JsonLinesReader``
+
+
 7.11.0
 ======
 
@@ -16,6 +43,7 @@ Change Log
   * Fix in ``get_schema`` and ``get_schemas`` for the ``portal_vapp`` returning webtest.response.TestResponse
     which has a ``json`` object property rather than a function.
 
+
 7.10.0
 ======
 
diff --git a/dcicutils/misc_utils.py b/dcicutils/misc_utils.py
index cc18f4b19..8ebd991a4 100644
--- a/dcicutils/misc_utils.py
+++ b/dcicutils/misc_utils.py
@@ -9,6 +9,7 @@
 import inspect
 import math
 import io
+import json
 import os
 import logging
 import pytz
@@ -191,7 +192,11 @@ class _VirtualAppHelper(webtest.TestApp):
     pass
 
 
-class VirtualApp:
+class AbstractVirtualApp:
+    pass
+
+
+class VirtualApp(AbstractVirtualApp):
     """
     Wrapper class for TestApp, to allow custom control over submitting Encoded requests,
     simulating a number of conditions, including permissions.
@@ -1352,6 +1357,25 @@ def capitalize1(s):
     return s[:1].upper() + s[1:]
 
 
+"""
+Python's UUID ignores all dashes, whereas Postgres is more strict
+http://www.postgresql.org/docs/9.2/static/datatype-uuid.html
+See also http://www.postgresql.org/docs/9.2/static/datatype-uuid.html
+And, anyway, this pattern is what our portals have been doing
+for quite a while, so it's the most stable choice for us now.
+"""
+
+uuid_re = re.compile(r'(?i)[{]?(?:[0-9a-f]{4}-?){8}[}]?')
+
+
+def is_uuid(instance):
+    """
+    Predicate returns true for any group of 32 hex characters with optional hyphens every four characters.
+    We insist on lowercase to make matching faster. See other notes on this design choice above.
+    """
+    return bool(uuid_re.match(instance))
+
+
 def string_list(s):
     """
     Turns a comma-separated list into an actual list, trimming whitespace and ignoring nulls.
@@ -2313,3 +2337,73 @@ def parse_in_radix(text: str, *, radix: int):
     except Exception:
         pass
     raise ValueError(f"Unable to parse: {text!r}")
+
+
+def pad_to(target_size: int, data: list, *, padding=None):
+    """
+    This will pad to a given target size, a list of a potentially different actual size, using given padding.
+    e.g., pad_to(3, [1, 2]) will return [1, 2, None]
+    """
+    actual_size = len(data)
+    if actual_size < target_size:
+        data = data + [padding] * (target_size - actual_size)
+    return data
+
+
+class JsonLinesReader:
+
+    def __init__(self, fp, padded=False, padding=None):
+        """
+        Given an fp (the conventional name for a "file pointer", the thing a call to io.open returns,
+        this creates an object that can be used to iterate across the lines in the JSON lines file
+        that the fp is reading from.
+
+        There are two possible formats that this will return.
+
+        For files that contain a series of dictionaries, such as:
+            {"something": 1, "else": "a"}
+            {"something": 2, "else": "b"}
+            ...etc
+        this will just return thos those dictionaries one-by-one when iterated over.
+
+        The same set of dictionaries will also be yielded by a file containing:
+            ["something", "else"]
+            [1, "a"]
+            [2, "b"]
+            ...etc
+        this will just return thos those dictionaries one-by-one when iterated over.
+
+        NOTES:
+
+        * In the second case, shorter lists on subsequent lines return only partial dictionaries.
+        * In the second case, longer lists on subsequent lines will quietly drop any extra elements.
+        """
+
+        self.fp = fp
+        self.padded: bool = padded
+        self.padding = padding
+        self.headers = None  # Might change after we see first line
+
+    def __iter__(self):
+        first_line = True
+        n_headers = 0
+        for raw_line in self.fp:
+            line = json.loads(raw_line)
+            if first_line:
+                first_line = False
+                if isinstance(line, list):
+                    self.headers = line
+                    n_headers = len(line)
+                    continue
+            # If length of line is more than we expect, ignore it. Let user put comments beyond our table
+            # But if length of line is less than we expect, extend the line with None
+            if self.headers:
+                if not isinstance(line, list):
+                    raise Exception("If the first line is a list, all lines must be.")
+                if self.padded and len(line) < n_headers:
+                    line = pad_to(n_headers, line, padding=self.padding)
+                yield dict(zip(self.headers, line))
+            elif isinstance(line, dict):
+                yield line
+            else:
+                raise Exception(f"If the first line is not a list, all lines must be dictionaries: {line!r}")
diff --git a/dcicutils/sheet_utils.py b/dcicutils/sheet_utils.py
new file mode 100644
index 000000000..5a311f7c0
--- /dev/null
+++ b/dcicutils/sheet_utils.py
@@ -0,0 +1,1131 @@
+import chardet
+import contextlib
+import copy
+import csv
+import glob
+import io
+import json
+import openpyxl
+import os
+import re
+import subprocess
+import uuid
+import yaml
+
+from openpyxl.worksheet.worksheet import Worksheet
+from openpyxl.workbook.workbook import Workbook
+from tempfile import TemporaryFile, TemporaryDirectory
+from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, Union
+from .common import AnyJsonData
+from .env_utils import public_env_name, EnvUtils
+from .ff_utils import get_schema
+from .lang_utils import conjoined_list, disjoined_list, maybe_pluralize, there_are
+from .misc_utils import ignored, PRINT, pad_to, JsonLinesReader, AbstractVirtualApp, remove_suffix
+from .task_utils import pmap
+
+
+Header = str
+Headers = List[str]
+ParsedHeader = List[Union[str, int]]
+ParsedHeaders = List[ParsedHeader]
+SheetCellValue = Union[int, float, str]
+SheetRow = List[SheetCellValue]
+CsvReader = type(csv.reader(TemporaryFile()))
+SheetData = List[dict]
+TabbedSheetData = Dict[str, SheetData]
+Regexp = type(re.compile("sample"))
+
+
+class LoadFailure(Exception):
+    """
+    In general, we'd prefer to load up the spreadsheet with clumsy data that can then be validated in detail,
+    but some errors are so confusing or so problematic that we need to just fail the load right away.
+    """
+    pass
+
+
+class LoadArgumentsError(LoadFailure):
+    """
+    Errors of this class represent situations where we can't get started because
+    there's a problem with the given arguments.
+    """
+    pass
+
+
+class LoadTableError(LoadFailure):
+    """
+    Errors of this class represent situations where we can't get started because
+    there's a problem with some table's syntax, for example headers that don't make sense.
+    """
+    pass
+
+
+@contextlib.contextmanager
+def deferred_problems():
+    problems = []
+
+    def note_problems(problem):
+        problems.append(problem)
+
+    yield note_problems
+
+    if problems:
+        for problem in problems:
+            PRINT(f"Problem: {problem}")
+        raise Exception(there_are(problems, kind='problem while compiling hints', tense='past', show=False))
+
+
+def unwanted_kwargs(*, context, kwargs, context_plural=False, detailed=False):
+    if kwargs:
+        unwanted = [f"{argname}={value!r}" if detailed else argname
+                    for argname, value in kwargs.items()
+                    if value is not None]
+        if unwanted:
+            does_not = "don't" if context_plural else "doesn't"
+            raise LoadArgumentsError(f"{context} {does_not} use"
+                                     f" {maybe_pluralize(unwanted, 'keyword argument')} {conjoined_list(unwanted)}.")
+
+
+def prefer_number(value: SheetCellValue):
+    if isinstance(value, str):  # the given value might be an int or float, in which case just fall through
+        if not value:
+            return None
+        value = value
+        ch0 = value[0]
+        if ch0 == '+' or ch0 == '-' or ch0.isdigit():
+            try:
+                return int(value)
+            except Exception:
+                pass
+            try:
+                return float(value)
+            except Exception:
+                pass
+        # If we couldn't parse it as an int or float, fall through to returning the original value
+        pass
+    return value
+
+
+def expand_string_escape_sequences(text: str) -> str:
+    s = io.StringIO()
+    escaping = False
+    for ch in text:
+        if escaping:
+            if ch == 'r':
+                s.write('\r')
+            elif ch == 't':
+                s.write('\t')
+            elif ch == 'n':
+                s.write('\n')
+            elif ch == '\\':
+                s.write('\\')
+            else:
+                # Rather than err, just leave other sequences as-is.
+                s.write(f"\\{ch}")
+            escaping = False
+        elif ch == '\\':
+            escaping = True
+        else:
+            s.write(ch)
+    return s.getvalue()
+
+
+def open_unicode_text_input_file_respecting_byte_order_mark(filename):
+    """
+    Opens a file for text input, respecting a byte-order mark (BOM).
+    """
+    with io.open(filename, 'rb') as fp:
+        leading_bytes = fp.read(4 * 8)  # 4 bytes is all we need
+        bom_info = chardet.detect(leading_bytes, should_rename_legacy=True)
+        detected_encoding = bom_info and bom_info.get('encoding')  # tread lightly
+    use_encoding = 'utf-8' if detected_encoding == 'ascii' else detected_encoding
+    return io.open(filename, 'r', encoding=use_encoding)
+
+
+class TypeHint:
+    def apply_hint(self, value):
+        return value
+
+    def __str__(self):
+        return f"<{self.__class__.__name__}>"
+
+    def __repr__(self):
+        return self.__str__()
+
+
+class BoolHint(TypeHint):
+
+    def apply_hint(self, value):
+        if isinstance(value, str) and value:
+            if 'true'.startswith(value.lower()):
+                return True
+            elif 'false'.startswith(value.lower()):
+                return False
+        return super().apply_hint(value)
+
+
+class EnumHint(TypeHint):
+
+    def __str__(self):
+        return f"<EnumHint {','.join(f'{key}={val}' for key, val in self.value_map.items())}>"
+
+    def __init__(self, value_map):
+        self.value_map = value_map
+
+    def apply_hint(self, value):
+        if isinstance(value, str):
+            if value in self.value_map:
+                result = self.value_map[value]
+                return result
+            else:
+                lvalue = value.lower()
+                found = []
+                for lkey, key in self.value_map.items():
+                    if lkey.startswith(lvalue):
+                        found.append(lkey)
+                if len(found) == 1:
+                    [only_found] = found
+                    result = self.value_map[only_found]
+                    return result
+        return super().apply_hint(value)
+
+
+OptionalTypeHints = List[Optional[TypeHint]]
+
+
+class ItemTools:
+    """
+    Implements operations on table-related data without pre-supposing the specific representation of the table.
+    It is assumed this can be used for data that was obtained from .json, .csv, .tsv, and .xlsx files because
+    it does not presuppose the source of the data nor where it will be written to.
+
+    For the purpose of this class:
+
+    * a 'header' is a string representing the top of a column.
+
+    * a 'parsed header' is a list of strings and/or ints, after splitting at uses of '#' or '.', so that
+      "a.b.c" is represented as ["a", "b", "c"], and "x.y#0" is represented as ["x", "y", 0], and representing
+      each numeric token as an int instead of a string.
+
+    * a 'headers' object is just a list of strings, each of which is a 'header'.
+
+    * a 'parsed headers' object is a non-empty list of lists, each of which is a 'parsed header'.
+      e..g., the headers ["a.b.c", "x.y#0"] is represented as parsed hearders [["a", "b", "c"], ["x", "y", 0]].
+
+   """
+
+    @classmethod
+    def parse_sheet_header(cls, header: Header) -> ParsedHeader:
+        result = []
+        token = ""
+        for i in range(len(header)):
+            ch = header[i]
+            if ch == '.' or ch == '#':
+                if token:
+                    result.append(int(token) if token.isdigit() else token)
+                    token = ""
+            else:
+                token += ch
+        if token:
+            result.append(int(token) if token.isdigit() else token)
+        return result
+
+    @classmethod
+    def parse_sheet_headers(cls, headers: Headers):
+        return [cls.parse_sheet_header(header)
+                for header in headers]
+
+    @classmethod
+    def compute_patch_prototype(cls, parsed_headers: ParsedHeaders):
+        prototype = {}
+        for parsed_header in parsed_headers:
+            parsed_header0 = parsed_header[0]
+            if isinstance(parsed_header0, int):
+                raise LoadTableError(f"A header cannot begin with a numeric ref: {parsed_header0}")
+            cls.assure_patch_prototype_shape(parent=prototype, keys=parsed_header)
+        return prototype
+
+    @classmethod
+    def assure_patch_prototype_shape(cls, *, parent: Union[Dict, List], keys: ParsedHeader):
+        [key0, *more_keys] = keys
+        key1 = more_keys[0] if more_keys else None
+        if isinstance(key1, int):
+            placeholder = []
+        elif isinstance(key1, str):
+            placeholder = {}
+        else:
+            placeholder = None
+        if isinstance(key0, int):
+            n = len(parent)
+            if key0 == n:
+                parent.append(placeholder)
+            elif key0 > n:
+                raise LoadTableError("Numeric items must occur sequentially.")
+        elif isinstance(key0, str):
+            if key0 not in parent:
+                parent[key0] = placeholder
+        if key1 is not None:
+            cls.assure_patch_prototype_shape(parent=parent[key0], keys=more_keys)
+        return parent
+
+    INSTAGUIDS_ENABLED = False  # Experimental feature not enabled by default
+
+    @classmethod
+    def parse_item_value(cls, value: SheetCellValue, context=None) -> AnyJsonData:
+        # TODO: Remodularize this for easier testing and more Schema-driven effect
+        # Doug asks that this be broken up into different mechanisms, more modular and separately testable.
+        # I pretty much agree with that. I'm just waiting for suggestions on what kinds of features are desired.
+        if isinstance(value, str):
+            lvalue = value.lower()
+            # TODO: We could consult a schema to make this less heuristic, but this may do for now
+            if lvalue == 'true':
+                return True
+            elif lvalue == 'false':
+                return False
+            elif lvalue == 'null' or lvalue == '':
+                return None
+            elif '|' in value:
+                if value == '|':  # Use '|' for []
+                    return []
+                else:
+                    if value.endswith("|"):  # Use 'foo|' for ['foo']
+                        value = value[:-1]
+                    return [cls.parse_item_value(subvalue, context=context) for subvalue in value.split('|')]
+            elif cls.INSTAGUIDS_ENABLED and context is not None and value.startswith('#'):
+                # Note that this clause MUST follow '|' clause above so '#foo|#bar' isn't seen as instaguid
+                return cls.get_instaguid(value, context=context)
+            else:
+                # Doug points out that the schema might not agree, might want a string representation of a number.
+                # At this semantic layer, this might be a bad choice.
+                return prefer_number(value)
+        else:  # presumably a number (int or float)
+            return value
+
+    @classmethod
+    def get_instaguid(cls, guid_placeholder: str, *, context: Optional[Dict] = None):
+        if context is None:
+            return guid_placeholder
+        else:
+            referent = context.get(guid_placeholder)
+            if not referent:
+                context[guid_placeholder] = referent = str(uuid.uuid4())
+            return referent
+
+    @classmethod
+    def set_path_value(cls, datum: Union[List, Dict], path: ParsedHeader, value: Any, force: bool = False):
+        if (value is None or value == '') and not force:
+            return
+        [key, *more_path] = path
+        if not more_path:
+            datum[key] = value
+        else:
+            cls.set_path_value(datum[key], more_path, value)
+
+    @classmethod
+    def find_type_hint(cls, parsed_header: Optional[ParsedHeader], schema: Any):
+
+        def finder(subheader, subschema):
+            if not parsed_header:
+                return None
+            else:
+                [key1, *other_headers] = subheader
+                if isinstance(key1, str) and isinstance(subschema, dict):
+                    if subschema.get('type') == 'object':
+                        def1 = subschema.get('properties', {}).get(key1)
+                        if not other_headers:
+                            if def1 is not None:
+                                t = def1.get('type')
+                                if t == 'string':
+                                    enum = def1.get('enum')
+                                    if enum:
+                                        mapping = {e.lower(): e for e in enum}
+                                        return EnumHint(mapping)
+                                elif t == 'boolean':
+                                    return BoolHint()
+                                else:
+                                    pass  # fall through to asking super()
+                            else:
+                                pass  # fall through to asking super()
+                        else:
+                            return finder(subheader=other_headers, subschema=def1)
+
+        return finder(subheader=parsed_header, subschema=schema)
+
+    @classmethod
+    def infer_tab_name(cls, filename):
+        return os.path.basename(filename).split('.')[0]
+
+
+# TODO: Consider whether this might want to be an abstract base class. Some change might be needed.
+#
+# Doug thinks we might want (metaclass=ABCMeta) here to make this an abstract base class.
+# I am less certain but open to discussion. Among other things, as implemented now,
+# the __init__ method here needs to run and the documentation says that ABC's won't appear
+# in the method resolution order. -kmp 17-Aug-2023
+# See also discussion at https://github.com/4dn-dcic/utils/pull/276#discussion_r1297775535
+class AbstractTableSetManager:
+    """
+    The TableSetManager is the spanning class of anything that wants to be able to load a table set,
+    regardless of what it wants to load it from. To do this, it must support a load method
+    that takes a filename and returns the file content in the form:
+        {
+            "Sheet1": [
+                          {...representation of row1 as some kind of dict...},
+                          {...representation of row2 as some kind of dict...}
+                      ],
+            "Sheet2": [...],
+            ...,
+        }
+    It also needs some implementation of the .tab_names property.
+    Note that at this level of abstraction, we take no position on what form of representation is used
+    for the rows, as long as it is JSON data of some kind. It might be
+         {"col1": "val1", "col2": "val2", ...}
+    or it might be something more structured like
+         {"something": "val1", {"something_else": ["val2"]}}
+    Additionally, the values stored might be altered as well. In particular, the most likely alteration
+    is to turn "123" to 123 or "" to None, though the specifics of whether and how such transformations
+    happen is not constrained by this class.
+    """
+
+    ALLOWED_FILE_EXTENSIONS: List[str] = []
+
+    def __init__(self, filename: str, **kwargs):
+        self.filename: str = filename
+        unwanted_kwargs(context=self.__class__.__name__, kwargs=kwargs)
+
+    # TODO: Consider whether this should be an abstractmethod (but first see detailed design note at top of class.)
+    @classmethod
+    def load(cls, filename: str, **kwargs) -> TabbedSheetData:
+        """
+        Reads a filename and returns a dictionary that maps sheet names to rows of dictionary data.
+        For more information, see documentation of AbstractTableSetManager.
+        """
+        raise NotImplementedError(f".load(...) is not implemented for {cls.__name__}.")  # noQA
+
+    @property
+    def tab_names(self) -> List[str]:
+        raise NotImplementedError(f".tab_names is not implemented for {self.__class__.__name__}..")  # noQA
+
+    def load_content(self) -> Any:
+        raise NotImplementedError(f".load_content() is not implemented for {self.__class__.__name__}.")  # noQA
+
+
+class BasicTableSetManager(AbstractTableSetManager):
+    """
+    A BasicTableManager provides some structure that most kinds of parsers will need.
+    In particular, everything will likely need some way of storing headers and some way of storing content
+    of each sheet. Even a csv file, which doesn't have multiple tabs can be seen as the degenerate case
+    of this where there's only one set of headers and only one block of content.
+    """
+
+    def __init__(self, filename: str, **kwargs):
+        super().__init__(filename=filename, **kwargs)
+        self.headers_by_tab_name: Dict[str, Headers] = {}
+        self.content_by_tab_name: Dict[str, SheetData] = {}
+        self.reader_agent: Any = self._get_reader_agent()
+
+    def tab_headers(self, tab_name: str) -> Headers:
+        return self.headers_by_tab_name[tab_name]
+
+    def tab_content(self, tab_name: str) -> List[AnyJsonData]:
+        return self.content_by_tab_name[tab_name]
+
+    @classmethod
+    def _create_tab_processor_state(cls, tab_name: str) -> Any:
+        """
+        This method provides for the possibility that some parsers will want auxiliary state,
+        (such as parsed headers or a line count or a table of temporary names for objects to cross-link
+        or some other such feature) that it carries with it as it moves from line to line parsing things.
+        Subclasses might therefore want to make this do something more interesting.
+        """
+        ignored(tab_name)  # subclasses might need this, but we don't
+        return None
+
+    def _get_reader_agent(self) -> Any:
+        """This function is responsible for opening the workbook and returning a workbook object."""
+        raise NotImplementedError(f"._get_reader_agent() is not implemented for {self.__class__.__name__}.")  # noQA
+
+
+class SemanticTableSetManager(BasicTableSetManager):
+    """
+    This is the base class for all workbook-like data sources, i.e., that may need to apply semantic processing.
+    Those may be:
+    * Excel workbook readers (.xlsx)
+    * Comma-separated file readers (.csv)
+    * Tab-separarated file readers (.tsv in most of the world, but Microsoft stupidly calls this .txt, outright
+      refusing to write a .tsv file, so many people seem to compromise and call this .tsv.txt)
+    There are two levels to each of these: a class that is not semantically interpreted,
+    and a class that is semantically interpreted as an "item".
+
+    This is NOT a parent class of these kinds of files, which we always take literally as if semantic processing
+    were already done (in part so that they can be used to test the results of other formats):
+    * Json files
+    * Yaml files
+    * Inserts directories
+    * JsonLines files
+    """
+
+    @classmethod
+    def load(cls, filename: str, **kwargs) -> AnyJsonData:
+        if cls.ALLOWED_FILE_EXTENSIONS:
+            if not any(filename.lower().endswith(suffix) for suffix in cls.ALLOWED_FILE_EXTENSIONS):
+                raise LoadArgumentsError(f"The TableSetManager subclass {cls.__name__} expects only"
+                                         f" {disjoined_list(cls.ALLOWED_FILE_EXTENSIONS)} filenames: {filename}")
+
+        table_set_manager: SemanticTableSetManager = cls(filename=filename, **kwargs)
+        return table_set_manager.load_content()
+
+    def __init__(self, filename: str, **kwargs):
+        super().__init__(filename=filename, **kwargs)
+
+    def _raw_row_generator_for_tab_name(self, tab_name: str) -> Iterable[SheetRow]:
+        """
+        Given a tab_name and a state (returned by _sheet_loader_state), return a generator for a set of row values.
+        """
+        raise NotImplementedError(f"._rows_for_tab_name(...) is not implemented for {self.__class__.__name__}.")  # noQA
+
+    def _process_row(self, tab_name: str, state: Any, row: List[SheetCellValue]) -> AnyJsonData:
+        """
+        This needs to take a state and whatever represents a row and
+        must return a list of objects representing column values.
+        What constitutes a processed up to the class, but other than that the result must be a JSON dictionary.
+        """
+        raise NotImplementedError(f"._process_row(...) is not implemented for {self.__class__.__name__}.")  # noQA
+
+    def load_content(self) -> AnyJsonData:
+        for tab_name in self.tab_names:
+            sheet_content = []
+            state = self._create_tab_processor_state(tab_name)
+            for row_data in self._raw_row_generator_for_tab_name(tab_name):
+                processed_row_data: AnyJsonData = self._process_row(tab_name, state, row_data)
+                sheet_content.append(processed_row_data)
+            self.content_by_tab_name[tab_name] = sheet_content
+        return self.content_by_tab_name
+
+    @classmethod
+    def parse_cell_value(cls, value: SheetCellValue) -> AnyJsonData:
+        return prefer_number(value)
+
+
+class AbstractItemManager(AbstractTableSetManager):
+
+    pass
+
+
+class TableSetManagerRegistry:
+
+    def __init__(self):
+        self.manager_table: Dict[str, Type[AbstractTableSetManager]] = {}
+        self.regexp_mappings: List[Tuple[Regexp, Type[AbstractTableSetManager]]] = []
+
+    def register(self, regexp: Optional[str] = None):
+        def _wrapped_register(class_to_register: Type[AbstractTableSetManager]):
+            if regexp:
+                self.regexp_mappings.append((re.compile(regexp), class_to_register))
+            for ext in class_to_register.ALLOWED_FILE_EXTENSIONS:
+                existing = self.manager_table.get(ext)
+                if existing:
+                    raise Exception(f"Tried to define {class_to_register} to extension {ext},"
+                                    f" but {existing} already claimed that.")
+                self.manager_table[ext] = class_to_register
+            return class_to_register
+        return _wrapped_register
+
+    register1 = register
+
+    def manager_for_filename(self, filename: str) -> Type[AbstractTableSetManager]:
+        base: str = os.path.basename(filename)
+        suffix_parts = base.split('.')[1:]
+        if suffix_parts:
+            for i in range(0, len(suffix_parts)):
+                suffix = f".{'.'.join(suffix_parts[i:])}"
+                found: Optional[Type[AbstractTableSetManager]] = self.manager_table.get(suffix)
+                if found:
+                    return found
+        else:
+            special_case: Optional[Type[AbstractItemManager]] = self.manager_for_special_filename(filename)
+            if special_case:
+                return special_case
+        raise LoadArgumentsError(f"Unknown file type: {filename}")
+
+    def manager_for_special_filename(self, filename: str) -> Optional[Type[AbstractTableSetManager]]:
+        for pattern, manager_class in self.regexp_mappings:
+            if pattern.match(filename):
+                return manager_class
+        return None
+
+
+TABLE_SET_MANAGER_REGISTRY = TableSetManagerRegistry()
+ITEM_MANAGER_REGISTRY = TableSetManagerRegistry()
+
+
+@TABLE_SET_MANAGER_REGISTRY.register()
+class XlsxManager(SemanticTableSetManager):
+    """
+    This implements the mechanism to get a series of rows out of the sheets in an XLSX file.
+    """
+
+    ALLOWED_FILE_EXTENSIONS = ['.xlsx']
+
+    @classmethod
+    def _all_rows(cls, sheet: Worksheet):
+        row_max = sheet.max_row
+        for row in range(2, row_max + 1):
+            yield row
+
+    @classmethod
+    def _all_cols(cls, sheet: Worksheet):
+        col_max = sheet.max_column
+        for col in range(1, col_max + 1):
+            yield col
+
+    @property
+    def tab_names(self) -> List[str]:
+        return self.reader_agent.sheetnames
+
+    def _get_reader_agent(self) -> Workbook:
+        return openpyxl.load_workbook(self.filename)
+
+    def _raw_row_generator_for_tab_name(self, tab_name: str) -> Iterable[SheetRow]:
+        sheet = self.reader_agent[tab_name]
+        return (self._get_raw_row_content_tuple(sheet, row)
+                for row in self._all_rows(sheet))
+
+    def _get_raw_row_content_tuple(self, sheet: Worksheet, row: int) -> SheetRow:
+        return [sheet.cell(row=row, column=col).value
+                for col in self._all_cols(sheet)]
+
+    def _create_tab_processor_state(self, tab_name: str) -> Headers:
+        sheet = self.reader_agent[tab_name]
+        headers: Headers = [str(sheet.cell(row=1, column=col).value)
+                            for col in self._all_cols(sheet)]
+        self.headers_by_tab_name[sheet.title] = headers
+        return headers
+
+    def _process_row(self, tab_name: str, headers: Headers, row_data: SheetRow) -> AnyJsonData:
+        ignored(tab_name)
+        return {headers[i]: self.parse_cell_value(row_datum)
+                for i, row_datum in enumerate(row_data)}
+
+
+class SchemaAutoloadMixin(AbstractTableSetManager):
+
+    SCHEMA_CACHE = {}  # Shared cache. Do not override. Use .clear_schema_cache() to clear it.
+    CACHE_SCHEMAS = True  # Controls whether we're doing caching at all
+    AUTOLOAD_SCHEMAS_DEFAULT = True
+
+    def __init__(self, filename: str, autoload_schemas: Optional[bool] = None, portal_env: Optional[str] = None,
+                 portal_vapp: Optional[AbstractVirtualApp] = None, **kwargs):
+        # This setup must be in place before the class initialization is done (via the super call).
+        self.autoload_schemas: bool = self.AUTOLOAD_SCHEMAS_DEFAULT if autoload_schemas is None else autoload_schemas
+        if self.autoload_schemas:  # If autoload_schemas is False, we don't care about doing this defaulting.
+            if portal_env is None and portal_vapp is None:
+                portal_env = public_env_name(EnvUtils.PRD_ENV_NAME)
+                PRINT(f"The portal_env was not explicitly supplied. Schemas will come from portal_env={portal_env!r}.")
+        self.portal_env: Optional[str] = portal_env
+        self.portal_vapp: Optional[AbstractVirtualApp] = portal_vapp
+        super().__init__(filename=filename, **kwargs)
+
+    def fetch_relevant_schemas(self, schema_names: List[str]):
+        # The schema_names argument is not normally given, but it is there for easier testing
+        def fetch_schema(schema_name):
+            schema = self.fetch_schema(schema_name, portal_env=self.portal_env, portal_vapp=self.portal_vapp)
+            return schema_name, schema
+        if self.autoload_schemas and (self.portal_env or self.portal_vapp):
+            autoloaded = {tab_name: schema
+                          for tab_name, schema in pmap(fetch_schema, schema_names)}
+            return autoloaded
+        else:
+            return {}
+
+    @classmethod
+    def fetch_schema(cls, schema_name: str, *, portal_env: Optional[str] = None,
+                     portal_vapp: Optional[AbstractVirtualApp] = None):
+        def just_fetch_it():
+            return get_schema(schema_name, portal_env=portal_env, portal_vapp=portal_vapp)
+        if cls.CACHE_SCHEMAS:
+            schema: Optional[AnyJsonData] = cls.SCHEMA_CACHE.get(schema_name)
+            if schema is None:
+                cls.SCHEMA_CACHE[schema_name] = schema = just_fetch_it()
+            return schema
+        else:
+            return just_fetch_it()
+
+    @classmethod
+    def clear_schema_cache(cls):
+        for key in list(cls.SCHEMA_CACHE.keys()):  # important to get the list of keys as a separate object first
+            cls.SCHEMA_CACHE.pop(key, None)
+
+
+class ItemManagerMixin(SchemaAutoloadMixin, AbstractItemManager, BasicTableSetManager):
+    """
+    This can add functionality to a reader such as an XlsxManager or a CsvManager in order to make its rows
+    get handled like Items instead of just flat table rows.
+    """
+
+    def __init__(self, filename: str, schemas: Optional[Dict[str, AnyJsonData]] = None, **kwargs):
+        super().__init__(filename=filename, **kwargs)
+        self.patch_prototypes_by_tab_name: Dict[str, Dict] = {}
+        self.parsed_headers_by_tab_name: Dict[str, ParsedHeaders] = {}
+        self.type_hints_by_tab_name: Dict[str, OptionalTypeHints] = {}
+        self._schemas = schemas
+        self._instaguid_context_table: Dict[str, str] = {}
+
+    @property
+    def schemas(self):
+        schemas = self._schemas
+        if schemas is None:
+            self._schemas = schemas = self.fetch_relevant_schemas(self.tab_names)
+        return schemas
+
+    def sheet_patch_prototype(self, tab_name: str) -> Dict:
+        return self.patch_prototypes_by_tab_name[tab_name]
+
+    def sheet_parsed_headers(self, tab_name: str) -> ParsedHeaders:
+        return self.parsed_headers_by_tab_name[tab_name]
+
+    def sheet_type_hints(self, tab_name: str) -> OptionalTypeHints:
+        return self.type_hints_by_tab_name[tab_name]
+
+    class SheetState:
+
+        def __init__(self, parsed_headers: ParsedHeaders, type_hints: OptionalTypeHints):
+            self.parsed_headers = parsed_headers
+            self.type_hints = type_hints
+
+    def _compile_type_hints(self, tab_name: str):
+        parsed_headers = self.sheet_parsed_headers(tab_name)
+        schema = self.schemas.get(tab_name)
+        with deferred_problems() as note_problem:
+            for required_header in self._schema_required_headers(schema):
+                if required_header not in parsed_headers:
+                    note_problem("Missing required header")
+        type_hints = [ItemTools.find_type_hint(parsed_header, schema) if schema else None
+                      for parsed_header in parsed_headers]
+        self.type_hints_by_tab_name[tab_name] = type_hints
+
+    @classmethod
+    def _schema_required_headers(cls, schema):
+        ignored(schema)
+        return []  # TODO: Make this compute a list of required headers (in parsed header form)
+
+    def _compile_sheet_headers(self, tab_name: str):
+        headers = self.headers_by_tab_name[tab_name]
+        parsed_headers = ItemTools.parse_sheet_headers(headers)
+        self.parsed_headers_by_tab_name[tab_name] = parsed_headers
+        prototype = ItemTools.compute_patch_prototype(parsed_headers)
+        self.patch_prototypes_by_tab_name[tab_name] = prototype
+
+    def _create_tab_processor_state(self, tab_name: str) -> SheetState:
+        super()._create_tab_processor_state(tab_name)
+        # This will create state that allows us to efficiently assign values in the right place on each row
+        # by setting up a prototype we can copy and then drop values into.
+        self._compile_sheet_headers(tab_name)
+        self._compile_type_hints(tab_name)
+        return self.SheetState(parsed_headers=self.sheet_parsed_headers(tab_name),
+                               type_hints=self.sheet_type_hints(tab_name))
+
+    def _process_row(self, tab_name: str, state: SheetState, row_data: SheetRow) -> AnyJsonData:
+        parsed_headers = state.parsed_headers
+        type_hints = state.type_hints
+        patch_item = copy.deepcopy(self.sheet_patch_prototype(tab_name))
+        for i, value in enumerate(row_data):
+            parsed_value = self.parse_cell_value(value)
+            type_hint = type_hints[i]
+            if type_hint:
+                parsed_value = type_hint.apply_hint(parsed_value)
+            ItemTools.set_path_value(patch_item, parsed_headers[i], parsed_value)
+        return patch_item
+
+    def parse_cell_value(self, value: SheetCellValue) -> AnyJsonData:
+        return ItemTools.parse_item_value(value, context=self._instaguid_context_table)
+
+
+@ITEM_MANAGER_REGISTRY.register()
+class XlsxItemManager(ItemManagerMixin, XlsxManager):
+    """
+    This layers item-style row processing functionality on an XLSX file.
+    """
+    pass
+
+
+class SingleTableMixin(AbstractTableSetManager):
+
+    def __init__(self, filename: str, tab_name: Optional[str] = None, **kwargs):
+        self._tab_name = tab_name or ItemTools.infer_tab_name(filename)
+        super().__init__(filename=filename, **kwargs)
+
+    @property
+    def tab_names(self) -> List[str]:
+        return [self._tab_name]
+
+
+class InsertsManager(BasicTableSetManager):  # ItemManagerMixin isn't really appropriate here
+
+    ALLOWED_FILE_EXTENSIONS = []
+
+    def _parse_inserts_data(self, filename: str) -> AnyJsonData:
+        raise NotImplementedError(f"._parse_inserts_dataa(...) is not implemented for {self.__class__.__name__}.")  # noQA
+
+    def _load_inserts_data(self, filename: str) -> TabbedSheetData:
+        data: AnyJsonData = self._parse_inserts_data(filename)
+        tabbed_inserts: AnyJsonData = self._wrap_inserts_data(filename, data)
+        if (not isinstance(tabbed_inserts, dict)
+                or not all(isinstance(tab_name, str) for tab_name in tabbed_inserts.keys())
+                or not all(isinstance(content, list) and all(isinstance(item, dict) for item in content)
+                           for content in tabbed_inserts.values())):
+            raise ValueError(f"Data in {filename} is not of type TabbedSheetData (Dict[str, List[dict]]).")
+        tabbed_inserts: TabbedSheetData  # we've just checked that
+        return tabbed_inserts
+
+    @classmethod
+    def _wrap_inserts_data(cls, filename: str, data: AnyJsonData) -> AnyJsonData:
+        ignored(filename)
+        return data
+
+    @property
+    def tab_names(self) -> List[str]:
+        return list(self.content_by_tab_name.keys())
+
+    def _get_reader_agent(self) -> Any:
+        return self
+
+    def load_content(self) -> Dict[str, AnyJsonData]:
+        data = self._load_inserts_data(self.filename)
+        for tab_name, tab_content in data.items():
+            self.content_by_tab_name[tab_name] = tab_content
+            if not tab_content:
+                self.headers_by_tab_name[tab_name] = []
+            else:
+                self.headers_by_tab_name[tab_name] = list(tab_content[0].keys())
+        return self.content_by_tab_name
+
+
+class SimpleInsertsMixin(SingleTableMixin):
+
+    def _wrap_inserts_data(self, filename: str, data: AnyJsonData) -> TabbedSheetData:
+        if (not isinstance(data, list)
+                or not all(isinstance(item, dict) for item in data)):
+            raise ValueError(f"Data in {filename} is not of type SheetData (List[dict]).")
+        return {self._tab_name: data}
+
+
+class JsonInsertsMixin:
+
+    @classmethod
+    def _parse_inserts_data(cls, filename: str) -> AnyJsonData:
+        return json.load(open_unicode_text_input_file_respecting_byte_order_mark(filename))
+
+
+@TABLE_SET_MANAGER_REGISTRY.register()
+class TabbedJsonInsertsManager(JsonInsertsMixin, InsertsManager):
+
+    ALLOWED_FILE_EXTENSIONS = [".tabs.json"]  # If you want them all in one family, use this extension
+
+
+@TABLE_SET_MANAGER_REGISTRY.register()
+class SimpleJsonInsertsManager(SimpleInsertsMixin, JsonInsertsMixin, InsertsManager):
+
+    ALLOWED_FILE_EXTENSIONS = [".json"]
+
+
+class YamlInsertsMixin:
+
+    def _parse_inserts_data(self, filename) -> AnyJsonData:
+        return yaml.safe_load(open_unicode_text_input_file_respecting_byte_order_mark(filename))
+
+
+@TABLE_SET_MANAGER_REGISTRY.register()
+class TabbedYamlInsertsManager(YamlInsertsMixin, InsertsManager):
+
+    ALLOWED_FILE_EXTENSIONS = [".tabs.yaml"]
+
+    def _parse_inserts_data(self, filename) -> AnyJsonData:
+        return yaml.safe_load(open_unicode_text_input_file_respecting_byte_order_mark(filename))
+
+
+@TABLE_SET_MANAGER_REGISTRY.register()
+class SimpleYamlInsertsManager(SimpleInsertsMixin, YamlInsertsMixin, InsertsManager):
+
+    ALLOWED_FILE_EXTENSIONS = [".yaml"]
+
+
+class InsertsItemMixin(AbstractItemManager):  # ItemManagerMixin isn't really appropriate here
+    """
+    This class is used for inserts directories and other JSON-like data that will be literally used as an Item
+    without semantic pre-processing. In other words, these classes will not be pre-checked for semantic correctness
+    but instead assumed to have been checked by other means.
+    """
+
+    AUTOLOAD_SCHEMAS_DEFAULT = False  # Has no effect, but someone might inspect the value.
+
+    def __init__(self, filename: str, *, autoload_schemas: Optional[bool] = None, portal_env: Optional[str] = None,
+                 portal_vapp: Optional[AbstractVirtualApp] = None, schemas: Optional[Dict[str, AnyJsonData]] = None,
+                 **kwargs):
+        ignored(portal_env, portal_vapp)  # Would only be used if autoload_schemas was true, and we don't allow that.
+        if schemas not in [None, {}]:
+            raise ValueError(f"{self.__class__.__name__} does not allow schemas={schemas!r}.")
+        if autoload_schemas not in [None, False]:
+            raise ValueError(f"{self.__class__.__name__} does not allow autoload_schemas={autoload_schemas!r}.")
+        super().__init__(filename=filename, **kwargs)
+
+
+@ITEM_MANAGER_REGISTRY.register()
+class TabbedJsonInsertsItemManager(InsertsItemMixin, TabbedJsonInsertsManager):
+    pass
+
+
+@ITEM_MANAGER_REGISTRY.register()
+class SimpleJsonInsertsItemManager(InsertsItemMixin, SimpleJsonInsertsManager):
+    pass
+
+
+@ITEM_MANAGER_REGISTRY.register()
+class TabbedYamlInsertsItemManager(InsertsItemMixin, TabbedYamlInsertsManager):
+    pass
+
+
+@ITEM_MANAGER_REGISTRY.register()
+class SimpleYamlInsertsItemManager(InsertsItemMixin, SimpleYamlInsertsManager):
+    pass
+
+
+@TABLE_SET_MANAGER_REGISTRY.register()
+class SimpleJsonLinesInsertsManager(SimpleInsertsMixin, InsertsManager):
+
+    ALLOWED_FILE_EXTENSIONS = [".jsonl"]
+
+    def _parse_inserts_data(self, filename: str) -> AnyJsonData:
+        return [line for line in JsonLinesReader(open_unicode_text_input_file_respecting_byte_order_mark(filename))]
+
+
+@ITEM_MANAGER_REGISTRY.register()
+class SimpleJsonLinesInsertsItemManager(InsertsItemMixin, SimpleJsonLinesInsertsManager):
+    pass
+
+
+@TABLE_SET_MANAGER_REGISTRY.register(regexp="^(.*/)?(|[^/]*[-_])inserts/?$")
+class InsertsDirectoryManager(InsertsManager):
+
+    ALLOWED_FILE_EXTENSIONS = []
+
+    def _parse_inserts_data(self, filename: str) -> AnyJsonData:
+        if not os.path.isdir(filename):
+            raise LoadArgumentsError(f"{filename} is not the name of an inserts directory.")
+        tab_files = glob.glob(os.path.join(filename, "*.json"))
+        data = {}
+        for tab_file in tab_files:
+            tab_content = json.load(open_unicode_text_input_file_respecting_byte_order_mark(tab_file))
+            # Here we don't use os.path.splitext because we want to split on the first dot.
+            # e.g., for foo.bar.baz, return just foo
+            #       this allows names like ExperimentSet.tab.json that might need to use multi-dot suffixes
+            #       for things unrelated to the tab name.
+            tab_name = os.path.basename(tab_file).split('.')[0]
+            data[tab_name] = tab_content
+        return data
+
+
+@ITEM_MANAGER_REGISTRY.register(regexp="^(.*/)?(|[^/]*[-_])inserts/?$")
+class InsertsDirectoryItemManager(InsertsItemMixin, InsertsDirectoryManager):
+    pass
+
+
+@TABLE_SET_MANAGER_REGISTRY.register()
+class CsvManager(SingleTableMixin, SemanticTableSetManager):
+    """
+    This implements the mechanism to get a series of rows out of the sheet in a csv file,
+    returning a result that still looks like there could have been multiple tabs.
+    """
+
+    ALLOWED_FILE_EXTENSIONS = ['.csv']
+
+    def __init__(self, filename: str, escaping: Optional[bool] = None, **kwargs):
+        super().__init__(filename=filename, **kwargs)
+        self.escaping: bool = escaping or False
+
+    def _get_reader_agent(self) -> CsvReader:
+        return self._get_reader_agent_for_filename(self.filename)
+
+    @classmethod
+    def _get_reader_agent_for_filename(cls, filename) -> CsvReader:
+        return csv.reader(open_unicode_text_input_file_respecting_byte_order_mark(filename))
+
+    PAD_TRAILING_TABS = True
+
+    def _raw_row_generator_for_tab_name(self, tab_name: str) -> Iterable[SheetRow]:
+        headers = self.tab_headers(tab_name)
+        n_headers = len(headers)
+        for row_data in self.reader_agent:
+            if self.PAD_TRAILING_TABS:
+                row_data = pad_to(n_headers, row_data, padding='')
+            yield row_data
+
+    def _create_tab_processor_state(self, tab_name: str) -> Headers:
+        headers: Optional[Headers] = self.headers_by_tab_name.get(tab_name)
+        if headers is None:
+            self.headers_by_tab_name[tab_name] = headers = self.reader_agent.__next__()
+        return headers
+
+    @classmethod
+    def _escape_cell_text(cls, cell_text):
+        if '\\' in cell_text:
+            return expand_string_escape_sequences(cell_text)
+        else:
+            return cell_text
+
+    def _process_row(self, tab_name: str, headers: Headers, row_data: SheetRow) -> AnyJsonData:
+        ignored(tab_name)
+        if self.escaping:
+            return {headers[i]: self.parse_cell_value(self._escape_cell_text(cell_text))
+                    for i, cell_text in enumerate(row_data)}
+        else:
+            return {headers[i]: self.parse_cell_value(cell_text)
+                    for i, cell_text in enumerate(row_data)}
+
+
+@ITEM_MANAGER_REGISTRY.register()
+class CsvItemManager(ItemManagerMixin, CsvManager):
+    """
+    This layers item-style row processing functionality on a CSV file.
+    """
+    pass
+
+
+@TABLE_SET_MANAGER_REGISTRY.register()
+class TsvManager(CsvManager):
+    """
+    TSV files are just CSV files with tabs instead of commas as separators.
+    (We do not presently handle any escaping of strange characters. May need to add handling for backslash escaping.)
+    """
+    ALLOWED_FILE_EXTENSIONS = ['.tsv', '.tsv.txt']
+
+    @classmethod
+    def _get_reader_agent_for_filename(cls, filename) -> CsvReader:
+        return csv.reader(open_unicode_text_input_file_respecting_byte_order_mark(filename), delimiter='\t')
+
+
+@ITEM_MANAGER_REGISTRY.register()
+class TsvItemManager(ItemManagerMixin, TsvManager):
+    """
+    This layers item-style row processing functionality on a TSV file.
+    """
+    pass
+
+
+def _do_shell_command(command, cwd=None):
+    # This might need to be more elaborate, but hopefully it will do for now. -kmp 11-Sep-2023
+    subprocess.check_output(command, cwd=cwd)
+
+
+@contextlib.contextmanager
+def maybe_unpack(filename):  # Maybe move to another module
+    """
+    If necessary, unpack a file that is zipped and/or tarred, yielding the name of the file (unpacked or not).
+    """
+    unpackables = ['.tar.gz', '.tar', '.tgz', '.gz', '.zip']
+    ext = None
+    for unpackable in unpackables:
+        if filename.endswith(unpackable):
+            ext = unpackable
+            break
+    if not ext:
+        yield filename
+        return
+    if not os.path.exists(filename):
+        # We don't bother to raise this error if we're not planning to do any unpacking.
+        # The caller can decide if/when such errors are needed in that case.
+        # But if we are going to have to move bits around, they'll need to actually be there.
+        # -kmp 12-Sep-2023
+        raise ValueError(f"The file {filename!r} does not exist.")
+    target_base_part = remove_suffix(ext, os.path.basename(filename), required=True)
+    target_ext = '.tar.gz' if ext == '.tgz' else ext
+    with TemporaryDirectory() as temp_dir:
+        temp_base = os.path.join(temp_dir, target_base_part)
+        temp_filename = temp_base + target_ext
+        _do_shell_command(['cp', filename, temp_filename])
+        if temp_filename.endswith('.gz'):
+            _do_shell_command(['gunzip', temp_filename], cwd=temp_dir)
+            temp_filename = remove_suffix('.gz', temp_filename)
+        elif temp_filename.endswith(".zip"):
+            _do_shell_command(['unzip', temp_filename], cwd=temp_dir)
+            temp_filename = remove_suffix('.zip', temp_filename)
+        if temp_filename.endswith(".tar"):
+            _do_shell_command(['tar', '-xf', temp_filename], cwd=temp_dir)
+            tar_file = temp_filename
+            temp_filename = remove_suffix(".tar", temp_filename, required=True)
+            if not os.path.isdir(temp_filename):
+                raise Exception(f"{tar_file} didn't unpack to a dir: {temp_filename}")
+        # print(f"Unpacked {filename} to {temp_filename}")
+        yield temp_filename
+
+
+class TableSetManager(AbstractTableSetManager):
+    """
+    This class will open a .xlsx or .csv file and load its content in our standard format.
+    (See more detailed description in AbstractTableManager.)
+    """
+
+    @classmethod
+    def create_implementation_manager(cls, filename: str, **kwargs) -> AbstractTableSetManager:
+        reader_agent_class = TABLE_SET_MANAGER_REGISTRY.manager_for_filename(filename)
+        if issubclass(reader_agent_class, AbstractItemManager):
+            raise ValueError(f"TableSetManager unexpectedly found reader agent class {reader_agent_class}.")
+        reader_agent = reader_agent_class(filename=filename, **kwargs)
+        return reader_agent
+
+    @classmethod
+    def load(cls, filename: str, tab_name: Optional[str] = None, escaping: Optional[bool] = None,
+             **kwargs) -> TabbedSheetData:
+        """
+        Given a filename and various options
+        """
+        with maybe_unpack(filename) as filename:
+            manager = cls.create_implementation_manager(filename=filename, tab_name=tab_name, escaping=escaping,
+                                                        **kwargs)
+            return manager.load_content()
+
+
+class ItemManager(AbstractTableSetManager):
+    """
+    This class will open a .xlsx or .csv file and load its content in our standard format.
+    (See more detailed description in AbstractTableManager.)
+    """
+
+    @classmethod
+    def create_implementation_manager(cls, filename: str, **kwargs) -> AbstractItemManager:
+        reader_agent_class: Type[AbstractTableSetManager] = ITEM_MANAGER_REGISTRY.manager_for_filename(filename)
+        if not issubclass(reader_agent_class, AbstractItemManager):
+            raise ValueError(f"ItemManager unexpectedly found reader agent class {reader_agent_class}.")
+        reader_agent_class: Type[AbstractItemManager]
+        reader_agent = reader_agent_class(filename=filename, **kwargs)
+        return reader_agent
+
+    @classmethod
+    def load(cls, filename: str, tab_name: Optional[str] = None, escaping: Optional[bool] = None,
+             schemas: Optional[Dict] = None, autoload_schemas: Optional[bool] = None,
+             portal_env: Optional[str] = None, portal_vapp: Optional[AbstractVirtualApp] = None,
+             **kwargs) -> TabbedSheetData:
+        """
+        Given a filename and various options, loads the items associated with that filename.
+
+        :param filename: The name of the file to load.
+        :param tab_name: For files that lack multiple tabs (such as .csv or .tsv),
+            the tab name to associate with the data.
+        :param escaping: Whether to perform escape processing on backslashes.
+        :param schemas: A set of schemas to use instead of trying to load them.
+        :param autoload_schemas: Whether to try autoloading schemas.
+        :param portal_env: A portal to consult to find schemas (usually if calling from the outside of a portal).
+        :param portal_vapp: A vapp to use (usually if calling from within a portal).
+        """
+
+        with maybe_unpack(filename) as filename:
+
+            manager = cls.create_implementation_manager(filename=filename, tab_name=tab_name, escaping=escaping,
+                                                        schemas=schemas, autoload_schemas=autoload_schemas,
+                                                        portal_env=portal_env, portal_vapp=portal_vapp,
+                                                        **kwargs)
+            return manager.load_content()
+
+
+load_table_set = TableSetManager.load
+load_items = ItemManager.load
diff --git a/docs/source/dcicutils.rst b/docs/source/dcicutils.rst
index f15307d0e..8481da6a7 100644
--- a/docs/source/dcicutils.rst
+++ b/docs/source/dcicutils.rst
@@ -281,6 +281,13 @@ secrets_utils
   :members:
 
 
+sheet_utils
+^^^^^^^^^^^
+
+.. automodule:: dcicutils.sheet_utils
+  :members:
+
+
 snapshot_utils
 ^^^^^^^^^^^^^^
 
diff --git a/poetry.lock b/poetry.lock
index d7e77523c..95670b506 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -489,6 +489,18 @@ files = [
 [package.dependencies]
 pycparser = "*"
 
+[[package]]
+name = "chardet"
+version = "5.2.0"
+description = "Universal encoding detector for Python 3"
+category = "main"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"},
+    {file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"},
+]
+
 [[package]]
 name = "charset-normalizer"
 version = "3.2.0"
@@ -752,6 +764,18 @@ develop = ["black", "coverage", "jinja2", "mock", "pytest", "pytest-cov", "pyyam
 docs = ["sphinx (<1.7)", "sphinx-rtd-theme"]
 requests = ["requests (>=2.4.0,<3.0.0)"]
 
+[[package]]
+name = "et-xmlfile"
+version = "1.1.0"
+description = "An implementation of lxml.xmlfile for the standard library"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"},
+    {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"},
+]
+
 [[package]]
 name = "exceptiongroup"
 version = "1.1.2"
@@ -911,6 +935,21 @@ files = [
 [package.dependencies]
 psutil = {version = ">=4.0.0", markers = "sys_platform != \"cygwin\""}
 
+[[package]]
+name = "openpyxl"
+version = "3.1.2"
+description = "A Python library to read/write Excel 2010 xlsx/xlsm files"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "openpyxl-3.1.2-py2.py3-none-any.whl", hash = "sha256:f91456ead12ab3c6c2e9491cf33ba6d08357d802192379bb482f1033ade496f5"},
+    {file = "openpyxl-3.1.2.tar.gz", hash = "sha256:a6f5977418eff3b2d5500d54d9db50c8277a368436f4e4f8ddb1be3422870184"},
+]
+
+[package.dependencies]
+et-xmlfile = "*"
+
 [[package]]
 name = "opensearch-py"
 version = "2.3.0"
@@ -1594,4 +1633,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.7,<3.10"
-content-hash = "b8d6612bb28cfb9da79306a82b2ac35a20678e1f62ef86c93b8af3c3d1ed798e"
+content-hash = "eb629a04469e24b917d9525dd06dac72f2014cc9ede879946909929f5c09b9fd"
diff --git a/pyproject.toml b/pyproject.toml
index 65dba0353..846624504 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dcicutils"
-version = "7.11.0"
+version = "7.11.0.1b9"  # to become "7.12.0"
 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
 authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
 license = "MIT"
@@ -37,28 +37,31 @@ classifiers = [
 
 [tool.poetry.dependencies]
 python = ">=3.7,<3.10"
+
 boto3 = "^1.17.39"
 botocore = "^1.20.39"
 # The DCIC portals (cgap-portal and fourfront) are very particular about which ElasticSearch version.
 # This value is intentionally pinned and must not be changed casually.
 elasticsearch = "7.13.4"
 aws-requests-auth = ">=0.4.2,<1"
+chardet = "^5.2.0"
 docker = "^4.4.4"
 gitpython = "^3.1.2"
+openpyxl = "^3.1.2"
+opensearch-py = "^2.0.1"
+pyOpenSSL = "^23.1.1"
+PyJWT = "^2.6.0"
 pytz = ">=2020.4"
 PyYAML = ">=5.1,<5.5"
+redis = "^4.5.1"
 requests = "^2.21.0"
 rfc3986 = "^1.4.0"
 structlog = "^19.2.0"
 toml = ">=0.10.1,<1"
+tqdm = "^4.65.0"
 typing-extensions = ">=3.8"  # Fourfront uses 3.8
 urllib3 = "^1.26.6"
 webtest = "^2.0.34"
-opensearch-py = "^2.0.1"
-redis = "^4.5.1"
-pyOpenSSL = "^23.1.1"
-PyJWT = "^2.6.0"
-tqdm = "^4.65.0"
 
 
 [tool.poetry.dev-dependencies]
diff --git a/test/data_files/escaping-false.json b/test/data_files/escaping-false.json
new file mode 100644
index 000000000..84ab06993
--- /dev/null
+++ b/test/data_files/escaping-false.json
@@ -0,0 +1,67 @@
+{
+  "escaping": [
+    {
+      "name": "backslash",
+      "unquoted": "\\\\",
+      "doublequoted": "\\\\",
+      "singlequoted": "'\\\\'",
+      "overflow": null
+    },
+    {
+      "name": "formfeed",
+      "unquoted": "\\f",
+      "doublequoted": "\\f",
+      "singlequoted": "'\\f'",
+      "overflow": null
+    },
+    {
+      "name": "newline",
+      "unquoted": "\\n",
+      "doublequoted": "\\n",
+      "singlequoted": "'\\n'",
+      "overflow": null
+    },
+    {
+      "name": "return",
+      "unquoted": "\\r",
+      "doublequoted": "\\r",
+      "singlequoted": "'\\r'",
+      "overflow": null
+    },
+    {
+      "name": "tab",
+      "unquoted": "\\t",
+      "doublequoted": "\\t",
+      "singlequoted": "'\\t'",
+      "overflow": null
+    },
+    {
+      "name": "misc",
+      "unquoted": "\\m",
+      "doublequoted": "\\m",
+      "singlequoted": "'\\m'",
+      "overflow": null
+    },
+    {
+      "name": "quote1",
+      "unquoted": "N/A",
+      "doublequoted": "x,,z",
+      "singlequoted": "N/A",
+      "overflow": null
+    },
+    {
+      "name": "quotelong",
+      "unquoted": "N/A",
+      "doublequoted": "x,,z,N/A\nquotlongcontinued,",
+      "singlequoted": "N/A",
+      "overflow": null
+    },
+    {
+      "name": "comma",
+      "unquoted": "N/A",
+      "doublequoted": ",",
+      "singlequoted": "'",
+      "overflow": "'"
+    }
+  ]
+}
diff --git a/test/data_files/escaping-true.json b/test/data_files/escaping-true.json
new file mode 100644
index 000000000..5d6c837a6
--- /dev/null
+++ b/test/data_files/escaping-true.json
@@ -0,0 +1,67 @@
+{
+  "escaping": [
+    {
+      "name": "backslash",
+      "unquoted": "\\",
+      "doublequoted": "\\",
+      "singlequoted": "'\\'",
+      "overflow": null
+    },
+    {
+      "name": "formfeed",
+      "unquoted": "\\f",
+      "doublequoted": "\\f",
+      "singlequoted": "'\\f'",
+      "overflow": null
+    },
+    {
+      "name": "newline",
+      "unquoted": "\n",
+      "doublequoted": "\n",
+      "singlequoted": "'\n'",
+      "overflow": null
+    },
+    {
+      "name": "return",
+      "unquoted": "\r",
+      "doublequoted": "\r",
+      "singlequoted": "'\r'",
+      "overflow": null
+    },
+    {
+      "name": "tab",
+      "unquoted": "\t",
+      "doublequoted": "\t",
+      "singlequoted": "'\t'",
+      "overflow": null
+    },
+    {
+      "name": "misc",
+      "unquoted": "\\m",
+      "doublequoted": "\\m",
+      "singlequoted": "'\\m'",
+      "overflow": null
+    },
+    {
+      "name": "quote1",
+      "unquoted": "N/A",
+      "doublequoted": "x,,z",
+      "singlequoted": "N/A",
+      "overflow": null
+    },
+    {
+      "name": "quotelong",
+      "unquoted": "N/A",
+      "doublequoted": "x,,z,N/A\nquotlongcontinued,",
+      "singlequoted": "N/A",
+      "overflow": null
+    },
+    {
+      "name": "comma",
+      "unquoted": "N/A",
+      "doublequoted": ",",
+      "singlequoted": "'",
+      "overflow": "'"
+    }
+  ]
+}
diff --git a/test/data_files/escaping.csv b/test/data_files/escaping.csv
new file mode 100644
index 000000000..ec04defbd
--- /dev/null
+++ b/test/data_files/escaping.csv
@@ -0,0 +1,11 @@
+name,unquoted,doublequoted,singlequoted,overflow
+backslash,\\,"\\",'\\'
+formfeed,\f,"\f",'\f'
+newline,\n,"\n",'\n'
+return,\r,"\r",'\r'
+tab,\t,"\t",'\t'
+misc,\m,"\m",'\m'
+quote1,N/A,"x,,z",N/A
+quotelong,N/A,"x,,z,N/A
+quotlongcontinued,",N/A
+comma,N/A,",",','
diff --git a/test/data_files/sample_items.tabs.json b/test/data_files/sample_items.tabs.json
new file mode 100644
index 000000000..f972245f0
--- /dev/null
+++ b/test/data_files/sample_items.tabs.json
@@ -0,0 +1,74 @@
+{
+    "Sheet1": [
+        {
+            "x": 1,
+            "y": {
+                "a": 1,
+                "z": 1
+            }
+        },
+        {
+            "x": 1,
+            "y": {
+                "a": 2,
+                "z": 3
+            }
+        },
+        {
+            "x": "alpha",
+            "y": {
+                "a": "beta",
+                "z": [
+                    "gamma",
+                    "delta"
+                ]
+            }
+        }
+    ],
+    "Sheet2": [
+        {
+            "name": "bill",
+            "age": 23,
+            "mother": {
+                "name": "mary",
+                "age": 58
+            },
+            "father": {
+                "name": "fred",
+                "age": 63
+            },
+            "friends": [
+                {
+                    "name": "sam",
+                    "age": 22
+                },
+                {
+                    "name": "arthur",
+                    "age": 19
+                }
+            ]
+        },
+        {
+            "name": "joe",
+            "age": 9,
+            "mother": {
+                "name": "estrella",
+                "age": 35
+            },
+            "father": {
+                "name": "anthony",
+                "age": 34
+            },
+            "friends": [
+                {
+                    "name": "anders",
+                    "age": 9
+                },
+                {
+                    "name": null,
+                    "age": null
+                }
+            ]
+        }
+    ]
+}
diff --git a/test/data_files/sample_items.tabs.yaml b/test/data_files/sample_items.tabs.yaml
new file mode 100644
index 000000000..f98d9259b
--- /dev/null
+++ b/test/data_files/sample_items.tabs.yaml
@@ -0,0 +1,42 @@
+Sheet1:
+- x: 1
+  y:
+    a: 1
+    z: 1
+- x: 1
+  y:
+    a: 2
+    z: 3
+- x: alpha
+  y:
+    a: beta
+    z:
+    - gamma
+    - delta
+Sheet2:
+- age: 23
+  father:
+    age: 63
+    name: fred
+  friends:
+  - age: 22
+    name: sam
+  - age: 19
+    name: arthur
+  mother:
+    age: 58
+    name: mary
+  name: bill
+- age: 9
+  father:
+    age: 34
+    name: anthony
+  friends:
+  - age: 9
+    name: anders
+  - age: null
+    name: null
+  mother:
+    age: 35
+    name: estrella
+  name: joe
diff --git a/test/data_files/sample_items.xlsx b/test/data_files/sample_items.xlsx
new file mode 100644
index 000000000..19ca2acc8
Binary files /dev/null and b/test/data_files/sample_items.xlsx differ
diff --git a/test/data_files/sample_items2.csv b/test/data_files/sample_items2.csv
new file mode 100644
index 000000000..2e32bf426
--- /dev/null
+++ b/test/data_files/sample_items2.csv
@@ -0,0 +1,5 @@
+name,sex,member
+john,M,false
+juan,male,true
+igor,unknown,
+mary,Female,t
diff --git a/test/data_files/sample_items2.json b/test/data_files/sample_items2.json
new file mode 100644
index 000000000..7e084f908
--- /dev/null
+++ b/test/data_files/sample_items2.json
@@ -0,0 +1,6 @@
+[
+    {"name": "john", "sex": "Male", "member": false},
+    {"name": "juan", "sex": "Male", "member": true},
+    {"name": "igor", "sex": "unknown", "member": null},
+    {"name": "mary", "sex": "Female", "member": true}
+]
diff --git a/test/data_files/sample_items3.csv b/test/data_files/sample_items3.csv
new file mode 100644
index 000000000..ee2d61b61
--- /dev/null
+++ b/test/data_files/sample_items3.csv
@@ -0,0 +1,5 @@
+name,sex,uuid,father,mother,parents,children
+John,Male,#john,#igor,#mary,,
+Juan,Male,#juan,,,#igor|#mary,
+Igor,Male,#igor,,,,#john|
+Mary,Female,#mary,,,,#john|
diff --git a/test/data_files/sample_items_for_real_schemas.csv b/test/data_files/sample_items_for_real_schemas.csv
new file mode 100644
index 000000000..29af47792
--- /dev/null
+++ b/test/data_files/sample_items_for_real_schemas.csv
@@ -0,0 +1,3 @@
+accession,fragment_size_selection_method
+foo,spri
+bar,blue
diff --git a/test/data_files/sample_items_sheet2.csv b/test/data_files/sample_items_sheet2.csv
new file mode 100644
index 000000000..95567c42a
--- /dev/null
+++ b/test/data_files/sample_items_sheet2.csv
@@ -0,0 +1,3 @@
+﻿name,age,mother.name,mother.age,father.name,father.age,friends#0.name,friends#0.age,friends#1.name,friends#1.age
+bill,23,mary,58,fred,63,sam,22,arthur,19
+joe,9,estrella,35,anthony,34,anders,9,,
diff --git a/test/data_files/sample_items_sheet2.tsv b/test/data_files/sample_items_sheet2.tsv
new file mode 100644
index 000000000..e862bf36d
--- /dev/null
+++ b/test/data_files/sample_items_sheet2.tsv
@@ -0,0 +1,3 @@
+name	age	mother.name	mother.age	father.name	father.age	friends#0.name	friends#0.age	friends#1.name	friends#1.age
+bill	23	mary	58	fred	63	sam	22	arthur	19
+joe	9	estrella	35	anthony	34	anders	9
diff --git a/test/data_files/sample_items_sheet2a.jsonl b/test/data_files/sample_items_sheet2a.jsonl
new file mode 100644
index 000000000..a0e96e83e
--- /dev/null
+++ b/test/data_files/sample_items_sheet2a.jsonl
@@ -0,0 +1,3 @@
+["name", "age", "mother.name", "mother.age", "father.name", "father.age", "friends#0.name", "friends#0.age", "friends#1.name", "friends#1.age"]
+["bill", 23,    "mary",        58,           "fred",        63,           "sam",            22,              "arthur",         19]
+["joe",  9,     "estrella",    35,           "anthony",     34,           "anders",         9]
diff --git a/test/data_files/sample_items_sheet2b.jsonl b/test/data_files/sample_items_sheet2b.jsonl
new file mode 100644
index 000000000..c044bfe18
--- /dev/null
+++ b/test/data_files/sample_items_sheet2b.jsonl
@@ -0,0 +1,2 @@
+{"name": "bill", "age": 23, "mother.name": "mary",     "mother.age": 58, "father.name": "fred",    "father.age": 63, "friends#0.name": "sam",    "friends#0.age": 22, "friends#1.name": "arthur", "friends#1.age": 19}
+{"name": "joe",  "age": 9,  "mother.name": "estrella", "mother.age": 35, "father.name": "anthony", "father.age": 34, "friends#0.name": "anders", "friends#0.age": 9}
diff --git a/test/data_files/sample_items_sheet2b1.jsonl b/test/data_files/sample_items_sheet2b1.jsonl
new file mode 100644
index 000000000..8f5c3345b
--- /dev/null
+++ b/test/data_files/sample_items_sheet2b1.jsonl
@@ -0,0 +1,3 @@
+["name", "age", "mother", "father", "friends"]
+["bill", 23, {"name": "mary",     "age": 58}, {"name": "fred",    "age": 63}, [{"name": "sam",    "age":  22}, {"name":  "arthur", "age": 19}]]
+["joe", 9,   {"name": "estrella", "age": 35}, {"name": "anthony", "age": 34}, [{"name": "anders", "age":  9}]]
diff --git a/test/data_files/sample_items_sheet2b2.jsonl b/test/data_files/sample_items_sheet2b2.jsonl
new file mode 100644
index 000000000..1ef8d9f11
--- /dev/null
+++ b/test/data_files/sample_items_sheet2b2.jsonl
@@ -0,0 +1,2 @@
+{"name": "bill", "age": 23, "mother": {"name": "mary",     "age": 58}, "father": {"name": "fred",    "age": 63}, "friends": [{"name": "sam",    "age":  22}, {"name":  "arthur", "age": 19}]}
+{"name": "joe", "age": 9,   "mother": {"name": "estrella", "age": 35}, "father": {"name": "anthony", "age": 34}, "friends": [{"name": "anders", "age":  9}]}
\ No newline at end of file
diff --git a/test/test_misc_utils.py b/test/test_misc_utils.py
index a07c6d234..0017bd16e 100644
--- a/test/test_misc_utils.py
+++ b/test/test_misc_utils.py
@@ -17,7 +17,7 @@
 from dcicutils.misc_utils import (
     PRINT, ignored, ignorable, filtered_warnings, get_setting_from_context, TestApp, VirtualApp, VirtualAppError,
     _VirtualAppHelper,  # noqa - yes, this is a protected member, but we still want to test it
-    Retry, apply_dict_overrides, utc_today_str, RateManager, environ_bool, str_to_bool,
+    Retry, apply_dict_overrides, utc_today_str, RateManager, environ_bool, str_to_bool, is_uuid,
     LockoutManager, check_true, remove_prefix, remove_suffix, full_class_name, full_object_name, constantly,
     keyword_as_title, file_contents, CachedField, camel_case_to_snake_case, snake_case_to_camel_case, make_counter,
     CustomizableProperty, UncustomizedInstance, getattr_customized, copy_json, url_path_join,
@@ -30,12 +30,13 @@
     classproperty, classproperty_cached, classproperty_cached_each_subclass, Singleton, NamedObject, obsolete,
     ObsoleteError, CycleError, TopologicalSorter, keys_and_values_to_dict, dict_to_keys_and_values, is_c4_arn,
     deduplicate_list, chunked, parse_in_radix, format_in_radix, managed_property, future_datetime,
-    MIN_DATETIME, MIN_DATETIME_UTC, INPUT, builtin_print, map_chunked, to_camel_case,
+    MIN_DATETIME, MIN_DATETIME_UTC, INPUT, builtin_print, map_chunked, to_camel_case, pad_to, JsonLinesReader,
 )
 from dcicutils.qa_utils import (
     Occasionally, ControlledTime, override_environ as qa_override_environ, MockFileSystem, printed_output,
     raises_regexp, MockId, MockLog, input_series,
 )
+from typing import Any, Dict, List
 from unittest import mock
 
 
@@ -1094,7 +1095,7 @@ def test_lockout_manager():
 
     protected_action = "simulated action"
 
-    # The function now() will get us the time. This assure us that binding datetime.datetime
+    # The function now() will get us the time. This assures us that binding datetime.datetime
     # will not be affecting us.
     now = datetime_module.datetime.now
 
@@ -1197,7 +1198,7 @@ def test_rate_manager():
     # PyCharm thinks this is not used. -kmp 26-Jul-2020
     # r = RateManager(interval_seconds=60, safety_seconds=1, allowed_attempts=4)
 
-    # The function now() will get us the time. This assure us that binding datetime.datetime
+    # The function now() will get us the time. This assures us that binding datetime.datetime
     # will not be affecting us.
     now = datetime_module.datetime.now
 
@@ -1885,7 +1886,7 @@ def test_cached_field_mocked(self):
             assert field.get() == val5
             assert field.get() == val5
 
-            dt.sleep(self.DEFAULT_TIMEOUT)  # Fast forward to where we're going to refill again
+            dt.sleep(self.DEFAULT_TIMEOUT)  # Fast-forward to where we're going to refill again
             val6 = field.get()
             assert val6 != val5
 
@@ -2007,6 +2008,33 @@ def test_capitalize1(token, expected):
     assert capitalize1(token) == expected
 
 
+def test_is_uuid():
+
+    good_uuid = str(uuid.uuid4())
+    bad_uuid = '123-456-789'
+
+    assert not is_uuid("12345678abcd678123456781234")  # wrong length. expecting 32 digits
+    assert not is_uuid("12-3456781234abcd1234567812345678")  # hyphens only allowed at multiple of four boundaries
+    assert not is_uuid("12-3456781234abcd1234567-812345678")  # ditto
+
+    assert is_uuid("123456781234abcd1234567812345678")
+    assert is_uuid("12345678abcd56781234ABCD12345678")
+    assert is_uuid("1234-5678abcd56781234ABCD12345678")
+    assert is_uuid("12345678abcd-56781234ABCD1234-5678")
+    assert is_uuid("1234-5678-abcd56781234ABCD-12345678")
+    assert is_uuid("1234-5678-abcd-56781234ABCD12345678")
+    assert is_uuid("1234-5678-abcd-5678-1234-ABCD-1234-5678")
+    assert is_uuid("1234-5678-abcd-5678-1234-ABCD-1234-5678-")  # we don't really want this, but we tolerate it
+
+    assert is_uuid("{12345678abcd56781234ABCD12345678}")  # braces are optionally allowed
+    assert is_uuid("{1234-5678-abcd5678-1234-ABCD-1234-5678}")  # ditto
+    assert is_uuid("1234-5678-abcd5678-1234-ABCD-1234-5678}")  # ditto
+    assert is_uuid("{1234-5678-abcd5678-1234-ABCD-1234-5678-}")  # balanced braces trailing hyphen tolerated
+
+    assert is_uuid(good_uuid) is True
+    assert is_uuid(bad_uuid) is False
+
+
 def test_string_list():
 
     assert string_list('') == []
@@ -2050,7 +2078,7 @@ def test_copy_json(obj):
 
 
 def test_copy_json_side_effects():
-    obj = {'foo': [1, 2, 3], 'bar': [{'x': 4, 'y': 5}, {'x': 2, 'y': 7}]}
+    obj: Dict[str, Any] = {'foo': [1, 2, 3], 'bar': [{'x': 4, 'y': 5}, {'x': 2, 'y': 7}]}
     obj_copy = copy_json(obj)
     obj['foo'][1] = 20
     obj['bar'][0]['y'] = 500  # NoQA - PyCharm wrongly fears there are type errors in this line, that it will fail.
@@ -2904,7 +2932,7 @@ class SubClock(Clock):
     assert str(exc.value) == ("The subclasses= argument to classproperty_cached.reset must not be False"
                               " because classproperty_cached does not use per-subclass caches.")
 
-    # This will clear SubClock cache, bu that's shared with the Clock cache, so both will clear.
+    # This will clear SubClock cache, but that's shared with the Clock cache, so both will clear.
     assert classproperty_cached.reset(instance_class=SubClock, attribute_name='sample') is True
 
     c_t5 = Clock.sample     # This should recompute Clock.sample cache, which is shared by SubCLock
@@ -3258,7 +3286,7 @@ def test_deduplicate_list():
     xlen = len(x)
 
     assert sorted(deduplicate_list(x)) == ['a', 'b', 'c']
-    assert len(x) == xlen  # make sure there was no side-effect to the original list
+    assert len(x) == xlen  # make sure there was no side effect to the original list
 
     y = ['a']
     y0 = deduplicate_list(y)
@@ -3468,3 +3496,85 @@ def test_map_chunked():
 
     res = map_chunked(lambda x: ''.join(x), "abcdefghij", chunk_size=4, reduce=lambda x: '.'.join(x))
     assert res == 'abcd.efgh.ij'
+
+
+def test_pad_to():
+
+    assert pad_to(5, []) == [None, None, None, None, None]
+    assert pad_to(5, [], padding='foo') == ['foo', 'foo', 'foo', 'foo', 'foo']
+
+    assert pad_to(5, ['x']) == ['x', None, None, None, None]
+    assert pad_to(5, ['x'], padding='foo') == ['x', 'foo', 'foo', 'foo', 'foo']
+
+    six_elements = ['a', 'b', 'c', 'd', 'e', 'f']
+
+    assert pad_to(5, six_elements) == six_elements
+    assert pad_to(5, six_elements, padding='foo')
+
+
+def test_json_lines_reader_dicts():
+
+    print()  # start on a fresh line
+
+    mfs = MockFileSystem()
+
+    with mfs.mock_exists_open_remove():
+
+        item1 = {"foo": 1, "bar": 2}
+        item2 = {"foo": 3, "bar": 4}
+
+        item1_str = json.dumps(item1)
+        item2_str = json.dumps(item2)
+
+        sample_lines = [item1_str, item2_str]
+
+        sample_filename = "somefile.jsonl"
+
+        with io.open(sample_filename, 'w') as fp:
+            for line in sample_lines:
+                print(line, file=fp)
+
+        for file, content in mfs.files.items():
+            print("=" * 20, file, "=" * 20)
+            print(content.decode('utf-8'))
+            print("=" * 80)
+
+        with io.open(sample_filename) as fp:
+            assert [line for line in JsonLinesReader(fp)] == [item1, item2]
+
+
+def test_json_lines_reader_lists():
+
+    print()  # start on a fresh line
+
+    mfs = MockFileSystem()
+
+    with mfs.mock_exists_open_remove():
+
+        item1 = {"foo": 1, "bar": 2}
+        item2 = {"foo": 3, "bar": 4}
+
+        headers: List[str] = list(item1.keys())
+
+        item1_str = json.dumps([item1[header] for header in headers])
+        item2_str = json.dumps([item2[header] for header in headers])
+
+        sample_lines = [item1_str, item2_str]
+
+        sample_filename = "somefile.jsonl"
+
+        with io.open(sample_filename, 'w') as fp:
+
+            print(json.dumps(headers), file=fp)
+            for line in sample_lines:
+                print(line, file=fp)
+
+        for file, content in mfs.files.items():
+            print("=" * 20, file, "=" * 20)
+            print(content.decode('utf-8'))
+            print("=" * 80)
+
+        with io.open(sample_filename) as fp:
+            parsed = [line for line in JsonLinesReader(fp)]
+            expected = [item1, item2]
+            assert parsed == expected
diff --git a/test/test_sheet_utils.py b/test/test_sheet_utils.py
new file mode 100644
index 000000000..ed312bf21
--- /dev/null
+++ b/test/test_sheet_utils.py
@@ -0,0 +1,935 @@
+import contextlib
+import json
+import os
+import pytest
+
+from collections import namedtuple
+from dcicutils import sheet_utils as sheet_utils_module, ff_utils as ff_utils_module
+from dcicutils.common import AnyJsonData
+from dcicutils.env_utils import EnvUtils, public_env_name
+from dcicutils.misc_utils import is_uuid, local_attrs, NamedObject, AbstractVirtualApp
+from dcicutils.qa_utils import printed_output, mock_not_called, MockResponse
+from dcicutils.sheet_utils import (
+    # High-level interfaces
+    ItemManager, load_items, TABLE_SET_MANAGER_REGISTRY, ITEM_MANAGER_REGISTRY,
+    # Low-level implementation
+    BasicTableSetManager, SchemaAutoloadMixin,
+    ItemTools, XlsxManager, XlsxItemManager,
+    CsvManager, CsvItemManager, TsvManager, TsvItemManager,
+    # TypeHint, EnumHint,
+    BoolHint,
+    # Error handling
+    LoadFailure, LoadArgumentsError, LoadTableError,
+    # Utilities
+    prefer_number, unwanted_kwargs, expand_string_escape_sequences,
+)
+from typing import Dict, Optional
+from unittest import mock
+from .conftest_settings import TEST_DIR
+from .helpers import using_fresh_ff_state_for_testing
+
+
+TEST_SHEET_1 = 'Sheet1'
+
+
+def test_load_failure():
+
+    sample_message = "This is a test."
+
+    load_failure_object = LoadFailure(sample_message)
+    assert isinstance(load_failure_object, LoadFailure)
+    assert str(load_failure_object) == sample_message
+
+
+def test_load_argument_error():
+
+    sample_message = "This is a test."
+
+    load_failure_object = LoadArgumentsError(sample_message)
+    assert isinstance(load_failure_object, LoadArgumentsError)
+    assert str(load_failure_object) == sample_message
+
+
+def test_load_table_error():
+
+    sample_message = "This is a test."
+
+    load_failure_object = LoadTableError(sample_message)
+    assert isinstance(load_failure_object, LoadTableError)
+    assert str(load_failure_object) == sample_message
+
+
+def test_prefer_number():
+
+    assert prefer_number('') is None
+    assert prefer_number('123') == 123
+    assert prefer_number('3.14') == 3.14
+    assert prefer_number('abc') == 'abc'
+    assert prefer_number('123i') == '123i'
+    assert prefer_number('123e') == '123e'
+    assert prefer_number('123e0') == 123.0
+    assert prefer_number('123e1') == 1230.0
+    assert prefer_number('123e+1') == 1230.0
+    assert prefer_number('123e-1') == 12.3
+
+
+def test_expand_string_escape_sequences():
+
+    assert expand_string_escape_sequences("foo") == "foo"
+    assert expand_string_escape_sequences("foo\\tbar") == "foo\tbar"
+    assert expand_string_escape_sequences("\\r\\t\\n\\\\") == "\r\t\n\\"
+    assert expand_string_escape_sequences("foo\\fbar") == "foo\\fbar"
+
+
+def test_unwanted_kwargs_without_error():
+    unwanted_kwargs(context="Function foo", kwargs={})
+    unwanted_kwargs(context="Function foo", kwargs={}, context_plural=True, detailed=True)
+
+
+tst_args = "context,context_plural,detailed,kwargs,message"
+
+TstArgs = namedtuple("TstArgs1", tst_args, defaults=(None,) * len(tst_args.split(',')))
+
+
+@pytest.mark.parametrize(tst_args, [
+    TstArgs(context="Function foo", context_plural=False, detailed=False, kwargs={'a': 1},
+            message="Function foo doesn't use keyword argument a."),
+    TstArgs(context="Function foo", context_plural=False, detailed=False, kwargs={'a': 1, 'b': 2},
+            message="Function foo doesn't use keyword arguments a and b."),
+    TstArgs(context="Functions like foo", context_plural=True, detailed=False, kwargs={'a': 1},
+            message="Functions like foo don't use keyword argument a."),
+    TstArgs(context="Functions like foo", context_plural=True, detailed=False, kwargs={'a': 1, 'b': 2},
+            message="Functions like foo don't use keyword arguments a and b."),
+    # Don't need to do all the cases again
+    TstArgs(context="Function foo", kwargs={'a': 1, 'b': 2},
+            message="Function foo doesn't use keyword arguments a and b."),  # noQA - PyCharm can't see defaults
+    TstArgs(context="Function foo", detailed=True, kwargs={'a': 1, 'b': 2},
+            message="Function foo doesn't use keyword arguments a=1 and b=2."),  # noQA PyCharm can't see defaults
+])
+def test_unwanted_kwargs_with_error(context, context_plural, detailed, kwargs, message):
+
+    with pytest.raises(LoadArgumentsError) as exc:
+        unwanted_kwargs(context=context, kwargs=kwargs, context_plural=context_plural, detailed=detailed)
+    assert str(exc.value) == message
+
+
+def test_back_table_set_create_state():
+
+    assert BasicTableSetManager._create_tab_processor_state('some-tab') is None
+
+
+def test_item_tools_parse_sheet_header():
+    assert ItemTools.parse_sheet_header('.a') == ['a']
+    assert ItemTools.parse_sheet_header('a') == ['a']
+    assert ItemTools.parse_sheet_header('#0') == [0]
+    assert ItemTools.parse_sheet_header('0') == [0]
+    assert ItemTools.parse_sheet_header('foo.bar') == ['foo', 'bar']
+    assert ItemTools.parse_sheet_header('a.b#0') == ['a', 'b', 0]
+    assert ItemTools.parse_sheet_header('x.xx#17#8.z') == ['x', 'xx', 17, 8, 'z']
+
+    # We don't error-check this, but it shouldn't matter
+    assert ItemTools.parse_sheet_header('#abc') == ['abc']
+    assert ItemTools.parse_sheet_header('.123') == [123]
+    assert ItemTools.parse_sheet_header('#abc.123#456.def') == ['abc', 123, 456, 'def']
+
+
+def test_item_tools_parse_sheet_headers():
+    input = ['a.b', 'a.c', 'a.d#1', 'a.d#2']
+    expected = [['a', 'b'], ['a', 'c'], ['a', 'd', 1], ['a', 'd', 2]]
+    assert ItemTools.parse_sheet_headers(input) == expected
+
+
+def test_item_tools_infer_tab_name():
+
+    assert ItemTools.infer_tab_name('some/dir/some') == 'some'
+    assert ItemTools.infer_tab_name('some/dir/some.file') == 'some'
+    assert ItemTools.infer_tab_name('some/dir/some.file.name') == 'some'
+
+
+@pytest.mark.parametrize('parsed_headers,expected_prototype', [
+    (['a'],
+     {'a': None}),
+    (['a', 'b'],
+     {'a': None, 'b': None}),
+    (['a.b', 'a.c', 'a.d#0', 'a.d#1'],
+     {'a': {'b': None, 'c': None, 'd': [None, None]}}),
+    (['a.b', 'a.c', 'a.d#0.foo', 'a.d#0.bar'],
+     {'a': {'b': None, 'c': None, 'd': [{'foo': None, 'bar': None}]}}),
+    (['a.b', 'a.c', 'a.d#0.foo', 'a.d#0.bar', 'a.d#1.foo', 'a.d#1.bar'],
+     {'a': {'b': None, 'c': None, 'd': [{'foo': None, 'bar': None}, {'foo': None, 'bar': None}]}}),
+])
+def test_item_tools_compute_patch_prototype(parsed_headers, expected_prototype):
+    parsed_headers = ItemTools.parse_sheet_headers(parsed_headers)
+    assert ItemTools.compute_patch_prototype(parsed_headers) == expected_prototype
+
+
+@pytest.mark.parametrize('headers', [['0'], ['x', '0.y']])
+def test_item_tools_compute_patch_prototype_errors(headers):
+
+    parsed_headers = ItemTools.parse_sheet_headers(headers)
+    with pytest.raises(LoadTableError) as exc:
+        ItemTools.compute_patch_prototype(parsed_headers)
+    assert str(exc.value) == "A header cannot begin with a numeric ref: 0"
+
+
+def test_item_tools_parse_item_value_basic():
+
+    for x in [37, 19.3, True, False, None, 'simple text']:
+        assert ItemTools.parse_item_value(x) == x
+
+    assert ItemTools.parse_item_value('3') == 3
+    assert ItemTools.parse_item_value('+3') == 3
+    assert ItemTools.parse_item_value('-3') == -3
+
+    assert ItemTools.parse_item_value('3.5') == 3.5
+    assert ItemTools.parse_item_value('+3.5') == 3.5
+    assert ItemTools.parse_item_value('-3.5') == -3.5
+
+    assert ItemTools.parse_item_value('3.5e1') == 35.0
+    assert ItemTools.parse_item_value('+3.5e1') == 35.0
+    assert ItemTools.parse_item_value('-3.5e1') == -35.0
+
+    assert ItemTools.parse_item_value('') is None
+
+    assert ItemTools.parse_item_value('null') is None
+    assert ItemTools.parse_item_value('Null') is None
+    assert ItemTools.parse_item_value('NULL') is None
+
+    assert ItemTools.parse_item_value('true') is True
+    assert ItemTools.parse_item_value('True') is True
+    assert ItemTools.parse_item_value('TRUE') is True
+
+    assert ItemTools.parse_item_value('false') is False
+    assert ItemTools.parse_item_value('False') is False
+    assert ItemTools.parse_item_value('FALSE') is False
+
+    assert ItemTools.parse_item_value('|') == []  # special case: lone '|' means empty
+    assert ItemTools.parse_item_value('alpha|') == ['alpha']  # special case: trailing '|' means singleton
+    assert ItemTools.parse_item_value('|alpha|') == [None, 'alpha']
+    assert ItemTools.parse_item_value('|alpha') == [None, 'alpha']
+    assert ItemTools.parse_item_value('alpha|beta|gamma') == ['alpha', 'beta', 'gamma']
+    assert ItemTools.parse_item_value('alpha|true|false|null||7|1.5') == ['alpha', True, False, None, None, 7, 1.5]
+
+
+@pytest.mark.parametrize('instaguids_enabled', [True, False])
+def test_item_tools_parse_item_value_guids(instaguids_enabled):
+
+    with local_attrs(ItemTools, INSTAGUIDS_ENABLED=instaguids_enabled):
+
+        sample_simple_field_input = "#foo"
+
+        parsed = ItemTools.parse_item_value(sample_simple_field_input)
+        assert parsed == sample_simple_field_input
+
+        context = {}
+        parsed = ItemTools.parse_item_value(sample_simple_field_input, context=context)
+        if instaguids_enabled:
+            assert is_uuid(parsed)
+            assert parsed == context[sample_simple_field_input]
+        else:
+            assert parsed == sample_simple_field_input
+            assert context == {}
+
+        sample_compound_field_input = '#foo|#bar'
+        sample_compound_field_list = ['#foo', '#bar']
+
+        parsed = ItemTools.parse_item_value(sample_compound_field_input)
+        assert parsed == sample_compound_field_list
+
+        context = {}
+        parsed = ItemTools.parse_item_value(sample_compound_field_input, context=context)
+        assert isinstance(parsed, list)
+        if instaguids_enabled:
+            assert all(is_uuid(x) for x in parsed)
+            assert '#foo' in context and '#bar' in context
+        else:
+            assert parsed == sample_compound_field_list
+            assert context == {}
+
+
+def test_item_tools_set_path_value():
+
+    x = {'foo': 1, 'bar': 2}
+    ItemTools.set_path_value(x, ['foo'], 3)
+    assert x == {'foo': 3, 'bar': 2}
+
+    x = {'foo': [11, 22, 33], 'bar': {'x': 'xx', 'y': 'yy'}}
+    ItemTools.set_path_value(x, ['foo', 1], 17)
+    assert x == {'foo': [11, 17, 33], 'bar': {'x': 'xx', 'y': 'yy'}}
+
+    x = {'foo': [11, 22, 33], 'bar': {'x': 'xx', 'y': 'yy'}}
+    ItemTools.set_path_value(x, ['bar', 'x'], 'something')
+    assert x == {'foo': [11, 22, 33], 'bar': {'x': 'something', 'y': 'yy'}}
+
+
+def test_item_tools_find_type_hint():
+
+    assert ItemTools.find_type_hint(None, 'anything') is None
+
+    assert ItemTools.find_type_hint(['foo', 'bar'], None) is None
+    assert ItemTools.find_type_hint(['foo', 'bar'], "something") is None
+    assert ItemTools.find_type_hint(['foo', 'bar'], {}) is None
+
+    actual = ItemTools.find_type_hint(['foo', 'bar'], {"type": "object"})
+    assert actual is None
+
+    schema = {
+        "type": "object",
+        "properties": {
+            "foo": {
+                "type": "boolean"
+            }
+        }
+    }
+    actual = ItemTools.find_type_hint(['foo', 'bar'], schema)
+    assert actual is None
+
+    actual = ItemTools.find_type_hint(['foo'], schema)
+    assert isinstance(actual, BoolHint)
+
+    schema = {
+        "type": "object",
+        "properties": {
+            "foo": {
+                "type": "object",
+                "properties": {
+                    "bar": {
+                        "type": "boolean"
+                    }
+                }
+            }
+        }
+    }
+    actual = ItemTools.find_type_hint(['foo', 'bar'], schema)
+    assert isinstance(actual, BoolHint)
+
+    actual = ItemTools.find_type_hint(['foo'], schema)
+    assert actual is None
+
+
+def test_table_set_manager_registry_manager_for_filename():
+
+    assert TABLE_SET_MANAGER_REGISTRY.manager_for_filename("xyz/foo.csv") == CsvManager
+
+    with pytest.raises(Exception) as exc:
+        TABLE_SET_MANAGER_REGISTRY.manager_for_filename("xyz/foo.something.missing")
+    assert str(exc.value) == "Unknown file type: xyz/foo.something.missing"
+
+    assert ITEM_MANAGER_REGISTRY.manager_for_filename("xyz/foo.csv") == CsvItemManager
+
+    with pytest.raises(Exception) as exc:
+        ITEM_MANAGER_REGISTRY.manager_for_filename("xyz/foo.something.missing")
+    assert str(exc.value) == "Unknown file type: xyz/foo.something.missing"
+
+
+SAMPLE_XLSX_FILE = os.path.join(TEST_DIR, 'data_files/sample_items.xlsx')
+
+SAMPLE_XLSX_FILE_RAW_CONTENT = {
+    "Sheet1": [
+        {"x": 1, "y.a": 1, "y.z": 1},
+        {"x": 1, "y.a": 2, "y.z": 3},
+        {"x": "alpha", "y.a": "beta", "y.z": "gamma|delta"},
+    ],
+    "Sheet2": [
+        {
+            "name": "bill", "age": 23,
+            "mother.name": "mary", "mother.age": 58,
+            "father.name": "fred", "father.age": 63,
+            "friends#0.name": "sam", "friends#0.age": 22,
+            "friends#1.name": "arthur", "friends#1.age": 19,
+        },
+        {
+            "name": "joe", "age": 9,
+            "mother.name": "estrella", "mother.age": 35,
+            "father.name": "anthony", "father.age": 34,
+            "friends#0.name": "anders", "friends#0.age": 9,
+            "friends#1.name": None, "friends#1.age": None,
+        },
+    ]
+}
+
+SAMPLE_XLSX_FILE_ITEM_CONTENT = {
+    "Sheet1": [
+        {"x": 1, "y": {"a": 1, "z": 1}},
+        {"x": 1, "y": {"a": 2, "z": 3}},
+        {"x": "alpha", "y": {"a": "beta", "z": ["gamma", "delta"]}},
+    ],
+    "Sheet2": [
+        {
+            "name": "bill", "age": 23,
+            "mother": {"name": "mary", "age": 58},
+            "father": {"name": "fred", "age": 63},
+            "friends": [
+                {"name": "sam", "age": 22},
+                {"name": "arthur", "age": 19},
+            ]
+        },
+        {
+            "name": "joe", "age": 9,
+            "mother": {"name": "estrella", "age": 35},
+            "father": {"name": "anthony", "age": 34},
+            "friends": [
+                {"name": "anders", "age": 9},
+                {"name": None, "age": None}
+            ]
+        },
+    ],
+}
+
+SAMPLE_CSV_FILE = os.path.join(TEST_DIR, 'data_files/sample_items_sheet2.csv')
+
+SAMPLE_CSV_FILE_SHEET_NAME = ItemTools.infer_tab_name(SAMPLE_CSV_FILE)
+
+SAMPLE_CSV_FILE_RAW_CONTENT = {SAMPLE_CSV_FILE_SHEET_NAME: SAMPLE_XLSX_FILE_RAW_CONTENT['Sheet2']}
+
+SAMPLE_CSV_FILE_ITEM_CONTENT = {SAMPLE_CSV_FILE_SHEET_NAME: SAMPLE_XLSX_FILE_ITEM_CONTENT['Sheet2']}
+
+SAMPLE_TSV_FILE = os.path.join(TEST_DIR, 'data_files/sample_items_sheet2.tsv')
+
+SAMPLE_TSV_FILE_SHEET_NAME = ItemTools.infer_tab_name(SAMPLE_TSV_FILE)
+
+SAMPLE_TSV_FILE_RAW_CONTENT = {SAMPLE_TSV_FILE_SHEET_NAME: SAMPLE_XLSX_FILE_RAW_CONTENT['Sheet2']}
+
+SAMPLE_TSV_FILE_ITEM_CONTENT = {SAMPLE_TSV_FILE_SHEET_NAME: SAMPLE_XLSX_FILE_ITEM_CONTENT['Sheet2']}
+
+SAMPLE_JSON_TABS_FILE = os.path.join(TEST_DIR, 'data_files/sample_items.tabs.json')
+
+SAMPLE_JSON_TABS_FILE_ITEM_CONTENT = SAMPLE_XLSX_FILE_ITEM_CONTENT
+
+SAMPLE_YAML_TABS_FILE = os.path.join(TEST_DIR, 'data_files/sample_items.tabs.yaml')
+
+SAMPLE_YAML_TABS_FILE_ITEM_CONTENT = SAMPLE_XLSX_FILE_ITEM_CONTENT
+
+
+def test_xlsx_manager_load_content():
+
+    wt = XlsxManager(SAMPLE_XLSX_FILE)
+    assert wt.load_content() == SAMPLE_XLSX_FILE_RAW_CONTENT
+
+
+def test_xlsx_manager_load():
+
+    assert XlsxManager.load(SAMPLE_XLSX_FILE) == SAMPLE_XLSX_FILE_RAW_CONTENT
+
+
+def test_xlsx_manager_load_csv():
+
+    with pytest.raises(LoadArgumentsError) as exc:
+        XlsxManager.load(SAMPLE_CSV_FILE)
+    assert str(exc.value).startswith('The TableSetManager subclass XlsxManager'
+                                     ' expects only .xlsx filenames:')
+
+
+def test_xlsx_item_manager_load_content():
+
+    it = XlsxItemManager(SAMPLE_XLSX_FILE, autoload_schemas=False)
+    assert it.load_content() == SAMPLE_XLSX_FILE_ITEM_CONTENT
+
+
+def test_xlsx_item_manager_load():
+
+    assert XlsxItemManager.load(SAMPLE_XLSX_FILE, autoload_schemas=False) == SAMPLE_XLSX_FILE_ITEM_CONTENT
+
+
+def test_xlsx_item_manager_load_csv():
+
+    with pytest.raises(LoadArgumentsError) as exc:
+        XlsxItemManager.load(SAMPLE_CSV_FILE)
+    assert str(exc.value).startswith('The TableSetManager subclass XlsxItemManager'
+                                     ' expects only .xlsx filenames:')
+
+
+def test_csv_manager_load_content():
+
+    wt = CsvManager(SAMPLE_CSV_FILE)
+    assert wt.load_content() == SAMPLE_CSV_FILE_RAW_CONTENT
+
+
+def test_csv_manager_load():
+
+    assert CsvManager.load(SAMPLE_CSV_FILE) == SAMPLE_CSV_FILE_RAW_CONTENT
+
+
+def test_csv_manager_load_csv():
+
+    with pytest.raises(LoadArgumentsError) as exc:
+        CsvManager.load(SAMPLE_XLSX_FILE)
+    assert str(exc.value).startswith('The TableSetManager subclass CsvManager'
+                                     ' expects only .csv filenames:')
+
+
+def test_csv_item_manager_load_content():
+
+    it = CsvItemManager(SAMPLE_CSV_FILE, autoload_schemas=False)
+    assert it.load_content() == SAMPLE_CSV_FILE_ITEM_CONTENT
+
+
+def test_csv_item_manager_load():
+
+    assert CsvItemManager.load(SAMPLE_CSV_FILE, autoload_schemas=False) == SAMPLE_CSV_FILE_ITEM_CONTENT
+
+
+def test_csv_item_manager_load_csv():
+
+    with pytest.raises(LoadArgumentsError) as exc:
+        CsvItemManager.load(SAMPLE_XLSX_FILE, autoload_schemas=False)
+    assert str(exc.value).startswith('The TableSetManager subclass CsvItemManager'
+                                     ' expects only .csv filenames:')
+
+
+def test_csv_escaping():
+
+    actual = CsvManager.load("test/data_files/escaping.csv", escaping=False)
+    expected = json.load(open("test/data_files/escaping-false.json"))
+    assert actual == expected
+
+    actual = CsvManager.load("test/data_files/escaping.csv", escaping=True)
+    expected = json.load(open("test/data_files/escaping-true.json"))
+    assert actual == expected
+
+
+def test_tsv_manager_load_content():
+
+    wt = TsvManager(SAMPLE_TSV_FILE)
+    assert wt.load_content() == SAMPLE_TSV_FILE_RAW_CONTENT
+
+
+def test_tsv_manager_load():
+
+    assert TsvManager.load(SAMPLE_TSV_FILE) == SAMPLE_TSV_FILE_RAW_CONTENT
+
+
+def test_tsv_manager_load_csv():
+
+    with pytest.raises(LoadArgumentsError) as exc:
+        TsvManager.load(SAMPLE_XLSX_FILE)
+    assert str(exc.value).startswith('The TableSetManager subclass TsvManager'
+                                     ' expects only .tsv or .tsv.txt filenames:')
+
+
+def test_tsv_item_manager_load_content():
+
+    it = TsvItemManager(SAMPLE_TSV_FILE, autoload_schemas=False)
+    assert it.load_content() == SAMPLE_TSV_FILE_ITEM_CONTENT
+
+
+def test_tsv_item_manager_load():
+
+    assert TsvItemManager.load(SAMPLE_TSV_FILE, autoload_schemas=False) == SAMPLE_TSV_FILE_ITEM_CONTENT
+
+
+def test_tsv_item_manager_load_csv():
+
+    with pytest.raises(LoadArgumentsError) as exc:
+        TsvItemManager.load(SAMPLE_XLSX_FILE, autoload_schemas=False)
+    assert str(exc.value).startswith('The TableSetManager subclass TsvItemManager'
+                                     ' expects only .tsv or .tsv.txt filenames:')
+
+
+def test_item_manager_load():
+
+    assert ItemManager.load(SAMPLE_XLSX_FILE, autoload_schemas=False) == SAMPLE_XLSX_FILE_ITEM_CONTENT
+
+    assert ItemManager.load(SAMPLE_CSV_FILE, autoload_schemas=False) == SAMPLE_CSV_FILE_ITEM_CONTENT
+
+    assert ItemManager.load(SAMPLE_TSV_FILE, autoload_schemas=False) == SAMPLE_TSV_FILE_ITEM_CONTENT
+
+    loaded = ItemManager.load(SAMPLE_JSON_TABS_FILE, autoload_schemas=False)
+    print("loaded=", json.dumps(loaded, indent=2))
+    expected = SAMPLE_JSON_TABS_FILE_ITEM_CONTENT
+    print("expected=", json.dumps(expected, indent=2))
+    assert loaded == expected
+
+    with pytest.raises(LoadArgumentsError) as exc:
+        ItemManager.load("something.else")
+    assert str(exc.value) == "Unknown file type: something.else"
+
+
+def test_load_items():
+
+    assert load_items(SAMPLE_XLSX_FILE, autoload_schemas=False) == SAMPLE_XLSX_FILE_ITEM_CONTENT
+
+    assert load_items(SAMPLE_CSV_FILE, autoload_schemas=False) == SAMPLE_CSV_FILE_ITEM_CONTENT
+
+    with pytest.raises(LoadArgumentsError) as exc:
+        load_items("something.else")
+    assert str(exc.value) == "Unknown file type: something.else"
+
+
+SAMPLE_CSV_FILE2 = os.path.join(TEST_DIR, 'data_files/sample_items2.csv')
+
+SAMPLE_CSV_FILE2_SHEET_NAME = ItemTools.infer_tab_name(SAMPLE_CSV_FILE2)
+
+SAMPLE_CSV_FILE2_SCHEMAS = {
+    "Person": {
+        "type": "object",
+        "properties": {
+            "name": {"type": "string"},
+            "sex": {"type": "string", "enum": ["Male", "Female"]},
+            "member": {"type": "boolean"}
+        }
+    }
+}
+
+SAMPLE_CSV_FILE2_CONTENT = {
+    SAMPLE_CSV_FILE2_SHEET_NAME: [
+        {"name": "john", "sex": "M", "member": "false"},
+        {"name": "juan", "sex": "male", "member": "true"},
+        {"name": "igor", "sex": "unknown", "member": None},
+        {"name": "mary", "sex": "Female", "member": "t"}
+    ]
+}
+
+SAMPLE_CSV_FILE2_ITEM_CONTENT = {
+    SAMPLE_CSV_FILE2_SHEET_NAME: [
+        {"name": "john", "sex": "M", "member": False},
+        {"name": "juan", "sex": "male", "member": True},
+        {"name": "igor", "sex": "unknown", "member": None},
+        {"name": "mary", "sex": "Female", "member": "t"}
+    ]
+}
+
+SAMPLE_CSV_FILE2_PERSON_CONTENT_HINTED = {
+    "Person": [
+        {"name": "john", "sex": "Male", "member": False},
+        {"name": "juan", "sex": "Male", "member": True},
+        {"name": "igor", "sex": "unknown", "member": None},
+        {"name": "mary", "sex": "Female", "member": True}
+    ]
+}
+
+
+SAMPLE_JSON_FILE2 = os.path.join(TEST_DIR, 'data_files/sample_items2.json')
+
+SAMPLE_JSON_FILE2_SHEET_NAME = ItemTools.infer_tab_name(SAMPLE_JSON_FILE2)
+
+
+SAMPLE_CSV_FILE3_SCHEMAS = {
+    "Person": {
+        "type": "object",
+        "properties": {
+            "name": {"type": "string"},
+            "sex": {"type": "string", "enum": ["Male", "Female"]},
+            "children": {"type": "array", "items": {"type": "string"}},
+            "parents": {"type": "array", "items": {"type": "string"}},
+            "mother": {"type": "string"},
+            "father": {"type": "string"},
+        }
+    }
+}
+
+SAMPLE_CSV_FILE3_PERSON_CONTENT_HINTED = {
+    "Person": [
+        {
+            "name": "John",
+            "uuid": "#john",
+            "sex": "Male",
+            "father": "#igor",
+            "mother": "#mary",
+            "parents": None,
+            "children": None,
+        },
+        {
+            "name": "Juan",
+            "uuid": "#juan",
+            "sex": "Male",
+            "father": None,
+            "mother": None,
+            "parents": ["#igor", "#mary"],
+            "children": None,
+        },
+        {
+            "name": "Igor",
+            "uuid": "#igor",
+            "sex": "Male",
+            "father": None,
+            "mother": None,
+            "parents": None,
+            "children": ["#john"],
+        },
+        {
+            "name": "Mary",
+            "uuid": "#mary",
+            "sex": "Female",
+            "father": None,
+            "mother": None,
+            "parents": None,
+            "children": ["#john"],
+        },
+    ]
+}
+
+SAMPLE_CSV_FILE3 = os.path.join(TEST_DIR, 'data_files/sample_items3.csv')
+
+
+def matches_template(json1: AnyJsonData, json2: AnyJsonData, *, previous_matches: Dict[str, str] = None) -> bool:
+    if previous_matches is None:
+        previous_matches = {}
+    if isinstance(json1, dict) and isinstance(json2, dict):
+        keys1 = set(json1.keys())
+        keys2 = set(json2.keys())
+        if keys1 != keys2:
+            print(f"Keys don't match: {keys1} vs {keys2}")
+            return False
+        return all(matches_template(json1[key], json2[key], previous_matches=previous_matches) for key in keys1)
+    elif isinstance(json1, list) and isinstance(json2, list):
+        n1 = len(json1)
+        n2 = len(json2)
+        if n1 != n2:
+            print(f"Length doesn't match: {n1} vs {n2}")
+            return False
+        return all(matches_template(json1[i], json2[i], previous_matches=previous_matches) for i in range(n1))
+    elif isinstance(json1, str) and isinstance(json2, str) and is_uuid(json1) and json2.startswith("#"):
+        previously_matched = previous_matches.get(json2)
+        if previously_matched:
+            result = json1 == previously_matched
+            if not result:
+                print(f"Instaguid mismatch: {json1} vs {json2}")
+            return result
+        else:
+            # Remember the match
+            previous_matches[json2] = json1
+            return True
+    else:  # any other atomic items can be just directly compared
+        result = json1 == json2
+        if not result:
+            print(f"Unequal: {json1} vs {json2}")
+        return result
+
+
+def test_load_items_with_schema():
+
+    print("Case 1")
+    expected = SAMPLE_CSV_FILE2_CONTENT
+    actual = CsvManager.load(SAMPLE_CSV_FILE2)
+    assert actual == expected
+
+    print("Case 2")
+    expected = SAMPLE_CSV_FILE2_ITEM_CONTENT
+    actual = load_items(SAMPLE_CSV_FILE2, schemas=SAMPLE_CSV_FILE2_SCHEMAS)
+    assert actual == expected
+
+    print("Case 3")
+    expected = SAMPLE_CSV_FILE2_PERSON_CONTENT_HINTED
+    actual = load_items(SAMPLE_CSV_FILE2, schemas=SAMPLE_CSV_FILE2_SCHEMAS, tab_name='Person')
+    assert actual == expected
+
+
+def test_sample_items_csv_vs_json():
+
+    csv_content = load_items(SAMPLE_CSV_FILE2, schemas=SAMPLE_CSV_FILE2_SCHEMAS, tab_name='Person')
+
+    json_content = load_items(SAMPLE_JSON_FILE2, tab_name="Person")
+
+    assert csv_content == json_content
+
+
+def test_sample_items_json_vs_yaml():
+
+    tabs_data_from_json = load_items(SAMPLE_JSON_TABS_FILE)
+    tabs_data_from_yaml = load_items(SAMPLE_YAML_TABS_FILE)
+    assert tabs_data_from_json == tabs_data_from_yaml
+
+
+@pytest.mark.parametrize('instaguids_enabled', [True, False])
+def test_load_items_with_schema_and_instaguids(instaguids_enabled):
+
+    with local_attrs(ItemTools, INSTAGUIDS_ENABLED=instaguids_enabled):
+
+        expected = SAMPLE_CSV_FILE3_PERSON_CONTENT_HINTED
+        print("expected=", json.dumps(expected, indent=2))
+        actual = load_items(SAMPLE_CSV_FILE3, schemas=SAMPLE_CSV_FILE3_SCHEMAS, tab_name='Person')
+        print("actual=", json.dumps(actual, indent=2))
+        if instaguids_enabled:
+            assert matches_template(actual, expected)
+        else:
+            assert actual == expected  # no substitution performed
+
+
+class SchemaAutoloaderForTesting(SchemaAutoloadMixin):
+
+    def __init__(self, **kwargs):
+        super().__init__(filename='ignored.file.name', **kwargs)
+
+
+@contextlib.contextmanager
+def schema_autoloader_for_testing(**kwargs) -> SchemaAutoloadMixin:
+    autoloader: Optional[SchemaAutoloadMixin] = None
+    success = False
+    try:
+        autoloader: SchemaAutoloadMixin = SchemaAutoloaderForTesting(**kwargs)
+        assert autoloader.SCHEMA_CACHE == {}, "The schema cache is not clean."
+        yield autoloader
+        success = True
+    finally:
+        if autoloader is not None:
+            autoloader.clear_schema_cache()
+            assert autoloader.SCHEMA_CACHE == SchemaAutoloadMixin.SCHEMA_CACHE == {}
+        if not success:
+            raise
+
+
+@using_fresh_ff_state_for_testing()
+@pytest.mark.integrated
+@pytest.mark.parametrize('portal_env', [None, 'data'])
+def test_schema_autoload_mixin_caching(portal_env):
+
+    with schema_autoloader_for_testing(portal_env=portal_env) as autoloader:
+
+        assert autoloader.portal_env == 'data'  # it should have defaulted even if we didn't supply it
+
+        assert autoloader.SCHEMA_CACHE == SchemaAutoloadMixin.SCHEMA_CACHE == {}
+
+        sample_schema_name = 'foo'
+        sample_schema = {'mock_schema_for': 'foo'}
+
+        with mock.patch.object(sheet_utils_module, "get_schema") as mock_get_schema:
+            mock_get_schema.return_value = sample_schema
+            assert autoloader.fetch_schema(sample_schema_name, portal_env=autoloader.portal_env) == sample_schema
+
+        schema_cache_with_sample_schema = {sample_schema_name: sample_schema}
+        assert SchemaAutoloadMixin.SCHEMA_CACHE == schema_cache_with_sample_schema
+        assert autoloader.SCHEMA_CACHE == schema_cache_with_sample_schema
+
+
+@using_fresh_ff_state_for_testing()
+@pytest.mark.integrated
+@pytest.mark.parametrize('portal_env', [None, 'data'])
+def test_schema_autoload_mixin_fetch_schema(portal_env):
+
+    with schema_autoloader_for_testing(portal_env=portal_env) as autoloader:
+
+        assert autoloader.portal_env == 'data'
+
+        user_schema = autoloader.fetch_schema('user', portal_env=autoloader.portal_env)
+
+        assert user_schema['$id'] == '/profiles/user.json'
+        assert user_schema['title'] == 'User'
+        assert 'properties' in user_schema
+
+
+@using_fresh_ff_state_for_testing()
+@pytest.mark.integrated
+@pytest.mark.parametrize('autoload_schemas', [True, False])
+@pytest.mark.parametrize('cache_schemas', [True, False])
+@pytest.mark.parametrize('portal_env', [None, 'data'])
+def test_schema_autoload_mixin_fetch_relevant_schemas(autoload_schemas, cache_schemas, portal_env):
+
+    with printed_output() as printed:
+        with local_attrs(SchemaAutoloadMixin, CACHE_SCHEMAS=cache_schemas):
+            with schema_autoloader_for_testing(portal_env=portal_env, autoload_schemas=autoload_schemas) as autoloader:
+
+                assert autoloader.portal_env == ('data' if autoload_schemas or portal_env else None)
+
+                if autoload_schemas:
+
+                    schemas = autoloader.fetch_relevant_schemas(['User', 'Lab'])
+                    assert isinstance(schemas, dict)
+                    assert len(schemas) == 2
+                    assert set(schemas.keys()) == {'User', 'Lab'}
+
+                else:
+
+                    assert autoloader.fetch_relevant_schemas(['User', 'Lab']) == {}
+
+                if portal_env == 'data' or not autoload_schemas:
+                    assert printed.lines == []
+                else:
+                    assert printed.lines == [
+                        "The portal_env was not explicitly supplied. Schemas will come from portal_env='data'."
+                    ]
+
+
+SAMPLE_ITEMS_FOR_REAL_SCHEMAS_FILE = os.path.join(TEST_DIR, 'data_files/sample_items_for_real_schemas.csv')
+
+
+@using_fresh_ff_state_for_testing()
+@pytest.mark.integrated
+def test_workbook_with_schemas():
+
+    print()  # start o a fresh line
+
+    SchemaAutoloadMixin.clear_schema_cache()
+
+    actual_data = CsvManager(filename=SAMPLE_ITEMS_FOR_REAL_SCHEMAS_FILE, tab_name='ExperimentSeq').load_content()
+    expected_data = {
+        "ExperimentSeq": [
+            {
+                "accession": "foo",
+                "fragment_size_selection_method": "spri"
+            },
+            {
+                "accession": "bar",
+                "fragment_size_selection_method": "blue"
+            }
+        ]
+    }
+    assert actual_data == expected_data
+
+    actual_items = load_items(SAMPLE_ITEMS_FOR_REAL_SCHEMAS_FILE,
+                              tab_name='ExperimentSeq', autoload_schemas=True)
+    expected_items = {
+        "ExperimentSeq": [
+            {
+                "accession": "foo",
+                "fragment_size_selection_method": "SPRI beads"
+            },
+            {
+                "accession": "bar",
+                "fragment_size_selection_method": "BluePippin"
+            }
+        ]
+    }
+    assert actual_items == expected_items
+
+
+@using_fresh_ff_state_for_testing()
+@pytest.mark.integrated
+def test_workbook_with_schemas_and_portal_vapp():
+
+    print()  # start on a fresh line
+
+    SchemaAutoloadMixin.clear_schema_cache()
+
+    portal_env = public_env_name(EnvUtils.PRD_ENV_NAME)
+
+    experiment_seq_schema = ff_utils_module.get_schema('ExperimentSeq', portal_env=portal_env)
+
+    expected_items = {
+        "ExperimentSeq": [
+            {
+                "accession": "foo",
+                "fragment_size_selection_method": "SPRI beads"
+            },
+            {
+                "accession": "bar",
+                "fragment_size_selection_method": "BluePippin"
+            }
+        ]
+    }
+
+    class MockVapp(NamedObject, AbstractVirtualApp):
+
+        def __init__(self, name):
+            super().__init__(name=name)
+            self.call_count = 0
+
+        def get(self, path_url):
+            assert path_url.startswith('/profiles/ExperimentSeq.json?')
+            self.call_count += 1
+            response = MockResponse(200, json=experiment_seq_schema)
+            return response
+
+    portal_vapp = MockVapp(name=f'MockVapp[{portal_env}]')
+
+    old_count = portal_vapp.call_count
+
+    with mock.patch.object(ff_utils_module, "get_authentication_with_server",
+                           mock_not_called("get_authentication_with_server")):
+        with mock.patch.object(ff_utils_module, "get_metadata",
+                               mock_not_called("get_metadata")):
+            actual_items = load_items(SAMPLE_ITEMS_FOR_REAL_SCHEMAS_FILE,
+                                      tab_name='ExperimentSeq', autoload_schemas=True, portal_vapp=portal_vapp)
+
+    assert portal_vapp.call_count == old_count + 1
+    assert actual_items == expected_items