From 46a2c0946f4c9ba44fbbd8237a15a32cb77ae47d Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Wed, 25 Oct 2023 05:54:37 -0400
Subject: [PATCH] Begin to address David's problems in C4-1111.

---
 dcicutils/bundle_utils.py     | 55 ++++++++++++++++++++++++------
 dcicutils/sheet_utils.py      | 22 +++++++++++-
 dcicutils/validation_utils.py | 55 ++++++++++++++++++------------
 pyproject.toml                |  2 +-
 test/test_bundle_utils.py     | 47 ++++++++++++++++++--------
 test/test_validation_utils.py | 63 ++++++++++++++++++++++++++++++++---
 6 files changed, 192 insertions(+), 52 deletions(-)

diff --git a/dcicutils/bundle_utils.py b/dcicutils/bundle_utils.py
index 5e37e8660..fce23f71b 100644
--- a/dcicutils/bundle_utils.py
+++ b/dcicutils/bundle_utils.py
@@ -9,9 +9,9 @@
 from .sheet_utils import (
     LoadTableError, prefer_number, TabbedJsonSchemas,
     Header, Headers, TabbedHeaders, ParsedHeader, ParsedHeaders, TabbedParsedHeaders, SheetCellValue, TabbedSheetData,
-    TableSetManagerRegistry, AbstractTableSetManager, InsertsManager, load_table_set,
+    TableSetManagerRegistry, AbstractTableSetManager, InsertsManager, TableSetManager, load_table_set,
 )
-from .validation_utils import SchemaManager
+from .validation_utils import SchemaManager, validate_data_against_schemas, summary_of_data_validation_errors
 
 
 PatchPrototype = Dict
@@ -378,10 +378,19 @@ def load_table_structures(filename: str, *, apply_heuristics: bool = True,
 
 class TableChecker(InflatableTabbedDataManager, TypeHintContext):
 
-    def __init__(self, tabbed_sheet_data: TabbedSheetData, schemas: Optional[TabbedJsonSchemas] = None,
+    def __init__(self, tabbed_sheet_data: TabbedSheetData, *, flattened: bool,
+                 override_schemas: Optional[TabbedJsonSchemas] = None,
                  portal_env: Optional[str] = None, portal_vapp: Optional[AbstractVirtualApp] = None,
                  apply_heuristics: bool = False):
 
+        self.flattened = flattened
+        if not flattened:
+            # TODO: Need to implement something that depends on this flattened attribute.
+            # Also, it's possible that we can default this once we see if the new strategy is general-purpose,
+            # rather than it being a required argument. But for now let's require it be passed.
+            # -kmp 25-Oct-2023
+            raise ValueError("Only flattened=True is supported by TableChecker for now.")
+
         if portal_env is None and portal_vapp is None:
             portal_env = public_env_name(EnvUtils.PRD_ENV_NAME)
         # InflatableTabbedDataManager supplies:
@@ -394,7 +403,7 @@ def __init__(self, tabbed_sheet_data: TabbedSheetData, schemas: Optional[TabbedJ
         self.portal_env = portal_env
         self.portal_vapp = portal_vapp
         self.schema_manager: SchemaManager = SchemaManager(portal_env=portal_env, portal_vapp=portal_vapp,
-                                                           schemas=schemas)
+                                                           override_schemas=override_schemas)
         self.schemas = self.schema_manager.fetch_relevant_schemas(self.tab_names)  # , schemas=schemas)
         self.lookup_tables_by_tab_name: Dict[str, Dict[str, Dict]] = {
             tab_name: self.build_lookup_table_for_tab(tab_name, rows=rows)
@@ -463,6 +472,7 @@ def validate_ref(self, item_type, item_ref):
         if self.contains_ref(item_type=item_type, item_ref=item_ref):
             return True
         try:
+            # TODO: This probably needs a cache
             info = get_metadata(f"/{to_camel_case(item_type)}/{item_ref}")
             # Basically return True if there's a value at all,
             # but still check it's not an error message that didn't get raised.
@@ -499,10 +509,13 @@ def check_row(self, row: Dict, *, tab_name: str, row_number: int, prototype: Dic
         return patch_item
 
     @classmethod
-    def check(cls, tabbed_sheet_data: TabbedSheetData, schemas: Optional[TabbedJsonSchemas] = None,
+    def check(cls, tabbed_sheet_data: TabbedSheetData, *,
+              flattened: bool,
+              override_schemas: Optional[TabbedJsonSchemas] = None,
               apply_heuristics: bool = False,
               portal_env: Optional[str] = None, portal_vapp: Optional[AbstractVirtualApp] = None):
-        checker = cls(tabbed_sheet_data, schemas=schemas, apply_heuristics=apply_heuristics,
+        checker = cls(tabbed_sheet_data, flattened=flattened,
+                      override_schemas=override_schemas, apply_heuristics=apply_heuristics,
                       portal_env=portal_env, portal_vapp=portal_vapp)
         checked = checker.check_tabs()
         return checked
@@ -538,14 +551,34 @@ def create_tab_processor_state(self, tab_name: str) -> SheetState:
 
 
 def load_items(filename: str, tab_name: Optional[str] = None, escaping: Optional[bool] = None,
-               schemas: Optional[TabbedJsonSchemas] = None, apply_heuristics: bool = False,
+               override_schemas: Optional[TabbedJsonSchemas] = None, apply_heuristics: bool = False,
                portal_env: Optional[str] = None, portal_vapp: Optional[AbstractVirtualApp] = None,
-               validate: bool = False, **kwargs):
-    tabbed_rows = load_table_set(filename=filename, tab_name=tab_name, escaping=escaping, prefer_number=False,
+               # TODO: validate= is presently False (i.e., disabled) by default while being debugged,
+               #       but for production use maybe should not be? -kmp 25-Oct-2023
+               validate: bool = False,
+               **kwargs):
+    annotated_data = TableSetManager.load_annotated(filename=filename, tab_name=tab_name, escaping=escaping, prefer_number=False,
                                  **kwargs)
-    checked_items = check(tabbed_rows, schemas=schemas, portal_env=portal_env, portal_vapp=portal_vapp,
-                          apply_heuristics=apply_heuristics)
+    tabbed_rows = annotated_data['content']
+    flattened = annotated_data['flattened']
+    if flattened:
+        checked_items = TableChecker.check(tabbed_rows, flattened=flattened,
+                                           override_schemas=override_schemas,
+                                           portal_env=portal_env, portal_vapp=portal_vapp,
+                                           apply_heuristics=apply_heuristics)
+    else:
+        # No fancy checking for things like .json, etc. for now. Only check things that came from
+        # spreadsheet-like data, where structural datatypes are forced into strings.
+        checked_items = tabbed_rows
+
     if validate:
+        problems = validate_data_against_schemas(checked_items, portal_env=portal_env, portal_vapp=portal_vapp,
+                                                 override_schemas=override_schemas)
+        error_summary = summary_of_data_validation_errors(problems)
+        if error_summary:
+            for item in error_summary:
+                print(item)
+            raise Exception("Validation problems were seen.")
         # TODO: Maybe connect validation here. Although another option is to just call validation separately
         #       once this is successfully loaded. Needs thought. However, David's validation_utils can do
         #       the validation if we decide to do it, it would just need to be connected up.
diff --git a/dcicutils/sheet_utils.py b/dcicutils/sheet_utils.py
index 9242bb765..66ce523fa 100644
--- a/dcicutils/sheet_utils.py
+++ b/dcicutils/sheet_utils.py
@@ -682,10 +682,30 @@ def load(cls, filename: str, tab_name: Optional[str] = None, escaping: Optional[
         """
         Given a filename and various options
         """
+        annotated_content = cls.load_annotated(filename=filename, tab_name=tab_name, escaping=escaping, **kwargs)
+        content: TabbedSheetData = annotated_content['content']
+        return content
+
+    @classmethod
+    def load_annotated(cls, filename: str, tab_name: Optional[str] = None, escaping: Optional[bool] = None,
+                       **kwargs) -> Dict:
+        """
+        Given a filename and various options
+        """
+        orig_filename = filename
         with maybe_unpack(filename) as filename:
             manager = cls.create_implementation_manager(filename=filename, tab_name=tab_name, escaping=escaping,
                                                         **kwargs)
-            return manager.load_content()
+            content: TabbedSheetData = manager.load_content()
+            return {
+                'filename': filename,
+                'content': content,
+                'tab_name': tab_name,
+                'escaping': escaping,
+                'singleton': isinstance(manager, SingleTableMixin),
+                'flattened': isinstance(manager, FlattenedTableSetManager),
+                'packed': orig_filename != filename,  # tar or zip file that had to be unpacked somehow
+            }
 
 
 load_table_set = TableSetManager.load
diff --git a/dcicutils/validation_utils.py b/dcicutils/validation_utils.py
index 3e8169000..c4da5a856 100644
--- a/dcicutils/validation_utils.py
+++ b/dcicutils/validation_utils.py
@@ -8,33 +8,39 @@
 from .ff_utils import get_schema
 from .env_utils import EnvUtils, public_env_name
 from .lang_utils import there_are, maybe_pluralize, disjoined_list
-from .misc_utils import AbstractVirtualApp, PRINT
+from .misc_utils import AbstractVirtualApp, PRINT, to_snake_case
 from .sheet_utils import JsonSchema, TabbedJsonSchemas, SheetData, TabbedSheetData
 from .task_utils import pmap
 
 
 class SchemaManager:
 
-    SCHEMA_CACHE = {}  # Shared cache. Do not override. Use .clear_schema_cache() to clear it.
-
     @classmethod
     @contextlib.contextmanager
     def fresh_schema_manager_context_for_testing(cls):
-        old_schema_cache = cls.SCHEMA_CACHE
-        try:
-            cls.SCHEMA_CACHE = {}
-            yield
-        finally:
-            cls.SCHEMA_CACHE = old_schema_cache
-
-    def __init__(self, schemas: Optional[TabbedJsonSchemas] = None,
+        # TODO: Remove references to this once reimplementation using an instance variable for SCHEMA_CACHE is working.
+        yield
+        # old_schema_cache = cls.SCHEMA_CACHE
+        # try:
+        #     cls.SCHEMA_CACHE = {}
+        #     yield
+        # finally:
+        #     cls.SCHEMA_CACHE = old_schema_cache
+
+    def __init__(self, *, override_schemas: Optional[TabbedJsonSchemas] = None,
                  portal_env: Optional[str] = None, portal_vapp: Optional[AbstractVirtualApp] = None):
+        self.SCHEMA_CACHE = {}  # Shared cache. Do not override. Use .clear_schema_cache() to clear it.
         if portal_env is None and portal_vapp is None:
             portal_env = public_env_name(EnvUtils.PRD_ENV_NAME)
             PRINT(f"The portal_env was not explicitly supplied. Schemas will come from portal_env={portal_env!r}.")
         self.portal_env = portal_env
         self.portal_vapp = portal_vapp
-        self.schemas = {} if schemas is None else schemas.copy()
+        self.override_schemas = (
+            {}
+            if override_schemas is None
+            else {to_snake_case(key): value  # important to both canonicalize the case and copy the dict
+                  for key, value in override_schemas.items()}
+        )
 
     def fetch_relevant_schemas(self, schema_names: List[str]):  # , schemas: Optional[TabbedSchemas] = None):
         # if schemas is None:
@@ -51,7 +57,8 @@ def schema_exists(self, schema_name: str):
         return bool(self.fetch_schema(schema_name=schema_name))
 
     def fetch_schema(self, schema_name: str):
-        override_schema = self.schemas.get(schema_name)
+        schema_name = to_snake_case(schema_name)
+        override_schema = self.override_schemas.get(schema_name)
         if override_schema is not None:
             return override_schema
         schema: Optional[AnyJsonData] = self.SCHEMA_CACHE.get(schema_name)
@@ -60,10 +67,12 @@ def fetch_schema(self, schema_name: str):
             self.SCHEMA_CACHE[schema_name] = schema
         return schema
 
-    @classmethod
-    def clear_schema_cache(cls):
-        for key in list(cls.SCHEMA_CACHE.keys()):  # important to get the list of keys as a separate object first
-            cls.SCHEMA_CACHE.pop(key, None)
+    # Should not be needed given SCHEMA_CACHE is an instance variable.
+    #
+    # @classmethod
+    # def clear_schema_cache(cls):
+    #     for key in list(cls.SCHEMA_CACHE.keys()):  # important to get the list of keys as a separate object first
+    #         cls.SCHEMA_CACHE.pop(key, None)
 
     def identifying_properties(self, schema: Optional[JsonSchema] = None, schema_name: Optional[str] = None,
                                among: Optional[List[str]] = None):
@@ -76,7 +85,7 @@ def identifying_properties(self, schema: Optional[JsonSchema] = None, schema_nam
                                               if prop in possible_identifying_properties))
         return identifying_properties
 
-    @classmethod
+    @classmethod  # This operation doesn't actually use the schemas so is safe as a class method
     def identifying_value(cls, data_item: Dict[str, AnyJsonData], identifying_properties) -> AnyJsonData:
         if not identifying_properties:
             raise ValueError("No identifying properties were specified.")
@@ -89,9 +98,10 @@ def identifying_value(cls, data_item: Dict[str, AnyJsonData], identifying_proper
                          f' in {json.dumps(data_item)}.')
 
 
-def validate_data_against_schemas(data: TabbedSheetData,
+def validate_data_against_schemas(data: TabbedSheetData, *,
+                                  portal_env: Optional[str] = None,
                                   portal_vapp: Optional[AbstractVirtualApp] = None,
-                                  schemas: Optional[TabbedJsonSchemas] = None) -> Optional[Dict]:
+                                  override_schemas: Optional[TabbedJsonSchemas] = None) -> Optional[Dict]:
     """
     Validates the given data against the corresponding schema(s). The given data is assumed to
     be in a format as returned by sheet_utils, i.e. a dictionary of lists of objects where each
@@ -148,7 +158,7 @@ def validate_data_against_schemas(data: TabbedSheetData,
     the given data, which can be useful in identifying the object in the source data if it is unidentified.
     """
 
-    schema_manager = SchemaManager(portal_vapp=portal_vapp, schemas=schemas)
+    schema_manager = SchemaManager(portal_env=portal_env, portal_vapp=portal_vapp, override_schemas=override_schemas)
 
     errors = []
     schemas = schema_manager.fetch_relevant_schemas(list(data.keys()))
@@ -156,7 +166,8 @@ def validate_data_against_schemas(data: TabbedSheetData,
     for data_type in data:
         schema = schemas.get(data_type)
         if not schema:
-            errors.append({"error": f"No schema found for: {data_type}"})
+            if schema is None:  # if Schema is {}, we're deliberately suppressing schema checking (not an error)
+                errors.append({"error": f"No schema found for: {data_type}"})
             continue
         data_errors = validate_data_items_against_schemas(data[data_type], data_type, schema)
         errors.extend(data_errors)
diff --git a/pyproject.toml b/pyproject.toml
index 7e1520b69..0d661cd71 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dcicutils"
-version = "8.0.0.1-alpha.13"  # to become "8.1.0"
+version = "8.0.0.1-alpha.14"  # to become "8.1.0"
 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
 authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
 license = "MIT"
diff --git a/test/test_bundle_utils.py b/test/test_bundle_utils.py
index ce716878f..8b5777404 100644
--- a/test/test_bundle_utils.py
+++ b/test/test_bundle_utils.py
@@ -218,10 +218,19 @@ def test_load_table_structures():
     assert str(exc.value) == "Unknown file type: something.else"
 
 
-def test_load_items():
+@contextlib.contextmanager
+def no_schemas():
 
     with mock.patch.object(validation_utils_module, "get_schema") as mock_get_schema:
         mock_get_schema.return_value = {}
+        yield
+
+
+def test_load_items():
+
+    # with mock.patch.object(validation_utils_module, "get_schema") as mock_get_schema:
+    #     mock_get_schema.return_value = {}
+    with no_schemas():
 
         assert load_items(SAMPLE_XLSX_FILE, apply_heuristics=True) == SAMPLE_XLSX_FILE_ITEM_CONTENT
         assert load_items(SAMPLE_CSV_FILE, apply_heuristics=True) == SAMPLE_CSV_FILE_ITEM_CONTENT
@@ -385,29 +394,35 @@ def test_load_items_with_schema():
     print("Case 2")
     file_base_name = os.path.splitext(os.path.basename(SAMPLE_CSV_FILE2))[0]
     expected = SAMPLE_CSV_FILE2_ITEM_CONTENT
-    actual = load_items(SAMPLE_CSV_FILE2, schemas={file_base_name: {}}, apply_heuristics=True)
+    actual = load_items(SAMPLE_CSV_FILE2, override_schemas={file_base_name: {}}, apply_heuristics=True)
     assert actual == expected
 
     print("Case 3")
     expected = SAMPLE_CSV_FILE2_PERSON_CONTENT_HINTED
-    actual = load_items(SAMPLE_CSV_FILE2, schemas=SAMPLE_CSV_FILE2_SCHEMAS, tab_name='Person')
+    actual = load_items(SAMPLE_CSV_FILE2, override_schemas=SAMPLE_CSV_FILE2_SCHEMAS, tab_name='Person')
     assert actual == expected
 
 
 def test_sample_items_csv_vs_json():
 
-    csv_content = load_items(SAMPLE_CSV_FILE2, tab_name='Person', schemas=SAMPLE_CSV_FILE2_SCHEMAS)
+    csv_content = load_items(SAMPLE_CSV_FILE2, tab_name='Person', override_schemas=SAMPLE_CSV_FILE2_SCHEMAS)
 
-    json_content = load_items(SAMPLE_JSON_FILE2, tab_name="Person", schemas=SAMPLE_CSV_FILE2_SCHEMAS)
+    json_content = load_items(SAMPLE_JSON_FILE2, tab_name="Person", override_schemas=SAMPLE_CSV_FILE2_SCHEMAS)
 
     assert csv_content == json_content
 
 
 def test_sample_items_json_vs_yaml():
 
-    tabs_data_from_json = load_items(SAMPLE_JSON_TABS_FILE)
-    tabs_data_from_yaml = load_items(SAMPLE_YAML_TABS_FILE)
-    assert tabs_data_from_json == tabs_data_from_yaml
+    with SchemaManager.fresh_schema_manager_context_for_testing():
+
+        # with mock.patch.object(validation_utils_module, "get_schema") as mock_get_schema:
+        #     mock_get_schema.return_value = {}  # no schema checking
+        with no_schemas():
+
+            tabs_data_from_json = load_items(SAMPLE_JSON_TABS_FILE)
+            tabs_data_from_yaml = load_items(SAMPLE_YAML_TABS_FILE)
+            assert tabs_data_from_json == tabs_data_from_yaml
 
 
 @using_fresh_ff_state_for_testing()
@@ -421,7 +436,7 @@ def test_schema_autoload_mixin_caching(portal_env):
 
         assert schema_manager.portal_env == 'data'  # it should have defaulted even if we didn't supply it
 
-        assert SchemaManager.SCHEMA_CACHE == {}
+        assert schema_manager.SCHEMA_CACHE == {}
 
         sample_schema_name = 'foo'
         sample_schema = {'mock_schema_for': 'foo'}
@@ -431,7 +446,7 @@ def test_schema_autoload_mixin_caching(portal_env):
             assert schema_manager.fetch_schema(sample_schema_name) == sample_schema
 
         schema_cache_with_sample_schema = {sample_schema_name: sample_schema}
-        assert SchemaManager.SCHEMA_CACHE == schema_cache_with_sample_schema
+        assert schema_manager.SCHEMA_CACHE == schema_cache_with_sample_schema
 
 
 @using_fresh_ff_state_for_testing()
@@ -639,7 +654,9 @@ def test_table_checker():
 
         with printed_output() as printed:
             with pytest.raises(Exception) as exc:
-                checker = TableChecker(SAMPLE_WORKBOOK_WITH_UNMATCHED_UUID_REFS, portal_env=mock_ff_env)
+                checker = TableChecker(SAMPLE_WORKBOOK_WITH_UNMATCHED_UUID_REFS,
+                                       flattened=True,
+                                       portal_env=mock_ff_env)
                 checker.check_tabs()
             assert str(exc.value) == "There were 2 problems while compiling hints."
             assert printed.lines == [
@@ -648,8 +665,12 @@ def test_table_checker():
                  f" {SAMPLE_INSTITUTION_UUID!r}")
             ]
 
-        checker = TableChecker(SAMPLE_WORKBOOK_WITH_MATCHED_UUID_REFS, portal_env=mock_ff_env)
+        checker = TableChecker(SAMPLE_WORKBOOK_WITH_MATCHED_UUID_REFS,
+                               flattened=True,
+                               portal_env=mock_ff_env)
         checker.check_tabs()
 
-        checker = TableChecker(SAMPLE_WORKBOOK_WITH_NAME_REFS, portal_env=mock_ff_env)
+        checker = TableChecker(SAMPLE_WORKBOOK_WITH_NAME_REFS,
+                               flattened=True,
+                               portal_env=mock_ff_env)
         checker.check_tabs()
diff --git a/test/test_validation_utils.py b/test/test_validation_utils.py
index ce1730d1e..d0039a957 100644
--- a/test/test_validation_utils.py
+++ b/test/test_validation_utils.py
@@ -4,13 +4,68 @@
 import re
 
 from dcicutils.bundle_utils import inflate
-from dcicutils.misc_utils import AbstractVirtualApp, NamedObject, json_file_contents, to_snake_case
-from dcicutils.qa_utils import MockResponse
+from dcicutils.misc_utils import AbstractVirtualApp, NamedObject, json_file_contents, to_snake_case, to_camel_case
+from dcicutils.qa_utils import MockResponse, printed_output
 from dcicutils.validation_utils import SchemaManager, validate_data_against_schemas, summary_of_data_validation_errors
 from .conftest_settings import TEST_DIR
 from .helpers_for_bundles import SAMPLE_WORKBOOK_WITH_NAME_REFS
 
 
+def test_schema_manager_simple():
+
+    print()  # start on a fresh line
+
+    with printed_output() as printed:
+
+        schema_manager_1 = SchemaManager()
+
+        assert printed.lines == [
+            "The portal_env was not explicitly supplied. Schemas will come from portal_env='data'."
+        ]
+
+        assert schema_manager_1.SCHEMA_CACHE == {}
+
+        # Test that schema-lookup works, since that's kinda what these are about
+        user_schema = schema_manager_1.fetch_schema('user')
+        assert isinstance(user_schema, dict)
+        assert user_schema.get('title') == 'User'
+
+        assert schema_manager_1.override_schemas == {}
+
+
+@pytest.mark.parametrize('schema_id', ['user', 'User'])
+def test_schema_manager_with_schemas(schema_id):
+
+    print()  # start on a fresh line
+
+    with printed_output() as printed:
+
+        schemas = {schema_id: {}}
+        snake_id = to_snake_case(schema_id)
+        camel_id = to_camel_case(schema_id)
+
+        # Just to make sure to_snake_case and to_camel_case aren't doing something weird
+        assert schema_id == snake_id or schema_id == camel_id
+
+        schema_manager_2 = SchemaManager(override_schemas=schemas)
+
+        # whether 'User' or 'user' was an input, it will be canonicalized to snake case
+        assert schema_manager_2.override_schemas == {snake_id: {}}
+
+        assert printed.lines == [
+            "The portal_env was not explicitly supplied. Schemas will come from portal_env='data'."
+        ]
+
+        assert schema_manager_2.fetch_schema(snake_id) == {}
+        assert schema_manager_2.fetch_schema(camel_id) == {}
+
+        # even after using a camel case id, only the snake_id will be in the table
+        assert schema_manager_2.override_schemas == {snake_id: {}}
+
+        # this would only get updated if we fetched something remotely
+        assert schema_manager_2.SCHEMA_CACHE == {}
+
+
 def test_schema_manager_identifying_value():
 
     with pytest.raises(ValueError) as exc:
@@ -54,14 +109,14 @@ def get(cls, path_url):
 
         good_workbook = inflate(SAMPLE_WORKBOOK_WITH_NAME_REFS)
 
-        assert validate_data_against_schemas(good_workbook, portal_vapp) is None
+        assert validate_data_against_schemas(good_workbook, portal_vapp=portal_vapp) is None
 
         bogus_workbook = copy.deepcopy(good_workbook)  # modified immediately below
         user_items = bogus_workbook['User']
         user_item0 = user_items[0]
         user_item0['bogus'] = 'item'
 
-        assert validate_data_against_schemas(bogus_workbook, portal_vapp) == {
+        assert validate_data_against_schemas(bogus_workbook, portal_vapp=portal_vapp) == {
             'errors': [
                 {
                     'extraneous_properties': ['bogus'],