Release: Refactor Load Types (#2241)

A very common source of annoyance and confusion is that `trimesh.load` can return lots of different types depending on what type of file was passed (i.e. #2239). This refactor changes the return types for the loading functions to: - `trimesh.load_scene -> Scene` - This loads into a `Scene`, the most general container which can hold any loadable type. Most people should probably use this to load geometry. - `trimesh.load_mesh -> Trimesh` - Forces all mesh objects in a scene into a single `Trimesh` object. This potentially has to drop information and irreversibly concatenate multiple meshes. - The implementation of the concatenation logic is now in `Scene.to_mesh` rather than load. - `trimesh.load_path -> Path` - This loads into either a `Path2D` or `Path3D` which both inherit from `Path` - `trimesh.load -> Geometry` - This was the original load entry point and is deprecated, but there are no current plans to remove it. It has been modified into a thin wrapper for `load_scene` that attempts to match the behavior of the previous loader for backwards compatibility. In my testing against the current `main` branch it was returning the same types [99.8% of the time](https://gist.github.com/mikedh/8de541e066ce842932b1f6cd97c214ca) although there may be other subtle differences. - `trimesh.load(..., force='mesh')` will emit a deprecation warning in favor of `load_mesh` - `trimesh.load(..., force='scene')` will emit a deprecation warning in favor of `load_scene` Additional changes: - Removes `Geometry.metadata['file_path']` in favor of `Geometry.source.file_path`. Everything that inherits from `Geometry` should now have a `.source` attribute which is a typed dataclass. This was something of a struggle as `file_path` was populated into metadata on load, but we also try to make sure `metadata` is preserved through round-trips if at all possible. And so the `load` inserted *different* keys into the metadata. Making it first-class information rather than a loose key seems like an improvement, but users will have to replace `mesh.metadata["file_name"]` with `mesh.source.file_name`. - Moves all network fetching into `WebResolver` so it can be more easily gated by `allow_remote`. - Removes code for the following deprecations: - January 2025 deprecation for `trimesh.resources.get` in favor of the typed alternatives (`get_json`, `get_bytes`, etc). - January 2025 deprecation for `Scene.deduplicated` in favor of a very short list comprehension on `Scene.duplicate_nodes` - March 2024 deprecation for `trimesh.graph.smoothed` in favor of `trimesh.graph.smooth_shaded`. - Adds the following new deprecations: - January 2026 `Path3D.to_planar` -> `Path3D.to_2D` to be consistent with `Path2D.to_3D`. - Fixes #2335 - Fixes #2330 - Fixes #2239 - Releases #2313 - Releases #2327 - Releases #2336 - Releases #2339
mikedh · Jan 21, 2025 · aed7eee · aed7eee
2 parents 2fcb2b2 + b15df31
commit aed7eee
Show file tree

Hide file tree

Showing 60 changed files with 1,687 additions and 1,254 deletions.
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -54,7 +54,7 @@ jobs:
 
   pypi:
     name: Release To PyPi
-    needs: [tests, containers]
+    needs: [tests, containers, corpus]
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v4
@@ -118,13 +118,13 @@ jobs:
     - name: Install Trimesh
       run: pip install .[easy,test]
     - name: Run Corpus Check
-      run: python tests/corpus.py
+      run: python tests/corpus.py -run
 
   release:
     permissions:
       contents: write  # for actions/create-release
     name: Create GitHub Release
-    needs: [tests, containers]
+    needs: [tests, containers, corpus]
     runs-on: ubuntu-latest
     steps:
       - name: Checkout code

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -68,5 +68,5 @@ jobs:
     - name: Install Trimesh
       run: pip install .[easy,test]
     - name: Run Corpus Check
-      run: python tests/corpus.py
+      run: python tests/corpus.py -run
 
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,13 +1,13 @@
-pypandoc==1.13
+pypandoc==1.14
 recommonmark==0.7.1
-jupyter==1.0.0
+jupyter==1.1.1
 
 # get sphinx version range from furo install
-furo==2024.5.6
-myst-parser==3.0.1
-pyopenssl==24.1.0
-autodocsumm==0.2.12
-jinja2==3.1.4
-matplotlib==3.8.4
-nbconvert==7.16.4
+furo==2024.8.6
+myst-parser==4.0.0
+pyopenssl==24.3.0
+autodocsumm==0.2.14
+jinja2==3.1.5
+matplotlib==3.10.0
+nbconvert==7.16.5
 
diff --git a/examples/nearest.ipynb b/examples/nearest.ipynb
@@ -75,7 +75,7 @@
     "# create a scene containing the mesh and two sets of points\n",
     "scene = trimesh.Scene([mesh, cloud_original, cloud_close])\n",
     "\n",
-    "# show the scene wusing\n",
+    "# show the scene we are using\n",
     "scene.show()"
    ]
   }

diff --git a/pyproject.toml b/pyproject.toml
@@ -5,7 +5,7 @@ requires = ["setuptools >= 61.0", "wheel"]
 [project]
 name = "trimesh"
 requires-python = ">=3.8"
-version = "4.5.3"
+version = "4.6.0"
 authors = [{name = "Michael Dawson-Haggerty", email = "mikedh@kerfed.com"}]
 license = {file = "LICENSE.md"}
 description = "Import, export, process, analyze and view triangular meshes."
@@ -120,6 +120,7 @@ test_more = [
     "matplotlib",
     "pymeshlab",
     "triangle",
+    "ipython",
 ]
 
 # interfaces.gmsh will be dropped Jan 2025

diff --git a/tests/corpus.py b/tests/corpus.py
@@ -6,29 +6,85 @@
 will download more than a gigabyte to your home directory!
 """
 
+import json
+import sys
+import time
+from dataclasses import asdict, dataclass
+
 import numpy as np
 from pyinstrument import Profiler
+from pyinstrument.renderers.jsonrenderer import JSONRenderer
 
 import trimesh
+from trimesh.typed import List, Optional, Tuple
 from trimesh.util import log, wrap_as_stream
 
-# get a set with available extension
-available = trimesh.available_formats()
 
-# remove loaders that are thin wrappers
-available.difference_update(
-    [
-        k
-        for k, v in trimesh.exchange.load.mesh_loaders.items()
-        if v in (trimesh.exchange.misc.load_meshio,)
-    ]
-)
-# remove loaders we don't care about
-available.difference_update({"json", "dae", "zae"})
-available.update({"dxf", "svg"})
+@dataclass
+class LoadReport:
+    # i.e. 'hi.glb'
+    file_name: str
+
+    # i.e 'glb'
+    file_type: str
+
+    # i.e. 'Scene'
+    type_load: Optional[str] = None
+
+    # what type was every geometry
+    type_geometry: Optional[Tuple[str]] = None
+
+    # what is the printed repr of the object, i.e. `<Trimesh ...>`
+    repr_load: Optional[str] = None
+
+    # if there was an exception save it here
+    exception: Optional[str] = None
+
+
+@dataclass
+class Report:
+    # what did we load
+    load: list[LoadReport]
+
+    # what version of trimesh was this produced on
+    version: str
+
+    # what was the profiler output for this run
+    # a pyinstrument.renderers.JSONRenderer output
+    profile: str
+
+    def compare(self, other: "Report"):
+        """
+        Compare this load report to another.
+        """
+        # what files were loaded by both versions
+        self_type = {o.file_name: o.type_load for o in self.load}
+        other_type = {n.file_name: n.type_load for n in other.load}
+
+        both = set(self_type.keys()).intersection(other_type.keys())
+        matches = np.array([self_type[k] == other_type[k] for k in both])
+        percent = matches.sum() / len(matches)
 
+        print(f"Comparing `{self.version}` against `{other.version}`")
+        print(f"Return types matched {percent * 100.0:0.3f}% of the time")
+        print(f"Loaded {len(self.load)} vs Loaded {len(other.load)}")
 
-def on_repo(repo, commit):
+
+def from_dict(data: dict) -> Report:
+    """
+    Parse a `Report` which has been exported using `dataclasses.asdict`
+    into a Report object.
+    """
+    return Report(
+        load=[LoadReport(**r) for r in data.get("load", [])],
+        version=data.get("version"),
+        profile=data.get("profile"),
+    )
+
+
+def on_repo(
+    repo: str, commit: str, available: set, root: Optional[str] = None
+) -> List[LoadReport]:
     """
     Try loading all supported files in a Github repo.
 
@@ -38,6 +94,10 @@ def on_repo(repo, commit):
       Github "slug" i.e. "assimp/assimp"
     commit : str
       Full hash of the commit to check.
+    available
+      Which `file_type` to check
+    root
+      If passed only consider files under this root directory.
     """
 
     # get a resolver for the specific commit
@@ -47,32 +107,43 @@ def on_repo(repo, commit):
     # list file names in the repo we can load
     paths = [i for i in repo.keys() if i.lower().split(".")[-1] in available]
 
-    report = {}
+    if root is not None:
+        # clip off any file not under the root path
+        paths = [p for p in paths if p.startswith(root)]
+
+    report = []
     for _i, path in enumerate(paths):
         namespace, name = path.rsplit("/", 1)
         # get a subresolver that has a root at
         # the file we are trying to load
         resolver = repo.namespaced(namespace)
 
         check = path.lower()
-        broke = (
-            "malformed empty outofmemory "
-            + "bad incorrect missing "
-            + "failures pond.0.ply"
-        ).split()
+        broke = "malformed outofmemory bad incorrect missing invalid failures".split()
         should_raise = any(b in check for b in broke)
         raised = False
 
-        # clip off the big old name from the archive
-        saveas = path[path.find(commit) + len(commit) :]
+        # start collecting data about the current load attempt
+        current = LoadReport(file_name=name, file_type=trimesh.util.split_extension(name))
+
+        print(f"Attempting: {name}")
 
         try:
             m = trimesh.load(
                 file_obj=wrap_as_stream(resolver.get(name)),
                 file_type=name,
                 resolver=resolver,
             )
-            report[saveas] = str(m)
+
+            # save the load types
+            current.type_load = m.__class__.__name__
+            if isinstance(m, trimesh.Scene):
+                # save geometry types
+                current.type_geometry = tuple(
+                    [g.__class__.__name__ for g in m.geometry.values()]
+                )
+            # save the <Trimesh ...> repr
+            current.repr_load = str(m)
 
             # if our source was a GLTF we should be able to roundtrip without
             # dropping
@@ -104,19 +175,19 @@ def on_repo(repo, commit):
             # this is what unsupported formats
             # like GLTF 1.0 should raise
             log.debug(E)
-            report[saveas] = str(E)
+            current.exception = str(E)
         except BaseException as E:
             raised = True
             # we got an error on a file that should have passed
             if not should_raise:
                 log.debug(path, E)
                 raise E
-            report[saveas] = str(E)
+            current.exception = str(E)
 
         # if it worked when it didn't have to add a label
         if should_raise and not raised:
-            # raise ValueError(name)
-            report[saveas] += " SHOULD HAVE RAISED"
+            current.exception = "PROBABLY SHOULD HAVE RAISED BUT DIDN'T!"
+        report.append(current)
 
     return report
 
@@ -165,33 +236,92 @@ def equal(a, b):
     return a == b
 
 
-if __name__ == "__main__":
-    trimesh.util.attach_to_log()
+def run(save: bool = False):
+    """
+    Try to load and export every mesh we can get our hands on.
+
+    Parameters
+    -----------
+    save
+      If passed, save a JSON dump of the load report.
+    """
+    # get a set with available extension
+    available = trimesh.available_formats()
+
+    # remove meshio loaders because we're not testing meshio
+    available.difference_update(
+        [
+            k
+            for k, v in trimesh.exchange.load.mesh_loaders.items()
+            if v in (trimesh.exchange.misc.load_meshio,)
+        ]
+    )
+
+    # TODO : waiting on a release containing pycollada/pycollada/147
+    available.difference_update({"dae"})
 
     with Profiler() as P:
+        # check against the small trimesh corpus
+        loads = on_repo(
+            repo="mikedh/trimesh",
+            commit="2fcb2b2ea8085d253e692ecd4f71b8f450890d51",
+            available=available,
+            root="models",
+        )
+
         # check the assimp corpus, about 50mb
-        report = on_repo(
-            repo="assimp/assimp", commit="c2967cf79acdc4cd48ecb0729e2733bf45b38a6f"
+        loads.extend(
+            on_repo(
+                repo="assimp/assimp",
+                commit="1e44036c363f64d57e9f799beb9f06d4d3389a87",
+                available=available,
+                root="test",
+            )
         )
         # check the gltf-sample-models, about 1gb
-        report.update(
+        loads.extend(
             on_repo(
                 repo="KhronosGroup/glTF-Sample-Models",
                 commit="8e9a5a6ad1a2790e2333e3eb48a1ee39f9e0e31b",
+                available=available,
             )
         )
-
-        # add back collada for this repo
-        available.update(["dae", "zae"])
-        report.update(
+        # try on the universal robot models
+        loads.extend(
             on_repo(
                 repo="ros-industrial/universal_robot",
                 commit="8f01aa1934079e5a2c859ccaa9dd6623d4cfa2fe",
+                available=available,
             )
         )
 
     # show all profiler lines
     log.info(P.output_text(show_all=True))
 
-    # print a formatted report of what we loaded
-    log.debug("\n".join(f"# {k}\n{v}\n" for k, v in report.items()))
+    # save the profile for comparison loader
+    profile = P.output(JSONRenderer())
+
+    # compose the overall report
+    report = Report(load=loads, version=trimesh.__version__, profile=profile)
+
+    if save:
+        with open(f"trimesh.{trimesh.__version__}.{int(time.time())}.json", "w") as F:
+            json.dump(asdict(report), F)
+
+    return report
+
+
+if __name__ == "__main__":
+    trimesh.util.attach_to_log()
+
+    if "-run" in " ".join(sys.argv):
+        run()
+
+    if "-compare" in " ".join(sys.argv):
+        with open("trimesh.4.5.3.1737061410.json") as f:
+            old = from_dict(json.load(f))
+
+        with open("trimesh.4.6.0.1737060030.json") as f:
+            new = from_dict(json.load(f))
+
+        new.compare(old)
diff --git a/tests/generic.py b/tests/generic.py
@@ -366,7 +366,6 @@ def check(item):
                 batched.append(loaded)
 
             for mesh in batched:
-                mesh.metadata["file_name"] = file_name
                 # only return our limit
                 if returned[0] >= count:
                     return

diff --git a/tests/regression.py b/tests/regression.py
@@ -12,7 +12,7 @@ def typical_application():
     meshes = g.get_meshes(raise_error=True)
 
     for mesh in meshes:
-        g.log.info("Testing %s", mesh.metadata["file_name"])
+        g.log.info("Testing %s", mesh.source.file_name)
         assert len(mesh.faces) > 0
         assert len(mesh.vertices) > 0