ActivitySim · jpn-- · Mar 18, 2024 · Mar 18, 2024 · Mar 19, 2024 · Mar 19, 2024
diff --git a/.github/workflows/core_tests.yml b/.github/workflows/core_tests.yml
@@ -51,13 +51,12 @@ jobs:
           	"psutil=5.9.5" \
           	"pydantic=2.6.1" \
           	"pypyr=5.8.0" \
-          	"pytables=3.6.1" \
+          	"pytables=3.9.2" \
           	"pytest-cov" \
           	"pytest-regressions=2.5.0" \
           	"scikit-learn=1.2.2" \
-          	"sharrow>=2.6.0" \
+          	"sharrow>=2.7.0" \
           	"simwrapper=1.8.5" \
-          	"xarray=2023.2.0" \
           	"zarr=2.14.2" \
           	"zstandard=0.21.0"
         if: steps.cache.outputs.cache-hit != 'true'
@@ -151,13 +150,12 @@ jobs:
           	"psutil=5.9.5" \
           	"pydantic=2.6.1" \
           	"pypyr=5.8.0" \
-          	"pytables=3.6.1" \
+          	"pytables=3.9.2" \
           	"pytest-cov" \
           	"pytest-regressions=2.5.0" \
           	"scikit-learn=1.2.2" \
-          	"sharrow>=2.6.0" \
+          	"sharrow>=2.7.0" \
           	"simwrapper=1.8.5" \
-          	"xarray=2023.2.0" \
           	"zarr=2.14.2" \
           	"zstandard=0.21.0"
         if: steps.cache.outputs.cache-hit != 'true'
@@ -249,13 +247,12 @@ jobs:
           	"psutil=5.9.5" \
           	"pydantic=2.6.1" \
           	"pypyr=5.8.0" \
-          	"pytables=3.6.1" \
+          	"pytables=3.9.2" \
           	"pytest-cov" \
           	"pytest-regressions=2.5.0" \
           	"scikit-learn=1.2.2" \
-          	"sharrow>=2.6.0" \
+          	"sharrow>=2.7.0" \
           	"simwrapper=1.8.5" \
-          	"xarray=2023.2.0" \
           	"zarr=2.14.2" \
           	"zstandard=0.21.0"
         if: steps.cache.outputs.cache-hit != 'true'
@@ -346,13 +343,12 @@ jobs:
           	"psutil=5.9.5" \
           	"pydantic=2.6.1" \
           	"pypyr=5.8.0" \
-          	"pytables=3.6.1" \
+          	"pytables=3.9.2" \
           	"pytest-cov" \
           	"pytest-regressions=2.5.0" \
           	"scikit-learn=1.2.2" \
-          	"sharrow>=2.6.0" \
+          	"sharrow>=2.7.0" \
           	"simwrapper=1.8.5" \
-          	"xarray=2023.2.0" \
           	"zarr=2.14.2" \
           	"zstandard=0.21.0"
         if: steps.cache.outputs.cache-hit != 'true'
@@ -413,13 +409,12 @@ jobs:
           	"psutil=5.9.5" \
           	"pydantic=2.6.1" \
           	"pypyr=5.8.0" \
-          	"pytables=3.6.1" \
+          	"pytables=3.9.2" \
           	"pytest-cov" \
           	"pytest-regressions=2.5.0" \
           	"scikit-learn=1.2.2" \
-          	"sharrow>=2.6.0" \
+          	"sharrow>=2.7.0" \
           	"simwrapper=1.8.5" \
-          	"xarray=2023.2.0" \
           	"zarr=2.14.2" \
           	"zstandard=0.21.0"
         if: steps.cache.outputs.cache-hit != 'true'
@@ -479,13 +474,12 @@ jobs:
           	"psutil=5.9.5" \
           	"pydantic=2.6.1" \
           	"pypyr=5.8.0" \
-          	"pytables=3.6.1" \
+          	"pytables=3.9.2" \
           	"pytest-cov" \
           	"pytest-regressions=2.5.0" \
           	"scikit-learn=1.2.2" \
-          	"sharrow>=2.6.0" \
+          	"sharrow>=2.7.0" \
           	"simwrapper=1.8.5" \
-          	"xarray=2023.2.0" \
           	"zarr=2.14.2" \
           	"zstandard=0.21.0"
         if: steps.cache.outputs.cache-hit != 'true'

diff --git a/activitysim/abm/models/disaggregate_accessibility.py b/activitysim/abm/models/disaggregate_accessibility.py
@@ -569,7 +569,7 @@ def expand_template_zones(self, tables):
         _expanded = pd.DataFrame(util.named_product(**index_params)).set_index("index")
 
         # Use result to join template onto expanded table of zones
-        ex_table = _expanded.join(master_template).reset_index()
+        ex_table = _expanded.join(master_template).sort_index().reset_index()
 
         # Concatenate a new unique set of ids
         cols = ["home_zone_id", "proto_household_id", "proto_person_id"]
@@ -642,7 +642,9 @@ def create_proto_pop(self):
                 .set_index("index")
                 .rename(columns={"hhid": hhid})
             )
-            persons = rep.join(persons).sort_values(hhid).reset_index(drop=True)
+            persons = (
+                rep.join(persons, sort=True).sort_values(hhid).reset_index(drop=True)
+            )
             persons[perid] = persons.index + 1
 
             # Assign persons to tours
@@ -718,6 +720,7 @@ def merge_persons(self):
 
         perid = self.params["proto_persons"]["index_col"]
         persons_merged.set_index(perid, inplace=True, drop=True)
+        persons_merged = persons_merged.sort_index()
         self.proto_pop["proto_persons_merged"] = persons_merged
 
         # Store in pipeline

diff --git a/activitysim/abm/models/input_checker.py b/activitysim/abm/models/input_checker.py
@@ -300,34 +300,42 @@ def report_errors(state, input_checker_settings, v_warnings, v_errors):
 
             for warn in warns:
                 if "dataframe validator" in str(warn.message):
-                    file_logger.warning(
-                        "Failed dataframe validator: "
-                        + str(warn.message).split("\n")[-1]
-                    )
-                elif "element-wise validator" in str(warn.message):
-                    if "DataFrameSchema" in str(warn.message):
-                        file_logger.warning(
-                            "Failed element-wise validator: <"
-                            + str(warn.message).split("\n")[0].split(" ")[1]
-                            + table_name
-                            + ")>\n\t"
-                            + str(warn.message)
-                            .split("failure cases:\n")[0]
-                            .split("\n")[-2]
-                            + "\n\tfailure cases:\n\t"
-                            + "\n\t".join(
-                                str(warn.message)
-                                .split("failure cases:\n")[1]
-                                .split("\n")
-                            )
-                        )
-                    else:
+                    try:
                         file_logger.warning(
-                            "Failed element-wise validator: <"
-                            + " ".join(str(warn.message).split("\n")[0].split(" ")[1:3])
-                            + "\n\t"
-                            + "\n\t".join(str(warn.message).split("\n")[1:])
+                            "Failed dataframe validator: "
+                            + str(warn.message).split("\n")[-1]
                         )
+                    except Exception:
+                        file_logger.warning(warn)
+                elif "element-wise validator" in str(warn.message):
+                    try:
+                        if "DataFrameSchema" in str(warn.message):
+                            file_logger.warning(
+                                "Failed element-wise validator: <"
+                                + str(warn.message).split("\n")[0].split(" ")[1]
+                                + table_name
+                                + ")>\n\t"
+                                + str(warn.message)
+                                .split("failure cases:\n")[0]
+                                .split("\n")[-2]
+                                + "\n\tfailure cases:\n\t"
+                                + "\n\t".join(
+                                    str(warn.message)
+                                    .split("failure cases:\n")[1]
+                                    .split("\n")
+                                )
+                            )
+                        else:
+                            file_logger.warning(
+                                "Failed element-wise validator: <"
+                                + " ".join(
+                                    str(warn.message).split("\n")[0].split(" ")[1:3]
+                                )
+                                + "\n\t"
+                                + "\n\t".join(str(warn.message).split("\n")[1:])
+                            )
+                    except Exception:
+                        file_logger.warning(warn)
                 else:
                     file_logger.warning(warn)
             file_logger.warning("\n")

diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py
@@ -634,7 +634,7 @@ def school_escorting(
     state.add_table("tours", tours)
     state.get_rn_generator().drop_channel("tours")
     state.get_rn_generator().add_channel("tours", tours)
-    state.add_table("escort_bundles", escort_bundles)
+    state.add_table("escort_bundles", escort_bundles.reset_index(drop=True))
     # save school escorting tours and trips in pipeline so we can overwrite results from downstream models
     state.add_table("school_escort_tours", school_escort_tours)
     state.add_table("school_escort_trips", school_escort_trips)

diff --git a/activitysim/abm/models/trip_departure_choice.py b/activitysim/abm/models/trip_departure_choice.py
@@ -386,7 +386,7 @@ def choose_tour_leg_pattern(
 
 
 def apply_stage_two_model(state, omnibus_spec, trips, chunk_size, trace_label):
-    if not trips.index.is_monotonic:
+    if not trips.index.is_monotonic_increasing:
         trips = trips.sort_index()
 
     # Assign the duration of the appropriate leg to the trip

diff --git a/activitysim/abm/models/util/school_escort_tours_trips.py b/activitysim/abm/models/util/school_escort_tours_trips.py
@@ -100,7 +100,7 @@ def create_chauf_escort_trips(bundles):
             "outbound",
             "purpose",
         ]
-    ).reset_index()
+    ).reset_index(drop=True)
 
     # numbering trips such that outbound escorting trips must come first and inbound trips must come last
     outbound_trip_num = -1 * (
@@ -240,7 +240,7 @@ def create_escortee_trips(bundles):
     # create a new trip for each escortee destination
     escortee_trips = escortee_trips.explode(
         ["destination", "escort_participants", "school_escort_trip_num", "purpose"]
-    ).reset_index()
+    ).reset_index(drop=True)
 
     # numbering trips such that outbound escorting trips must come first and inbound trips must come last
     # this comes in handy when merging trips to others in the tour decided downstream

diff --git a/activitysim/abm/models/vehicle_allocation.py b/activitysim/abm/models/vehicle_allocation.py
@@ -236,6 +236,8 @@ def vehicle_allocation(
         logger.info("Running for occupancy = %d", occup)
         # setting occup for access in spec expressions
         locals_dict.update({"occup": occup})
+        if model_settings.sharrow_skip:
+            locals_dict["disable_sharrow"] = True
 
         choices = simulate.simple_simulate(
             state,
@@ -258,6 +260,7 @@ def vehicle_allocation(
             choices.loc[choices["alt_choice"] == alt, "choice"] = choosers.loc[
                 choices["alt_choice"] == alt, alt
             ]
+        choices["choice"] = choices["choice"].astype(veh_choice_dtype)
         choices.loc[
             choices["alt_choice"] == alts_from_spec[-1], "choice"
         ] = alts_from_spec[-1]

diff --git a/activitysim/cli/create.py b/activitysim/cli/create.py
@@ -2,6 +2,7 @@
 
 import glob
 import hashlib
+import importlib.resources
 import logging
 import os
 import shutil
@@ -21,14 +22,15 @@
 
 def _example_path(resource):
     resource = os.path.join(EXAMPLES_DIR, resource)
-    path = pkg_resources.resource_filename(PACKAGE, resource)
-
-    return path
+    return importlib.resources.as_file(
+        importlib.resources.files(PACKAGE).joinpath(resource)
+    )
 
 
 def _load_manifest():
-    with open(_example_path(MANIFEST), "r") as f:
-        manifest = yaml.safe_load(f.read())
+    with _example_path(MANIFEST) as f_pth:
+        with open(f_pth, "r") as f:
+            manifest = yaml.safe_load(f.read())
 
     assert manifest, f"error: could not load {MANIFEST}"
     return {example["name"]: example for example in manifest}
@@ -177,8 +179,9 @@ def get_example(
             )
 
         else:
-            for asset_path in glob.glob(_example_path(assets)):
-                copy_asset(asset_path, target_path, dirs_exist_ok=True)
+            with _example_path(assets) as pth:
+                for asset_path in glob.glob(str(pth)):
+                    copy_asset(asset_path, target_path, dirs_exist_ok=True)
 
     print(f"copied! new project files are in {os.path.abspath(dest_path)}")
 

diff --git a/activitysim/core/assign.py b/activitysim/core/assign.py
@@ -96,7 +96,36 @@ def read_assignment_spec(
     """
 
     try:
-        cfg = pd.read_csv(file_name, comment="#")
+        # we use an explicit list of na_values, these are the values that
+        # Pandas version 1.5 recognized as NaN by default.  Notably absent is
+        # 'None' which is used in some spec files to be the object `None` not
+        # the float value NaN.
+        cfg = pd.read_csv(
+            file_name,
+            comment="#",
+            na_values=[
+                "",
+                "#N/A",
+                "#N/A N/A",
+                "#NA",
+                "-1.#IND",
+                "-1.#QNAN",
+                "-NaN",
+                "-nan",
+                "1.#IND",
+                "1.#QNAN",
+                "<NA>",
+                "N/A",
+                "NA",
+                "NULL",
+                "NaN",
+                "n/a",
+                "nan",
+                "null",
+            ],
+            keep_default_na=False,
+        )
+
     except Exception as e:
         logger.error(f"Error reading spec file: {file_name}")
         logger.error(str(e))

diff --git a/activitysim/core/los.py b/activitysim/core/los.py
@@ -780,7 +780,15 @@ def get_mazpairs(self, omaz, dmaz, attribute):
                 self.maz_ceiling
             ) + np.asanyarray(dmaz, dtype=np.int64)
         else:
-            i = np.asanyarray(omaz) * self.maz_ceiling + np.asanyarray(dmaz)
+            # if we have less than a 32-bit index, it will
+            # overflow so we need to upgrade to at least 32 bit
+            omaz_as_array = np.asanyarray(omaz)
+            if omaz_as_array.dtype not in (np.int32, np.int64):
+                omaz_as_array = omaz_as_array.astype(np.int32)
+            dmaz_as_array = np.asanyarray(dmaz)
+            if dmaz_as_array.dtype not in (np.int32, np.int64):
+                dmaz_as_array = dmaz_as_array.astype(np.int32)
+            i = omaz_as_array * self.maz_ceiling + dmaz_as_array
         s = util.quick_loc_df(i, self.maz_to_maz_df, attribute)
 
         # FIXME - no point in returning series?

diff --git a/activitysim/core/test/_tools.py b/activitysim/core/test/_tools.py
@@ -165,8 +165,8 @@ def progressive_checkpoint_test(
         if ref_target.exists():
             try:
                 state.checkpoint.check_against(ref_target, checkpoint_name=step_name)
-            except Exception:
-                print(f"> {name} {step_name}: ERROR")
+            except Exception as e:
+                print(f"> {name} {step_name}: ERROR {e}")
                 raise
             else:
                 print(f"> {name} {step_name}: ok")

diff --git a/activitysim/core/util.py b/activitysim/core/util.py
@@ -289,7 +289,7 @@ def quick_loc_series(loc_list, target_series):
 
     left_on = "left"
 
-    if isinstance(loc_list, pd.Int64Index):
+    if isinstance(loc_list, pd.Index):
         left_df = pd.DataFrame({left_on: loc_list.values})
     elif isinstance(loc_list, pd.Series):
         left_df = loc_list.to_frame(name=left_on)

diff --git a/activitysim/core/workflow/state.py b/activitysim/core/workflow/state.py
@@ -714,7 +714,18 @@ def get_pyarrow(
         if t is None:
             raise KeyError(tablename)
         if isinstance(t, pd.DataFrame):
-            t = pa.Table.from_pandas(t, preserve_index=True, columns=columns)
+            df = t
+            try:
+                t = pa.Table.from_pandas(df, preserve_index=True, columns=columns)
+            except (pa.ArrowTypeError, pa.ArrowInvalid):
+                # if there are object columns, try to convert them to categories
+                df = df.copy()
+                for k, dtype in df.dtypes.items():
+                    if dtype.kind == "O":
+                        df[k] = df[k].astype("str")
+                    elif dtype == "boolean":
+                        df[k] = df[k].astype("str")
+                t = pa.Table.from_pandas(df, preserve_index=True, columns=columns)
         if isinstance(t, pa.Table):
             if columns is not None:
                 t = t.select(columns)