Performance tuning for building damage (#516)

* improve or_matched; add slc test * improve payload loop * remove keys() as not needed * remove keys to speed up * speed up using pre-filter on mapped/unmapped buildings * improve lens * Added cache to store fragility matches and the inventory attributes s… (#517) * Added cache to store fragility matches and the inventory attributes so like structures can pull from the cache * Add caching for both new and old rule sets * Updated to handle both old and new rule parsing when building the cache * Update CHANGELOG.md * Updated caching to consider the retrofit_key entry when matching the cache --------- Co-authored-by: Chris Navarro <cmnavarr@illinois.edu>
IN-CORE · Mar 8, 2024 · ae2c9cd · ae2c9cd
1 parent 870f2be
commit ae2c9cd
Show file tree

Hide file tree

Showing 5 changed files with 215 additions and 78 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
 
 ### Changed
 - Retrofitted Building Damage [#469](https://github.com/IN-CORE/pyincore/issues/469) 
+- Optimize building damage performance [#513](https://github.com/IN-CORE/pyincore/issues/513)
 
 
 ## [1.16.0] - 2024-02-07

diff --git a/pyincore/analyses/buildingdamage/buildingdamage.py b/pyincore/analyses/buildingdamage/buildingdamage.py
@@ -165,46 +165,46 @@ def building_damage_analysis_bulk_input(self, buildings, hazards, hazard_types,
 
             values_payload = []
             values_payload_liq = []  # for liquefaction, if used
-            unmapped_buildings = []
-            mapped_buildings = []
-            for b in buildings:
+
+            # Pre-filter buildings that are in fragility_sets to reduce the number of iterations
+            mapped_buildings = [b for b in buildings if b["id"] in fragility_sets]
+            unmapped_buildings = [b for b in buildings if b["id"] not in fragility_sets]
+
+            for b in mapped_buildings:
                 bldg_id = b["id"]
-                if bldg_id in fragility_sets:
-                    location = GeoUtil.get_location(b)
-                    loc = str(location.y) + "," + str(location.x)
-                    demands, units, adjusted_to_original = \
-                        AnalysisUtil.get_hazard_demand_types_units(b,
-                                                                   fragility_sets[bldg_id],
-                                                                   hazard_type,
-                                                                   allowed_demand_types)
-                    adjust_demand_types_mapping.update(adjusted_to_original)
-                    value = {
-                        "demands": demands,
-                        "units": units,
+                location = GeoUtil.get_location(b)
+                loc = str(location.y) + "," + str(location.x)
+                demands, units, adjusted_to_original = \
+                    AnalysisUtil.get_hazard_demand_types_units(b,
+                                                               fragility_sets[bldg_id],
+                                                               hazard_type,
+                                                               allowed_demand_types)
+                adjust_demand_types_mapping.update(adjusted_to_original)
+                value = {
+                    "demands": demands,
+                    "units": units,
+                    "loc": loc
+                }
+                values_payload.append(value)
+
+                if use_liquefaction and geology_dataset_id is not None:
+                    value_liq = {
+                        "demands": [""],
+                        "units": [""],
                         "loc": loc
                     }
-                    values_payload.append(value)
-                    mapped_buildings.append(b)
-
-                    if use_liquefaction and geology_dataset_id is not None:
-                        value_liq = {
-                            "demands": [""],
-                            "units": [""],
-                            "loc": loc
-                        }
-                        values_payload_liq.append(value_liq)
-                else:
-                    unmapped_buildings.append(b)
+                    values_payload_liq.append(value_liq)
 
             hazard_vals = hazard.read_hazard_values(values_payload, self.hazardsvc)
 
             # map demand type from payload to response
             # worst code I have ever written
             # e.g. 1.04 Sec Sa --> 1.04 SA --> 1.0 SA
             for payload, response in zip(values_payload, hazard_vals):
-                for i in range(len(payload["demands"])):
-                    adjust_demand_types_mapping[response["demands"][i]] = adjust_demand_types_mapping[payload[
-                        "demands"][i]]
+                adjust_demand_types_mapping.update({
+                    response_demand: adjust_demand_types_mapping[payload_demand]
+                    for payload_demand, response_demand in zip(payload["demands"], response["demands"])
+                })
 
             # record hazard value for each hazard type for later calcu
             multihazard_vals[hazard_type] = hazard_vals

diff --git a/pyincore/dfr3service.py b/pyincore/dfr3service.py
@@ -190,6 +190,7 @@ def match_inventory(self, mapping: MappingSet, inventories: list, entry_key: Opt
         """
 
         dfr3_sets = {}
+        dfr3_sets_cache = {}
 
         # find default mapping entry key if not provided
         if entry_key is None:
@@ -214,40 +215,58 @@ def match_inventory(self, mapping: MappingSet, inventories: list, entry_key: Opt
             retrofit_entry_key = inventory["properties"]["retrofit_k"] if "retrofit_k" in \
                                                                           inventory["properties"] else None
 
-            for m in mapping.mappings:
-                # for old format rule matching [[]]
-                # [[ and ] or [ and ]]
-                if isinstance(m.rules, list):
-                    if self._property_match_legacy(rules=m.rules, properties=inventory["properties"]):
-                        if retrofit_entry_key is not None and retrofit_entry_key in m.entry.keys():
-                            curve = m.entry[retrofit_entry_key]
-                        else:
-                            curve = m.entry[entry_key]
-                        dfr3_sets[inventory['id']] = curve
-
-                        # if it's string:id; then need to fetch it from remote and cast to dfr3curve object
-                        if isinstance(curve, str) and curve not in matched_curve_ids:
-                            matched_curve_ids.append(curve)
-
-                        # use the first match
-                        break
-
-                # for new format rule matching {"AND/OR":[]}
-                # {"AND": [xx, "OR": [yy, yy], "AND": {"OR":["zz", "zz"]]}
-                elif isinstance(m.rules, dict):
-                    if self._property_match(rules=m.rules, properties=inventory["properties"]):
-                        if retrofit_entry_key is not None and retrofit_entry_key in m.entry.keys():
-                            curve = m.entry[retrofit_entry_key]
-                        else:
-                            curve = m.entry[entry_key]
-                        dfr3_sets[inventory['id']] = curve
+            cached_curve = self._check_cache(dfr3_sets_cache, inventory["properties"])
 
-                        # if it's string:id; then need to fetch it from remote and cast to dfr3 curve object
-                        if isinstance(curve, str) and curve not in matched_curve_ids:
-                            matched_curve_ids.append(curve)
+            if cached_curve is not None:
+                dfr3_sets[inventory['id']] = cached_curve
 
-                        # use the first match
-                        break
+            else:
+                for m in mapping.mappings:
+                    # for old format rule matching [[]]
+                    # [[ and ] or [ and ]]
+                    if isinstance(m.rules, list):
+                        if self._property_match_legacy(rules=m.rules, properties=inventory["properties"]):
+                            if retrofit_entry_key is not None and retrofit_entry_key in m.entry:
+                                curve = m.entry[retrofit_entry_key]
+                            else:
+                                curve = m.entry[entry_key]
+
+                            dfr3_sets[inventory['id']] = curve
+
+                            matched_properties_dict = self._convert_properties_to_dict(m.rules, inventory["properties"])
+
+                            if retrofit_entry_key is not None:
+                                matched_properties_dict["retrofit_k"] = retrofit_entry_key
+                            # Add the matched inventory properties so other matching inventory can avoid rule matching
+                            dfr3_sets_cache[curve] = matched_properties_dict
+
+                            # if it's string:id; then need to fetch it from remote and cast to dfr3curve object
+                            if isinstance(curve, str) and curve not in matched_curve_ids:
+                                matched_curve_ids.append(curve)
+
+                            # use the first match
+                            break
+
+                    # for new format rule matching {"AND/OR":[]}
+                    # {"AND": [xx, "OR": [yy, yy], "AND": {"OR":["zz", "zz"]]}
+                    elif isinstance(m.rules, dict):
+                        if self._property_match(rules=m.rules, properties=inventory["properties"]):
+                            if retrofit_entry_key is not None and retrofit_entry_key in m.entry:
+                                curve = m.entry[retrofit_entry_key]
+                            else:
+                                curve = m.entry[entry_key]
+                            dfr3_sets[inventory['id']] = curve
+
+                            matched_properties_dict = self._convert_properties_to_dict(m.rules, inventory["properties"])
+                            # Add the matched inventory properties so other matching inventory can avoid rule matching
+                            dfr3_sets_cache[curve] = matched_properties_dict
+
+                            # if it's string:id; then need to fetch it from remote and cast to dfr3 curve object
+                            if isinstance(curve, str) and curve not in matched_curve_ids:
+                                matched_curve_ids.append(curve)
+
+                            # use the first match
+                            break
 
         batch_dfr3_sets = self.batch_get_dfr3_set(matched_curve_ids)
 
@@ -333,6 +352,71 @@ def match_list_of_dicts(self, mapping: MappingSet, inventories: list, entry_key:
 
         return dfr3_sets
 
+    @staticmethod
+    def _check_cache(dfr3_sets_dict, properties):
+        """A method to see if we already have matched an inventory with the same properties to a fragility curve
+
+                Args:
+                    dfr3_sets_dict (dict): {"fragility-curve-id-1": {"struct_typ": "W1", "no_stories": "2"}, etc.}
+                    properties (obj): A fiona Properties object that contains properties of the inventory row.
+
+                Returns:
+                    Fragility curve id if a match is found 
+
+        """
+        if not dfr3_sets_dict:
+            return None
+
+        retrofit_entry_key = properties["retrofit_k"] if "retrofit_k" in properties else None
+        for entry_key in dfr3_sets_dict:
+            inventory_dict = {}
+            entry_dict = dfr3_sets_dict[entry_key]
+            for rule_key in entry_dict:
+                inventory_dict[rule_key] = properties[rule_key]
+
+            if retrofit_entry_key is not None:
+                inventory_dict["retrofit_k"] = retrofit_entry_key
+
+            if entry_dict == inventory_dict:
+                return entry_key
+
+    @staticmethod
+    def _convert_properties_to_dict(rules, properties):
+        """A method to convert properties to a dictionary with only the matched values in the rule set
+
+        Args:
+            rules (obj): [[A and B] or [C and D]]
+            properties (dict): A dictionary that contains properties of the inventory row.
+
+        Returns:
+            Dictionary of property values for the inventory object so the matched fragility can be cached
+
+        """
+        matched_properties = {}
+        # Handle legacy rules
+        if isinstance(rules, list):
+            for i, and_rules in enumerate(rules):
+                for j, rule in enumerate(and_rules):
+                    matched_properties.update(Dfr3Service._eval_property_from_inventory(rule, properties))
+        elif isinstance(rules, dict):
+            # Handles new style of rules
+            boolean = list(rules.keys())[0]  # AND or OR
+            criteria = rules[boolean]
+
+            for criterion in criteria:
+                # Recursively parse and evaluate the rules with boolean
+                if isinstance(criterion, dict):
+                    for criteria in criterion:
+                        for rule_criteria in criterion[criteria]:
+                            matched_properties.update(Dfr3Service._eval_property_from_inventory(rule_criteria,
+                                                                                                properties))
+                elif isinstance(criterion, str):
+                    matched_properties.update(Dfr3Service._eval_property_from_inventory(criterion, properties))
+                else:
+                    raise ValueError("Cannot evaluate criterion, unsupported format!")
+
+        return matched_properties
+
     @staticmethod
     def _property_match_legacy(rules, properties):
         """A method to determine whether current set of rules rules applied to the inventory row (legacy rule format).
@@ -352,16 +436,10 @@ def _property_match_legacy(rules, properties):
 
         else:
             # rules = [[A and B], OR [C and D], OR [E and F]]
-            or_matched = [False for i in range(len(rules))]  # initiate all false state outer list
-            for i, and_rules in enumerate(rules):
-                and_matched = [False for j in range(len(and_rules))]  # initialte all false state for inner list
-                for j, rule in enumerate(and_rules):
-                    # evaluate, return True or False. And place it in the corresponding place
-                    and_matched[j] = Dfr3Service._eval_criterion(rule, properties)
-
-                # for inner list, AND boolean applied
-                if all(and_matched):
-                    or_matched[i] = True
+            or_matched = [
+                all(map(lambda rule: Dfr3Service._eval_criterion(rule, properties), and_rules))
+                for and_rules in rules
+            ]
 
         # for outer list, OR boolean is appied
         return any(or_matched)
@@ -404,6 +482,25 @@ def _property_match(rules, properties):
             else:
                 raise ValueError("boolean " + boolean + " not supported!")
 
+    @staticmethod
+    def _eval_property_from_inventory(rule, properties):
+        """A method to evaluate individual rule and get the property from the inventory properties.
+
+               Args:
+                   rule (str): # e.g. "int no_stories EQ 1",
+                   properties (dict): dictionary of properties of an inventory item. e.g. {"guid":xxx,
+                   "num_stories":xxx, ...}
+
+               Returns:
+                    dictionary entry with the inventory property value that matched the rule
+
+        """
+        elements = rule.split(" ", 3)
+        property_key = elements[1]
+
+        matched_props = {property_key: properties[property_key]}
+        return matched_props
+
     @staticmethod
     def _eval_criterion(rule, properties):
         """A method to evaluate individual rule and see if it appies to a certain inventory row.
@@ -425,18 +522,18 @@ def _eval_criterion(rule, properties):
         # e.g. "java.lang.String struct_typ EQUALS W2"
 
         rule_type = elements[0]  # e.g. int, str, double, java.lang.String, etc...
-        if rule_type not in known_types.keys():
+        if rule_type not in known_types:
             raise ValueError(rule_type + " Unknown. Cannot parse the rules of this mapping!")
 
         rule_key = elements[1]  # e.g. no_storeis, year_built, etc...
 
         rule_operator = elements[2]  # e.g. EQ, GE, LE, EQUALS
-        if rule_operator not in known_operators.keys():
+        if rule_operator not in known_operators:
             raise ValueError(rule_operator + " Unknown. Cannot parse the rules of this mapping!")
 
         rule_value = elements[3].strip('\'').strip('\"')
 
-        if rule_key in properties.keys():
+        if rule_key in properties:
             # validate if the rule is written correctly by comparing variable type, e.g. no_stories properties
             # should be integer
             if isinstance(properties[rule_key], eval(known_types[rule_type])):

diff --git a/pyincore/utils/analysisutil.py b/pyincore/utils/analysisutil.py
@@ -261,14 +261,17 @@ def adjust_damage_for_liquefaction(limit_state_probabilities, ground_failure_pro
         keys = list(limit_state_probabilities.keys())
         adjusted_limit_state_probabilities = collections.OrderedDict()
 
-        for i in range(len(keys)):
+        ground_failure_probabilities_len = len(ground_failure_probabilities)
+        keys_len = len(keys)
+
+        for i in range(keys_len):
             # check and see...if we are trying to use the last ground failure
             # number for something other than the
             # last limit-state-probability, then we should use the
             # second-to-last probability of ground failure instead.
 
-            if i > len(ground_failure_probabilities) - 1:
-                prob_ground_failure = ground_failure_probabilities[len(ground_failure_probabilities) - 2]
+            if i > ground_failure_probabilities_len - 1:
+                prob_ground_failure = ground_failure_probabilities[ground_failure_probabilities_len - 2]
             else:
                 prob_ground_failure = ground_failure_probabilities[i]
 

diff --git a/tests/pyincore/analyses/buildingdamage/test_slc_buildingdamage.py b/tests/pyincore/analyses/buildingdamage/test_slc_buildingdamage.py
@@ -0,0 +1,36 @@
+from pyincore import IncoreClient, FragilityService, MappingSet, Earthquake, HazardService, DataService
+from pyincore.analyses.buildingdamage import BuildingDamage
+import time
+
+
+if __name__ == "__main__":
+    client = IncoreClient()
+
+    # Initiate fragility service
+    fragility_services = FragilityService(client)
+    hazard_services = HazardService(client)
+    data_services = DataService(client)
+
+    # Analysis setup
+    start_time = time.time()
+    bldg_dmg = BuildingDamage(client)
+
+    mapping_set = MappingSet(fragility_services.get_mapping("6309005ad76c6d0e1f6be081"))
+    bldg_dmg.set_input_dataset('dfr3_mapping_set', mapping_set)
+
+    bldg_dmg.load_remote_input_dataset("buildings", "62fea288f5438e1f8c515ef8")  # Salt Lake County All Building
+    bldg_dmg.set_parameter("result_name", "SLC_bldg_dmg_no_retrofit-withLIQ7.1")
+
+    eq = Earthquake.from_hazard_service("640a03ea73a1642180262450", hazard_services)  # Mw 7.1
+    # eq = Earthquake.from_hazard_service("64108b6486a52d419dd69a41", hazard_services) #  Mw 7.0
+    bldg_dmg.set_input_hazard("hazard", eq)
+
+    bldg_dmg.set_parameter("use_liquefaction", True)
+    bldg_dmg.set_parameter("liquefaction_geology_dataset_id", "62fe9ab685ac6b569e372429")
+    bldg_dmg.set_parameter("num_cpu", 8)
+
+    # Run building damage without liquefaction
+    bldg_dmg.run_analysis()
+
+    end_time = time.time()
+    print(f"total runtime: {end_time - start_time}")