From 48f057f90eac4eac23dad1985e8b0a9e9425c1c7 Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Wed, 12 Jul 2023 14:25:31 -0500 Subject: [PATCH 1/5] leave nan out but keep their count --- pyincore/utils/dataprocessutil.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pyincore/utils/dataprocessutil.py b/pyincore/utils/dataprocessutil.py index 7208762d2..00d8b3255 100644 --- a/pyincore/utils/dataprocessutil.py +++ b/pyincore/utils/dataprocessutil.py @@ -202,6 +202,10 @@ def _sum_average(series): # unify mcs and bldg func naming bldg_func.rename(columns={"building_guid": "guid", "samples": "failure"}, inplace=True) + # drop nan but count their numbers + count_nan = bldg_func.isnull().sum() + bldg_func = bldg_func.dropna(subset=func_state, how="all") + func_merged = pd.merge(inventory, bldg_func, on="guid") mapped_df = pd.merge(func_merged, arch_mapping, on=arch_col) unique_categories = arch_mapping.groupby(by=["category"], sort=False, as_index=False).count()["category"] @@ -249,7 +253,7 @@ def _group_by(by_column, unique): json_by_cluster = json.loads(cluster_records) json_by_category = json.loads(category_records) - return {"by_cluster": json_by_cluster, "by_category": json_by_category} + return {"by_cluster": json_by_cluster, "by_category": json_by_category, "nan_count": count_nan} @staticmethod def get_max_damage_state(dmg_result): From dee992785053454c85dbde8b7ca46e38c03b96d3 Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Tue, 18 Jul 2023 13:57:15 -0500 Subject: [PATCH 2/5] count empty strings --- pyincore/utils/dataprocessutil.py | 6 +- .../buildingdamage/test_buildingdamage.py | 135 ++++++++++++++++++ tests/pyincore/utils/test_dataprocessutil.py | 4 + 3 files changed, 142 insertions(+), 3 deletions(-) diff --git a/pyincore/utils/dataprocessutil.py b/pyincore/utils/dataprocessutil.py index 00d8b3255..a5c579d21 100644 --- a/pyincore/utils/dataprocessutil.py +++ b/pyincore/utils/dataprocessutil.py @@ -203,8 +203,8 @@ def _sum_average(series): bldg_func.rename(columns={"building_guid": "guid", "samples": "failure"}, inplace=True) # drop nan but count their numbers - count_nan = bldg_func.isnull().sum() - bldg_func = bldg_func.dropna(subset=func_state, how="all") + count_null = (bldg_func["failure"] == "").sum() + bldg_func = bldg_func[bldg_func['failure'] != ""] func_merged = pd.merge(inventory, bldg_func, on="guid") mapped_df = pd.merge(func_merged, arch_mapping, on=arch_col) @@ -253,7 +253,7 @@ def _group_by(by_column, unique): json_by_cluster = json.loads(cluster_records) json_by_category = json.loads(category_records) - return {"by_cluster": json_by_cluster, "by_category": json_by_category, "nan_count": count_nan} + return {"by_cluster": json_by_cluster, "by_category": json_by_category, "NA": int(count_null)} @staticmethod def get_max_damage_state(dmg_result): diff --git a/tests/pyincore/analyses/buildingdamage/test_buildingdamage.py b/tests/pyincore/analyses/buildingdamage/test_buildingdamage.py index 4daa7283f..610069b17 100644 --- a/tests/pyincore/analyses/buildingdamage/test_buildingdamage.py +++ b/tests/pyincore/analyses/buildingdamage/test_buildingdamage.py @@ -139,7 +139,142 @@ def run_with_base_class(): bldg_dmg.set_parameter("result_name", result_name) bldg_dmg.set_parameter("hazard_type", hazard_type) bldg_dmg.set_parameter("hazard_id", hazard_id) + bldg_dmg.set_parameter("num_cpu", 4)# Memphis Earthquake damage + # New madrid earthquake using Atkinson Boore 1995 + hazard_type = "earthquake" + hazard_id = "5b902cb273c3371e1236b36b" + + # Geology dataset + liq_geology_dataset_id = "5a284f53c7d30d13bc08249c" + + # Building dataset + # 5a284f0bc7d30d13bc081a28 5kb + # 5bcf2fcbf242fe047ce79dad 300kb + # 5a284f37c7d30d13bc08219c 20mb + bldg_dataset_id = "5a284f0bc7d30d13bc081a28" + + bldg_dmg = BuildingDamage(client) + bldg_dmg.load_remote_input_dataset("buildings", bldg_dataset_id) + + # Earthquake mapping + mapping_id = "5b47b350337d4a3629076f2c" + fragility_service = FragilityService(client) + mapping_set = MappingSet(fragility_service.get_mapping(mapping_id)) + bldg_dmg.set_input_dataset('dfr3_mapping_set', mapping_set) + + result_name = "memphis_eq_bldg_dmg_result" + bldg_dmg.set_parameter("result_name", result_name) + bldg_dmg.set_parameter("hazard_type", hazard_type) + bldg_dmg.set_parameter("hazard_id", hazard_id) + bldg_dmg.set_parameter("num_cpu", 4) + bldg_dmg.set_parameter("use_liquefaction", True) + bldg_dmg.set_parameter("liquefaction_geology_dataset_id", liq_geology_dataset_id) + + # Run Analysis + bldg_dmg.run_analysis() + + # TSUNAMI + + hazard_type = "tsunami" + hazard_id = "5bc9e25ef7b08533c7e610dc" + + # Seaside building dataset + bldg_dataset_id = "5bcf2fcbf242fe047ce79dad" + + # Run seaside tsunami building damage + bldg_dmg = BuildingDamage(client) + bldg_dmg.load_remote_input_dataset("buildings", bldg_dataset_id) + + # Tsunami mapping + mapping_id = "5b48fb1f337d4a478e7bd54d" + fragility_service = FragilityService(client) + mapping_set = MappingSet(fragility_service.get_mapping(mapping_id)) + bldg_dmg.set_input_dataset('dfr3_mapping_set', mapping_set) + + result_name = "seaside_tsunami_dmg_result" + bldg_dmg.set_parameter("result_name", result_name) + bldg_dmg.set_parameter("hazard_type", hazard_type) + bldg_dmg.set_parameter("hazard_id", hazard_id) + bldg_dmg.set_parameter("num_cpu", 4) + bldg_dmg.run_analysis() + + # Hurricane + + hazard_type = "hurricane" + hazard_id = "5f11e50cc6491311a814584c" + + # Galveston building dataset 602eba8bb1db9c28aef01358 + bldg_dataset_id = "602eba8bb1db9c28aef01358" # 19k buildings with age_group + # bldg_dataset_id = "602d61d0b1db9c28aeedea03" # 40 buildings without age_group + + bldg_dmg = BuildingDamage(client) + bldg_dmg.load_remote_input_dataset("buildings", bldg_dataset_id) + + # Hurricane building mapping (with equation) + mapping_id = "602c381a1d85547cdc9f0675" + fragility_service = FragilityService(client) + mapping_set = MappingSet(fragility_service.get_mapping(mapping_id)) + bldg_dmg.set_input_dataset('dfr3_mapping_set', mapping_set) + bldg_dmg.set_parameter("fragility_key", "Hurricane SurgeLevel and WaveHeight Fragility ID Code") + + result_name = "galveston_hurr_dmg_result" + bldg_dmg.set_parameter("result_name", result_name) + bldg_dmg.set_parameter("hazard_type", hazard_type) + bldg_dmg.set_parameter("hazard_id", hazard_id) bldg_dmg.set_parameter("num_cpu", 4) + bldg_dmg.run_analysis() + + # lumberton flood + hazard_type = "flood" + hazard_id = "5f4d02e99f43ee0dde768406" + + # lumberton building inventory v7 + # bldg_dataset_id = "603010f7b1db9c28aef53214" # 40 building subset + bldg_dataset_id = "603010a4b1db9c28aef5319f" # 21k full building + + bldg_dmg = BuildingDamage(client) + bldg_dmg.load_remote_input_dataset("buildings", bldg_dataset_id) + + # lumberton building mapping (with equation) + mapping_id = "602f3cf981bd2c09ad8f4f9d" + fragility_service = FragilityService(client) + mapping_set = MappingSet(fragility_service.get_mapping(mapping_id)) + bldg_dmg.set_input_dataset('dfr3_mapping_set', mapping_set) + bldg_dmg.set_parameter("fragility_key", "Lumberton Flood Building Fragility ID Code") + + result_name = "lumberton_flood_dmg_result" + bldg_dmg.set_parameter("result_name", result_name) + bldg_dmg.set_parameter("hazard_type", hazard_type) + bldg_dmg.set_parameter("hazard_id", hazard_id) + bldg_dmg.set_parameter("num_cpu", 4) + bldg_dmg.run_analysis() + + # joplin tornado with retrofit strategy + bldg_dataset_id = "5df7d0de425e0b00092d0082" # joplin building v6 + retrofit_strategy_id = "6091d5a8daa06e14ee96d502" # plan 1 + # retrofit_strategy_id = "6091d5ffdaa06e14ee96d5ef" # plan 2 + + bldg_dmg = BuildingDamage(client) + bldg_dmg.load_remote_input_dataset("buildings", bldg_dataset_id) + bldg_dmg.load_remote_input_dataset("retrofit_strategy", retrofit_strategy_id) + + # lumberton building mapping (with equation) + mapping_id = "6091d9fbb53ed4646fd276ca" # 19 archetype with retrofit + # mapping_id = "60994a1906d63d5ded1d6dcc" # 19 archetype with retrofit new format mapping + fragility_service = FragilityService(client) + mapping_set = MappingSet(fragility_service.get_mapping(mapping_id)) + bldg_dmg.set_input_dataset('dfr3_mapping_set', mapping_set) + bldg_dmg.set_parameter("fragility_key", "Fragility ID Code") + + hazard_type = "tornado" + hazard_id = "5dfa32bbc0601200080893fb" + result_name = "joplin_tornado_dmg_result_w_retrofit" + bldg_dmg.set_parameter("result_name", result_name) + bldg_dmg.set_parameter("hazard_type", hazard_type) + bldg_dmg.set_parameter("hazard_id", hazard_id) + bldg_dmg.set_parameter("num_cpu", 4) + bldg_dmg.set_parameter("seed", 1000) + bldg_dmg.run_analysis() bldg_dmg.set_parameter("seed", 1000) bldg_dmg.run_analysis() diff --git a/tests/pyincore/utils/test_dataprocessutil.py b/tests/pyincore/utils/test_dataprocessutil.py index db0d20d79..4026694d6 100644 --- a/tests/pyincore/utils/test_dataprocessutil.py +++ b/tests/pyincore/utils/test_dataprocessutil.py @@ -68,6 +68,10 @@ def _functionality_cluster(client, archetype_mapping="5fca915fb34b193f7a44059b", bldg_func_state_dataset_path = bldg_func_state_dataset.get_file_path() bldg_func_state = pd.read_csv(bldg_func_state_dataset_path) + # manufacturing the nan rows for testing + bldg_func_state.loc[0, "failure"] = "" + bldg_func_state.loc[1, "failure"] = "" + ret_json = util.create_mapped_func_result(buildings, bldg_func_state, arch_mapping, arch_column) with open(title + "_cluster.json", "w") as f: From 236c2fb304020a46eb0e46bbae0c681b14e8ae0b Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Tue, 18 Jul 2023 13:58:28 -0500 Subject: [PATCH 3/5] add entry in changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a7eaacef8..4ab9ecdc1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] ### Fixed +- Post-processing cluster fuction handle empty rows from mcs [#365](https://github.com/IN-CORE/pyincore/issues/365) - Expose all the incore client parameters [#295](https://github.com/IN-CORE/pyincore/issues/295) ### Changed From 3a3e4c9b4f6e545d8c51f5b9457f03b36e8c9b87 Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Wed, 26 Jul 2023 14:45:58 -0500 Subject: [PATCH 4/5] bldg func doesn't have failure field --- tests/pyincore/utils/test_dataprocessutil.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/pyincore/utils/test_dataprocessutil.py b/tests/pyincore/utils/test_dataprocessutil.py index 4026694d6..3d36c0cb5 100644 --- a/tests/pyincore/utils/test_dataprocessutil.py +++ b/tests/pyincore/utils/test_dataprocessutil.py @@ -69,8 +69,9 @@ def _functionality_cluster(client, archetype_mapping="5fca915fb34b193f7a44059b", bldg_func_state = pd.read_csv(bldg_func_state_dataset_path) # manufacturing the nan rows for testing - bldg_func_state.loc[0, "failure"] = "" - bldg_func_state.loc[1, "failure"] = "" + if "failure" in bldg_func_state.columns: + bldg_func_state.loc[0, "failure"] = "" + bldg_func_state.loc[1, "failure"] = "" ret_json = util.create_mapped_func_result(buildings, bldg_func_state, arch_mapping, arch_column) From b028079673fbc52174965ad6641693361bd44925 Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Wed, 26 Jul 2023 14:47:54 -0500 Subject: [PATCH 5/5] no change needed for building damage --- .../buildingdamage/test_buildingdamage.py | 135 ------------------ 1 file changed, 135 deletions(-) diff --git a/tests/pyincore/analyses/buildingdamage/test_buildingdamage.py b/tests/pyincore/analyses/buildingdamage/test_buildingdamage.py index 610069b17..4daa7283f 100644 --- a/tests/pyincore/analyses/buildingdamage/test_buildingdamage.py +++ b/tests/pyincore/analyses/buildingdamage/test_buildingdamage.py @@ -139,142 +139,7 @@ def run_with_base_class(): bldg_dmg.set_parameter("result_name", result_name) bldg_dmg.set_parameter("hazard_type", hazard_type) bldg_dmg.set_parameter("hazard_id", hazard_id) - bldg_dmg.set_parameter("num_cpu", 4)# Memphis Earthquake damage - # New madrid earthquake using Atkinson Boore 1995 - hazard_type = "earthquake" - hazard_id = "5b902cb273c3371e1236b36b" - - # Geology dataset - liq_geology_dataset_id = "5a284f53c7d30d13bc08249c" - - # Building dataset - # 5a284f0bc7d30d13bc081a28 5kb - # 5bcf2fcbf242fe047ce79dad 300kb - # 5a284f37c7d30d13bc08219c 20mb - bldg_dataset_id = "5a284f0bc7d30d13bc081a28" - - bldg_dmg = BuildingDamage(client) - bldg_dmg.load_remote_input_dataset("buildings", bldg_dataset_id) - - # Earthquake mapping - mapping_id = "5b47b350337d4a3629076f2c" - fragility_service = FragilityService(client) - mapping_set = MappingSet(fragility_service.get_mapping(mapping_id)) - bldg_dmg.set_input_dataset('dfr3_mapping_set', mapping_set) - - result_name = "memphis_eq_bldg_dmg_result" - bldg_dmg.set_parameter("result_name", result_name) - bldg_dmg.set_parameter("hazard_type", hazard_type) - bldg_dmg.set_parameter("hazard_id", hazard_id) - bldg_dmg.set_parameter("num_cpu", 4) - bldg_dmg.set_parameter("use_liquefaction", True) - bldg_dmg.set_parameter("liquefaction_geology_dataset_id", liq_geology_dataset_id) - - # Run Analysis - bldg_dmg.run_analysis() - - # TSUNAMI - - hazard_type = "tsunami" - hazard_id = "5bc9e25ef7b08533c7e610dc" - - # Seaside building dataset - bldg_dataset_id = "5bcf2fcbf242fe047ce79dad" - - # Run seaside tsunami building damage - bldg_dmg = BuildingDamage(client) - bldg_dmg.load_remote_input_dataset("buildings", bldg_dataset_id) - - # Tsunami mapping - mapping_id = "5b48fb1f337d4a478e7bd54d" - fragility_service = FragilityService(client) - mapping_set = MappingSet(fragility_service.get_mapping(mapping_id)) - bldg_dmg.set_input_dataset('dfr3_mapping_set', mapping_set) - - result_name = "seaside_tsunami_dmg_result" - bldg_dmg.set_parameter("result_name", result_name) - bldg_dmg.set_parameter("hazard_type", hazard_type) - bldg_dmg.set_parameter("hazard_id", hazard_id) - bldg_dmg.set_parameter("num_cpu", 4) - bldg_dmg.run_analysis() - - # Hurricane - - hazard_type = "hurricane" - hazard_id = "5f11e50cc6491311a814584c" - - # Galveston building dataset 602eba8bb1db9c28aef01358 - bldg_dataset_id = "602eba8bb1db9c28aef01358" # 19k buildings with age_group - # bldg_dataset_id = "602d61d0b1db9c28aeedea03" # 40 buildings without age_group - - bldg_dmg = BuildingDamage(client) - bldg_dmg.load_remote_input_dataset("buildings", bldg_dataset_id) - - # Hurricane building mapping (with equation) - mapping_id = "602c381a1d85547cdc9f0675" - fragility_service = FragilityService(client) - mapping_set = MappingSet(fragility_service.get_mapping(mapping_id)) - bldg_dmg.set_input_dataset('dfr3_mapping_set', mapping_set) - bldg_dmg.set_parameter("fragility_key", "Hurricane SurgeLevel and WaveHeight Fragility ID Code") - - result_name = "galveston_hurr_dmg_result" - bldg_dmg.set_parameter("result_name", result_name) - bldg_dmg.set_parameter("hazard_type", hazard_type) - bldg_dmg.set_parameter("hazard_id", hazard_id) bldg_dmg.set_parameter("num_cpu", 4) - bldg_dmg.run_analysis() - - # lumberton flood - hazard_type = "flood" - hazard_id = "5f4d02e99f43ee0dde768406" - - # lumberton building inventory v7 - # bldg_dataset_id = "603010f7b1db9c28aef53214" # 40 building subset - bldg_dataset_id = "603010a4b1db9c28aef5319f" # 21k full building - - bldg_dmg = BuildingDamage(client) - bldg_dmg.load_remote_input_dataset("buildings", bldg_dataset_id) - - # lumberton building mapping (with equation) - mapping_id = "602f3cf981bd2c09ad8f4f9d" - fragility_service = FragilityService(client) - mapping_set = MappingSet(fragility_service.get_mapping(mapping_id)) - bldg_dmg.set_input_dataset('dfr3_mapping_set', mapping_set) - bldg_dmg.set_parameter("fragility_key", "Lumberton Flood Building Fragility ID Code") - - result_name = "lumberton_flood_dmg_result" - bldg_dmg.set_parameter("result_name", result_name) - bldg_dmg.set_parameter("hazard_type", hazard_type) - bldg_dmg.set_parameter("hazard_id", hazard_id) - bldg_dmg.set_parameter("num_cpu", 4) - bldg_dmg.run_analysis() - - # joplin tornado with retrofit strategy - bldg_dataset_id = "5df7d0de425e0b00092d0082" # joplin building v6 - retrofit_strategy_id = "6091d5a8daa06e14ee96d502" # plan 1 - # retrofit_strategy_id = "6091d5ffdaa06e14ee96d5ef" # plan 2 - - bldg_dmg = BuildingDamage(client) - bldg_dmg.load_remote_input_dataset("buildings", bldg_dataset_id) - bldg_dmg.load_remote_input_dataset("retrofit_strategy", retrofit_strategy_id) - - # lumberton building mapping (with equation) - mapping_id = "6091d9fbb53ed4646fd276ca" # 19 archetype with retrofit - # mapping_id = "60994a1906d63d5ded1d6dcc" # 19 archetype with retrofit new format mapping - fragility_service = FragilityService(client) - mapping_set = MappingSet(fragility_service.get_mapping(mapping_id)) - bldg_dmg.set_input_dataset('dfr3_mapping_set', mapping_set) - bldg_dmg.set_parameter("fragility_key", "Fragility ID Code") - - hazard_type = "tornado" - hazard_id = "5dfa32bbc0601200080893fb" - result_name = "joplin_tornado_dmg_result_w_retrofit" - bldg_dmg.set_parameter("result_name", result_name) - bldg_dmg.set_parameter("hazard_type", hazard_type) - bldg_dmg.set_parameter("hazard_id", hazard_id) - bldg_dmg.set_parameter("num_cpu", 4) - bldg_dmg.set_parameter("seed", 1000) - bldg_dmg.run_analysis() bldg_dmg.set_parameter("seed", 1000) bldg_dmg.run_analysis()