IN-CORE · longshuicy · Aug 1, 2023 · Jul 12, 2023 · Jul 18, 2023 · Jul 18, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
 - Method to get allow Hazard demands from hazard service [#363](https://github.com/IN-CORE/pyincore/issues/363)
 
 ### Fixed
+- Post-processing cluster fuction handle empty rows from mcs [#365](https://github.com/IN-CORE/pyincore/issues/365)
 - Expose all the incore client parameters [#295](https://github.com/IN-CORE/pyincore/issues/295)
 - Fixed testing datasets not being cleaned in the database [#367](https://github.com/IN-CORE/pyincore/issues/367)
 - Space services methods missing timeout parameters [#375](https://github.com/IN-CORE/pyincore/issues/375)

diff --git a/pyincore/utils/dataprocessutil.py b/pyincore/utils/dataprocessutil.py
@@ -202,6 +202,10 @@ def _sum_average(series):
         # unify mcs and bldg func naming
         bldg_func.rename(columns={"building_guid": "guid", "samples": "failure"}, inplace=True)
 
+        # drop nan but count their numbers
+        count_null = (bldg_func["failure"] == "").sum()
+        bldg_func = bldg_func[bldg_func['failure'] != ""]
+
         func_merged = pd.merge(inventory, bldg_func, on="guid")
         mapped_df = pd.merge(func_merged, arch_mapping, on=arch_col)
         unique_categories = arch_mapping.groupby(by=["category"], sort=False, as_index=False).count()["category"]
@@ -249,7 +253,7 @@ def _group_by(by_column, unique):
         json_by_cluster = json.loads(cluster_records)
         json_by_category = json.loads(category_records)
 
-        return {"by_cluster": json_by_cluster, "by_category": json_by_category}
+        return {"by_cluster": json_by_cluster, "by_category": json_by_category, "NA": int(count_null)}
 
     @staticmethod
     def get_max_damage_state(dmg_result):

diff --git a/tests/pyincore/utils/test_dataprocessutil.py b/tests/pyincore/utils/test_dataprocessutil.py
@@ -68,6 +68,11 @@ def _functionality_cluster(client, archetype_mapping="5fca915fb34b193f7a44059b",
     bldg_func_state_dataset_path = bldg_func_state_dataset.get_file_path()
     bldg_func_state = pd.read_csv(bldg_func_state_dataset_path)
 
+    # manufacturing the nan rows for testing
+    if "failure" in bldg_func_state.columns:
+        bldg_func_state.loc[0, "failure"] = ""
+        bldg_func_state.loc[1, "failure"] = ""
+
     ret_json = util.create_mapped_func_result(buildings, bldg_func_state, arch_mapping, arch_column)
 
     with open(title + "_cluster.json", "w") as f: