From 11ca944de7c618d5469c4a0c13ff0dd82ce85964 Mon Sep 17 00:00:00 2001 From: Hitesh Tolani Date: Mon, 21 Oct 2024 22:36:47 +0530 Subject: [PATCH] Added Inbuilt function to extract nested JSON (#798) * Added Inbuilt function to extract nested JSON * Renamed json_to_pandas to dict_to_dataframe * Fixing a pylint error in network_plot.py (this is due to an update to pylint, not the code in this PR) * fixed linting errors --------- Co-authored-by: Hitesh Tolani Co-authored-by: ianhelle --- msticpy/init/pivot_core/pivot_pd_accessor.py | 63 ++++++++++++++++++++ msticpy/vis/network_plot.py | 2 +- 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/msticpy/init/pivot_core/pivot_pd_accessor.py b/msticpy/init/pivot_core/pivot_pd_accessor.py index 9eb6fe813..becae8437 100644 --- a/msticpy/init/pivot_core/pivot_pd_accessor.py +++ b/msticpy/init/pivot_core/pivot_pd_accessor.py @@ -442,6 +442,39 @@ def parse_json(self, cols: Union[str, Iterable[str]]) -> pd.DataFrame: data = data.drop([col], axis=1).rename(columns={col_parsed: col}) return data + def dict_to_dataframe(self, col: str) -> pd.DataFrame: + """ + Construct a new dataframe having keys as column and values as row. + + Parameters + ---------- + col : str + Name of the column which contains json data + + Returns + ------- + pd.DataFrame + A new dataframe containing the json data in tabular form. + + """ + unnested_col = [] + + for row_value in self._df[col]: + unnest_row = {} + record = {} + if isinstance(row_value, dict): + record = row_value + elif isinstance(row_value, str): + try: + record = json.loads(row_value) + except json.JSONDecodeError: + continue + for key in record.keys(): + unnest_row.update(_extract_values(record[key], key)) + unnested_col.append(unnest_row) + + return pd.DataFrame(unnested_col) + def _name_match(cur_cols: Iterable[str], col_filter, match_case): col_filter = re.sub(r"[^.]\*", ".*", col_filter) @@ -455,3 +488,33 @@ def _json_safe_conv(val): with contextlib.suppress(TypeError, JSONDecodeError): return json.loads(val) return val + + +def _extract_values(data: Union[dict, list, str], key_name: str = "") -> dict: + """ + Recursively extracts column values from the given key's values. + + Parameters + ---------- + data: Union[dict, list, str] + Values for the given key in the dictionary. + key_name : str + Key for unnested and is obtained by joining all parent key names. + + Returns + ------- + unnested : dict + A dict containing unnested json for a specific key. + """ + unnested = {} + + if isinstance(data, dict): + for key in data.keys(): + unnested.update(_extract_values(data[key], f"{key_name}.{key}")) + elif isinstance(data, list): + for idx, elm in enumerate(data): + unnested.update(_extract_values(elm, f"{key_name}.{idx}")) + else: + unnested.update({key_name: data}) + + return unnested diff --git a/msticpy/vis/network_plot.py b/msticpy/vis/network_plot.py index a5260da83..1ed2eb249 100644 --- a/msticpy/vis/network_plot.py +++ b/msticpy/vis/network_plot.py @@ -209,7 +209,7 @@ def _get_graph_layout(nx_graph: nx.Graph, layout: GraphLayout, **kwargs): if callable(layout): return layout(nx_graph, **kwargs) layout_func = getattr(nx, f"{layout}_layout", None) - if layout_func: + if layout_func and callable(layout_func): # pylint: disable=not-callable return layout_func(nx_graph, **kwargs) return nx.spring_layout(nx_graph, **kwargs)