Skip to content

Commit

Permalink
Added Inbuilt function to extract nested JSON (#798)
Browse files Browse the repository at this point in the history
* Added Inbuilt function to extract nested JSON

* Renamed json_to_pandas to dict_to_dataframe

* Fixing a pylint error in network_plot.py
(this is due to an update to pylint, not the code in this PR)

* fixed linting errors

---------

Co-authored-by: Hitesh Tolani <hitesh.ht.2003@gmail.com>
Co-authored-by: ianhelle <ianhelle@microsoft.com>
  • Loading branch information
3 people authored Oct 21, 2024
1 parent e9adaff commit 11ca944
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 1 deletion.
63 changes: 63 additions & 0 deletions msticpy/init/pivot_core/pivot_pd_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,39 @@ def parse_json(self, cols: Union[str, Iterable[str]]) -> pd.DataFrame:
data = data.drop([col], axis=1).rename(columns={col_parsed: col})
return data

def dict_to_dataframe(self, col: str) -> pd.DataFrame:
"""
Construct a new dataframe having keys as column and values as row.
Parameters
----------
col : str
Name of the column which contains json data
Returns
-------
pd.DataFrame
A new dataframe containing the json data in tabular form.
"""
unnested_col = []

for row_value in self._df[col]:
unnest_row = {}
record = {}
if isinstance(row_value, dict):
record = row_value
elif isinstance(row_value, str):
try:
record = json.loads(row_value)
except json.JSONDecodeError:
continue
for key in record.keys():
unnest_row.update(_extract_values(record[key], key))
unnested_col.append(unnest_row)

return pd.DataFrame(unnested_col)


def _name_match(cur_cols: Iterable[str], col_filter, match_case):
col_filter = re.sub(r"[^.]\*", ".*", col_filter)
Expand All @@ -455,3 +488,33 @@ def _json_safe_conv(val):
with contextlib.suppress(TypeError, JSONDecodeError):
return json.loads(val)
return val


def _extract_values(data: Union[dict, list, str], key_name: str = "") -> dict:
"""
Recursively extracts column values from the given key's values.
Parameters
----------
data: Union[dict, list, str]
Values for the given key in the dictionary.
key_name : str
Key for unnested and is obtained by joining all parent key names.
Returns
-------
unnested : dict
A dict containing unnested json for a specific key.
"""
unnested = {}

if isinstance(data, dict):
for key in data.keys():
unnested.update(_extract_values(data[key], f"{key_name}.{key}"))
elif isinstance(data, list):
for idx, elm in enumerate(data):
unnested.update(_extract_values(elm, f"{key_name}.{idx}"))
else:
unnested.update({key_name: data})

return unnested
2 changes: 1 addition & 1 deletion msticpy/vis/network_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def _get_graph_layout(nx_graph: nx.Graph, layout: GraphLayout, **kwargs):
if callable(layout):
return layout(nx_graph, **kwargs)
layout_func = getattr(nx, f"{layout}_layout", None)
if layout_func:
if layout_func and callable(layout_func):
# pylint: disable=not-callable
return layout_func(nx_graph, **kwargs)
return nx.spring_layout(nx_graph, **kwargs)
Expand Down

0 comments on commit 11ca944

Please sign in to comment.