Skip to content

Commit

Permalink
Merge pull request #232 from PowerGridModel/feature/Support-UUID-Visi…
Browse files Browse the repository at this point in the history
…on-input

Feature/support UUID vision input. Backward compatible.
  • Loading branch information
Jerry-Jinfeng-Guo authored Mar 4, 2024
2 parents 51ec22f + 5a6f5ed commit ea38690
Show file tree
Hide file tree
Showing 7 changed files with 145 additions and 21 deletions.
18 changes: 12 additions & 6 deletions src/power_grid_model_io/converters/vision_excel_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from typing import Any, Mapping, Optional, Union

from power_grid_model_io.converters.tabular_converter import TabularConverter
from power_grid_model_io.data_stores.base_data_store import LANGUAGE_EN
from power_grid_model_io.data_stores.vision_excel_file_store import VisionExcelFileStore

DEFAULT_MAPPING_FILE = Path(__file__).parent.parent / "config" / "excel" / "vision_{language:s}.yaml"
Expand All @@ -36,17 +37,22 @@ class VisionExcelConverter(TabularConverter):
def __init__(
self,
source_file: Optional[Union[Path, str]] = None,
language: str = "en",
mapping_file: Optional[Path] = None,
language: str = LANGUAGE_EN,
terms_changed: Optional[dict] = None,
mapping_file: Optional[Union[Path, str]] = None,
log_level: int = logging.INFO,
):
_mapping_file = _mapping_file = (
mapping_file if mapping_file is not None else Path(str(DEFAULT_MAPPING_FILE).format(language=language))
): # pylint: disable=too-many-arguments
_mapping_file = Path(
mapping_file if mapping_file is not None else str(DEFAULT_MAPPING_FILE).format(language=language)
)
if not _mapping_file.exists():
raise FileNotFoundError(f"No Vision Excel mapping available for language '{language}'")
self._id_reference: Optional[IdReferenceFields] = None
source = VisionExcelFileStore(file_path=Path(source_file)) if source_file else None
source = (
VisionExcelFileStore(file_path=Path(source_file), language=language, terms_changed=terms_changed)
if source_file
else None
)
super().__init__(mapping_file=_mapping_file, source=source, log_level=log_level)

def set_mapping(self, mapping: Mapping[str, Any]) -> None:
Expand Down
15 changes: 15 additions & 0 deletions src/power_grid_model_io/data_stores/base_data_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,21 @@

T = TypeVar("T")

LANGUAGE_EN = "en"
LANGUAGE_NL = "nl"
DICT_KEY_NUMBER = "key_number"
DICT_KEY_SUBNUMBER = "key_subnumber"
VISION_EXCEL_LAN_DICT = {
LANGUAGE_EN: {
DICT_KEY_NUMBER: "Number",
DICT_KEY_SUBNUMBER: "Subnumber",
},
LANGUAGE_NL: {
DICT_KEY_NUMBER: "Nummer",
DICT_KEY_SUBNUMBER: "Subnummer",
},
}


class BaseDataStore(Generic[T], ABC):
"""
Expand Down
57 changes: 54 additions & 3 deletions src/power_grid_model_io/data_stores/excel_file_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,21 @@

import pandas as pd

from power_grid_model_io.data_stores.base_data_store import BaseDataStore
from power_grid_model_io.data_stores.base_data_store import (
DICT_KEY_NUMBER,
DICT_KEY_SUBNUMBER,
VISION_EXCEL_LAN_DICT,
BaseDataStore,
)
from power_grid_model_io.data_types import LazyDataFrame, TabularData
from power_grid_model_io.utils.uuid_excel_cvtr import (
UUID2IntCvtr,
add_guid_values_to_cvtr,
get_special_key_map,
special_nodes_en,
special_nodes_nl,
update_column_names,
)


class ExcelFileStore(BaseDataStore[TabularData]):
Expand All @@ -28,9 +41,15 @@ class ExcelFileStore(BaseDataStore[TabularData]):

_unnamed_pattern: re.Pattern = re.compile(r"Unnamed: \d+_level_\d+")

def __init__(self, file_path: Optional[Path] = None, **extra_paths: Path):
def __init__(
self,
file_path: Optional[Path] = None,
*,
language: str = "en",
terms_changed: Optional[dict] = None,
**extra_paths: Path,
):
super().__init__()

# Create a dictionary of all supplied file paths:
# {"": file_path, extra_name[0]: extra_path[0], extra_name[1]: extra_path[1], ...}
self._file_paths: Dict[str, Path] = {}
Expand All @@ -45,6 +64,10 @@ def __init__(self, file_path: Optional[Path] = None, **extra_paths: Path):
raise ValueError(f"{name} file should be a .xls or .xlsx file, {path.suffix} provided.")

self._header_rows: List[int] = [0]
self._language = language
self._vision_excel_key_mapping = VISION_EXCEL_LAN_DICT[self._language]
self._terms_changed = terms_changed if terms_changed is not None else {}
self._uuid_cvtr = UUID2IntCvtr()

def files(self) -> Dict[str, Path]:
"""
Expand All @@ -68,6 +91,8 @@ def sheet_loader():
sheet_data = xls_file.parse(xls_sheet_name, header=self._header_rows)
sheet_data = self._remove_unnamed_column_placeholders(data=sheet_data)
sheet_data = self._handle_duplicate_columns(data=sheet_data, sheet_name=xls_sheet_name)
sheet_data = self._process_uuid_columns(data=sheet_data, sheet_name=xls_sheet_name)
sheet_data = self._update_column_names(data=sheet_data)
return sheet_data

return sheet_loader
Expand Down Expand Up @@ -197,6 +222,32 @@ def _check_duplicate_values(self, sheet_name: str, data: pd.DataFrame) -> Dict[i

return to_rename

def _process_uuid_columns(self, data: pd.DataFrame, sheet_name: str) -> pd.DataFrame:
first_level = data.columns.get_level_values(0)
guid_columns = first_level[first_level.str.endswith("GUID")]

sheet_key_mapping = get_special_key_map(
sheet_name=sheet_name, nodes_en=special_nodes_en, nodes_nl=special_nodes_nl
)

for guid_column in guid_columns:
nr = VISION_EXCEL_LAN_DICT[self._language][DICT_KEY_NUMBER]
add_guid_values_to_cvtr(data, guid_column, self._uuid_cvtr)
new_column_name = guid_column.replace("GUID", nr)
if guid_column == "GUID" and sheet_key_mapping not in (None, {}):
new_column_name = guid_column.replace("GUID", sheet_key_mapping[DICT_KEY_SUBNUMBER])
guid_column_pos = first_level.tolist().index(guid_column)
try:
data.insert(guid_column_pos + 1, new_column_name, data[guid_column].apply(self._uuid_cvtr.query))
except ValueError:
data[new_column_name] = data[guid_column].apply(self._uuid_cvtr.query)

return data

def _update_column_names(self, data: pd.DataFrame) -> pd.DataFrame:
update_column_names(data, self._terms_changed)
return data

@staticmethod
def _group_columns_by_index(data: pd.DataFrame) -> Dict[Union[str, Tuple[str, ...]], Set[int]]:
grouped: Dict[Union[str, Tuple[str, ...]], Set[int]] = {}
Expand Down
11 changes: 9 additions & 2 deletions src/power_grid_model_io/data_stores/vision_excel_file_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
Vision Excel file store
"""
from pathlib import Path
from typing import Optional

from power_grid_model_io.data_stores.base_data_store import LANGUAGE_EN
from power_grid_model_io.data_stores.excel_file_store import ExcelFileStore


Expand All @@ -17,10 +19,15 @@ class VisionExcelFileStore(ExcelFileStore):
Therefore, row 1 (which is row 2 in Excel) is added to the header_rows in the constructor.
"""

def __init__(self, file_path: Path):
def __init__(
self,
file_path: Path,
language: str = LANGUAGE_EN,
terms_changed: Optional[dict] = None,
):
"""
Args:
file_path: The main Vision Excel export file
"""
super().__init__(file_path)
super().__init__(file_path, language=language, terms_changed=terms_changed)
self._header_rows.append(1) # Units are stored in the row below the column names
44 changes: 35 additions & 9 deletions src/power_grid_model_io/utils/uuid_excel_cvtr.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,19 @@

import os
import re
from typing import Optional
from pathlib import Path
from typing import Optional, Union

import pandas as pd

from power_grid_model_io.data_stores.base_data_store import (
DICT_KEY_NUMBER,
DICT_KEY_SUBNUMBER,
LANGUAGE_EN,
LANGUAGE_NL,
VISION_EXCEL_LAN_DICT,
)

special_nodes_en = [
"Transformer loads",
"Sources",
Expand Down Expand Up @@ -111,7 +120,7 @@ def get_size(self) -> int:
return self._counter


def load_excel_file(file_name: str) -> pd.ExcelFile:
def load_excel_file(file_name: Union[Path, str]) -> pd.ExcelFile:
"""Load an excel file
Args:
Expand Down Expand Up @@ -147,6 +156,20 @@ def add_guid_values_to_cvtr(df: pd.DataFrame, guid_column: str, cvtr: UUID2IntCv
cvtr.add_list(df[guid_column].tolist())


def get_special_key_map(sheet_name: str, nodes_en: list[str], nodes_nl: list[str]) -> dict:
"""Get the special nodes for English and Dutch
Args:
sheet_name (str): the sheet name
mapping (dict): the mapping dictionary
"""
if sheet_name in nodes_en:
return VISION_EXCEL_LAN_DICT[LANGUAGE_EN]
if sheet_name in nodes_nl:
return VISION_EXCEL_LAN_DICT[LANGUAGE_NL]
return {}


def insert_or_update_number_column(
df: pd.DataFrame, guid_column: str, sheet_name: str, cvtr: UUID2IntCvtr, number: str
) -> None:
Expand All @@ -159,11 +182,10 @@ def insert_or_update_number_column(
number (str): "Number" or "Nummer" depending on the language
"""
new_column_name = guid_column.replace("GUID", number)
if guid_column == "GUID":
if sheet_name in special_nodes_en:
new_column_name = guid_column.replace("GUID", "Subnumber")
elif sheet_name in special_nodes_nl:
new_column_name = guid_column.replace("GUID", "Subnummer")
special_key_mapping = get_special_key_map(sheet_name, special_nodes_en, special_nodes_nl)

if guid_column == "GUID" and special_key_mapping not in (None, {}):
new_column_name = guid_column.replace("GUID", special_key_mapping[DICT_KEY_SUBNUMBER])
try:
df.insert(df.columns.get_loc(guid_column) + 1, new_column_name, df[guid_column].apply(cvtr.query))
except ValueError:
Expand Down Expand Up @@ -196,11 +218,15 @@ def save_df_to_excel(df: pd.DataFrame, file_name: str, sheet_name: str, i: int)
df.to_excel(writer, sheet_name=sheet_name, index=False)


def convert_guid_vision_excel(excel_file: str, number: str = "Number", terms_changed: Optional[dict] = None) -> str:
def convert_guid_vision_excel(
excel_file: Union[Path, str],
number: str = VISION_EXCEL_LAN_DICT[LANGUAGE_EN][DICT_KEY_NUMBER],
terms_changed: Optional[dict] = None,
) -> str:
"""Main entry function. Convert the GUID based Vision excel files to a number based format
Args:
excel_file (str): Vision excel file name
excel_file (Path | str): Vision excel file name
number (str): "Number" or "Nummer" depending on the language. Defaults to "Number".
terms_changed (dict): the dictionary containing the terms to be changed. Defaults to {}.
Expand Down
5 changes: 4 additions & 1 deletion tests/unit/utils/test_uuid_excel_cvtr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pathlib import Path

from power_grid_model_io.converters import VisionExcelConverter
from power_grid_model_io.data_stores.base_data_store import DICT_KEY_NUMBER, LANGUAGE_EN, VISION_EXCEL_LAN_DICT
from power_grid_model_io.utils.uuid_excel_cvtr import convert_guid_vision_excel

terms_chaged = {"Grounding1": "N1", "Grounding2": "N2", "Grounding3": "N3", "Load.Behaviour": "Behaviour"}
Expand All @@ -15,7 +16,9 @@


def test_convert_guid_vision_excel():
new_file = convert_guid_vision_excel(SOURCE_FILE, number="Number", terms_changed=terms_chaged)
new_file = convert_guid_vision_excel(
SOURCE_FILE, number=VISION_EXCEL_LAN_DICT[LANGUAGE_EN][DICT_KEY_NUMBER], terms_changed=terms_chaged
)
vision_cvtr_new = VisionExcelConverter(source_file=new_file)
vision_cvtr_ref = VisionExcelConverter(source_file=REFERENCE_FILE)

Expand Down
16 changes: 16 additions & 0 deletions tests/validation/converters/test_vision_excel_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,22 @@
from power_grid_model.data_types import SingleDataset

from power_grid_model_io.converters import VisionExcelConverter
from power_grid_model_io.data_stores.base_data_store import DICT_KEY_NUMBER, LANGUAGE_EN, VISION_EXCEL_LAN_DICT
from power_grid_model_io.data_types import ExtraInfo
from power_grid_model_io.utils.json import JsonEncoder
from power_grid_model_io.utils.uuid_excel_cvtr import convert_guid_vision_excel

from ..utils import compare_extra_info, component_attributes, component_objects, load_json_single_dataset, select_values

DATA_PATH = Path(__file__).parents[2] / "data" / "vision"
SOURCE_FILE = DATA_PATH / "vision_{language:s}.xlsx"
SOURCE_FILE_97 = DATA_PATH / "vision_97_{language:s}.xlsx"
VALIDATION_FILE = DATA_PATH / "pgm_input_data_{language:s}.json"
LANGUAGES = ["en", "nl"]
LANGUAGES_97 = ["en"]
VALIDATION_EN = Path(str(VALIDATION_FILE).format(language="en"))
CUSTOM_MAPPING_FILE = DATA_PATH / "vision_9_5_{language:s}.yaml"
terms_changed = {"Grounding1": "N1", "Grounding2": "N2", "Grounding3": "N3", "Load.Behaviour": "Behaviour"}


@lru_cache
Expand Down Expand Up @@ -297,3 +302,14 @@ def test_log_levels(capsys):
cvtr5.set_log_level(logging.CRITICAL)
outerr = capsys.readouterr()
assert "debug" not in outerr.out


def test_uuid_excel_input():
source_file = Path(str(SOURCE_FILE_97).format(language=LANGUAGE_EN))
ref_file_97 = convert_guid_vision_excel(
excel_file=source_file, number=VISION_EXCEL_LAN_DICT[LANGUAGE_EN][DICT_KEY_NUMBER], terms_changed=terms_changed
)
data_native, _ = VisionExcelConverter(source_file, language="en", terms_changed=terms_changed).load_input_data()
data_convtd, _ = VisionExcelConverter(source_file=ref_file_97).load_input_data()

assert len(data_native) == len(data_convtd)

0 comments on commit ea38690

Please sign in to comment.