Skip to content

Commit

Permalink
Add InputData Module
Browse files Browse the repository at this point in the history
  • Loading branch information
sevisal committed Dec 19, 2023
1 parent c40ef35 commit 8fb32a6
Show file tree
Hide file tree
Showing 5 changed files with 195 additions and 29 deletions.
38 changes: 31 additions & 7 deletions src/vai_lab/Data/Data_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,23 +28,33 @@ def __init__(self: DataT) -> None:
def _import_csv(self: DataT,
filename: str,
data_name: str,
strip_whitespace: bool = True) -> None:
strip_whitespace: bool = True,
index_col = None,
delimiter=',',
quotechar='|',
usecols=None) -> None:
"""import data directly into DataFrame
:param filename: str, filename of csv file to be loaded
:param data_name: str, name of dict key in which data will be stored
:param strip_whitespace: bool, remove spaces from before & after header names
TODO: pandas has a lot of inbuilt read functions, including excel - implement
"""
self.data[data_name] = pd.read_csv(filename,
delimiter=',',
quotechar='|')
delimiter=delimiter,
quotechar=quotechar,
index_col = index_col,
usecols = usecols)
if strip_whitespace:
self.data[data_name].columns = [c.strip()
for c in self.data[data_name].columns]

def _import_png(self: DataT,
filename: str,
data_name: str) -> None:
data_name: str,
index_col = None,
delimiter=',',
quotechar='|',
usecols=None) -> None:
"""Loads png into PIL.Image class. Adds instance to self.data
The image is stored as a function (not a matrix - can be added if needed)
:param filename: str, filename of csv file to be loaded
Expand All @@ -56,7 +66,11 @@ def _import_png(self: DataT,

def _import_dir(self: DataT,
folder_dir: str,
data_name: str) -> None:
data_name: str,
index_col = None,
delimiter=',',
quotechar='|',
usecols=None) -> None:
"""Explores folder, and imports all data items recursively
:param folder_dir: str, directory to be explored
Expand Down Expand Up @@ -85,7 +99,11 @@ def _get_ext(self: DataT, path_dir: str) -> str:

def import_data(self: DataT,
filename: str,
data_name: str = "data") -> None:
data_name: str = "data",
index_col = None,
delimiter=',',
quotechar='|',
usecols=None) -> None:
"""Import file directly into DataFrame
Translates relative files to absolute before parsing - not ideal
Filename to parsing method based on extension name.
Expand All @@ -94,7 +112,13 @@ def import_data(self: DataT,
"""
filename = rel_to_abs(filename)
ext = self._get_ext(filename)
getattr(self, "_import_{0}".format(ext))(filename, data_name)
getattr(self, "_import_{0}".format(ext))(filename,
data_name,
index_col = index_col,
delimiter=delimiter,
quotechar=quotechar,
usecols=usecols)
return self.data

def import_data_from_config(self: DataT, config: dict) -> None:
for c in config.keys():
Expand Down
55 changes: 33 additions & 22 deletions src/vai_lab/InputData/InputData_core.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,20 @@
from vai_lab.Data.Data_core import Data
from vai_lab._import_helper import import_plugin_absolute
from vai_lab._types import PluginSpecsInterface, DataInterface
from pandas import DataFrame
from numpy import array

class InputData(Data):
def __init__(self):
super().__init__()
self.node_name = None
self.plugin_name = None
self.output_data = None
class InputData(object):
def __init__(self) -> None:
self.output_data: DataInterface

def set_avail_plugins(self, avail_plugins: PluginSpecsInterface) -> None:
self._avail_plugins = avail_plugins

def set_data_in(self, data_in: DataInterface) -> None:
"""Pass existing data from another module to be stored in this class"""
self._data_in = data_in

def load_data_from_file(self, filename: str, data_id: str) -> None:
"""Load data from file. Calls parent class method to store data in self.data"""
super().import_data(filename, data_id)

def set_options(self, module_config: dict) -> None:
"""Send configuration arguments to plugin
:param module_config: dict of settings to configure the plugin
"""
self._module_config = module_config

def set_avail_plugins(self, avail_plugins: PluginSpecsInterface) -> None:
self._avail_plugins = avail_plugins

def _load_plugin(self, data_in: DataInterface) -> None:
avail_plugins = self._avail_plugins.find_from_readable_name(
self._module_config["plugin"]["plugin_name"])
Expand All @@ -36,5 +24,28 @@ def _load_plugin(self, data_in: DataInterface) -> None:
avail_plugins["_PLUGIN_CLASS_NAME"])\
.__call__(self._module_config["plugin"], data_in)

def get_result(self):
return self._data_in
def set_options(self, module_config: dict) -> None:
"""Send configuration arguments to plugin
:param module_config: dict of settings to configure the plugin
"""
self._module_config = module_config

def launch(self) -> None:

for method in self._module_config["plugin"]["methods"]["_order"]:
if "options" in self._module_config["plugin"]["methods"][method].keys():
out = getattr(self._plugin, "{}".format(method))(self._plugin._parse_options_dict(self._module_config["plugin"]["methods"][method]["options"]))
else:
out = getattr(self._plugin, "{}".format(method))()

if len(self._module_config["plugin"]["methods"]["_order"]) > 0:
try:
out = out[0][next(iter(out[0]))]
self.output_data = self._data_in.copy()
self.output_data.data[list(out[1])[0]] = out
except:
return

def get_result(self) -> DataInterface:
return self.output_data
39 changes: 39 additions & 0 deletions src/vai_lab/InputData/plugins/Input.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from vai_lab._plugin_templates import InputDataPluginT

from vai_lab.Data.Data_core import Data as model
import pandas as pd

_PLUGIN_READABLE_NAMES = {"Input": "default",
"input": "alias"} # type:ignore
_PLUGIN_MODULE_OPTIONS = {} # type:ignore
_PLUGIN_REQUIRED_SETTINGS = {} # type:ignore
_PLUGIN_OPTIONAL_SETTINGS = {} # type:ignore
_PLUGIN_REQUIRED_DATA = {} # type:ignore
_PLUGIN_OPTIONAL_DATA = {"X", "Y", "X_tst", 'Y_tst'} # type:ignore

class Input(InputDataPluginT):
"""
Import data to the pipeline or append column to existing data
"""

def __init__(self, config = {}, data_in = [None], ini = False):
"""Initialises parent class.
Passes `globals` dict of all current variables
"""
super().__init__(globals())
if not ini:
# Model configuration
self.set_data_in(data_in)
self.configure(config)
# Model initialisation
try:
self.model = model(**self._config["options"])
except Exception as exc:
print('The plugin encountered an error on the parameters of '
+str(list(self._PLUGIN_READABLE_NAMES.keys())[list(self._PLUGIN_READABLE_NAMES.values()).index('default')])+': '+str(exc)+'.')
raise
else:
self.model = model

self.import_plugin = self.model.import_data
self.append_plugin = self.model.append_data_column
36 changes: 36 additions & 0 deletions src/vai_lab/_plugin_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,42 @@ def save_file_as(self):
pass


class InputDataPluginT(PluginTemplate, ABC):
def __init__(self, plugin_globals: dict) -> None:
super().__init__(plugin_globals)

def import_data(self, options={}):
"""Sends params to import data, then import data"""
try:
if isinstance(options, list):
return self.import_plugin(*options)
if isinstance(options, dict):
return self.import_plugin(**options), options.keys()
else:
return self.import_plugin(options)
except Exception as exc:
print('The plugin encountered an error when importing '
+str(list(self._PLUGIN_READABLE_NAMES.keys())[list(self._PLUGIN_READABLE_NAMES.values()).index('default')])+': '+str(exc)+'.')
raise

def append_data_column(self, options={}):
""" Appends a column to the dataframe
:returns: array, shape (n_samples,)
Returns predicted values.
"""
try:
if isinstance(options, list):
return self.append_plugin(*options)
elif isinstance(options, dict):
return self.append_plugin(**options)
else:
return self.append_plugin(options)

except Exception as exc:
print('The plugin encountered an error when appending '
+str(list(self._PLUGIN_READABLE_NAMES.keys())[list(self._PLUGIN_READABLE_NAMES.values()).index('default')])+': '+str(exc)+'.')
raise

class EnvironmentPluginT(PluginTemplate, ABC):

@abstractmethod
Expand Down
56 changes: 56 additions & 0 deletions src/vai_lab/examples/xml_files/InputData_demo.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
<pipeline>
<Initialiser name="Initialiser">
<relationships>
<child name="Input Data" />
</relationships>
<coordinates>
[(350.0, 50), 0, {}]
</coordinates>
</Initialiser>
<InputData name="Input Data">
<inputdata>
<X module="Initialiser" />
</inputdata>
<relationships>
<parent name="Initialiser" />
<child name="Output" />
</relationships>
<coordinates>
[(350.0, 350.0), 2, {0: 'd0-u2'}]
</coordinates>
<plugin type="Input">
<options />
<method type="import_data">
<options>
<filename>
.\examples\crystalDesign\20190606-R1-JT\BMP\RGB\Calibrated\Samples.csv
</filename>
<index_col>
0
</index_col>
</options>
</method>
</plugin>
</InputData>
<Output name="Output">
<inputdata>
<X module="Input Data" />
</inputdata>
<relationships>
<parent name="Input Data" />
</relationships>
<coordinates>
[(350.0, 650), 1, {2: 'd2-u1'}]
</coordinates>
<plugin type="Output">
<options>
<outdata>
Input Data
</outdata>
<outpath>
.\examples\results\output.pkl
</outpath>
</options>
</plugin>
</Output>
</pipeline>

0 comments on commit 8fb32a6

Please sign in to comment.