Skip to content

Commit

Permalink
Adding load_set_text parameter to gdx.GdxSymbol.load, gdxpds.to_dataf…
Browse files Browse the repository at this point in the history
…rame, and gdxpds.to_dataframes so set values can be loaded as str instead of c_bool if desired. If exercised, the returned text should match what is shown in the Text column of GAMS Studio.
  • Loading branch information
elainethale committed Jul 7, 2023
1 parent 9f79c77 commit 43b9a28
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 14 deletions.
31 changes: 24 additions & 7 deletions gdxpds/gdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def read(self,filename):
sym = GdxSymbol(name,data_type,dims=dims,file=self,index=index)
self.append(sym)
except Exception as e:
logger.error(f"Unable to initialize GdxSymbol {name!r}, because {e}. SKIPPING.")
logger.error(f"Unable to initialize GdxSymbol {name!r}, because {e}. SKIPPING.")

# read all symbols if not lazy_load
if not self.lazy_load:
Expand Down Expand Up @@ -556,7 +556,10 @@ def __init__(self,name,data_type,dims=0,file=None,index=None,
self.dims = dims
assert self._dataframe is not None
self._file = file
self._index = index
self._index = index

# adding this flag to implement ability to load set text instead of boolean values
self._fixup_set_vals = True

if self.file is not None:
# reading from file
Expand Down Expand Up @@ -950,7 +953,9 @@ def _fixup_set_value(self):
logger.warning(f"Filling null values in {self} with True. To be "
"filled:\n{self._dataframe[self._dataframe[colname].isnull()]}")
replace_df_column(self._dataframe, colname, self._dataframe[colname].fillna(value=True))
replace_df_column(self._dataframe,colname,self._dataframe[colname].apply(lambda x: c_bool(x)))
if self._fixup_set_vals:
replace_df_column(self._dataframe,colname,self._dataframe[colname].apply(lambda x: c_bool(x)))
self._fixup_set_vals = True
return

@property
Expand Down Expand Up @@ -986,9 +991,16 @@ def __str__(self):
s += ", loaded" if self.loaded else ", not loaded"
return s

def load(self):
def load(self, load_set_text=False):
"""
Loads this :py:class:`GdxSymbol` from its :py:attr:`file`
Loads this :py:class:`GdxSymbol` from its :py:attr:`file`, thereby popluating
:py:attr:`dataframe`.
Parameters
----------
load_set_text : bool
If True (default is False) and this symbol is a :class:`GamsDataType.Set <GamsDataType>`,
loads the GDX Text field into the :py:attr:`dataframe` rather than a `c_bool`.
"""
if self.loaded:
logger.info("Nothing to do. Symbol already loaded.")
Expand All @@ -1011,8 +1023,13 @@ def reader():
yield gdxcc.gdxDataReadStr(handle)

vc = self.value_cols # do this for speed in the next line
data = [elements + [values[col_ind] for col_name, col_ind in vc] for ret, elements, values, afdim in reader()]
# gdxdict called gdxGetElemText here, but I do not currently see value in doing that
if load_set_text and (self.data_type == GamsDataType.Set):
data = [elements + [gdxcc.gdxGetElemText(self.file.H,int(values[col_ind]))[1]
for _col_name, col_ind in vc]
for _ret, elements, values, _afdim in reader()]
self._fixup_set_vals = False
else:
data = [elements + [values[col_ind] for col_name, col_ind in vc] for ret, elements, values, afdim in reader()]
self.dataframe = data
if not self.data_type in (GamsDataType.Set, GamsDataType.Alias):
self.dataframe = special.convert_gdx_to_np_svs(self.dataframe, self.num_dims)
Expand Down
35 changes: 28 additions & 7 deletions gdxpds/read_gdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,25 @@ def dataframes(self):
def symbols(self):
return [symbol_name for symbol_name in self.gdx]

def dataframe(self, symbol_name):
def dataframe(self, symbol_name, load_set_text=False):
if not symbol_name in self.gdx:
raise Error("No symbol named '{}' in '{}'.".format(symbol_name, self.gdx_file))
if not self.gdx[symbol_name].loaded:
self.gdx[symbol_name].load()
self.gdx[symbol_name].load(load_set_text=load_set_text)
# This was returning { symbol_name: dataframe }, which seems intuitively off.
return self.gdx[symbol_name].dataframe.copy()

def _get_dataframes(self, load_set_text=False):
if self.__dataframes is None:
self.__dataframes = OrderedDict()
for symbol in self.__gdx:
if not symbol.loaded:
symbol.load(load_set_text=load_set_text)
self.__dataframes[symbol.name] = symbol.dataframe.copy()
return self.__dataframes


def to_dataframes(gdx_file,gams_dir=None):
def to_dataframes(gdx_file,gams_dir=None,load_set_text=False):
"""
Primary interface for converting a GAMS GDX file to pandas DataFrames.
Expand All @@ -75,15 +85,20 @@ def to_dataframes(gdx_file,gams_dir=None):
Path to the GDX file to read
gams_dir : None or pathlib.Path or str
optional path to GAMS directory
load_set_text : bool
If True (default is False) and symbol_name is a Set, loads the GDX Text
field into the dataframe rather than a `c_bool`.
Returns
-------
dict of str to pd.DataFrame
Returns a dict of Pandas DataFrames, one item for each symbol in the GDX
file, keyed with the symbol name.
"""
dfs = Translator(gdx_file,gams_dir=gams_dir).dataframes
return dfs
if load_set_text:
return Translator(gdx_file,gams_dir=gams_dir,lazy_load=True)._get_dataframes(load_set_text=load_set_text)
return Translator(gdx_file,gams_dir=gams_dir).dataframes


def list_symbols(gdx_file,gams_dir=None):
"""
Expand All @@ -104,7 +119,8 @@ def list_symbols(gdx_file,gams_dir=None):
symbols = Translator(gdx_file,gams_dir=gams_dir,lazy_load=True).symbols
return symbols

def to_dataframe(gdx_file,symbol_name,gams_dir=None,old_interface=True):

def to_dataframe(gdx_file,symbol_name,gams_dir=None,old_interface=True,load_set_text=False):
"""
Interface for getting the data for a single symbol
Expand All @@ -119,6 +135,9 @@ def to_dataframe(gdx_file,symbol_name,gams_dir=None,old_interface=True):
old_interface : bool
Whether to use the old interface and return a dict, or the new interface,
and simply return a pd.DataFrame
load_set_text : bool
If True (default is False) and symbol_name is a Set, loads the GDX Text
field into the dataframe rather than a `c_bool`.
Returns
-------
Expand All @@ -128,5 +147,7 @@ def to_dataframe(gdx_file,symbol_name,gams_dir=None,old_interface=True):
pd.DataFrame. Otherwise (if not old_interface), returns just the
pd.DataFrame.
"""
df = Translator(gdx_file,gams_dir=gams_dir,lazy_load=True).dataframe(symbol_name)
df = Translator(gdx_file,gams_dir=gams_dir,lazy_load=True).dataframe(
symbol_name,
load_set_text=load_set_text)
return {symbol_name: df} if old_interface else df

0 comments on commit 43b9a28

Please sign in to comment.