Skip to content

Commit

Permalink
Merge pull request #98 from NREL/eh/text_field_gdxdump
Browse files Browse the repository at this point in the history
Adding load_set_text parameter to gdx.GdxSymbol.load, gdxpds.to_dataframe, and gdxpds.to_dataframes
  • Loading branch information
elainethale authored Jul 21, 2023
2 parents 8de37b7 + fe83083 commit ef994d0
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 13 deletions.
31 changes: 24 additions & 7 deletions gdxpds/gdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def read(self,filename):
sym = GdxSymbol(name,data_type,dims=dims,file=self,index=index)
self.append(sym)
except Exception as e:
logger.error(f"Unable to initialize GdxSymbol {name!r}, because {e}. SKIPPING.")
logger.error(f"Unable to initialize GdxSymbol {name!r}, because {e}. SKIPPING.")

# read all symbols if not lazy_load
if not self.lazy_load:
Expand Down Expand Up @@ -556,7 +556,10 @@ def __init__(self,name,data_type,dims=0,file=None,index=None,
self.dims = dims
assert self._dataframe is not None
self._file = file
self._index = index
self._index = index

# adding this flag to implement ability to load set text instead of boolean values
self._fixup_set_vals = True

if self.file is not None:
# reading from file
Expand Down Expand Up @@ -950,7 +953,9 @@ def _fixup_set_value(self):
logger.warning(f"Filling null values in {self} with True. To be "
"filled:\n{self._dataframe[self._dataframe[colname].isnull()]}")
replace_df_column(self._dataframe, colname, self._dataframe[colname].fillna(value=True))
replace_df_column(self._dataframe,colname,self._dataframe[colname].apply(lambda x: c_bool(x)))
if self._fixup_set_vals:
replace_df_column(self._dataframe,colname,self._dataframe[colname].apply(lambda x: c_bool(x)))
self._fixup_set_vals = True
return

@property
Expand Down Expand Up @@ -986,9 +991,16 @@ def __str__(self):
s += ", loaded" if self.loaded else ", not loaded"
return s

def load(self):
def load(self, load_set_text=False):
"""
Loads this :py:class:`GdxSymbol` from its :py:attr:`file`
Loads this :py:class:`GdxSymbol` from its :py:attr:`file`, thereby popluating
:py:attr:`dataframe`.
Parameters
----------
load_set_text : bool
If True (default is False) and this symbol is a :class:`GamsDataType.Set <GamsDataType>`,
loads the GDX Text field into the :py:attr:`dataframe` rather than a `c_bool`.
"""
if self.loaded:
logger.info("Nothing to do. Symbol already loaded.")
Expand All @@ -1011,8 +1023,13 @@ def reader():
yield gdxcc.gdxDataReadStr(handle)

vc = self.value_cols # do this for speed in the next line
data = [elements + [values[col_ind] for col_name, col_ind in vc] for ret, elements, values, afdim in reader()]
# gdxdict called gdxGetElemText here, but I do not currently see value in doing that
if load_set_text and (self.data_type == GamsDataType.Set):
data = [elements + [gdxcc.gdxGetElemText(self.file.H,int(values[col_ind]))[1]
for _col_name, col_ind in vc]
for _ret, elements, values, _afdim in reader()]
self._fixup_set_vals = False
else:
data = [elements + [values[col_ind] for col_name, col_ind in vc] for ret, elements, values, afdim in reader()]
self.dataframe = data
if not self.data_type in (GamsDataType.Set, GamsDataType.Alias):
self.dataframe = special.convert_gdx_to_np_svs(self.dataframe, self.num_dims)
Expand Down
32 changes: 26 additions & 6 deletions gdxpds/read_gdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,16 +61,25 @@ def symbols(self):
def data_types(self):
return {symbol.name: symbol.data_type for symbol in self.gdx}

def dataframe(self, symbol_name):
def dataframe(self, symbol_name, load_set_text=False):
if not symbol_name in self.gdx:
raise Error("No symbol named '{}' in '{}'.".format(symbol_name, self.gdx_file))
if not self.gdx[symbol_name].loaded:
self.gdx[symbol_name].load()
self.gdx[symbol_name].load(load_set_text=load_set_text)
# This was returning { symbol_name: dataframe }, which seems intuitively off.
return self.gdx[symbol_name].dataframe.copy()

def _get_dataframes(self, load_set_text=False):
if self.__dataframes is None:
self.__dataframes = OrderedDict()
for symbol in self.__gdx:
if not symbol.loaded:
symbol.load(load_set_text=load_set_text)
self.__dataframes[symbol.name] = symbol.dataframe.copy()
return self.__dataframes



def to_dataframes(gdx_file,gams_dir=None):
def to_dataframes(gdx_file,gams_dir=None,load_set_text=False):
"""
Primary interface for converting a GAMS GDX file to pandas DataFrames.
Expand All @@ -80,13 +89,18 @@ def to_dataframes(gdx_file,gams_dir=None):
Path to the GDX file to read
gams_dir : None or pathlib.Path or str
optional path to GAMS directory
load_set_text : bool
If True (default is False) and symbol_name is a Set, loads the GDX Text
field into the dataframe rather than a `c_bool`.
Returns
-------
dict of str to pd.DataFrame
Returns a dict of Pandas DataFrames, one item for each symbol in the GDX
file, keyed with the symbol name.
"""
if load_set_text:
return Translator(gdx_file,gams_dir=gams_dir,lazy_load=True)._get_dataframes(load_set_text=load_set_text)
return Translator(gdx_file,gams_dir=gams_dir).dataframes


Expand Down Expand Up @@ -128,7 +142,8 @@ def get_data_types(gdx_file,gams_dir=None):
return Translator(gdx_file,gams_dir=gams_dir,lazy_load=True).data_types


def to_dataframe(gdx_file,symbol_name,gams_dir=None,old_interface=True):

def to_dataframe(gdx_file,symbol_name,gams_dir=None,old_interface=True,load_set_text=False):
"""
Interface for getting the data for a single symbol
Expand All @@ -143,6 +158,9 @@ def to_dataframe(gdx_file,symbol_name,gams_dir=None,old_interface=True):
old_interface : bool
Whether to use the old interface and return a dict, or the new interface,
and simply return a pd.DataFrame
load_set_text : bool
If True (default is False) and symbol_name is a Set, loads the GDX Text
field into the dataframe rather than a `c_bool`.
Returns
-------
Expand All @@ -152,5 +170,7 @@ def to_dataframe(gdx_file,symbol_name,gams_dir=None,old_interface=True):
pd.DataFrame. Otherwise (if not old_interface), returns just the
pd.DataFrame.
"""
df = Translator(gdx_file,gams_dir=gams_dir,lazy_load=True).dataframe(symbol_name)
df = Translator(gdx_file,gams_dir=gams_dir,lazy_load=True).dataframe(
symbol_name,
load_set_text=load_set_text)
return {symbol_name: df} if old_interface else df

0 comments on commit ef994d0

Please sign in to comment.