Skip to content

Commit

Permalink
make nanoset compatible with python
Browse files Browse the repository at this point in the history
  • Loading branch information
eliebak committed Sep 5, 2024
1 parent 03d67f2 commit 7323ce1
Showing 1 changed file with 2 additions and 7 deletions.
9 changes: 2 additions & 7 deletions src/nanotron/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,18 +93,13 @@ def __post_init__(self):

@dataclass
class NanosetDatasetsArgs:
dataset_folder: Union[str, dict, List[str]]
dataset_folder: Union[str, List[str]]
dataset_weights: Optional[List[float]] = None

def __post_init__(self):
if isinstance(self.dataset_folder, str): # Case 1: 1 Dataset folder
self.dataset_folder = [self.dataset_folder]
self.dataset_weights = [1]
elif isinstance(self.dataset_folder, List): # Case 2: > 1 Dataset folder
self.dataset_weights = None # Set to None so we consume all the samples randomly
elif isinstance(self.dataset_folder, dict): # Case 3: dict with > 1 dataset_folder and weights
tmp_dataset_folder = self.dataset_folder.copy()
self.dataset_folder = list(tmp_dataset_folder.keys())
self.dataset_weights = list(tmp_dataset_folder.values())


@dataclass
Expand Down

1 comment on commit 7323ce1

@TJ-Solergibert
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should update the docs then!

Please sign in to comment.