Skip to content

Commit

Permalink
Merge pull request #228 from eliebak/nanoset-python-compatible
Browse files Browse the repository at this point in the history
change naonset args definition to make it compatible with the parser
  • Loading branch information
3outeille committed Sep 5, 2024
2 parents 4a2ddca + 7323ce1 commit 3be44ef
Showing 1 changed file with 2 additions and 7 deletions.
9 changes: 2 additions & 7 deletions src/nanotron/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,18 +93,13 @@ def __post_init__(self):

@dataclass
class NanosetDatasetsArgs:
dataset_folder: Union[str, dict, List[str]]
dataset_folder: Union[str, List[str]]
dataset_weights: Optional[List[float]] = None

def __post_init__(self):
if isinstance(self.dataset_folder, str): # Case 1: 1 Dataset folder
self.dataset_folder = [self.dataset_folder]
self.dataset_weights = [1]
elif isinstance(self.dataset_folder, List): # Case 2: > 1 Dataset folder
self.dataset_weights = None # Set to None so we consume all the samples randomly
elif isinstance(self.dataset_folder, dict): # Case 3: dict with > 1 dataset_folder and weights
tmp_dataset_folder = self.dataset_folder.copy()
self.dataset_folder = list(tmp_dataset_folder.keys())
self.dataset_weights = list(tmp_dataset_folder.values())


@dataclass
Expand Down

0 comments on commit 3be44ef

Please sign in to comment.