Skip to content
This repository has been archived by the owner on Apr 11, 2024. It is now read-only.

Commit

Permalink
Merge branch 'main' into docs-v1
Browse files Browse the repository at this point in the history
  • Loading branch information
BobaZooba authored Nov 14, 2023
2 parents 4af44ca + b152c19 commit e27d5da
Show file tree
Hide file tree
Showing 12 changed files with 58 additions and 28 deletions.
29 changes: 20 additions & 9 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,33 @@ mypy: ## Run static code analyzer
lint: check-codestyle mypy unit-test ## Run all checks

#* Develop
.PHONY: check-version-var
check-version-var: ## Check version var is not default
@if [ "$(VERSION)" = "0.0.0" ]; then \
echo "VERSION is equal to default 0.0.0"; \
echo "Please specify correct version"; \
echo "For example:"; \
echo "make test-pypi-release VERSION=1.2.3"; \
exit 1; \
else \
exit 0; \
fi

.PHONY: push-new-version
push-new-version: ## Push new version to the GitHub
make check-version-var
git add .
git commit -m "Release: $(VERSION)"
git tag $(VERSION) -m 'Adds tag $(VERSION) for pypi'
git push --tags origin main

.PHONY: push-dev-version
push-dev-version: ## Push new dev version to the GitHub
make check-version-var
git add .
git commit -m "New dev version: $(VERSION)"
git push origin main

.PHONY: delete-dist
delete-dist: ## Delete all dist builds
rm -rf ./dist
Expand All @@ -130,15 +150,6 @@ pypi-upload: ## Upload package to the pypi
# make test-pypi-release VERSION=0.1.0
.PHONY: test-pypi-release
test-pypi-release: ## Release test pypi package
@if [ "$(VERSION)" = "0.0.0" ]; then \
echo "VERSION is equal to default 0.0.0"; \
echo "Please specify correct version"; \
echo "For example:"; \
echo "make test-pypi-release VERSION=1.2.3"; \
exit 1; \
else \
exit 0; \
fi
make codestyle
make check-codestyle
make mypy
Expand Down
12 changes: 11 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
# Setup
setup(
name="xllm",
version="0.0.1",
version="0.0.4-dev",
description="Simple & Cutting Edge LLM Finetuning",
license_files=["LICENSE"],
long_description=open("README.md", "r", encoding="utf-8").read(),
Expand Down Expand Up @@ -97,6 +97,16 @@
)

# Release checklist
# 1. Change the version in __init__.py and setup.py
# 2. Run "make test-pypi-release VERSION=VERSION"
# 3. Make sure test pypi works correct
# 4. Run "make pypi-upload"
# 5. Add release notes to the tag in github once everything is looking hunky-dory
# 6. Change the version to dev "-dev" in __init__.py and setup.py
# 7. Add new dev "make push-dev-version VERSION=VERSION"


# Legacy Release checklist
# 1. Change the version in __init__.py and setup.py.
# 2. Commit these changes with the message: "Release: VERSION"
# 3. Add a tag in git to mark the release: "git tag VERSION -m 'Adds tag VERSION for pypi' "
Expand Down
4 changes: 2 additions & 2 deletions src/xllm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@

# ruff: noqa: F401

__version__ = "0.0.1"
__version__ = "0.0.4-dev"

from . import enums, types
from .cli.fuse import cli_run_fuse
from .cli.prepare import cli_run_preprare
from .cli.prepare import cli_run_prepare
from .cli.train import cli_run_train
from .core.config import Config
from .run.fuse import fuse
Expand Down
2 changes: 1 addition & 1 deletion src/xllm/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@
# ruff: noqa: F401

from .fuse import cli_run_fuse
from .prepare import cli_run_preprare
from .prepare import cli_run_prepare
from .quantize import cli_run_quantize
from .train import cli_run_train
4 changes: 2 additions & 2 deletions src/xllm/cli/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from ..utils.cli import setup_cli


def cli_run_preprare(
def cli_run_prepare(
config_cls: Type[Config] = Config,
) -> Tuple[PreTrainedTokenizer, PreTrainedModel]:
parser = HfArgumentParser(config_cls)
Expand All @@ -32,4 +32,4 @@ def cli_run_preprare(


if __name__ == "__main__":
cli_run_preprare(config_cls=Config)
cli_run_prepare(config_cls=HuggingFaceConfig)
8 changes: 3 additions & 5 deletions src/xllm/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ class Config:
"help": "HuggingFace Hub token. You can also set this key using .env file",
},
)
deepspeed_stage: Union[int, str, None] = field(
deepspeed_stage: Optional[int] = field(
default=0,
metadata={
"help": "DeepSpeed stage",
Expand Down Expand Up @@ -441,13 +441,11 @@ class Config:
eval_delay: float = field(
default=0,
metadata={
"helps": "Number of epochs or steps to wait for before the first "
"help": "Number of epochs or steps to wait for before the first "
"evaluation can be performed, depending on the evaluation_strategy"
},
)
eval_steps: Union[int, float, None] = field(
default=1000, metadata={"helps": "Number of update steps between two evaluations"}
)
eval_steps: Optional[int] = field(default=1000, metadata={"help": "Number of update steps between two evaluations"})
warmup_steps: int = field(
default=1000,
metadata={
Expand Down
6 changes: 4 additions & 2 deletions src/xllm/experiments/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,13 @@ def build(self):
self.before_eval_dataset_build()
self.eval_dataset = self.build_eval_dataset()
if self.eval_dataset is not None:
if self.training_arguments is not None:
self.training_arguments.do_eval = True
dist_logger(
f"Valid dataset {self.eval_dataset.__class__.__name__} was built. Size: {len(self.eval_dataset)}"
f"Eval dataset {self.eval_dataset.__class__.__name__} was built. Size: {len(self.eval_dataset)}"
)
else:
dist_logger("Valid dataset is None")
dist_logger("Eval dataset is None")
self.after_eval_dataset_build()

if self.tokenizer is None:
Expand Down
2 changes: 1 addition & 1 deletion src/xllm/utils/post_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def fuse_lora(config: Config) -> Tuple[PreTrainedTokenizer, PreTrainedModel]:
json.dump(tokenizer_config, file_object, indent=2)
logger.info(f"Model saved locally to {config.fused_model_local_path}")

if config.push_to_hub:
if config.push_to_hub or config.hub_model_id is not None:
logger.info(f"Pushing model to the hub {config.hub_model_id}")
if config.hub_model_id is not None:
tokenizer.push_to_hub(
Expand Down
10 changes: 8 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,9 @@ def llama_lm_collator(llama_tokenizer: PreTrainedTokenizer) -> LMCollator:


@pytest.fixture(scope="session")
def training_arguments() -> TrainingArguments:
def training_arguments(path_to_outputs: str) -> TrainingArguments:
arguments = TrainingArguments(
output_dir="./outputs/",
output_dir=path_to_outputs,
per_device_train_batch_size=2,
gradient_accumulation_steps=2,
warmup_steps=50,
Expand Down Expand Up @@ -191,3 +191,9 @@ def path_to_fused_model_local_path(tmp_path_factory: TempPathFactory) -> str:
def path_to_download_result(tmp_path_factory: TempPathFactory) -> str:
path = tmp_path_factory.mktemp("tmp") / "data.jsonl"
return os.path.abspath(path)


@pytest.fixture(scope="session")
def path_to_outputs(tmp_path_factory: TempPathFactory) -> str:
path = tmp_path_factory.mktemp("tmp") / "outputs/"
return os.path.abspath(path)
3 changes: 2 additions & 1 deletion tests/unit/experiments/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_base_experiment_init(monkeypatch: MonkeyPatch, path_to_train_dummy_data
Experiment(config=config)


def test_base_experiment_train(monkeypatch: MonkeyPatch, path_to_train_prepared_dummy_data: str):
def test_base_experiment_train(monkeypatch: MonkeyPatch, path_to_train_prepared_dummy_data: str, path_to_outputs: str):
os.environ["TOKENIZERS_PARALLELISM"] = "false"
config = Config(
push_to_hub=False,
Expand All @@ -29,6 +29,7 @@ def test_base_experiment_train(monkeypatch: MonkeyPatch, path_to_train_prepared_
save_total_limit=0,
max_steps=2,
tokenizer_name_or_path=LLAMA_TOKENIZER_DIR,
output_dir=path_to_outputs,
)

with patch_from_pretrained_auto_causal_lm(monkeypatch=monkeypatch):
Expand Down
5 changes: 3 additions & 2 deletions tests/unit/run/test_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,16 @@
from tests.helpers.patches import patch_from_pretrained_auto_causal_lm, patch_trainer_train


def test_train(monkeypatch: MonkeyPatch, path_to_train_prepared_dummy_data: str):
config = Config(
def test_train(monkeypatch: MonkeyPatch, path_to_train_prepared_dummy_data: str, path_to_outputs: str):
config = HuggingFaceConfig(
push_to_hub=False,
deepspeed_stage=0,
train_local_path_to_data=path_to_train_prepared_dummy_data,
report_to_wandb=False,
save_total_limit=0,
max_steps=2,
tokenizer_name_or_path=LLAMA_TOKENIZER_DIR,
output_dir=path_to_outputs,
)
with patch_from_pretrained_auto_causal_lm(monkeypatch=monkeypatch):
with patch_trainer_train(monkeypatch=monkeypatch):
Expand Down
1 change: 1 addition & 0 deletions tests/unit/trainers/test_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def test_get_lm_trainer(
training_arguments: TrainingArguments,
llama_lm_collator: LMCollator,
soda_dataset: SodaDataset,
path_to_outputs: str,
):
trainer_cls = trainers_registry.get(key=enums.Trainers.lm)
trainer = trainer_cls(
Expand Down

0 comments on commit e27d5da

Please sign in to comment.