Skip to content

Commit

Permalink
[CI] Replace load_boston with a randomly generated regression data (#428
Browse files Browse the repository at this point in the history
)

* Update compatibility tester

* Set up hypothesis in XGBoost integration test

* Add isort config

* Update test_skl_importer

* Update test_gtil

* Update test_lightgbm_integration

* [CI] Install hypothesis

* Remove unnecessary edit

* [CI] Show run time for all unit tests

* Relax hypothesis assertion

* Exclude invalid regression cases

* Relax test_lightgbm_regression

* [CI] Use local temp dir in Windows

* Relax
  • Loading branch information
hcho3 authored Dec 19, 2022
1 parent cce130c commit a6f4802
Show file tree
Hide file tree
Showing 17 changed files with 1,070 additions and 593 deletions.
3 changes: 3 additions & 0 deletions .isort.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[settings]
profile=black
known_first_party=treelite,treelite_runtime
1 change: 1 addition & 0 deletions ops/conda_env/dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ dependencies:
- pandas
- pytest
- pytest-cov
- hypothesis
- scikit-learn
- coverage
- codecov
Expand Down
2 changes: 1 addition & 1 deletion ops/cpp-python-coverage.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ cd ../..

echo "##[section]Running Python integration tests..."
export PYTHONPATH='./python:./runtime/python'
python -m pytest --cov=treelite --cov=treelite_runtime -v -rxXs --fulltrace tests/python tests/cython
python -m pytest --cov=treelite --cov=treelite_runtime -v -rxXs --fulltrace --durations=0 tests/python tests/cython

echo "##[section]Collecting coverage data..."
lcov --directory . --capture --output-file coverage.info
Expand Down
2 changes: 1 addition & 1 deletion ops/macos-python-coverage.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ cd ..

./build/treelite_cpp_test
PYTHONPATH=./python:./runtime/python python -m pytest --cov=treelite --cov=treelite_runtime -v -rxXs \
--fulltrace tests/python
--fulltrace --durations=0 tests/python
lcov --directory . --capture --output-file coverage.info
lcov --remove coverage.info '*dmlccore*' --output-file coverage.info
lcov --remove coverage.info '*fmtlib*' --output-file coverage.info
Expand Down
2 changes: 1 addition & 1 deletion ops/test-linux-python-wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ echo "##[section]Installing Treelite into Python environment..."
pip install python/dist/*.whl runtime/python/dist/*.whl

echo "##[section]Running Python tests..."
python -m pytest -v -rxXs --fulltrace tests/python/test_basic.py
python -m pytest -v -rxXs --fulltrace --durations=0 tests/python/test_basic.py

echo "##[section]Uploading Python wheels..."
python -m awscli s3 cp python/dist/*.whl s3://treelite-wheels/ --acl public-read || true
Expand Down
2 changes: 1 addition & 1 deletion ops/test-macos-python-wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ echo "##[section]Installing Treelite into Python environment..."
pip install wheelhouse/*.whl

echo "##[section]Running Python tests..."
python -m pytest -v -rxXs --fulltrace tests/python/test_basic.py
python -m pytest -v -rxXs --fulltrace --durations=0 tests/python/test_basic.py
2 changes: 1 addition & 1 deletion ops/test-sdist.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ make pippack
echo "##[section]Testing the source distribution..."
python -m pip install -v treelite-*.tar.gz
python -m pip install -v treelite_runtime-*.tar.gz
python -m pytest -v -rxXs --fulltrace tests/python/test_basic.py
python -m pytest -v -rxXs --fulltrace --durations=0 tests/python/test_basic.py

# Deploy source distribution to S3
for file in ./treelite-*.tar.gz ./treelite_runtime-*.tar.gz
Expand Down
2 changes: 1 addition & 1 deletion ops/test-win-python-wheel.bat
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ for /R %%i in (runtime\\python\\dist\\*.whl) DO (

echo ##[section]Running Python tests...
mkdir temp
python -m pytest --basetemp="%WORKING_DIR%\temp" -v -rxXs --fulltrace tests\python\test_basic.py
python -m pytest --basetemp="%WORKING_DIR%\temp" -v -rxXs --fulltrace --durations=0 tests\python\test_basic.py
if %errorlevel% neq 0 exit /b %errorlevel%

echo ##[section]Uploading Python wheels...
Expand Down
3 changes: 2 additions & 1 deletion ops/win-python-coverage.bat
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ mkdir temp
call micromamba activate dev
if %errorlevel% neq 0 exit /b %errorlevel%
set "PYTHONPATH=./python;./runtime/python"
python -m pytest --basetemp="%WORKING_DIR%\temp" --cov=treelite --cov=treelite_runtime --cov-report xml -v -rxXs --fulltrace tests\python
set "PYTEST_TMPDIR=%WORKING_DIR%\temp"
python -m pytest --basetemp="%WORKING_DIR%\temp" --cov=treelite --cov=treelite_runtime --cov-report xml -v -rxXs --fulltrace --durations=0 tests\python
if %errorlevel% neq 0 exit /b %errorlevel%

echo ##[section]Submitting code coverage data to CodeCov...
Expand Down
3 changes: 2 additions & 1 deletion src/annotator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,8 @@ AnnotateImpl(
// change layout of counts
std::vector<std::vector<uint64_t>>& counts = *out_counts;
for (size_t i = 0; i < ntree; ++i) {
counts.emplace_back(&new_counts[count_row_ptr[i]], &new_counts[count_row_ptr[i + 1]]);
counts.emplace_back(new_counts.begin() + count_row_ptr[i],
new_counts.begin() + count_row_ptr[i + 1]);
}
}

Expand Down
13 changes: 8 additions & 5 deletions tests/cython/compatibility_tester.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import pickle
import argparse
import os
import pickle

import lightgbm as lgb
import numpy as np
import treelite
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier

import treelite


def _fetch_data():
Expand All @@ -12,7 +15,7 @@ def _fetch_data():


def _train_model(X, y):
clf = RandomForestClassifier(max_depth=3, random_state=0, n_estimators=3)
clf = lgb.LGBMClassifier(max_depth=3, random_state=0, n_estimators=3)
clf.fit(X, y)
return clf

Expand All @@ -22,7 +25,7 @@ def save(args):
clf = _train_model(X, y)
with open(args.model_pickle_path, "wb") as f:
pickle.dump(clf, f)
tl_model = treelite.sklearn.import_model(clf)
tl_model = treelite.Model.from_lightgbm(clf.booster_)
tl_model.serialize(args.checkpoint_path)


Expand Down
98 changes: 98 additions & 0 deletions tests/python/hypothesis_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# -*- coding: utf-8 -*-
"""Utility functions for hypothesis-based testing"""

from sys import platform as _platform

import numpy as np
from hypothesis.strategies import composite, integers, just, none
from sklearn.datasets import make_regression


@composite
def standard_regression_datasets(
draw,
n_samples=integers(min_value=100, max_value=200),
n_features=integers(min_value=100, max_value=200),
*,
n_informative=None,
n_targets=just(1),
bias=just(0.0),
effective_rank=none(),
tail_strength=just(0.5),
noise=just(0.0),
shuffle=just(True),
random_state=None,
):
"""
Returns a strategy to generate regression problem input datasets.
Note:
This function uses the sklearn.datasets.make_regression function to
generate the regression problem from the provided search strategies.
Credit: Carl Simon Adorf (@csadorf)
https://github.com/rapidsai/cuml/blob/447bded/python/cuml/testing/strategies.py
Parameters
----------
n_samples: SearchStrategy[int]
Returned arrays will have number of rows drawn from these values.
n_features: SearchStrategy[int]
Returned arrays will have number of columns drawn from these values.
n_informative: SearchStrategy[int], default=none
A search strategy for the number of informative features. If none,
will use 10% of the actual number of features, but not less than 1
unless the number of features is zero.
n_targets: SearchStrategy[int], default=just(1)
A search strategy for the number of targets, that means the number of
columns of the returned y output array.
bias: SearchStrategy[float], default=just(0.0)
A search strategy for the bias term.
effective_rank=none()
If not None, a search strategy for the effective rank of the input data
for the regression problem. See sklearn.dataset.make_regression() for a
detailed explanation of this parameter.
tail_strength: SearchStrategy[float], default=just(0.5)
See sklearn.dataset.make_regression() for a detailed explanation of
this parameter.
noise: SearchStrategy[float], default=just(0.0)
A search strategy for the standard deviation of the gaussian noise.
shuffle: SearchStrategy[bool], default=just(True)
A boolean search strategy to determine whether samples and features
are shuffled.
random_state: int, RandomState instance or None, default=None
Pass a random state or integer to determine the random number
generation for data set generation.
Returns
-------
(X, y): SearchStrategy[array], SearchStrategy[array]
A tuple of search strategies for arrays subject to the constraints of
the provided parameters.
"""
n_features_ = draw(n_features)
if n_informative is None:
n_informative = just(max(min(n_features_, 1), int(0.1 * n_features_)))
X, y = make_regression(
n_samples=draw(n_samples),
n_features=n_features_,
n_informative=draw(n_informative),
n_targets=draw(n_targets),
bias=draw(bias),
effective_rank=draw(effective_rank),
tail_strength=draw(tail_strength),
noise=draw(noise),
shuffle=draw(shuffle),
random_state=random_state,
)
return X.astype(np.float32), y.astype(np.float32)


def standard_settings():
"""Default hypotheiss settings. Set a smaller max_examples on Windows"""
kwargs = {
"deadline": None,
"max_examples": 20,
"print_blob": True,
}
if _platform == "win32":
kwargs["max_examples"] = 3
return kwargs
Loading

0 comments on commit a6f4802

Please sign in to comment.