From 8fb404e30b18d162a4baa468f6ae1856a9406a6f Mon Sep 17 00:00:00 2001
From: KOLANICH <kolan_n@mail.ru>
Date: Sun, 22 Jan 2017 23:33:00 +0300
Subject: [PATCH] Initial commit

---
 .ci/aptPackagesToInstall.txt |   2 +
 .ci/pythonStdlibFixes.sh     |   2 +
 .editorconfig                |  12 +
 .github/.templateMarker      |   1 +
 .github/dependabot.yml       |   8 +
 .github/workflows/CI.yml     |  15 +
 .gitignore                   |   7 +
 .gitlab-ci.yml               |  42 +++
 Chassis.py                   | 371 +++++++++++++++++++
 Code_Of_Conduct.md           |   1 +
 MANIFEST.in                  |   4 +
 ReadMe.md                    |  19 +
 Tutorial.ipynb               | 686 +++++++++++++++++++++++++++++++++++
 UNLICENSE                    |  24 ++
 pyproject.toml               |  34 ++
 tests/tests.py               |  70 ++++
 16 files changed, 1298 insertions(+)
 create mode 100644 .ci/aptPackagesToInstall.txt
 create mode 100644 .ci/pythonStdlibFixes.sh
 create mode 100755 .editorconfig
 create mode 100644 .github/.templateMarker
 create mode 100644 .github/dependabot.yml
 create mode 100644 .github/workflows/CI.yml
 create mode 100644 .gitignore
 create mode 100644 .gitlab-ci.yml
 create mode 100644 Chassis.py
 create mode 100644 Code_Of_Conduct.md
 create mode 100644 MANIFEST.in
 create mode 100644 ReadMe.md
 create mode 100644 Tutorial.ipynb
 create mode 100644 UNLICENSE
 create mode 100644 pyproject.toml
 create mode 100644 tests/tests.py

diff --git a/.ci/aptPackagesToInstall.txt b/.ci/aptPackagesToInstall.txt
new file mode 100644
index 0000000..bd4d2a3
--- /dev/null
+++ b/.ci/aptPackagesToInstall.txt
@@ -0,0 +1,2 @@
+python3-pandas
+python3-numpy
diff --git a/.ci/pythonStdlibFixes.sh b/.ci/pythonStdlibFixes.sh
new file mode 100644
index 0000000..b6f8fba
--- /dev/null
+++ b/.ci/pythonStdlibFixes.sh
@@ -0,0 +1,2 @@
+if $( python -c "import sys;sys.exit(int(not (sys.version_info < (3, 5)) ))" ); then curl -O https://raw.githubusercontent.com/python/cpython/3.6/Lib/typing.py; fi;
+if $( python -c "import sys;sys.exit(int(not (sys.version_info < (3, 6)) ))" ); then curl -O https://raw.githubusercontent.com/python/cpython/3.7/Lib/enum.py; fi;
diff --git a/.editorconfig b/.editorconfig
new file mode 100755
index 0000000..c9162b9
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,12 @@
+root = true
+
+[*]
+charset = utf-8
+indent_style = tab
+indent_size = 4
+insert_final_newline = true
+end_of_line = lf
+
+[*.{yml,yaml}]
+indent_style = space
+indent_size = 2
diff --git a/.github/.templateMarker b/.github/.templateMarker
new file mode 100644
index 0000000..5e3a3e0
--- /dev/null
+++ b/.github/.templateMarker
@@ -0,0 +1 @@
+KOLANICH/python_project_boilerplate.py
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..89ff339
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,8 @@
+version: 2
+updates:
+  - package-ecosystem: "pip"
+    directory: "/"
+    schedule:
+      interval: "daily"
+    allow:
+      - dependency-type: "all"
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
new file mode 100644
index 0000000..7fe33b3
--- /dev/null
+++ b/.github/workflows/CI.yml
@@ -0,0 +1,15 @@
+name: CI
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: [master]
+
+jobs:
+  build:
+    runs-on: ubuntu-22.04
+    steps:
+      - name: typical python workflow
+        uses: KOLANICH-GHActions/typical-python-workflow@master
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..59e6afa
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,7 @@
+__pycache__
+*.pyc
+*.pyo
+/*.egg-info
+/build
+/dist
+/.eggs
\ No newline at end of file
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 0000000..7e11603
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,42 @@
+#image: travisci/ci-garnet:packer-1512502276-986baf0
+#image: pypy:latest
+image: python:latest
+
+build:
+  tags:
+    - shared
+  stage: build
+  variables:
+    GIT_DEPTH: "1"
+
+  before_script:
+    - source ./.ci/pythonStdlibFixes.sh
+    - pip3 install --upgrade setuptools
+    - pip3 install --upgrade pandas numpy
+    - pip3 install --upgrade coveralls setuptools_scm
+
+  script:
+    - python3 setup.py bdist_wheel
+    - coverage run --source=Chassis setup.py test
+    - pip3 install --upgrade ./dist/*.whl
+  cache:
+    paths:
+      - /usr/local/site-packages
+  artifacts:
+    paths:
+      - dist
+
+sast:
+  tags:
+    - shared
+  image: docker:latest
+  variables:
+    DOCKER_DRIVER: overlay2
+  allow_failure: true
+  services:
+    - docker:dind
+  script:
+    - docker run --env SAST_CONFIDENCE_LEVEL=5 --volume "$PWD:/code" --volume /var/run/docker.sock:/var/run/docker.sock "registry.gitlab.com/gitlab-org/security-products/sast:latest" /app/bin/run /code
+  artifacts:
+    paths:
+      - gl-sast-report.json
diff --git a/Chassis.py b/Chassis.py
new file mode 100644
index 0000000..898f074
--- /dev/null
+++ b/Chassis.py
@@ -0,0 +1,371 @@
+__all__ = ("Chassis", "MissingColumnsExplainer", "resampleDataFrame")
+
+
+import typing
+from inspect import signature
+
+import warnings
+from functools import wraps
+
+import numpy as np
+import pandas
+
+
+class RecomputingDict(dict):
+	def __init__(self, *args, **kwargs):
+		super().__init__(*args, **kwargs)
+
+	def mutateCallback(self):
+		pass
+
+	def __setitem__(self, *args, **kwargs):
+		super().__setitem__(*args, **kwargs)
+
+
+#_datasetsDb: typing.Optional[typing.Mapping[str, typing.Callable[[], object]]] = None
+_datasetsDb = None
+
+
+def _checkAndInitDatasetsDb() -> None:
+	"""creates a dict mapping datasets names to the functions loading them"""
+	global _datasetsDb  # pylint:disable=global-statement
+	if _datasetsDb is None:
+		#pylint:disable=import-outside-toplevel
+		import re
+
+		import sklearn.datasets
+
+		dsLoadFuncNameRx = re.compile("^(load|fetch)_(.+)")
+		_datasetsDb = {}
+		for fName in dir(sklearn.datasets):
+			m = dsLoadFuncNameRx.match(fName)
+			if m:
+				_datasetsDb[m.group(2)] = getattr(sklearn.datasets, fName)
+
+
+def resampleDataFrame(pdf: pandas.DataFrame, balancer, columns: typing.Set[str] = None) -> pandas.DataFrame:
+	"""Use a resampler from imblearn to resample dataframe columns"""
+	origCols = pdf.columns
+	if columns is None:
+		columns = origCols
+	for cn in columns:
+		x = pdf.loc[:, list(set(origCols) - {cn})]
+		y = pdf.loc[:, cn]
+		x1, y1 = balancer.fit_sample(x, y)
+		x1 = pandas.DataFrame(x1, columns=x.columns)
+		y1 = pandas.Series(y1, name=y.name)
+		pdf = pandas.concat([x1, y1], axis=1)
+	pdf.reindex(origCols, axis=1)
+	return pdf
+
+
+class MissingColumnsExplainer:
+	"""If a column is missing in covariates matrix (the one with encoded cathegoricals and without `stop` columns), tries to find an explaination and fix it"""
+
+	__slots__ = ("parent", "categorical", "unexplained")
+
+	def __init__(self, parent, missingColumns):
+		self.unexplained = set(missingColumns)
+		self.categorical = []
+		for mcn in missingColumns:
+			colIsLikelyCategorical = False
+			for ccn in parent.groups["categorical"]:
+				if mcn.startswith(ccn):
+					self.unexplained.remove(mcn)
+					self.categorical.append(mcn)
+					break
+
+	def fix(self, dmat, raiseUnexplained=True, warnFixable=False):
+		if self.categorical and warnFixable:
+			warnings.warn("the design matrix resulted from the dataset has no " + repr(self.categorical) + " columns, but they are present in the model. The cause are likely categorical variables values one-hot encoded missing in your partition of the dataset, so creating their columns with 0.-filled")
+
+			for mcn in self.categorical:
+				dmat.loc[:, mcn] = 0.0
+
+		if self.unexplained and raiseUnexplained:
+			raise ValueError("Columns `" + repr(self.unexplained) + "` are missing from the design matrix and we cannot explain this.")
+
+
+StrOrStrIter = typing.Union[str, typing.Iterable[str]]
+
+
+def allowAcceptMultipleColumns(fOrSeriesClass=None, *, seriesClass: type = None):
+	if isinstance(fOrSeriesClass, type):
+		seriesClass = fOrSeriesClass
+		f = None
+	elif callable(fOrSeriesClass):
+		f = fOrSeriesClass
+	else:
+		raise ValueError()
+
+	def _allowAcceptMultipleColumns(f):
+		s = signature(f)
+		firstParam = list(s.parameters.values())[1]
+		assert firstParam.name == "cn", firstParam.name
+		assert firstParam.annotation == str, firstParam.annotation
+		assert s.return_annotation == np.ndarray or s.return_annotation == pandas.Series or s.return_annotation == pandas.DataFrame, s.return_annotation
+
+		if seriesClass is None or seriesClass is s.return_annotation:
+			singleSeriesCall = f
+			seriesClass_ = s.return_annotation
+		else:
+			seriesClass_ = seriesClass
+
+			def singleSeriesCall(self, cn: str, *args, **kwargs) -> seriesClass_:
+				return seriesClass(f(self, cn, *args, **kwargs))
+
+		@wraps(f)
+		def modifiedF(self, cns: StrOrStrIter, *args, **kwargs) -> typing.Union[pandas.DataFrame, seriesClass_]:
+			if isinstance(cns, str):
+				return singleSeriesCall(self, cns, *args, **kwargs)
+
+			return pandas.concat((f(self, cn, *args, **kwargs) for cn in cns), axis=1)
+
+		#modifiedF.__name__ = f.__name__.replace("col", "cols")
+		modifiedF.__name__ = f.__name__
+		modifiedF.__annotations__["cn"] = StrOrStrIter
+		return modifiedF
+
+	if f is not None:
+		return _allowAcceptMultipleColumns(f)
+
+	return _allowAcceptMultipleColumns
+
+
+class Chassis:
+	"""Patsy is shit. This class prepares data, and it's more predictable than patsy"""
+
+	__slots__ = ("columns", "groups", "features", "stop", "weights", "catIndex", "catRemap", "pds", "dontWarnAboutMissingStopColumns")
+
+	#columns: typing.Set[str]
+	groupsTypes = {gn: gn for gn in ("categorical", "numerical", "stop", "weight", "binary")}
+
+	def __init__(self, spec: typing.Mapping[str, str], dataset: typing.Optional[pandas.DataFrame] = None) -> None:
+		"""Imports `dataset` according to the `spec`
+		`spec` is a dict specifying schema of your data. Its keys are columns names, its values are strings "Categoric", "Numeric", "Binary" and "Stop" (it takes into account only first letters). "Stop" are removed.
+		`dataset` is a `pandas.DataFrame` with your data."""
+		if isinstance(dataset, __class__):
+			if spec is None:
+				spec = dataset.spec
+			dataset = dataset.pds
+
+		self.dontWarnAboutMissingStopColumns = False
+		self.weights = None
+		self.importSpec(spec)
+		self.importDataset(dataset)
+
+	def _reprContents(self) -> str:
+		return ", ".join(("columns: " + str(len(self.columns)), ", ".join(((gn + ": " + str(len(g))) for gn, g in self.groups.items() if len(g)))))
+
+	def __repr__(self) -> str:
+		return self.__class__.__name__ + "< " + self._reprContents() + " >"
+
+	def importSpec(self, spec: typing.Mapping[str, str]) -> None:
+		"""Imports specification dictionary."""
+		self.features = spec
+		self.groups = {gtn: set() for gtn in set(self.__class__.groupsTypes.values())}  # slow, need to move into metaclass
+		for k, c in self.features.items():
+			self.groups[self.__class__.groupsTypes[c.lower()]].add(k)
+		self.columns = set(self.features) - set(self.groups["stop"]) - set(self.groups["weight"])
+
+	#catIndex: typing.Mapping[str, pandas.Series]
+	#catRemap: typing.Mapping[str, typing.Any]
+
+	def importDataset(self, pds: pandas.DataFrame) -> None:
+		"""Transforms pandas.DataFrame `pds` into internal representation."""
+		if pds is None:
+			self.pds = None
+			self.catIndex = {}
+			self.catRemap = {}
+			return
+
+		pds.reindex()
+		if hasattr(pds, "infer_objects"):
+			pds = pds.infer_objects()
+
+		presentStopColumns = self.groups["stop"] & set(pds.columns)
+		missingStopColumns = self.groups["stop"] - presentStopColumns
+
+		if missingStopColumns and not self.dontWarnAboutMissingStopColumns:
+			warnings.warn("Following stop columns are missing: " + repr(missingStopColumns) + ". Using only present columns.")
+			self.columns -= missingStopColumns
+			self.groups["stop"] = presentStopColumns
+
+		self.groups["weight"] = self.groups["weight"] & set(pds.columns)
+		if self.groups["weight"]:
+			assert len(self.groups["weight"]) == 1
+			self.weights = pds.loc[:, list(self.groups["weight"])]
+
+		self.stop = pds.loc[:, list(presentStopColumns)]
+
+		pds = pds.loc[:, list(self.columns)]
+
+		colz = [pds[cn].astype("float32") for cn in self.groups["binary"]]
+		catColz = {cn: pds[cn].astype("category") for cn in self.groups["categorical"]}
+		dummiez = {cn: pandas.get_dummies(col, prefix=cn) for cn, col in catColz.items()}
+		self.catIndex = {cn: col.cat.categories for cn, col in catColz.items()}
+		self.catRemap = {cn: list(col.columns) for cn, col in dummiez.items()}
+		#for cn in catColz:
+		#	print(cn, len(self.catIndex[cn]), len(self.catRemap[cn]))
+		#	assert len(self.catIndex[cn]) == len(self.catRemap[cn])
+		colz.extend(dummiez.values())
+		colz.extend([pandas.to_numeric(pds[c], "coerce") for c in self.groups["numerical"]])
+
+		self.pds = pandas.concat(colz, axis=1)
+
+	@allowAcceptMultipleColumns(pandas.DataFrame)
+	def _colsNaEquiv(self, cn: str) -> pandas.Series:
+		"""Returns a column suitable for checking if a value is nan. If it is categorical, it selects the first column one-hot because if value is nan one-hot will make all the values nans"""
+		if cn in self.catRemap:
+			col = self.pds.loc[:, self.catRemap[cn][0]]
+		else:
+			col = self.pds.loc[:, cn]
+		return col
+
+	def colsNotNA(self, cns: StrOrStrIter) -> pandas.Series:
+		"""Returns result of comparison of original column values to nans"""
+		return self._colsNaEquiv(cns).notna().all(axis=1, skipna=False)
+
+	def colsIsNA(self, cns: StrOrStrIter) -> pandas.Series:
+		"""Returns result of comparison of original column values to nans"""
+		return self._colsNaEquiv(cns).isna().any(axis=1, skipna=False)
+
+	def prepareCovariates(self, cns: typing.Optional[StrOrStrIter] = (), dmat: typing.Optional[pandas.DataFrame] = None, excludeColumns: typing.Set[str] = None) -> pandas.DataFrame:
+		"""Returns matrix of the rest of covariates needed to fit column `cn`"""
+		if dmat is None:
+			dmat = self.pds
+		neededCols = set(dmat.columns)
+		if excludeColumns is not None:
+			neededCols -= excludeColumns
+
+		if cns is None:
+			cns = tuple()
+		elif isinstance(cns, str):
+			cns = (cns,)
+
+		for cn in cns:
+			if cn in self.catRemap:
+				neededCols -= set(self.catRemap[cn])
+			else:
+				neededCols -= {cn}
+				#print(neededCols)
+
+		return dmat.loc[:, list(neededCols)]
+
+	def oneHotToCategory(self, cn: str, oneHot: pandas.DataFrame, index=None) -> pandas.Series:
+		"""Reverses one-hot encoding for category name. Transforms a matrix of columns `oneHot` (it must contain ONLY that columns, AND in the right order) into a column with type `category`"""
+		#print(cn, self.catIndex, self.catRemap)
+		#print(cn, len(self.catIndex[cn]), len(self.catRemap[cn]))
+		assert len(self.catIndex[cn]) == len(self.catRemap[cn])
+		catIdx = self.catIndex[cn]
+		return self.numpyToColumn(cn, pandas.Categorical(catIdx[np.argmax(oneHot, axis=1)], categories=catIdx, ordered=False), index)  # TODO: NaN = null vec
+
+	def numpyToColumn(self, cn: str, data: np.array, index=None) -> pandas.Series:
+		"""Converts a numpy array into a column"""
+		if index is None:
+			index = self.pds.index
+		res = pandas.pandas.Series(data, index=index)
+		res.name = cn
+		return res
+
+	def decodeCategory(self, cn: str, dmat: typing.Optional[pandas.DataFrame] = None) -> pandas.Series:
+		"""Returns original (like in the inital pandas.DataFrame) representation of column with the name `cn`."""
+		if dmat is None:
+			dmat = self.pds
+		return self.oneHotToCategory(
+			cn, np.array(
+				dmat.loc[:, list(self.catRemap[cn])]
+			)
+		)
+
+	@allowAcceptMultipleColumns
+	def prepareResults(self, cn: str, dmat: typing.Optional[pandas.DataFrame] = None) -> pandas.Series:
+		"""Prepares result column pandas.DataFrame"""
+		if dmat is None:
+			dmat = self.pds
+		if cn in self.catRemap:
+			return self.decodeCategory(cn, dmat)
+
+		return dmat.loc[:, cn]
+
+	def select(self, decodeCategories: bool = True, columns: typing.Optional[typing.Set[str]] = None):
+		"""Returns matrix by original columns, not transformed ones.
+		decodeCategories transforms one-hot encoded columns back to the original ones
+		columns allows to select subset of columns. If it is None, all the original columns are selected."""
+		if columns is None:
+			columns = self.columns | self.groups["stop"]
+
+		colz = [self.pds.loc[:, cn] for cn in self.groups["binary"] & columns]
+		colz.extend([self.pds.loc[:, cn] for cn in self.groups["numerical"] & columns])
+		colz.append(self.stop[list(columns & self.groups["stop"])])
+		if self.weights is not None:
+			colz.append(self.weights)
+		res = pandas.concat(colz, axis=1)
+		if decodeCategories:
+			for cn in set(self.catRemap) & columns:
+				res[cn] = self.decodeCategory(cn)
+		else:
+			for cn in set(self.catRemap) & columns:
+				for vcn in self.catRemap[cn]:
+					res[vcn] = self.pds.loc[:, vcn]
+		return res
+
+	def reduceCategoricalCols(self, dmat: typing.Optional[pandas.DataFrame], columns: typing.Optional[typing.Set[str]] = None):
+		"""Sums categorical columns. In future may use other functions. Useful for combining additive values like SHAP scores."""
+		if columns is None:
+			columns = set(self.catRemap)
+
+		availCols = set(dmat.columns)
+		plainColumns = list((self.columns - columns) & availCols)
+
+		resCols = [dmat.loc[:, list(plainColumns)]]
+		for cn in columns:
+			colz = dmat.loc[:, list(set(self.catRemap[cn]) & availCols)]
+			colz = colz[colz.notna()]
+			resC = colz.sum(axis=1)
+			resC.name = cn
+			resCols.append(resC)
+		return pandas.concat(resCols, axis=1)
+
+	def reverse(self, columns: typing.Optional[typing.Set[str]] = None):
+		"""Encodes design matrix back into initial representation, can return subset of the original columns"""
+		return self.select(decodeCategories=True, columns=columns)
+
+	@staticmethod
+	def specFromPandas(ds: pandas.DataFrame) -> typing.Mapping[str, str]:
+		"""Tries to reverse-engineer spec from data."""
+		spec = {}
+		for cn in ds.columns:
+			dt = ds.dtypes[cn]
+			v = ds.loc[:, cn]
+			rT = None
+			if dt.kind == "f" or dt.kind == "i":
+				if v[0] == 0 or v[0] == 1:
+					tf = set(dt.type([True, False]))
+					if tf == (set(v.unique()) & tf):
+						rT = "binary"
+					else:
+						rT = "numerical"
+				else:
+					rT = "numerical"
+			elif dt.kind == "O":
+				types = set(v.map(type)) - {None.__class__}
+				if len(types) == 1:
+					tp = next(iter(types))
+					if np.issubdtype(tp, str):
+						rT = "categorical"
+			if rT is None:
+				rT = "stop"
+			spec[cn] = rT
+		return spec
+
+	@classmethod
+	def fromSKLearnDataset(cls, dataset: typing.Union[str, "sklearn.utils.Bunch"], targetName: str = "target", *args, **kwargs):
+		"""Converts an sklearn dataset into a Chassis"""
+		if isinstance(dataset, str):
+			_checkAndInitDatasetsDb()
+			dataset = _datasetsDb[dataset]()
+		ds = pandas.concat([pandas.Series(dataset.target, name=targetName), pandas.DataFrame(dataset.data, columns=dataset.feature_names)], axis=1)
+		ds = ds.infer_objects()
+		spec = cls.specFromPandas(ds)
+		return cls(spec, ds, *args, **kwargs)
diff --git a/Code_Of_Conduct.md b/Code_Of_Conduct.md
new file mode 100644
index 0000000..2b781c7
--- /dev/null
+++ b/Code_Of_Conduct.md
@@ -0,0 +1 @@
+No codes of conduct!
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..20f0fa8
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,4 @@
+include UNLICENSE
+include *.md
+include tests
+include .editorconfig
diff --git a/ReadMe.md b/ReadMe.md
new file mode 100644
index 0000000..c86cbb5
--- /dev/null
+++ b/ReadMe.md
@@ -0,0 +1,19 @@
+Chassis.py [![Unlicensed work](https://raw.githubusercontent.com/unlicense/unlicense.org/master/static/favicon.png)](https://unlicense.org/)
+===============
+~~![GitLab Build Status](https://gitlab.com/KOLANICH1/Chassis.py/badges/master/pipeline.svg)~~
+~~![GitLab Coverage](https://gitlab.com/KOLANICH1/Chassis.py/badges/master/coverage.svg)~~
+[![Libraries.io Status](https://img.shields.io/librariesio/github/KOLANICH/Chassis.py.svg)](https://libraries.io/github/KOLANICH/Chassis.py)
+[![Code style: antiflash](https://img.shields.io/badge/code%20style-antiflash-FFF.svg)](https://codeberg.org/KOLANICH-tools/antiflash.py) 
+
+This is the library to transform a `pandas.DataFrame` into another `DataFrame` suitable for machine learning. It's my own reinvention of a ~~wheel~~ ![PyPI Status](https://img.shields.io/pypi/status/formulaic.svg)[![Build](https://img.shields.io/github/actions/workflow/status/matthewwardrop/formulaic/tests.yml?branch=main)](https://github.com/matthewwardrop/formulaic/actions?query=workflow%3A%22Run+Tox+Tests%22)[![docs](https://img.shields.io/github/actions/workflow/status/matthewwardrop/formulaic/publish_docs.yml?label=docs)](https://matthewwardrop.github.io/formulaic/)[![codecov](https://codecov.io/gh/matthewwardrop/formulaic/branch/main/graph/badge.svg)](https://codecov.io/gh/matthewwardrop/formulaic)[![Libraries.io Status](https://img.shields.io/librariesio/github/pydata/patsy.svg)](https://libraries.io/github/pydata/patsy), which doesn't fit my needs.
+
+It solves the following drawbacks of patsy:
+* unpredictability
+ * the column names are changed in unpredictable way depending on **content** of dataframe you pass to it. You also cannot retrive the names and have to write very dirty code. Here you can retrieve columns by names.
+ * The content is often shit `patsy` decides that we need it. For example it can remove a column if it finds them linearry dependent. Such matrices are not suitable to all the ML algorithms and currently there is no way to disable such a behavior.
+* lack of automation - I have to do everything myself: construct expression and evaluate it.
+
+Requirements
+------------
+* [`numpy`](https://github.com/numpy/numpy) ![Licence](https://img.shields.io/github/license/numpy/numpy.svg) [![PyPi Status](https://img.shields.io/pypi/v/numpy.svg)](https://pypi.org/project/numpy) [![Build status](https://github.com/numpy/numpy/actions/workflows/linux.yml/badge.svg?branch=main)](https://github.com/numpy/numpy/actions/workflows/linux.yml) [![Libraries.io Status](https://img.shields.io/librariesio/github/numpy/numpy.svg)](https://libraries.io/github/numpy/numpy) 
+* [`pandas`](https://github.com/pandas-dev/pandas) ![Licence](https://img.shields.io/github/license/pandas-dev/pandas.svg) [![PyPi Status](https://img.shields.io/pypi/v/pandas.svg)](https://pypi.python.org/pypi/pandas) [![CI](https://github.com/pandas-dev/pandas/actions/workflows/unit-tests.yml/badge.svg)](https://github.com/pandas-dev/pandas/actions/workflows/unit-tests.yml) [![CodeCov Coverage](https://codecov.io/github/pandas-dev/pandas/coverage.svg?branch=master)](https://codecov.io/github/pandas-dev/pandas/) [![Conda Latest Release](https://anaconda.org/conda-forge/pandas/badges/version.svg)](https://anaconda.org/conda-forge/pandas) [![License - BSD 3-Clause](https://img.shields.io/pypi/l/pandas.svg)](https://github.com/pandas-dev/pandas/blob/main/LICENSE) [![Libraries.io Status](https://img.shields.io/librariesio/github/pandas-dev/pandas.svg)](https://libraries.io/github/pandas-dev/pandas) [![Gitter.im](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/pydata/pandas)
diff --git a/Tutorial.ipynb b/Tutorial.ipynb
new file mode 100644
index 0000000..c2033a2
--- /dev/null
+++ b/Tutorial.ipynb
@@ -0,0 +1,686 @@
+{
+	"cells": 
+	[		
+		{
+			"cell_type": "code",
+			"execution_count": 1,
+			"metadata": 
+			{
+				"collapsed": true
+			},
+			"outputs": [],
+			"source": 
+			[
+				"import pandas\n",
+				"from Chassis import *"
+			]
+		},		
+		{
+			"cell_type": "code",
+			"execution_count": 2,
+			"metadata": 
+			{
+				"collapsed": false
+			},
+			"outputs": 
+			[				
+				{
+					"data": 
+					{
+						"text/html": 
+						[
+							"<div>\n",
+							"<style scoped>\n",
+							" .dataframe tbody tr th:only-of-type {\n",
+							" \tvertical-align: middle;\n",
+							" }\n",
+							"\n",
+							" .dataframe tbody tr th {\n",
+							" \tvertical-align: top;\n",
+							" }\n",
+							"\n",
+							" .dataframe thead th {\n",
+							" \ttext-align: right;\n",
+							" }\n",
+							"<\/style>\n",
+							"<table border=\"1\" class=\"dataframe\">\n",
+							" <thead>\n",
+							" <tr style=\"text-align: right;\">\n",
+							" <th><\/th>\n",
+							" <th>b<\/th>\n",
+							" <th>c<\/th>\n",
+							" <th>n<\/th>\n",
+							" <th>s<\/th>\n",
+							" <\/tr>\n",
+							" <\/thead>\n",
+							" <tbody>\n",
+							" <tr>\n",
+							" <th>0<\/th>\n",
+							" <td>True<\/td>\n",
+							" <td>A<\/td>\n",
+							" <td>10<\/td>\n",
+							" <td>1<\/td>\n",
+							" <\/tr>\n",
+							" <tr>\n",
+							" <th>1<\/th>\n",
+							" <td>False<\/td>\n",
+							" <td>B<\/td>\n",
+							" <td>11<\/td>\n",
+							" <td>1<\/td>\n",
+							" <\/tr>\n",
+							" <tr>\n",
+							" <th>2<\/th>\n",
+							" <td>0.5<\/td>\n",
+							" <td>C<\/td>\n",
+							" <td>20<\/td>\n",
+							" <td>1<\/td>\n",
+							" <\/tr>\n",
+							" <tr>\n",
+							" <th>3<\/th>\n",
+							" <td>0.3<\/td>\n",
+							" <td>A<\/td>\n",
+							" <td>42<\/td>\n",
+							" <td>1<\/td>\n",
+							" <\/tr>\n",
+							" <\/tbody>\n",
+							"<\/table>\n",
+							"<\/div>"
+						],
+						"text/plain": 
+						[
+							" b\tc n s\n",
+							"0 True A 10\t1\n",
+							"1 False B 11\t1\n",
+							"2 0.5 C\t20 1\n",
+							"3 0.3 A\t42 1"
+						]
+					},
+					"execution_count": 2,
+					"metadata": {},
+					"output_type": "execute_result"
+				}
+			],
+			"source": 
+			[
+				"ds=pandas.DataFrame.from_records([\n",
+				"\t{\"c\": \"A\", \"n\":10, \"b\": True, \"s\":1},\n",
+				"\t{\"c\": \"B\", \"n\":11, \"b\": False, \"s\":1},\n",
+				"\t{\"c\": \"C\", \"n\":20, \"b\": 0.5, \"s\":1},\n",
+				"\t{\"c\": \"A\", \"n\":42, \"b\": 0.3, \"s\":1},\n",
+				"])\n",
+				"ds"
+			]
+		},		
+		{
+			"cell_type": "code",
+			"execution_count": 3,
+			"metadata": 
+			{
+				"collapsed": false
+			},
+			"outputs": 
+			[				
+				{
+					"data": 
+					{
+						"text/html": 
+						[
+							"<div>\n",
+							"<style scoped>\n",
+							" .dataframe tbody tr th:only-of-type {\n",
+							" \tvertical-align: middle;\n",
+							" }\n",
+							"\n",
+							" .dataframe tbody tr th {\n",
+							" \tvertical-align: top;\n",
+							" }\n",
+							"\n",
+							" .dataframe thead th {\n",
+							" \ttext-align: right;\n",
+							" }\n",
+							"<\/style>\n",
+							"<table border=\"1\" class=\"dataframe\">\n",
+							" <thead>\n",
+							" <tr style=\"text-align: right;\">\n",
+							" <th><\/th>\n",
+							" <th>b<\/th>\n",
+							" <th>c_A<\/th>\n",
+							" <th>c_B<\/th>\n",
+							" <th>c_C<\/th>\n",
+							" <th>n<\/th>\n",
+							" <\/tr>\n",
+							" <\/thead>\n",
+							" <tbody>\n",
+							" <tr>\n",
+							" <th>0<\/th>\n",
+							" <td>1.0<\/td>\n",
+							" <td>1<\/td>\n",
+							" <td>0<\/td>\n",
+							" <td>0<\/td>\n",
+							" <td>10<\/td>\n",
+							" <\/tr>\n",
+							" <tr>\n",
+							" <th>1<\/th>\n",
+							" <td>0.0<\/td>\n",
+							" <td>0<\/td>\n",
+							" <td>1<\/td>\n",
+							" <td>0<\/td>\n",
+							" <td>11<\/td>\n",
+							" <\/tr>\n",
+							" <tr>\n",
+							" <th>2<\/th>\n",
+							" <td>0.5<\/td>\n",
+							" <td>0<\/td>\n",
+							" <td>0<\/td>\n",
+							" <td>1<\/td>\n",
+							" <td>20<\/td>\n",
+							" <\/tr>\n",
+							" <tr>\n",
+							" <th>3<\/th>\n",
+							" <td>0.3<\/td>\n",
+							" <td>1<\/td>\n",
+							" <td>0<\/td>\n",
+							" <td>0<\/td>\n",
+							" <td>42<\/td>\n",
+							" <\/tr>\n",
+							" <\/tbody>\n",
+							"<\/table>\n",
+							"<\/div>"
+						],
+						"text/plain": 
+						[
+							" b c_A\tc_B c_C n\n",
+							"0 1.0 1\t 0 0 10\n",
+							"1 0.0 0\t 1 0 11\n",
+							"2 0.5 0\t 0 1 20\n",
+							"3 0.3 1\t 0 0 42"
+						]
+					},
+					"execution_count": 3,
+					"metadata": {},
+					"output_type": "execute_result"
+				}
+			],
+			"source": 
+			[
+				"spec={\n",
+				"\t\"c\": \"C\", #categoric\n",
+				"\t\"n\": \"N\", #numerical\n",
+				"\t\"b\": \"B\", #binary\n",
+				"\t\"s\": \"S\", #STOP, this won't appear in the dmat\n",
+				"}\n",
+				"chs=Chassis(spec, ds)\n",
+				"chs.pds"
+			]
+		},		
+		{
+			"cell_type": "code",
+			"execution_count": 4,
+			"metadata": 
+			{
+				"collapsed": false
+			},
+			"outputs": 
+			[				
+				{
+					"data": 
+					{
+						"text/html": 
+						[
+							"<div>\n",
+							"<style scoped>\n",
+							" .dataframe tbody tr th:only-of-type {\n",
+							" \tvertical-align: middle;\n",
+							" }\n",
+							"\n",
+							" .dataframe tbody tr th {\n",
+							" \tvertical-align: top;\n",
+							" }\n",
+							"\n",
+							" .dataframe thead th {\n",
+							" \ttext-align: right;\n",
+							" }\n",
+							"<\/style>\n",
+							"<table border=\"1\" class=\"dataframe\">\n",
+							" <thead>\n",
+							" <tr style=\"text-align: right;\">\n",
+							" <th><\/th>\n",
+							" <th>n<\/th>\n",
+							" <th>c_C<\/th>\n",
+							" <th>c_A<\/th>\n",
+							" <th>c_B<\/th>\n",
+							" <\/tr>\n",
+							" <\/thead>\n",
+							" <tbody>\n",
+							" <tr>\n",
+							" <th>0<\/th>\n",
+							" <td>10<\/td>\n",
+							" <td>0<\/td>\n",
+							" <td>1<\/td>\n",
+							" <td>0<\/td>\n",
+							" <\/tr>\n",
+							" <tr>\n",
+							" <th>1<\/th>\n",
+							" <td>11<\/td>\n",
+							" <td>0<\/td>\n",
+							" <td>0<\/td>\n",
+							" <td>1<\/td>\n",
+							" <\/tr>\n",
+							" <tr>\n",
+							" <th>2<\/th>\n",
+							" <td>20<\/td>\n",
+							" <td>1<\/td>\n",
+							" <td>0<\/td>\n",
+							" <td>0<\/td>\n",
+							" <\/tr>\n",
+							" <tr>\n",
+							" <th>3<\/th>\n",
+							" <td>42<\/td>\n",
+							" <td>0<\/td>\n",
+							" <td>1<\/td>\n",
+							" <td>0<\/td>\n",
+							" <\/tr>\n",
+							" <\/tbody>\n",
+							"<\/table>\n",
+							"<\/div>"
+						],
+						"text/plain": 
+						[
+							" n c_C\tc_A c_B\n",
+							"0 10 0\t 1 0\n",
+							"1 11 0\t 0 1\n",
+							"2 20 1\t 0 0\n",
+							"3 42 0\t 1 0"
+						]
+					},
+					"execution_count": 4,
+					"metadata": {},
+					"output_type": "execute_result"
+				}
+			],
+			"source": 
+			[
+				"chs.prepareCovariates(\"b\")"
+			]
+		},		
+		{
+			"cell_type": "code",
+			"execution_count": 5,
+			"metadata": 
+			{
+				"collapsed": false
+			},
+			"outputs": 
+			[				
+				{
+					"data": 
+					{
+						"text/plain": 
+						[
+							"0 1.0\n",
+							"1 0.0\n",
+							"2 0.5\n",
+							"3 0.3\n",
+							"Name: b, dtype: float32"
+						]
+					},
+					"execution_count": 5,
+					"metadata": {},
+					"output_type": "execute_result"
+				}
+			],
+			"source": 
+			[
+				"chs.prepareResult(\"b\")"
+			]
+		},		
+		{
+			"cell_type": "code",
+			"execution_count": 6,
+			"metadata": 
+			{
+				"collapsed": false
+			},
+			"outputs": 
+			[				
+				{
+					"data": 
+					{
+						"text/html": 
+						[
+							"<div>\n",
+							"<style scoped>\n",
+							" .dataframe tbody tr th:only-of-type {\n",
+							" \tvertical-align: middle;\n",
+							" }\n",
+							"\n",
+							" .dataframe tbody tr th {\n",
+							" \tvertical-align: top;\n",
+							" }\n",
+							"\n",
+							" .dataframe thead th {\n",
+							" \ttext-align: right;\n",
+							" }\n",
+							"<\/style>\n",
+							"<table border=\"1\" class=\"dataframe\">\n",
+							" <thead>\n",
+							" <tr style=\"text-align: right;\">\n",
+							" <th><\/th>\n",
+							" <th>n<\/th>\n",
+							" <th>b<\/th>\n",
+							" <\/tr>\n",
+							" <\/thead>\n",
+							" <tbody>\n",
+							" <tr>\n",
+							" <th>0<\/th>\n",
+							" <td>10<\/td>\n",
+							" <td>1.0<\/td>\n",
+							" <\/tr>\n",
+							" <tr>\n",
+							" <th>1<\/th>\n",
+							" <td>11<\/td>\n",
+							" <td>0.0<\/td>\n",
+							" <\/tr>\n",
+							" <tr>\n",
+							" <th>2<\/th>\n",
+							" <td>20<\/td>\n",
+							" <td>0.5<\/td>\n",
+							" <\/tr>\n",
+							" <tr>\n",
+							" <th>3<\/th>\n",
+							" <td>42<\/td>\n",
+							" <td>0.3<\/td>\n",
+							" <\/tr>\n",
+							" <\/tbody>\n",
+							"<\/table>\n",
+							"<\/div>"
+						],
+						"text/plain": 
+						[
+							" n \tb\n",
+							"0 10 1.0\n",
+							"1 11 0.0\n",
+							"2 20 0.5\n",
+							"3 42 0.3"
+						]
+					},
+					"execution_count": 6,
+					"metadata": {},
+					"output_type": "execute_result"
+				}
+			],
+			"source": 
+			[
+				"chs.prepareCovariates(\"c\")"
+			]
+		},		
+		{
+			"cell_type": "code",
+			"execution_count": 7,
+			"metadata": 
+			{
+				"collapsed": false
+			},
+			"outputs": 
+			[				
+				{
+					"data": 
+					{
+						"text/plain": 
+						[
+							"0 A\n",
+							"1 B\n",
+							"2 C\n",
+							"3 A\n",
+							"dtype: category\n",
+							"Categories (3, object): [A, B, C]"
+						]
+					},
+					"execution_count": 7,
+					"metadata": {},
+					"output_type": "execute_result"
+				}
+			],
+			"source": 
+			[
+				"chs.prepareResult(\"c\")"
+			]
+		},		
+		{
+			"cell_type": "code",
+			"execution_count": 8,
+			"metadata": 
+			{
+				"collapsed": false
+			},
+			"outputs": 
+			[				
+				{
+					"data": 
+					{
+						"text/html": 
+						[
+							"<div>\n",
+							"<style scoped>\n",
+							" .dataframe tbody tr th:only-of-type {\n",
+							" \tvertical-align: middle;\n",
+							" }\n",
+							"\n",
+							" .dataframe tbody tr th {\n",
+							" \tvertical-align: top;\n",
+							" }\n",
+							"\n",
+							" .dataframe thead th {\n",
+							" \ttext-align: right;\n",
+							" }\n",
+							"<\/style>\n",
+							"<table border=\"1\" class=\"dataframe\">\n",
+							" <thead>\n",
+							" <tr style=\"text-align: right;\">\n",
+							" <th><\/th>\n",
+							" <th>c_C<\/th>\n",
+							" <th>c_A<\/th>\n",
+							" <th>b<\/th>\n",
+							" <th>c_B<\/th>\n",
+							" <\/tr>\n",
+							" <\/thead>\n",
+							" <tbody>\n",
+							" <tr>\n",
+							" <th>0<\/th>\n",
+							" <td>0<\/td>\n",
+							" <td>1<\/td>\n",
+							" <td>1.0<\/td>\n",
+							" <td>0<\/td>\n",
+							" <\/tr>\n",
+							" <tr>\n",
+							" <th>1<\/th>\n",
+							" <td>0<\/td>\n",
+							" <td>0<\/td>\n",
+							" <td>0.0<\/td>\n",
+							" <td>1<\/td>\n",
+							" <\/tr>\n",
+							" <tr>\n",
+							" <th>2<\/th>\n",
+							" <td>1<\/td>\n",
+							" <td>0<\/td>\n",
+							" <td>0.5<\/td>\n",
+							" <td>0<\/td>\n",
+							" <\/tr>\n",
+							" <tr>\n",
+							" <th>3<\/th>\n",
+							" <td>0<\/td>\n",
+							" <td>1<\/td>\n",
+							" <td>0.3<\/td>\n",
+							" <td>0<\/td>\n",
+							" <\/tr>\n",
+							" <\/tbody>\n",
+							"<\/table>\n",
+							"<\/div>"
+						],
+						"text/plain": 
+						[
+							" c_C c_A \tb c_B\n",
+							"0 0 \t1 1.0 0\n",
+							"1 0 \t0 0.0 1\n",
+							"2 1 \t0 0.5 0\n",
+							"3 0 \t1 0.3 0"
+						]
+					},
+					"execution_count": 8,
+					"metadata": {},
+					"output_type": "execute_result"
+				}
+			],
+			"source": 
+			[
+				"chs.prepareCovariates(\"n\")"
+			]
+		},		
+		{
+			"cell_type": "code",
+			"execution_count": 9,
+			"metadata": 
+			{
+				"collapsed": false
+			},
+			"outputs": 
+			[				
+				{
+					"data": 
+					{
+						"text/plain": 
+						[
+							"0 10\n",
+							"1 11\n",
+							"2 20\n",
+							"3 42\n",
+							"Name: n, dtype: int64"
+						]
+					},
+					"execution_count": 9,
+					"metadata": {},
+					"output_type": "execute_result"
+				}
+			],
+			"source": 
+			[
+				"chs.prepareResult(\"n\")"
+			]
+		},		
+		{
+			"cell_type": "code",
+			"execution_count": 10,
+			"metadata": 
+			{
+				"collapsed": false
+			},
+			"outputs": 
+			[				
+				{
+					"data": 
+					{
+						"text/html": 
+						[
+							"<div>\n",
+							"<style scoped>\n",
+							" .dataframe tbody tr th:only-of-type {\n",
+							" \tvertical-align: middle;\n",
+							" }\n",
+							"\n",
+							" .dataframe tbody tr th {\n",
+							" \tvertical-align: top;\n",
+							" }\n",
+							"\n",
+							" .dataframe thead th {\n",
+							" \ttext-align: right;\n",
+							" }\n",
+							"<\/style>\n",
+							"<table border=\"1\" class=\"dataframe\">\n",
+							" <thead>\n",
+							" <tr style=\"text-align: right;\">\n",
+							" <th><\/th>\n",
+							" <th>b<\/th>\n",
+							" <th>n<\/th>\n",
+							" <th>c<\/th>\n",
+							" <\/tr>\n",
+							" <\/thead>\n",
+							" <tbody>\n",
+							" <tr>\n",
+							" <th>0<\/th>\n",
+							" <td>1.0<\/td>\n",
+							" <td>10<\/td>\n",
+							" <td>A<\/td>\n",
+							" <\/tr>\n",
+							" <tr>\n",
+							" <th>1<\/th>\n",
+							" <td>0.0<\/td>\n",
+							" <td>11<\/td>\n",
+							" <td>B<\/td>\n",
+							" <\/tr>\n",
+							" <tr>\n",
+							" <th>2<\/th>\n",
+							" <td>0.5<\/td>\n",
+							" <td>20<\/td>\n",
+							" <td>C<\/td>\n",
+							" <\/tr>\n",
+							" <tr>\n",
+							" <th>3<\/th>\n",
+							" <td>0.3<\/td>\n",
+							" <td>42<\/td>\n",
+							" <td>A<\/td>\n",
+							" <\/tr>\n",
+							" <\/tbody>\n",
+							"<\/table>\n",
+							"<\/div>"
+						],
+						"text/plain": 
+						[
+							" b n\tc\n",
+							"0 1.0 10 A\n",
+							"1 0.0 11 B\n",
+							"2 0.5 20 C\n",
+							"3 0.3 42 A"
+						]
+					},
+					"execution_count": 10,
+					"metadata": {},
+					"output_type": "execute_result"
+				}
+			],
+			"source": 
+			[
+				"chs.reverse()"
+			]
+		}
+	],
+	"metadata": 
+	{
+		"kernelspec": 
+		{
+			"display_name": "Python 3",
+			"language": "python",
+			"name": "python3"
+		},
+		"language_info": 
+		{
+			"codemirror_mode": 
+			{
+				"name": "ipython",
+				"version": 3
+			},
+			"file_extension": ".py",
+			"mimetype": "text/x-python",
+			"name": "python",
+			"nbconvert_exporter": "python",
+			"pygments_lexer": "ipython3",
+			"version": "3.6.1"
+		},
+		"widgets": 
+		{
+			"state": {},
+			"version": "1.1.2"
+		}
+	},
+	"nbformat": 4,
+	"nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/UNLICENSE b/UNLICENSE
new file mode 100644
index 0000000..efb9808
--- /dev/null
+++ b/UNLICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <https://unlicense.org/>
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..b3db89b
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,34 @@
+[build-system]
+requires = ["setuptools>=61.2.0", "wheel", "setuptools_scm[toml]>=3.4.3"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "Chassis"
+authors = [{name = "KOLANICH"}]
+description = "A replacement for patsy better suitable for fully automated use"
+readme = "ReadMe.md"
+keywords = ["patsy", "data science", "machine learning", "design matrix"]
+license = {text = "Unlicense"}
+classifiers = [
+	"Programming Language :: Python",
+	"Programming Language :: Python :: 3",
+	"Development Status :: 4 - Beta",
+	"Environment :: Other Environment",
+	"Intended Audience :: Developers",
+	"License :: Public Domain",
+	"Operating System :: OS Independent",
+	"Topic :: Software Development :: Libraries :: Python Modules",
+]
+urls = {Homepage = "https://codeberg.org/KOLANICH-ML/Chassis.py"}
+requires-python = ">=3.4"
+dependencies = [
+	"numpy",
+	"pandas",
+]
+dynamic = ["version"]
+
+[tool.setuptools]
+zip-safe = true
+py-modules = ["Chassis"]
+
+[tool.setuptools_scm]
diff --git a/tests/tests.py b/tests/tests.py
new file mode 100644
index 0000000..63a2be8
--- /dev/null
+++ b/tests/tests.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+import sys
+from pathlib import Path
+import unittest
+sys.path.insert(0, str(Path(__file__).parent.absolute()))
+
+from collections import OrderedDict
+dict=OrderedDict
+
+import pandas
+from pandas.testing import assert_frame_equal, assert_series_equal
+from Chassis import Chassis
+
+class SimpleTests(unittest.TestCase):
+	def setUp(self):
+		ds=pandas.DataFrame.from_records([
+			{"c": "A", "n":10, "b": True, "s":1, "w": 1},
+			{"c": "B", "n":11, "b": False, "s":1, "w": 10},
+			{"c": "C", "n":20, "b": 0.5, "s":1, "w": 5},
+			{"c": "A", "n":42, "b": 0.3, "s":1, "w": 6},
+		])
+		if hasattr(ds, "to_dense"):
+			ds = ds.to_dense()
+		ds.loc[:,"c"]=pandas.Series(pandas.Categorical( ds.loc[:,"c"]))
+		
+		self.schema={
+			"c": "categorical",
+			"n": "numerical",
+			"b": "binary",
+			"w": "weight",
+			"s": "stop", #this won't appear in the dmat
+		}
+		self.chs=Chassis(self.schema, ds)
+		b=ds.loc[:,"b"]
+		n=ds.loc[:,"n"]
+		abc=pandas.concat([ds.loc[:,"c"]=="A", ds.loc[:,"c"]=="B", ds.loc[:,"c"]=="C"], axis=1, ).astype(int)
+		abc.columns=("c_A", "c_B", "c_C")
+		
+		self.expected={
+			'c' : (ds.loc[:,["b", "n"]], ds["c"]),
+			'n' : (pandas.concat([abc, b], axis=1), n),
+			'b' : (pandas.concat([abc, n], axis=1), b)
+		}
+		self.ds=ds
+	
+	def shouldColumnBePresent(self, cn):
+		return self.schema[cn] not in {"stop", "weight"}
+	
+	def checkCovariates(self, cn):
+		if self.shouldColumnBePresent(cn):
+			assert_frame_equal(self.chs.prepareCovariates(cn), self.expected[cn][0], check_like=True, check_dtype=False)
+	
+	def checkResult(self, cn):
+		if self.shouldColumnBePresent(cn):
+			assert_series_equal(self.chs.prepareResults(cn), self.expected[cn][1], check_dtype=False, check_categorical=False)
+		else:
+			with self.assertRaises(Exception):
+				self.chs.prepareResults(cn)
+	
+	def testCovariates(self):
+		for cn in self.schema:
+			self.checkCovariates(cn)
+	
+	def testResult(self):
+		for cn in self.schema:
+			self.checkResult(cn)
+
+
+if __name__ == '__main__':
+	unittest.main()