Skip to content

Commit

Permalink
8 enh compare two and only two dataframes by common attributes (#23)
Browse files Browse the repository at this point in the history
  • Loading branch information
hadarsharon authored Apr 20, 2024
2 parents f428c14 + e7894b9 commit abbc5e8
Show file tree
Hide file tree
Showing 9 changed files with 42 additions and 4 deletions.
1 change: 1 addition & 0 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* @hadarsharon
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@

<hr>

compars is a simple-yet-comprehensive, bear-agnostic :bear: :panda_face: :koala: DataFrame comparison library, powered by Rust with polars.
DataFrame comparison done right, powered by Rust with polars

(AKA the bear-agnostic :bear: :panda_face: :koala: DataFrame comparison library)
Empty file added compars/__init__.py
Empty file.
Empty file added compars/assertions/__init__.py
Empty file.
Empty file added compars/assertions/dataframe.py
Empty file.
Empty file added compars/dataframe/__init__.py
Empty file.
30 changes: 30 additions & 0 deletions compars/dataframe/types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from typing import Iterable, Any

from dask.dataframe.core import DataFrame as DaskDataFrame
from modin.pandas.dataframe import DataFrame as ModinDataFrame
from pandas.core.frame import DataFrame as PandasDataFrame
from polars.dataframe.frame import DataFrame as PolarsDataFrame
from pyspark.sql.dataframe import DataFrame as SparkDataFrame


class DataFrame:
"""Generic (bear-agnostic) DataFrame type"""

def __init__(self, df: PolarsDataFrame | PandasDataFrame | SparkDataFrame | DaskDataFrame | ModinDataFrame):
self._df = df

# TODO: property
def shape(self) -> tuple[int, int]:
if isinstance(self.df, (PolarsDataFrame, PandasDataFrame, ModinDataFrame)):
return self.df.shape
if isinstance(self.df, DaskDataFrame):
... # TODO: figure out
if isinstance(self.df, SparkDataFrame):
... # TODO: figure out

# TODO: property
def columns(self) -> Iterable[Any]:
return self.df.columns

def n_columns(self) -> int:
return len(self.columns())
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[project]
name = "compars"
description = "Bear-agnostic DataFrame comparison library"
version = "0.0.1"
description = "DataFrame comparison done right (AKA the Bear-agnostic DataFrame comparison library)"
version = "0.0.0"
authors = [
{ name = "Hadar Sharon", email = "hadar.sharon94@gmail.com" },
]
Expand Down Expand Up @@ -53,4 +53,4 @@ exclude = [
[tool.ruff.format]
quote-style = "double"
skip-magic-trailing-comma = false
preview = true
preview = true
5 changes: 5 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
dask==2024.2.1 # support for dask DataFrames
modin[dask]==0.27.0 # support for modin DataFrames
pandas==2.2.1 # support for pandas DataFrames
polars==0.20.10 # support for polars DataFrames
pre-commit==3.6.2 # pre-commit hooks
pyspark==3.5.0 # support for spark DataFrames
pytest==8.0.1 # testing framework
ruff==0.2.2 # formatter/linter

0 comments on commit abbc5e8

Please sign in to comment.