Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 0 additions & 13 deletions dataframely/columns/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,15 +381,6 @@ def _attributes_match(
if name == "check":
return _compare_checks(lhs, rhs, column_expr)

lhs_is_series = isinstance(lhs, pl.Series)
rhs_is_series = isinstance(rhs, pl.Series)

if lhs_is_series != rhs_is_series:
return False

if lhs_is_series and rhs_is_series:
return _compare_series(lhs, rhs)

return lhs == rhs

# -------------------------------- DUNDER METHODS -------------------------------- #
Expand All @@ -413,10 +404,6 @@ def __str__(self) -> str:
return self.__class__.__name__.lower()


def _compare_series(lhs: pl.Series, rhs: pl.Series) -> bool:
return (len(lhs) == len(rhs)) and lhs.equals(rhs)


def _compare_checks(lhs: Check | None, rhs: Check | None, expr: pl.Expr) -> bool:
match (lhs, rhs):
case (None, None):
Expand Down
12 changes: 4 additions & 8 deletions dataframely/columns/enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,8 @@ def __init__(
metadata=metadata,
)
if isclass(categories) and issubclass(categories, enum.Enum):
categories = pl.Series(
values=[getattr(v, "value", v) for v in categories.__members__.values()]
)
elif not isinstance(categories, pl.Series):
categories = pl.Series(values=categories)
self.categories = categories
categories = (item.value for item in categories)
self.categories = list(categories)

@property
def dtype(self) -> pl.DataType:
Expand All @@ -81,7 +77,7 @@ def dtype(self) -> pl.DataType:
def validate_dtype(self, dtype: PolarsDataType) -> bool:
if not isinstance(dtype, pl.Enum):
return False
return self.categories.equals(dtype.categories)
return self.categories == dtype.categories.to_list()

def sqlalchemy_dtype(self, dialect: sa.Dialect) -> sa_TypeEngine:
category_lengths = [len(c) for c in self.categories]
Expand All @@ -102,6 +98,6 @@ def pyarrow_dtype(self) -> pa.DataType:
def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
return generator.sample_choice(
n,
choices=self.categories.to_list(),
choices=self.categories,
null_probability=self._null_probability,
).cast(self.dtype)
11 changes: 11 additions & 0 deletions tests/schema/test_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,14 @@ def test_repr_with_rules() -> None:
- "my_rule": [(col("a")) < (dyn int: 100)]
- "my_group_rule": [(col("a").sum()) > (dyn int: 50)] grouped by ['a']
""")


def test_repr_enum() -> None:
class SchemaNoRules(dy.Schema):
a = dy.Enum(["a"])

assert repr(SchemaNoRules) == textwrap.dedent("""\
[Schema "SchemaNoRules"]
Columns:
- "a": Enum(categories=['a'], nullable=True)
""")
1 change: 1 addition & 0 deletions tests/schema/test_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def test_simple_serialization() -> None:
create_schema("test", {"a": dy.Int64(check=[lambda expr: expr > 5])}),
create_schema("test", {"a": dy.Int64(check={"x": lambda expr: expr > 5})}),
create_schema("test", {"a": dy.Int64(alias="foo")}),
create_schema("test", {"a": dy.Enum(["a"])}),
create_schema("test", {"a": dy.Decimal(scale=2, min=Decimal("1.5"))}),
create_schema("test", {"a": dy.Date(min=dt.date(2020, 1, 1))}),
create_schema("test", {"a": dy.Datetime(min=dt.datetime(2020, 1, 1))}),
Expand Down
Loading