From 5bf22fc7e3c392c8bd44315ca2d06d7dca7d084e Mon Sep 17 00:00:00 2001 From: sotech117 Date: Thu, 31 Jul 2025 17:27:24 -0400 Subject: add code for analysis of data --- .../site-packages/narwhals/_compliant/__init__.py | 84 ++ .../narwhals/_compliant/any_namespace.py | 85 ++ .../site-packages/narwhals/_compliant/dataframe.py | 500 +++++++++ .../site-packages/narwhals/_compliant/expr.py | 1140 ++++++++++++++++++++ .../site-packages/narwhals/_compliant/group_by.py | 233 ++++ .../site-packages/narwhals/_compliant/namespace.py | 194 ++++ .../site-packages/narwhals/_compliant/selectors.py | 332 ++++++ .../site-packages/narwhals/_compliant/series.py | 396 +++++++ .../site-packages/narwhals/_compliant/typing.py | 154 +++ .../site-packages/narwhals/_compliant/when_then.py | 232 ++++ .../site-packages/narwhals/_compliant/window.py | 15 + 11 files changed, 3365 insertions(+) create mode 100644 venv/lib/python3.8/site-packages/narwhals/_compliant/__init__.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/_compliant/any_namespace.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/_compliant/dataframe.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/_compliant/expr.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/_compliant/group_by.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/_compliant/namespace.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/_compliant/selectors.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/_compliant/series.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/_compliant/typing.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/_compliant/when_then.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/_compliant/window.py (limited to 'venv/lib/python3.8/site-packages/narwhals/_compliant') diff --git a/venv/lib/python3.8/site-packages/narwhals/_compliant/__init__.py b/venv/lib/python3.8/site-packages/narwhals/_compliant/__init__.py new file mode 100644 index 0000000..cebafbd --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/_compliant/__init__.py @@ -0,0 +1,84 @@ +from __future__ import annotations + +from narwhals._compliant.dataframe import ( + CompliantDataFrame, + CompliantLazyFrame, + EagerDataFrame, +) +from narwhals._compliant.expr import CompliantExpr, EagerExpr, LazyExpr +from narwhals._compliant.group_by import ( + CompliantGroupBy, + DepthTrackingGroupBy, + EagerGroupBy, + LazyGroupBy, +) +from narwhals._compliant.namespace import ( + CompliantNamespace, + EagerNamespace, + LazyNamespace, +) +from narwhals._compliant.selectors import ( + CompliantSelector, + CompliantSelectorNamespace, + EagerSelectorNamespace, + LazySelectorNamespace, +) +from narwhals._compliant.series import CompliantSeries, EagerSeries +from narwhals._compliant.typing import ( + CompliantExprT, + CompliantFrameT, + CompliantSeriesOrNativeExprT_co, + CompliantSeriesT, + EagerDataFrameT, + EagerSeriesT, + EvalNames, + EvalSeries, + IntoCompliantExpr, + NativeFrameT_co, + NativeSeriesT_co, +) +from narwhals._compliant.when_then import ( + CompliantThen, + CompliantWhen, + EagerWhen, + LazyThen, + LazyWhen, +) + +__all__ = [ + "CompliantDataFrame", + "CompliantExpr", + "CompliantExprT", + "CompliantFrameT", + "CompliantGroupBy", + "CompliantLazyFrame", + "CompliantNamespace", + "CompliantSelector", + "CompliantSelectorNamespace", + "CompliantSeries", + "CompliantSeriesOrNativeExprT_co", + "CompliantSeriesT", + "CompliantThen", + "CompliantWhen", + "DepthTrackingGroupBy", + "EagerDataFrame", + "EagerDataFrameT", + "EagerExpr", + "EagerGroupBy", + "EagerNamespace", + "EagerSelectorNamespace", + "EagerSeries", + "EagerSeriesT", + "EagerWhen", + "EvalNames", + "EvalSeries", + "IntoCompliantExpr", + "LazyExpr", + "LazyGroupBy", + "LazyNamespace", + "LazySelectorNamespace", + "LazyThen", + "LazyWhen", + "NativeFrameT_co", + "NativeSeriesT_co", +] diff --git a/venv/lib/python3.8/site-packages/narwhals/_compliant/any_namespace.py b/venv/lib/python3.8/site-packages/narwhals/_compliant/any_namespace.py new file mode 100644 index 0000000..3365d25 --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/_compliant/any_namespace.py @@ -0,0 +1,85 @@ +"""`Expr` and `Series` namespace accessor protocols.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Protocol + +from narwhals._utils import CompliantT_co, _StoresCompliant + +if TYPE_CHECKING: + from typing import Callable + + from narwhals.typing import TimeUnit + +__all__ = [ + "CatNamespace", + "DateTimeNamespace", + "ListNamespace", + "NameNamespace", + "StringNamespace", + "StructNamespace", +] + + +class CatNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]): + def get_categories(self) -> CompliantT_co: ... + + +class DateTimeNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]): + def to_string(self, format: str) -> CompliantT_co: ... + def replace_time_zone(self, time_zone: str | None) -> CompliantT_co: ... + def convert_time_zone(self, time_zone: str) -> CompliantT_co: ... + def timestamp(self, time_unit: TimeUnit) -> CompliantT_co: ... + def date(self) -> CompliantT_co: ... + def year(self) -> CompliantT_co: ... + def month(self) -> CompliantT_co: ... + def day(self) -> CompliantT_co: ... + def hour(self) -> CompliantT_co: ... + def minute(self) -> CompliantT_co: ... + def second(self) -> CompliantT_co: ... + def millisecond(self) -> CompliantT_co: ... + def microsecond(self) -> CompliantT_co: ... + def nanosecond(self) -> CompliantT_co: ... + def ordinal_day(self) -> CompliantT_co: ... + def weekday(self) -> CompliantT_co: ... + def total_minutes(self) -> CompliantT_co: ... + def total_seconds(self) -> CompliantT_co: ... + def total_milliseconds(self) -> CompliantT_co: ... + def total_microseconds(self) -> CompliantT_co: ... + def total_nanoseconds(self) -> CompliantT_co: ... + + +class ListNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]): + def len(self) -> CompliantT_co: ... + + +class NameNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]): + def keep(self) -> CompliantT_co: ... + def map(self, function: Callable[[str], str]) -> CompliantT_co: ... + def prefix(self, prefix: str) -> CompliantT_co: ... + def suffix(self, suffix: str) -> CompliantT_co: ... + def to_lowercase(self) -> CompliantT_co: ... + def to_uppercase(self) -> CompliantT_co: ... + + +class StringNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]): + def len_chars(self) -> CompliantT_co: ... + def replace( + self, pattern: str, value: str, *, literal: bool, n: int + ) -> CompliantT_co: ... + def replace_all( + self, pattern: str, value: str, *, literal: bool + ) -> CompliantT_co: ... + def strip_chars(self, characters: str | None) -> CompliantT_co: ... + def starts_with(self, prefix: str) -> CompliantT_co: ... + def ends_with(self, suffix: str) -> CompliantT_co: ... + def contains(self, pattern: str, *, literal: bool) -> CompliantT_co: ... + def slice(self, offset: int, length: int | None) -> CompliantT_co: ... + def split(self, by: str) -> CompliantT_co: ... + def to_datetime(self, format: str | None) -> CompliantT_co: ... + def to_lowercase(self) -> CompliantT_co: ... + def to_uppercase(self) -> CompliantT_co: ... + + +class StructNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]): + def field(self, name: str) -> CompliantT_co: ... diff --git a/venv/lib/python3.8/site-packages/narwhals/_compliant/dataframe.py b/venv/lib/python3.8/site-packages/narwhals/_compliant/dataframe.py new file mode 100644 index 0000000..5f21055 --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/_compliant/dataframe.py @@ -0,0 +1,500 @@ +from __future__ import annotations + +from itertools import chain +from typing import ( + TYPE_CHECKING, + Any, + Iterator, + Literal, + Mapping, + Protocol, + Sequence, + Sized, + TypeVar, + overload, +) + +from narwhals._compliant.typing import ( + CompliantDataFrameAny, + CompliantExprT_contra, + CompliantLazyFrameAny, + CompliantSeriesT, + EagerExprT, + EagerSeriesT, + NativeExprT, + NativeFrameT, +) +from narwhals._translate import ( + ArrowConvertible, + DictConvertible, + FromNative, + NumpyConvertible, + ToNarwhals, + ToNarwhalsT_co, +) +from narwhals._typing_compat import deprecated +from narwhals._utils import ( + Version, + _StoresNative, + check_columns_exist, + is_compliant_series, + is_index_selector, + is_range, + is_sequence_like, + is_sized_multi_index_selector, + is_slice_index, + is_slice_none, +) + +if TYPE_CHECKING: + from io import BytesIO + from pathlib import Path + + import pandas as pd + import polars as pl + import pyarrow as pa + from typing_extensions import Self, TypeAlias + + from narwhals._compliant.expr import LazyExpr + from narwhals._compliant.group_by import CompliantGroupBy, DataFrameGroupBy + from narwhals._compliant.namespace import EagerNamespace + from narwhals._compliant.window import WindowInputs + from narwhals._translate import IntoArrowTable + from narwhals._utils import Implementation, _FullContext + from narwhals.dataframe import DataFrame + from narwhals.dtypes import DType + from narwhals.exceptions import ColumnNotFoundError + from narwhals.schema import Schema + from narwhals.typing import ( + AsofJoinStrategy, + JoinStrategy, + LazyUniqueKeepStrategy, + MultiColSelector, + MultiIndexSelector, + PivotAgg, + SingleIndexSelector, + SizedMultiIndexSelector, + SizedMultiNameSelector, + SizeUnit, + UniqueKeepStrategy, + _2DArray, + _SliceIndex, + _SliceName, + ) + + Incomplete: TypeAlias = Any + +__all__ = ["CompliantDataFrame", "CompliantLazyFrame", "EagerDataFrame"] + +T = TypeVar("T") + +_ToDict: TypeAlias = "dict[str, CompliantSeriesT] | dict[str, list[Any]]" # noqa: PYI047 + + +class CompliantDataFrame( + NumpyConvertible["_2DArray", "_2DArray"], + DictConvertible["_ToDict[CompliantSeriesT]", Mapping[str, Any]], + ArrowConvertible["pa.Table", "IntoArrowTable"], + _StoresNative[NativeFrameT], + FromNative[NativeFrameT], + ToNarwhals[ToNarwhalsT_co], + Sized, + Protocol[CompliantSeriesT, CompliantExprT_contra, NativeFrameT, ToNarwhalsT_co], +): + _native_frame: NativeFrameT + _implementation: Implementation + _backend_version: tuple[int, ...] + _version: Version + + def __narwhals_dataframe__(self) -> Self: ... + def __narwhals_namespace__(self) -> Any: ... + @classmethod + def from_arrow(cls, data: IntoArrowTable, /, *, context: _FullContext) -> Self: ... + @classmethod + def from_dict( + cls, + data: Mapping[str, Any], + /, + *, + context: _FullContext, + schema: Mapping[str, DType] | Schema | None, + ) -> Self: ... + @classmethod + def from_native(cls, data: NativeFrameT, /, *, context: _FullContext) -> Self: ... + @classmethod + def from_numpy( + cls, + data: _2DArray, + /, + *, + context: _FullContext, + schema: Mapping[str, DType] | Schema | Sequence[str] | None, + ) -> Self: ... + + def __array__(self, dtype: Any, *, copy: bool | None) -> _2DArray: ... + def __getitem__( + self, + item: tuple[ + SingleIndexSelector | MultiIndexSelector[CompliantSeriesT], + MultiColSelector[CompliantSeriesT], + ], + ) -> Self: ... + def simple_select(self, *column_names: str) -> Self: + """`select` where all args are column names.""" + ... + + def aggregate(self, *exprs: CompliantExprT_contra) -> Self: + """`select` where all args are aggregations or literals. + + (so, no broadcasting is necessary). + """ + # NOTE: Ignore is to avoid an intermittent false positive + return self.select(*exprs) # pyright: ignore[reportArgumentType] + + def _with_version(self, version: Version) -> Self: ... + + @property + def native(self) -> NativeFrameT: + return self._native_frame + + @property + def columns(self) -> Sequence[str]: ... + @property + def schema(self) -> Mapping[str, DType]: ... + @property + def shape(self) -> tuple[int, int]: ... + def clone(self) -> Self: ... + def collect( + self, backend: Implementation | None, **kwargs: Any + ) -> CompliantDataFrameAny: ... + def collect_schema(self) -> Mapping[str, DType]: ... + def drop(self, columns: Sequence[str], *, strict: bool) -> Self: ... + def drop_nulls(self, subset: Sequence[str] | None) -> Self: ... + def estimated_size(self, unit: SizeUnit) -> int | float: ... + def explode(self, columns: Sequence[str]) -> Self: ... + def filter(self, predicate: CompliantExprT_contra | Incomplete) -> Self: ... + def gather_every(self, n: int, offset: int) -> Self: ... + def get_column(self, name: str) -> CompliantSeriesT: ... + def group_by( + self, + keys: Sequence[str] | Sequence[CompliantExprT_contra], + *, + drop_null_keys: bool, + ) -> DataFrameGroupBy[Self, Any]: ... + def head(self, n: int) -> Self: ... + def item(self, row: int | None, column: int | str | None) -> Any: ... + def iter_columns(self) -> Iterator[CompliantSeriesT]: ... + def iter_rows( + self, *, named: bool, buffer_size: int + ) -> Iterator[tuple[Any, ...]] | Iterator[Mapping[str, Any]]: ... + def is_unique(self) -> CompliantSeriesT: ... + def join( + self, + other: Self, + *, + how: JoinStrategy, + left_on: Sequence[str] | None, + right_on: Sequence[str] | None, + suffix: str, + ) -> Self: ... + def join_asof( + self, + other: Self, + *, + left_on: str, + right_on: str, + by_left: Sequence[str] | None, + by_right: Sequence[str] | None, + strategy: AsofJoinStrategy, + suffix: str, + ) -> Self: ... + def lazy(self, *, backend: Implementation | None) -> CompliantLazyFrameAny: ... + def pivot( + self, + on: Sequence[str], + *, + index: Sequence[str] | None, + values: Sequence[str] | None, + aggregate_function: PivotAgg | None, + sort_columns: bool, + separator: str, + ) -> Self: ... + def rename(self, mapping: Mapping[str, str]) -> Self: ... + def row(self, index: int) -> tuple[Any, ...]: ... + def rows( + self, *, named: bool + ) -> Sequence[tuple[Any, ...]] | Sequence[Mapping[str, Any]]: ... + def sample( + self, + n: int | None, + *, + fraction: float | None, + with_replacement: bool, + seed: int | None, + ) -> Self: ... + def select(self, *exprs: CompliantExprT_contra) -> Self: ... + def sort( + self, *by: str, descending: bool | Sequence[bool], nulls_last: bool + ) -> Self: ... + def tail(self, n: int) -> Self: ... + def to_arrow(self) -> pa.Table: ... + def to_pandas(self) -> pd.DataFrame: ... + def to_polars(self) -> pl.DataFrame: ... + @overload + def to_dict(self, *, as_series: Literal[True]) -> dict[str, CompliantSeriesT]: ... + @overload + def to_dict(self, *, as_series: Literal[False]) -> dict[str, list[Any]]: ... + def to_dict( + self, *, as_series: bool + ) -> dict[str, CompliantSeriesT] | dict[str, list[Any]]: ... + def unique( + self, + subset: Sequence[str] | None, + *, + keep: UniqueKeepStrategy, + maintain_order: bool | None = None, + ) -> Self: ... + def unpivot( + self, + on: Sequence[str] | None, + index: Sequence[str] | None, + variable_name: str, + value_name: str, + ) -> Self: ... + def with_columns(self, *exprs: CompliantExprT_contra) -> Self: ... + def with_row_index(self, name: str) -> Self: ... + @overload + def write_csv(self, file: None) -> str: ... + @overload + def write_csv(self, file: str | Path | BytesIO) -> None: ... + def write_csv(self, file: str | Path | BytesIO | None) -> str | None: ... + def write_parquet(self, file: str | Path | BytesIO) -> None: ... + + def _evaluate_aliases(self, *exprs: CompliantExprT_contra) -> list[str]: + it = (expr._evaluate_aliases(self) for expr in exprs) + return list(chain.from_iterable(it)) + + def _check_columns_exist(self, subset: Sequence[str]) -> ColumnNotFoundError | None: + return check_columns_exist(subset, available=self.columns) + + +class CompliantLazyFrame( + _StoresNative[NativeFrameT], + FromNative[NativeFrameT], + ToNarwhals[ToNarwhalsT_co], + Protocol[CompliantExprT_contra, NativeFrameT, ToNarwhalsT_co], +): + _native_frame: NativeFrameT + _implementation: Implementation + _backend_version: tuple[int, ...] + _version: Version + + def __narwhals_lazyframe__(self) -> Self: ... + def __narwhals_namespace__(self) -> Any: ... + + @classmethod + def from_native(cls, data: NativeFrameT, /, *, context: _FullContext) -> Self: ... + + def simple_select(self, *column_names: str) -> Self: + """`select` where all args are column names.""" + ... + + def aggregate(self, *exprs: CompliantExprT_contra) -> Self: + """`select` where all args are aggregations or literals. + + (so, no broadcasting is necessary). + """ + ... + + def _with_version(self, version: Version) -> Self: ... + + @property + def native(self) -> NativeFrameT: + return self._native_frame + + @property + def columns(self) -> Sequence[str]: ... + @property + def schema(self) -> Mapping[str, DType]: ... + def _iter_columns(self) -> Iterator[Any]: ... + def collect( + self, backend: Implementation | None, **kwargs: Any + ) -> CompliantDataFrameAny: ... + def collect_schema(self) -> Mapping[str, DType]: ... + def drop(self, columns: Sequence[str], *, strict: bool) -> Self: ... + def drop_nulls(self, subset: Sequence[str] | None) -> Self: ... + def explode(self, columns: Sequence[str]) -> Self: ... + def filter(self, predicate: CompliantExprT_contra | Incomplete) -> Self: ... + @deprecated( + "`LazyFrame.gather_every` is deprecated and will be removed in a future version." + ) + def gather_every(self, n: int, offset: int) -> Self: ... + def group_by( + self, + keys: Sequence[str] | Sequence[CompliantExprT_contra], + *, + drop_null_keys: bool, + ) -> CompliantGroupBy[Self, CompliantExprT_contra]: ... + def head(self, n: int) -> Self: ... + def join( + self, + other: Self, + *, + how: Literal["left", "inner", "cross", "anti", "semi"], + left_on: Sequence[str] | None, + right_on: Sequence[str] | None, + suffix: str, + ) -> Self: ... + def join_asof( + self, + other: Self, + *, + left_on: str, + right_on: str, + by_left: Sequence[str] | None, + by_right: Sequence[str] | None, + strategy: AsofJoinStrategy, + suffix: str, + ) -> Self: ... + def rename(self, mapping: Mapping[str, str]) -> Self: ... + def select(self, *exprs: CompliantExprT_contra) -> Self: ... + def sort( + self, *by: str, descending: bool | Sequence[bool], nulls_last: bool + ) -> Self: ... + @deprecated("`LazyFrame.tail` is deprecated and will be removed in a future version.") + def tail(self, n: int) -> Self: ... + def unique( + self, subset: Sequence[str] | None, *, keep: LazyUniqueKeepStrategy + ) -> Self: ... + def unpivot( + self, + on: Sequence[str] | None, + index: Sequence[str] | None, + variable_name: str, + value_name: str, + ) -> Self: ... + def with_columns(self, *exprs: CompliantExprT_contra) -> Self: ... + def with_row_index(self, name: str) -> Self: ... + def _evaluate_expr(self, expr: CompliantExprT_contra, /) -> Any: + result = expr(self) + assert len(result) == 1 # debug assertion # noqa: S101 + return result[0] + + def _evaluate_window_expr( + self, + expr: LazyExpr[Self, NativeExprT], + /, + window_inputs: WindowInputs[NativeExprT], + ) -> NativeExprT: + result = expr.window_function(self, window_inputs) + assert len(result) == 1 # debug assertion # noqa: S101 + return result[0] + + def _evaluate_aliases(self, *exprs: CompliantExprT_contra) -> list[str]: + it = (expr._evaluate_aliases(self) for expr in exprs) + return list(chain.from_iterable(it)) + + def _check_columns_exist(self, subset: Sequence[str]) -> ColumnNotFoundError | None: + return check_columns_exist(subset, available=self.columns) + + +class EagerDataFrame( + CompliantDataFrame[EagerSeriesT, EagerExprT, NativeFrameT, "DataFrame[NativeFrameT]"], + CompliantLazyFrame[EagerExprT, NativeFrameT, "DataFrame[NativeFrameT]"], + Protocol[EagerSeriesT, EagerExprT, NativeFrameT], +): + def __narwhals_namespace__( + self, + ) -> EagerNamespace[Self, EagerSeriesT, EagerExprT, NativeFrameT]: ... + + def to_narwhals(self) -> DataFrame[NativeFrameT]: + return self._version.dataframe(self, level="full") + + def _evaluate_expr(self, expr: EagerExprT, /) -> EagerSeriesT: + """Evaluate `expr` and ensure it has a **single** output.""" + result: Sequence[EagerSeriesT] = expr(self) + assert len(result) == 1 # debug assertion # noqa: S101 + return result[0] + + def _evaluate_into_exprs(self, *exprs: EagerExprT) -> Sequence[EagerSeriesT]: + # NOTE: Ignore is to avoid an intermittent false positive + return list(chain.from_iterable(self._evaluate_into_expr(expr) for expr in exprs)) # pyright: ignore[reportArgumentType] + + def _evaluate_into_expr(self, expr: EagerExprT, /) -> Sequence[EagerSeriesT]: + """Return list of raw columns. + + For eager backends we alias operations at each step. + + As a safety precaution, here we can check that the expected result names match those + we were expecting from the various `evaluate_output_names` / `alias_output_names` calls. + + Note that for PySpark / DuckDB, we are less free to liberally set aliases whenever we want. + """ + aliases = expr._evaluate_aliases(self) + result = expr(self) + if list(aliases) != ( + result_aliases := [s.name for s in result] + ): # pragma: no cover + msg = f"Safety assertion failed, expected {aliases}, got {result_aliases}" + raise AssertionError(msg) + return result + + def _extract_comparand(self, other: EagerSeriesT, /) -> Any: + """Extract native Series, broadcasting to `len(self)` if necessary.""" + ... + + @staticmethod + def _numpy_column_names( + data: _2DArray, columns: Sequence[str] | None, / + ) -> list[str]: + return list(columns or (f"column_{x}" for x in range(data.shape[1]))) + + def _gather(self, rows: SizedMultiIndexSelector[Any]) -> Self: ... + def _gather_slice(self, rows: _SliceIndex | range) -> Self: ... + def _select_multi_index(self, columns: SizedMultiIndexSelector[Any]) -> Self: ... + def _select_multi_name(self, columns: SizedMultiNameSelector[Any]) -> Self: ... + def _select_slice_index(self, columns: _SliceIndex | range) -> Self: ... + def _select_slice_name(self, columns: _SliceName) -> Self: ... + def __getitem__( # noqa: C901, PLR0912 + self, + item: tuple[ + SingleIndexSelector | MultiIndexSelector[EagerSeriesT], + MultiColSelector[EagerSeriesT], + ], + ) -> Self: + rows, columns = item + compliant = self + if not is_slice_none(columns): + if isinstance(columns, Sized) and len(columns) == 0: + return compliant.select() + if is_index_selector(columns): + if is_slice_index(columns) or is_range(columns): + compliant = compliant._select_slice_index(columns) + elif is_compliant_series(columns): + compliant = self._select_multi_index(columns.native) + else: + compliant = compliant._select_multi_index(columns) + elif isinstance(columns, slice): + compliant = compliant._select_slice_name(columns) + elif is_compliant_series(columns): + compliant = self._select_multi_name(columns.native) + elif is_sequence_like(columns): + compliant = self._select_multi_name(columns) + else: # pragma: no cover + msg = f"Unreachable code, got unexpected type: {type(columns)}" + raise AssertionError(msg) + + if not is_slice_none(rows): + if isinstance(rows, int): + compliant = compliant._gather([rows]) + elif isinstance(rows, (slice, range)): + compliant = compliant._gather_slice(rows) + elif is_compliant_series(rows): + compliant = compliant._gather(rows.native) + elif is_sized_multi_index_selector(rows): + compliant = compliant._gather(rows) + else: # pragma: no cover + msg = f"Unreachable code, got unexpected type: {type(rows)}" + raise AssertionError(msg) + + return compliant diff --git a/venv/lib/python3.8/site-packages/narwhals/_compliant/expr.py b/venv/lib/python3.8/site-packages/narwhals/_compliant/expr.py new file mode 100644 index 0000000..965469e --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/_compliant/expr.py @@ -0,0 +1,1140 @@ +from __future__ import annotations + +from functools import partial +from operator import methodcaller +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Generic, + Literal, + Mapping, + Protocol, + Sequence, +) + +from narwhals._compliant.any_namespace import ( + CatNamespace, + DateTimeNamespace, + ListNamespace, + NameNamespace, + StringNamespace, + StructNamespace, +) +from narwhals._compliant.namespace import CompliantNamespace +from narwhals._compliant.typing import ( + AliasName, + AliasNames, + CompliantExprT_co, + CompliantFrameT, + CompliantLazyFrameT, + CompliantSeriesOrNativeExprT_co, + EagerDataFrameT, + EagerExprT, + EagerSeriesT, + LazyExprT, + NativeExprT, +) +from narwhals._typing_compat import Protocol38, deprecated +from narwhals._utils import _StoresCompliant, not_implemented +from narwhals.dependencies import get_numpy, is_numpy_array + +if TYPE_CHECKING: + from typing import Mapping + + from typing_extensions import Self, TypeIs + + from narwhals._compliant.namespace import CompliantNamespace, EagerNamespace + from narwhals._compliant.series import CompliantSeries + from narwhals._compliant.typing import ( + AliasNames, + EvalNames, + EvalSeries, + ScalarKwargs, + WindowFunction, + ) + from narwhals._expression_parsing import ExprKind, ExprMetadata + from narwhals._utils import Implementation, Version, _FullContext + from narwhals.typing import ( + FillNullStrategy, + IntoDType, + NonNestedLiteral, + NumericLiteral, + RankMethod, + RollingInterpolationMethod, + TemporalLiteral, + TimeUnit, + ) + +__all__ = ["CompliantExpr", "EagerExpr", "LazyExpr", "NativeExpr"] + + +class NativeExpr(Protocol): + """An `Expr`-like object from a package with [Lazy-only support](https://narwhals-dev.github.io/narwhals/extending/#levels-of-support). + + Protocol members are chosen *purely* for matching statically - as they + are common to all currently supported packages. + """ + + def between(self, *args: Any, **kwds: Any) -> Any: ... + def isin(self, *args: Any, **kwds: Any) -> Any: ... + + +class CompliantExpr(Protocol38[CompliantFrameT, CompliantSeriesOrNativeExprT_co]): + _implementation: Implementation + _backend_version: tuple[int, ...] + _version: Version + _evaluate_output_names: EvalNames[CompliantFrameT] + _alias_output_names: AliasNames | None + _metadata: ExprMetadata | None + + def __call__( + self, df: CompliantFrameT + ) -> Sequence[CompliantSeriesOrNativeExprT_co]: ... + def __narwhals_expr__(self) -> None: ... + def __narwhals_namespace__(self) -> CompliantNamespace[CompliantFrameT, Self]: ... + @classmethod + def from_column_names( + cls, + evaluate_column_names: EvalNames[CompliantFrameT], + /, + *, + context: _FullContext, + ) -> Self: ... + @classmethod + def from_column_indices(cls, *column_indices: int, context: _FullContext) -> Self: ... + @staticmethod + def _eval_names_indices(indices: Sequence[int], /) -> EvalNames[CompliantFrameT]: + def fn(df: CompliantFrameT) -> Sequence[str]: + column_names = df.columns + return [column_names[i] for i in indices] + + return fn + + def is_null(self) -> Self: ... + def abs(self) -> Self: ... + def all(self) -> Self: ... + def any(self) -> Self: ... + def alias(self, name: str) -> Self: ... + def cast(self, dtype: IntoDType) -> Self: ... + def count(self) -> Self: ... + def min(self) -> Self: ... + def max(self) -> Self: ... + def arg_min(self) -> Self: ... + def arg_max(self) -> Self: ... + def arg_true(self) -> Self: ... + def mean(self) -> Self: ... + def sum(self) -> Self: ... + def median(self) -> Self: ... + def skew(self) -> Self: ... + def std(self, *, ddof: int) -> Self: ... + def var(self, *, ddof: int) -> Self: ... + def n_unique(self) -> Self: ... + def null_count(self) -> Self: ... + def drop_nulls(self) -> Self: ... + def fill_null( + self, + value: Self | NonNestedLiteral, + strategy: FillNullStrategy | None, + limit: int | None, + ) -> Self: ... + def diff(self) -> Self: ... + def exp(self) -> Self: ... + def unique(self) -> Self: ... + def len(self) -> Self: ... + def log(self, base: float) -> Self: ... + def round(self, decimals: int) -> Self: ... + def mode(self) -> Self: ... + def head(self, n: int) -> Self: ... + def tail(self, n: int) -> Self: ... + def shift(self, n: int) -> Self: ... + def is_finite(self) -> Self: ... + def is_nan(self) -> Self: ... + def is_unique(self) -> Self: ... + def is_first_distinct(self) -> Self: ... + def is_last_distinct(self) -> Self: ... + def cum_sum(self, *, reverse: bool) -> Self: ... + def cum_count(self, *, reverse: bool) -> Self: ... + def cum_min(self, *, reverse: bool) -> Self: ... + def cum_max(self, *, reverse: bool) -> Self: ... + def cum_prod(self, *, reverse: bool) -> Self: ... + def is_in(self, other: Any) -> Self: ... + def sort(self, *, descending: bool, nulls_last: bool) -> Self: ... + def rank(self, method: RankMethod, *, descending: bool) -> Self: ... + def replace_strict( + self, + old: Sequence[Any] | Mapping[Any, Any], + new: Sequence[Any], + *, + return_dtype: IntoDType | None, + ) -> Self: ... + def over(self, partition_by: Sequence[str], order_by: Sequence[str]) -> Self: ... + def sample( + self, + n: int | None, + *, + fraction: float | None, + with_replacement: bool, + seed: int | None, + ) -> Self: ... + def quantile( + self, quantile: float, interpolation: RollingInterpolationMethod + ) -> Self: ... + def map_batches( + self, + function: Callable[[CompliantSeries[Any]], CompliantExpr[Any, Any]], + return_dtype: IntoDType | None, + ) -> Self: ... + + def clip( + self, + lower_bound: Self | NumericLiteral | TemporalLiteral | None, + upper_bound: Self | NumericLiteral | TemporalLiteral | None, + ) -> Self: ... + + def ewm_mean( + self, + *, + com: float | None, + span: float | None, + half_life: float | None, + alpha: float | None, + adjust: bool, + min_samples: int, + ignore_nulls: bool, + ) -> Self: ... + + def rolling_sum( + self, window_size: int, *, min_samples: int, center: bool + ) -> Self: ... + + def rolling_mean( + self, window_size: int, *, min_samples: int, center: bool + ) -> Self: ... + + def rolling_var( + self, window_size: int, *, min_samples: int, center: bool, ddof: int + ) -> Self: ... + + def rolling_std( + self, window_size: int, *, min_samples: int, center: bool, ddof: int + ) -> Self: ... + + @deprecated("Since `1.22.0`") + def gather_every(self, n: int, offset: int) -> Self: ... + def __and__(self, other: Any) -> Self: ... + def __or__(self, other: Any) -> Self: ... + def __add__(self, other: Any) -> Self: ... + def __sub__(self, other: Any) -> Self: ... + def __mul__(self, other: Any) -> Self: ... + def __floordiv__(self, other: Any) -> Self: ... + def __truediv__(self, other: Any) -> Self: ... + def __mod__(self, other: Any) -> Self: ... + def __pow__(self, other: Any) -> Self: ... + def __gt__(self, other: Any) -> Self: ... + def __ge__(self, other: Any) -> Self: ... + def __lt__(self, other: Any) -> Self: ... + def __le__(self, other: Any) -> Self: ... + def __invert__(self) -> Self: ... + def broadcast( + self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL] + ) -> Self: ... + def _is_multi_output_unnamed(self) -> bool: + """Return `True` for multi-output aggregations without names. + + For example, column `'a'` only appears in the output as a grouping key: + + df.group_by('a').agg(nw.all().sum()) + + It does not get included in: + + nw.all().sum(). + """ + assert self._metadata is not None # noqa: S101 + return self._metadata.expansion_kind.is_multi_unnamed() + + def _evaluate_aliases( + self: CompliantExpr[CompliantFrameT, Any], frame: CompliantFrameT, / + ) -> Sequence[str]: + names = self._evaluate_output_names(frame) + return alias(names) if (alias := self._alias_output_names) else names + + @property + def str(self) -> Any: ... + @property + def name(self) -> Any: ... + @property + def dt(self) -> Any: ... + @property + def cat(self) -> Any: ... + @property + def list(self) -> Any: ... + @property + def struct(self) -> Any: ... + + +class DepthTrackingExpr( + CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co], + Protocol38[CompliantFrameT, CompliantSeriesOrNativeExprT_co], +): + _depth: int + _function_name: str + + @classmethod + def from_column_names( + cls: type[Self], + evaluate_column_names: EvalNames[CompliantFrameT], + /, + *, + context: _FullContext, + function_name: str = "", + ) -> Self: ... + + def _is_elementary(self) -> bool: + """Check if expr is elementary. + + Examples: + - nw.col('a').mean() # depth 1 + - nw.mean('a') # depth 1 + - nw.len() # depth 0 + + as opposed to, say + + - nw.col('a').filter(nw.col('b')>nw.col('c')).max() + + Elementary expressions are the only ones supported properly in + pandas, PyArrow, and Dask. + """ + return self._depth < 2 + + def __repr__(self) -> str: # pragma: no cover + return f"{type(self).__name__}(depth={self._depth}, function_name={self._function_name})" + + +class EagerExpr( + DepthTrackingExpr[EagerDataFrameT, EagerSeriesT], + Protocol38[EagerDataFrameT, EagerSeriesT], +): + _call: EvalSeries[EagerDataFrameT, EagerSeriesT] + _scalar_kwargs: ScalarKwargs + + def __init__( + self, + call: EvalSeries[EagerDataFrameT, EagerSeriesT], + *, + depth: int, + function_name: str, + evaluate_output_names: EvalNames[EagerDataFrameT], + alias_output_names: AliasNames | None, + implementation: Implementation, + backend_version: tuple[int, ...], + version: Version, + scalar_kwargs: ScalarKwargs | None = None, + ) -> None: ... + + def __call__(self, df: EagerDataFrameT) -> Sequence[EagerSeriesT]: + return self._call(df) + + def __narwhals_namespace__( + self, + ) -> EagerNamespace[EagerDataFrameT, EagerSeriesT, Self, Any]: ... + def __narwhals_expr__(self) -> None: ... + + @classmethod + def _from_callable( + cls, + func: EvalSeries[EagerDataFrameT, EagerSeriesT], + *, + depth: int, + function_name: str, + evaluate_output_names: EvalNames[EagerDataFrameT], + alias_output_names: AliasNames | None, + context: _FullContext, + scalar_kwargs: ScalarKwargs | None = None, + ) -> Self: + return cls( + func, + depth=depth, + function_name=function_name, + evaluate_output_names=evaluate_output_names, + alias_output_names=alias_output_names, + implementation=context._implementation, + backend_version=context._backend_version, + version=context._version, + scalar_kwargs=scalar_kwargs, + ) + + @classmethod + def _from_series(cls, series: EagerSeriesT) -> Self: + return cls( + lambda _df: [series], + depth=0, + function_name="series", + evaluate_output_names=lambda _df: [series.name], + alias_output_names=None, + implementation=series._implementation, + backend_version=series._backend_version, + version=series._version, + ) + + def _reuse_series( + self, + method_name: str, + *, + returns_scalar: bool = False, + scalar_kwargs: ScalarKwargs | None = None, + **expressifiable_args: Any, + ) -> Self: + """Reuse Series implementation for expression. + + If Series.foo is already defined, and we'd like Expr.foo to be the same, we can + leverage this method to do that for us. + + Arguments: + method_name: name of method. + returns_scalar: whether the Series version returns a scalar. In this case, + the expression version should return a 1-row Series. + scalar_kwargs: non-expressifiable args which we may need to reuse in `agg` or `over`, + such as `ddof` for `std` and `var`. + expressifiable_args: keyword arguments to pass to function, which may + be expressifiable (e.g. `nw.col('a').is_between(3, nw.col('b')))`). + """ + func = partial( + self._reuse_series_inner, + method_name=method_name, + returns_scalar=returns_scalar, + scalar_kwargs=scalar_kwargs or {}, + expressifiable_args=expressifiable_args, + ) + return self._from_callable( + func, + depth=self._depth + 1, + function_name=f"{self._function_name}->{method_name}", + evaluate_output_names=self._evaluate_output_names, + alias_output_names=self._alias_output_names, + scalar_kwargs=scalar_kwargs, + context=self, + ) + + # For PyArrow.Series, we return Python Scalars (like Polars does) instead of PyArrow Scalars. + # However, when working with expressions, we keep everything PyArrow-native. + def _reuse_series_extra_kwargs( + self, *, returns_scalar: bool = False + ) -> dict[str, Any]: + return {} + + @classmethod + def _is_expr(cls, obj: Self | Any) -> TypeIs[Self]: + return hasattr(obj, "__narwhals_expr__") + + def _reuse_series_inner( + self, + df: EagerDataFrameT, + *, + method_name: str, + returns_scalar: bool, + scalar_kwargs: ScalarKwargs, + expressifiable_args: dict[str, Any], + ) -> Sequence[EagerSeriesT]: + kwargs = { + **scalar_kwargs, + **{ + name: df._evaluate_expr(value) if self._is_expr(value) else value + for name, value in expressifiable_args.items() + }, + } + method = methodcaller( + method_name, + **self._reuse_series_extra_kwargs(returns_scalar=returns_scalar), + **kwargs, + ) + out: Sequence[EagerSeriesT] = [ + series._from_scalar(method(series)) if returns_scalar else method(series) + for series in self(df) + ] + aliases = self._evaluate_aliases(df) + if [s.name for s in out] != list(aliases): # pragma: no cover + msg = ( + f"Safety assertion failed, please report a bug to https://github.com/narwhals-dev/narwhals/issues\n" + f"Expression aliases: {aliases}\n" + f"Series names: {[s.name for s in out]}" + ) + raise AssertionError(msg) + return out + + def _reuse_series_namespace( + self, + series_namespace: Literal["cat", "dt", "list", "name", "str", "struct"], + method_name: str, + **kwargs: Any, + ) -> Self: + """Reuse Series implementation for expression. + + Just like `_reuse_series`, but for e.g. `Expr.dt.foo` instead + of `Expr.foo`. + + Arguments: + series_namespace: The Series namespace. + method_name: name of method, within `series_namespace`. + kwargs: keyword arguments to pass to function. + """ + return self._from_callable( + lambda df: [ + getattr(getattr(series, series_namespace), method_name)(**kwargs) + for series in self(df) + ], + depth=self._depth + 1, + function_name=f"{self._function_name}->{series_namespace}.{method_name}", + evaluate_output_names=self._evaluate_output_names, + alias_output_names=self._alias_output_names, + scalar_kwargs=self._scalar_kwargs, + context=self, + ) + + def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Self: + # Mark the resulting Series with `_broadcast = True`. + # Then, when extracting native objects, `extract_native` will + # know what to do. + def func(df: EagerDataFrameT) -> list[EagerSeriesT]: + results = [] + for result in self(df): + result._broadcast = True + results.append(result) + return results + + return type(self)( + func, + depth=self._depth, + function_name=self._function_name, + evaluate_output_names=self._evaluate_output_names, + alias_output_names=self._alias_output_names, + backend_version=self._backend_version, + implementation=self._implementation, + version=self._version, + scalar_kwargs=self._scalar_kwargs, + ) + + def cast(self, dtype: IntoDType) -> Self: + return self._reuse_series("cast", dtype=dtype) + + def __eq__(self, other: Self | Any) -> Self: # type: ignore[override] + return self._reuse_series("__eq__", other=other) + + def __ne__(self, other: Self | Any) -> Self: # type: ignore[override] + return self._reuse_series("__ne__", other=other) + + def __ge__(self, other: Self | Any) -> Self: + return self._reuse_series("__ge__", other=other) + + def __gt__(self, other: Self | Any) -> Self: + return self._reuse_series("__gt__", other=other) + + def __le__(self, other: Self | Any) -> Self: + return self._reuse_series("__le__", other=other) + + def __lt__(self, other: Self | Any) -> Self: + return self._reuse_series("__lt__", other=other) + + def __and__(self, other: Self | bool | Any) -> Self: + return self._reuse_series("__and__", other=other) + + def __or__(self, other: Self | bool | Any) -> Self: + return self._reuse_series("__or__", other=other) + + def __add__(self, other: Self | Any) -> Self: + return self._reuse_series("__add__", other=other) + + def __sub__(self, other: Self | Any) -> Self: + return self._reuse_series("__sub__", other=other) + + def __rsub__(self, other: Self | Any) -> Self: + return self.alias("literal")._reuse_series("__rsub__", other=other) + + def __mul__(self, other: Self | Any) -> Self: + return self._reuse_series("__mul__", other=other) + + def __truediv__(self, other: Self | Any) -> Self: + return self._reuse_series("__truediv__", other=other) + + def __rtruediv__(self, other: Self | Any) -> Self: + return self.alias("literal")._reuse_series("__rtruediv__", other=other) + + def __floordiv__(self, other: Self | Any) -> Self: + return self._reuse_series("__floordiv__", other=other) + + def __rfloordiv__(self, other: Self | Any) -> Self: + return self.alias("literal")._reuse_series("__rfloordiv__", other=other) + + def __pow__(self, other: Self | Any) -> Self: + return self._reuse_series("__pow__", other=other) + + def __rpow__(self, other: Self | Any) -> Self: + return self.alias("literal")._reuse_series("__rpow__", other=other) + + def __mod__(self, other: Self | Any) -> Self: + return self._reuse_series("__mod__", other=other) + + def __rmod__(self, other: Self | Any) -> Self: + return self.alias("literal")._reuse_series("__rmod__", other=other) + + # Unary + def __invert__(self) -> Self: + return self._reuse_series("__invert__") + + # Reductions + def null_count(self) -> Self: + return self._reuse_series("null_count", returns_scalar=True) + + def n_unique(self) -> Self: + return self._reuse_series("n_unique", returns_scalar=True) + + def sum(self) -> Self: + return self._reuse_series("sum", returns_scalar=True) + + def count(self) -> Self: + return self._reuse_series("count", returns_scalar=True) + + def mean(self) -> Self: + return self._reuse_series("mean", returns_scalar=True) + + def median(self) -> Self: + return self._reuse_series("median", returns_scalar=True) + + def std(self, *, ddof: int) -> Self: + return self._reuse_series( + "std", returns_scalar=True, scalar_kwargs={"ddof": ddof} + ) + + def var(self, *, ddof: int) -> Self: + return self._reuse_series( + "var", returns_scalar=True, scalar_kwargs={"ddof": ddof} + ) + + def skew(self) -> Self: + return self._reuse_series("skew", returns_scalar=True) + + def any(self) -> Self: + return self._reuse_series("any", returns_scalar=True) + + def all(self) -> Self: + return self._reuse_series("all", returns_scalar=True) + + def max(self) -> Self: + return self._reuse_series("max", returns_scalar=True) + + def min(self) -> Self: + return self._reuse_series("min", returns_scalar=True) + + def arg_min(self) -> Self: + return self._reuse_series("arg_min", returns_scalar=True) + + def arg_max(self) -> Self: + return self._reuse_series("arg_max", returns_scalar=True) + + # Other + + def clip( + self, + lower_bound: Self | NumericLiteral | TemporalLiteral | None, + upper_bound: Self | NumericLiteral | TemporalLiteral | None, + ) -> Self: + return self._reuse_series( + "clip", lower_bound=lower_bound, upper_bound=upper_bound + ) + + def is_null(self) -> Self: + return self._reuse_series("is_null") + + def is_nan(self) -> Self: + return self._reuse_series("is_nan") + + def fill_null( + self, + value: Self | NonNestedLiteral, + strategy: FillNullStrategy | None, + limit: int | None, + ) -> Self: + return self._reuse_series( + "fill_null", value=value, strategy=strategy, limit=limit + ) + + def is_in(self, other: Any) -> Self: + return self._reuse_series("is_in", other=other) + + def arg_true(self) -> Self: + return self._reuse_series("arg_true") + + def filter(self, *predicates: Self) -> Self: + plx = self.__narwhals_namespace__() + predicate = plx.all_horizontal(*predicates) + return self._reuse_series("filter", predicate=predicate) + + def drop_nulls(self) -> Self: + return self._reuse_series("drop_nulls") + + def replace_strict( + self, + old: Sequence[Any] | Mapping[Any, Any], + new: Sequence[Any], + *, + return_dtype: IntoDType | None, + ) -> Self: + return self._reuse_series( + "replace_strict", old=old, new=new, return_dtype=return_dtype + ) + + def sort(self, *, descending: bool, nulls_last: bool) -> Self: + return self._reuse_series("sort", descending=descending, nulls_last=nulls_last) + + def abs(self) -> Self: + return self._reuse_series("abs") + + def unique(self) -> Self: + return self._reuse_series("unique", maintain_order=False) + + def diff(self) -> Self: + return self._reuse_series("diff") + + def sample( + self, + n: int | None, + *, + fraction: float | None, + with_replacement: bool, + seed: int | None, + ) -> Self: + return self._reuse_series( + "sample", n=n, fraction=fraction, with_replacement=with_replacement, seed=seed + ) + + def alias(self, name: str) -> Self: + def alias_output_names(names: Sequence[str]) -> Sequence[str]: + if len(names) != 1: + msg = f"Expected function with single output, found output names: {names}" + raise ValueError(msg) + return [name] + + # Define this one manually, so that we can + # override `output_names` and not increase depth + return type(self)( + lambda df: [series.alias(name) for series in self(df)], + depth=self._depth, + function_name=self._function_name, + evaluate_output_names=self._evaluate_output_names, + alias_output_names=alias_output_names, + backend_version=self._backend_version, + implementation=self._implementation, + version=self._version, + scalar_kwargs=self._scalar_kwargs, + ) + + def is_unique(self) -> Self: + return self._reuse_series("is_unique") + + def is_first_distinct(self) -> Self: + return self._reuse_series("is_first_distinct") + + def is_last_distinct(self) -> Self: + return self._reuse_series("is_last_distinct") + + def quantile( + self, quantile: float, interpolation: RollingInterpolationMethod + ) -> Self: + return self._reuse_series( + "quantile", + quantile=quantile, + interpolation=interpolation, + returns_scalar=True, + ) + + def head(self, n: int) -> Self: + return self._reuse_series("head", n=n) + + def tail(self, n: int) -> Self: + return self._reuse_series("tail", n=n) + + def round(self, decimals: int) -> Self: + return self._reuse_series("round", decimals=decimals) + + def len(self) -> Self: + return self._reuse_series("len", returns_scalar=True) + + def gather_every(self, n: int, offset: int) -> Self: + return self._reuse_series("gather_every", n=n, offset=offset) + + def mode(self) -> Self: + return self._reuse_series("mode") + + def is_finite(self) -> Self: + return self._reuse_series("is_finite") + + def rolling_mean(self, window_size: int, *, min_samples: int, center: bool) -> Self: + return self._reuse_series( + "rolling_mean", + window_size=window_size, + min_samples=min_samples, + center=center, + ) + + def rolling_std( + self, window_size: int, *, min_samples: int, center: bool, ddof: int + ) -> Self: + return self._reuse_series( + "rolling_std", + window_size=window_size, + min_samples=min_samples, + center=center, + ddof=ddof, + ) + + def rolling_sum(self, window_size: int, *, min_samples: int, center: bool) -> Self: + return self._reuse_series( + "rolling_sum", window_size=window_size, min_samples=min_samples, center=center + ) + + def rolling_var( + self, window_size: int, *, min_samples: int, center: bool, ddof: int + ) -> Self: + return self._reuse_series( + "rolling_var", + window_size=window_size, + min_samples=min_samples, + center=center, + ddof=ddof, + ) + + def map_batches( + self, function: Callable[[Any], Any], return_dtype: IntoDType | None + ) -> Self: + def func(df: EagerDataFrameT) -> Sequence[EagerSeriesT]: + input_series_list = self(df) + output_names = [input_series.name for input_series in input_series_list] + result = [function(series) for series in input_series_list] + if is_numpy_array(result[0]) or ( + (np := get_numpy()) is not None and np.isscalar(result[0]) + ): + from_numpy = partial( + self.__narwhals_namespace__()._series.from_numpy, context=self + ) + result = [ + from_numpy(array).alias(output_name) + for array, output_name in zip(result, output_names) + ] + if return_dtype is not None: + result = [series.cast(return_dtype) for series in result] + return result + + return self._from_callable( + func, + depth=self._depth + 1, + function_name=self._function_name + "->map_batches", + evaluate_output_names=self._evaluate_output_names, + alias_output_names=self._alias_output_names, + context=self, + ) + + @property + def cat(self) -> EagerExprCatNamespace[Self]: + return EagerExprCatNamespace(self) + + @property + def dt(self) -> EagerExprDateTimeNamespace[Self]: + return EagerExprDateTimeNamespace(self) + + @property + def list(self) -> EagerExprListNamespace[Self]: + return EagerExprListNamespace(self) + + @property + def name(self) -> EagerExprNameNamespace[Self]: + return EagerExprNameNamespace(self) + + @property + def str(self) -> EagerExprStringNamespace[Self]: + return EagerExprStringNamespace(self) + + @property + def struct(self) -> EagerExprStructNamespace[Self]: + return EagerExprStructNamespace(self) + + +class LazyExpr( + CompliantExpr[CompliantLazyFrameT, NativeExprT], + Protocol38[CompliantLazyFrameT, NativeExprT], +): + arg_min: not_implemented = not_implemented() + arg_max: not_implemented = not_implemented() + arg_true: not_implemented = not_implemented() + head: not_implemented = not_implemented() + tail: not_implemented = not_implemented() + mode: not_implemented = not_implemented() + sort: not_implemented = not_implemented() + sample: not_implemented = not_implemented() + map_batches: not_implemented = not_implemented() + ewm_mean: not_implemented = not_implemented() + gather_every: not_implemented = not_implemented() + replace_strict: not_implemented = not_implemented() + cat: not_implemented = not_implemented() # pyright: ignore[reportAssignmentType] + + @property + def window_function(self) -> WindowFunction[CompliantLazyFrameT, NativeExprT]: ... + + @classmethod + def _is_expr(cls, obj: Self | Any) -> TypeIs[Self]: + return hasattr(obj, "__narwhals_expr__") + + def _with_callable(self, call: Callable[..., Any], /) -> Self: ... + def _with_alias_output_names(self, func: AliasNames | None, /) -> Self: ... + def alias(self, name: str) -> Self: + def fn(names: Sequence[str]) -> Sequence[str]: + if len(names) != 1: + msg = f"Expected function with single output, found output names: {names}" + raise ValueError(msg) + return [name] + + return self._with_alias_output_names(fn) + + @classmethod + def _alias_native(cls, expr: NativeExprT, name: str, /) -> NativeExprT: ... + + @property + def name(self) -> LazyExprNameNamespace[Self]: + return LazyExprNameNamespace(self) + + +class _ExprNamespace( # type: ignore[misc] + _StoresCompliant[CompliantExprT_co], Protocol[CompliantExprT_co] +): + _compliant_expr: CompliantExprT_co + + @property + def compliant(self) -> CompliantExprT_co: + return self._compliant_expr + + +class EagerExprNamespace(_ExprNamespace[EagerExprT], Generic[EagerExprT]): + def __init__(self, expr: EagerExprT, /) -> None: + self._compliant_expr = expr + + +class LazyExprNamespace(_ExprNamespace[LazyExprT], Generic[LazyExprT]): + def __init__(self, expr: LazyExprT, /) -> None: + self._compliant_expr = expr + + +class EagerExprCatNamespace( + EagerExprNamespace[EagerExprT], CatNamespace[EagerExprT], Generic[EagerExprT] +): + def get_categories(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("cat", "get_categories") + + +class EagerExprDateTimeNamespace( + EagerExprNamespace[EagerExprT], DateTimeNamespace[EagerExprT], Generic[EagerExprT] +): + def to_string(self, format: str) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "to_string", format=format) + + def replace_time_zone(self, time_zone: str | None) -> EagerExprT: + return self.compliant._reuse_series_namespace( + "dt", "replace_time_zone", time_zone=time_zone + ) + + def convert_time_zone(self, time_zone: str) -> EagerExprT: + return self.compliant._reuse_series_namespace( + "dt", "convert_time_zone", time_zone=time_zone + ) + + def timestamp(self, time_unit: TimeUnit) -> EagerExprT: + return self.compliant._reuse_series_namespace( + "dt", "timestamp", time_unit=time_unit + ) + + def date(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "date") + + def year(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "year") + + def month(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "month") + + def day(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "day") + + def hour(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "hour") + + def minute(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "minute") + + def second(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "second") + + def millisecond(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "millisecond") + + def microsecond(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "microsecond") + + def nanosecond(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "nanosecond") + + def ordinal_day(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "ordinal_day") + + def weekday(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "weekday") + + def total_minutes(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "total_minutes") + + def total_seconds(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "total_seconds") + + def total_milliseconds(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "total_milliseconds") + + def total_microseconds(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "total_microseconds") + + def total_nanoseconds(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "total_nanoseconds") + + def truncate(self, every: str) -> EagerExprT: + return self.compliant._reuse_series_namespace("dt", "truncate", every=every) + + +class EagerExprListNamespace( + EagerExprNamespace[EagerExprT], ListNamespace[EagerExprT], Generic[EagerExprT] +): + def len(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("list", "len") + + +class CompliantExprNameNamespace( # type: ignore[misc] + _ExprNamespace[CompliantExprT_co], + NameNamespace[CompliantExprT_co], + Protocol[CompliantExprT_co], +): + def keep(self) -> CompliantExprT_co: + return self._from_callable(lambda name: name, alias=False) + + def map(self, function: AliasName) -> CompliantExprT_co: + return self._from_callable(function) + + def prefix(self, prefix: str) -> CompliantExprT_co: + return self._from_callable(lambda name: f"{prefix}{name}") + + def suffix(self, suffix: str) -> CompliantExprT_co: + return self._from_callable(lambda name: f"{name}{suffix}") + + def to_lowercase(self) -> CompliantExprT_co: + return self._from_callable(str.lower) + + def to_uppercase(self) -> CompliantExprT_co: + return self._from_callable(str.upper) + + @staticmethod + def _alias_output_names(func: AliasName, /) -> AliasNames: + def fn(output_names: Sequence[str], /) -> Sequence[str]: + return [func(name) for name in output_names] + + return fn + + def _from_callable( + self, func: AliasName, /, *, alias: bool = True + ) -> CompliantExprT_co: ... + + +class EagerExprNameNamespace( + EagerExprNamespace[EagerExprT], + CompliantExprNameNamespace[EagerExprT], + Generic[EagerExprT], +): + def _from_callable(self, func: AliasName, /, *, alias: bool = True) -> EagerExprT: + expr = self.compliant + return type(expr)( + lambda df: [ + series.alias(func(name)) + for series, name in zip(expr(df), expr._evaluate_output_names(df)) + ], + depth=expr._depth, + function_name=expr._function_name, + evaluate_output_names=expr._evaluate_output_names, + alias_output_names=self._alias_output_names(func) if alias else None, + backend_version=expr._backend_version, + implementation=expr._implementation, + version=expr._version, + scalar_kwargs=expr._scalar_kwargs, + ) + + +class LazyExprNameNamespace( + LazyExprNamespace[LazyExprT], + CompliantExprNameNamespace[LazyExprT], + Generic[LazyExprT], +): + def _from_callable(self, func: AliasName, /, *, alias: bool = True) -> LazyExprT: + expr = self.compliant + output_names = self._alias_output_names(func) if alias else None + return expr._with_alias_output_names(output_names) + + +class EagerExprStringNamespace( + EagerExprNamespace[EagerExprT], StringNamespace[EagerExprT], Generic[EagerExprT] +): + def len_chars(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("str", "len_chars") + + def replace(self, pattern: str, value: str, *, literal: bool, n: int) -> EagerExprT: + return self.compliant._reuse_series_namespace( + "str", "replace", pattern=pattern, value=value, literal=literal, n=n + ) + + def replace_all(self, pattern: str, value: str, *, literal: bool) -> EagerExprT: + return self.compliant._reuse_series_namespace( + "str", "replace_all", pattern=pattern, value=value, literal=literal + ) + + def strip_chars(self, characters: str | None) -> EagerExprT: + return self.compliant._reuse_series_namespace( + "str", "strip_chars", characters=characters + ) + + def starts_with(self, prefix: str) -> EagerExprT: + return self.compliant._reuse_series_namespace("str", "starts_with", prefix=prefix) + + def ends_with(self, suffix: str) -> EagerExprT: + return self.compliant._reuse_series_namespace("str", "ends_with", suffix=suffix) + + def contains(self, pattern: str, *, literal: bool) -> EagerExprT: + return self.compliant._reuse_series_namespace( + "str", "contains", pattern=pattern, literal=literal + ) + + def slice(self, offset: int, length: int | None) -> EagerExprT: + return self.compliant._reuse_series_namespace( + "str", "slice", offset=offset, length=length + ) + + def split(self, by: str) -> EagerExprT: + return self.compliant._reuse_series_namespace("str", "split", by=by) + + def to_datetime(self, format: str | None) -> EagerExprT: + return self.compliant._reuse_series_namespace("str", "to_datetime", format=format) + + def to_lowercase(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("str", "to_lowercase") + + def to_uppercase(self) -> EagerExprT: + return self.compliant._reuse_series_namespace("str", "to_uppercase") + + +class EagerExprStructNamespace( + EagerExprNamespace[EagerExprT], StructNamespace[EagerExprT], Generic[EagerExprT] +): + def field(self, name: str) -> EagerExprT: + return self.compliant._reuse_series_namespace("struct", "field", name=name).alias( + name + ) diff --git a/venv/lib/python3.8/site-packages/narwhals/_compliant/group_by.py b/venv/lib/python3.8/site-packages/narwhals/_compliant/group_by.py new file mode 100644 index 0000000..778e9bc --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/_compliant/group_by.py @@ -0,0 +1,233 @@ +from __future__ import annotations + +import re +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ClassVar, + Iterable, + Iterator, + Literal, + Mapping, + Sequence, + TypeVar, +) + +from narwhals._compliant.typing import ( + CompliantDataFrameAny, + CompliantDataFrameT, + CompliantDataFrameT_co, + CompliantExprT_contra, + CompliantFrameT, + CompliantFrameT_co, + CompliantLazyFrameAny, + CompliantLazyFrameT, + DepthTrackingExprAny, + DepthTrackingExprT_contra, + EagerExprT_contra, + LazyExprT_contra, + NativeExprT_co, +) +from narwhals._typing_compat import Protocol38 +from narwhals._utils import is_sequence_of + +if TYPE_CHECKING: + from typing_extensions import TypeAlias + + _SameFrameT = TypeVar("_SameFrameT", CompliantDataFrameAny, CompliantLazyFrameAny) + + +__all__ = [ + "CompliantGroupBy", + "DepthTrackingGroupBy", + "EagerGroupBy", + "LazyGroupBy", + "NarwhalsAggregation", +] + +NativeAggregationT_co = TypeVar( + "NativeAggregationT_co", bound="str | Callable[..., Any]", covariant=True +) +NarwhalsAggregation: TypeAlias = Literal[ + "sum", "mean", "median", "max", "min", "std", "var", "len", "n_unique", "count" +] + + +_RE_LEAF_NAME: re.Pattern[str] = re.compile(r"(\w+->)") + + +class CompliantGroupBy(Protocol38[CompliantFrameT_co, CompliantExprT_contra]): + _compliant_frame: Any + + @property + def compliant(self) -> CompliantFrameT_co: + return self._compliant_frame # type: ignore[no-any-return] + + def __init__( + self, + compliant_frame: CompliantFrameT_co, + keys: Sequence[CompliantExprT_contra] | Sequence[str], + /, + *, + drop_null_keys: bool, + ) -> None: ... + + def agg(self, *exprs: CompliantExprT_contra) -> CompliantFrameT_co: ... + + +class DataFrameGroupBy( + CompliantGroupBy[CompliantDataFrameT_co, CompliantExprT_contra], + Protocol38[CompliantDataFrameT_co, CompliantExprT_contra], +): + def __iter__(self) -> Iterator[tuple[Any, CompliantDataFrameT_co]]: ... + + +class ParseKeysGroupBy( + CompliantGroupBy[CompliantFrameT, CompliantExprT_contra], + Protocol38[CompliantFrameT, CompliantExprT_contra], +): + def _parse_keys( + self, + compliant_frame: CompliantFrameT, + keys: Sequence[CompliantExprT_contra] | Sequence[str], + ) -> tuple[CompliantFrameT, list[str], list[str]]: + if is_sequence_of(keys, str): + keys_str = list(keys) + return compliant_frame, keys_str, keys_str.copy() + else: + return self._parse_expr_keys(compliant_frame, keys=keys) + + @staticmethod + def _parse_expr_keys( + compliant_frame: _SameFrameT, keys: Sequence[CompliantExprT_contra] + ) -> tuple[_SameFrameT, list[str], list[str]]: + """Parses key expressions to set up `.agg` operation with correct information. + + Since keys are expressions, it's possible to alias any such key to match + other dataframe column names. + + In order to match polars behavior and not overwrite columns when evaluating keys: + + - We evaluate what the output key names should be, in order to remap temporary column + names to the expected ones, and to exclude those from unnamed expressions in + `.agg(...)` context (see https://github.com/narwhals-dev/narwhals/pull/2325#issuecomment-2800004520) + - Create temporary names for evaluated key expressions that are guaranteed to have + no overlap with any existing column name. + - Add these temporary columns to the compliant dataframe. + """ + tmp_name_length = max(len(str(c)) for c in compliant_frame.columns) + 1 + + def _temporary_name(key: str) -> str: + # 5 is the length of `__tmp` + key_str = str(key) # pandas allows non-string column names :sob: + return f"_{key_str}_tmp{'_' * (tmp_name_length - len(key_str) - 5)}" + + output_names = compliant_frame._evaluate_aliases(*keys) + + safe_keys = [ + # multi-output expression cannot have duplicate names, hence it's safe to suffix + key.name.map(_temporary_name) + if (metadata := key._metadata) and metadata.expansion_kind.is_multi_output() + # otherwise it's single named and we can use Expr.alias + else key.alias(_temporary_name(new_name)) + for key, new_name in zip(keys, output_names) + ] + return ( + compliant_frame.with_columns(*safe_keys), + compliant_frame._evaluate_aliases(*safe_keys), + output_names, + ) + + +class DepthTrackingGroupBy( + ParseKeysGroupBy[CompliantFrameT, DepthTrackingExprT_contra], + Protocol38[CompliantFrameT, DepthTrackingExprT_contra, NativeAggregationT_co], +): + """`CompliantGroupBy` variant, deals with `Eager` and other backends that utilize `CompliantExpr._depth`.""" + + _REMAP_AGGS: ClassVar[Mapping[NarwhalsAggregation, Any]] + """Mapping from `narwhals` to native representation. + + Note: + - `Dask` *may* return a `Callable` instead of a `str` referring to one. + """ + + def _ensure_all_simple(self, exprs: Sequence[DepthTrackingExprT_contra]) -> None: + for expr in exprs: + if not self._is_simple(expr): + name = self.compliant._implementation.name.lower() + msg = ( + f"Non-trivial complex aggregation found.\n\n" + f"Hint: you were probably trying to apply a non-elementary aggregation with a" + f"{name!r} table.\n" + "Please rewrite your query such that group-by aggregations " + "are elementary. For example, instead of:\n\n" + " df.group_by('a').agg(nw.col('b').round(2).mean())\n\n" + "use:\n\n" + " df.with_columns(nw.col('b').round(2)).group_by('a').agg(nw.col('b').mean())\n\n" + ) + raise ValueError(msg) + + @classmethod + def _is_simple(cls, expr: DepthTrackingExprAny, /) -> bool: + """Return `True` is we can efficiently use `expr` in a native `group_by` context.""" + return expr._is_elementary() and cls._leaf_name(expr) in cls._REMAP_AGGS + + @classmethod + def _remap_expr_name( + cls, name: NarwhalsAggregation | Any, / + ) -> NativeAggregationT_co: + """Replace `name`, with some native representation. + + Arguments: + name: Name of a `nw.Expr` aggregation method. + + Returns: + A native compatible representation. + """ + return cls._REMAP_AGGS.get(name, name) + + @classmethod + def _leaf_name(cls, expr: DepthTrackingExprAny, /) -> NarwhalsAggregation | Any: + """Return the last function name in the chain defined by `expr`.""" + return _RE_LEAF_NAME.sub("", expr._function_name) + + +class EagerGroupBy( + DepthTrackingGroupBy[CompliantDataFrameT, EagerExprT_contra, NativeAggregationT_co], + DataFrameGroupBy[CompliantDataFrameT, EagerExprT_contra], + Protocol38[CompliantDataFrameT, EagerExprT_contra, NativeAggregationT_co], +): ... + + +class LazyGroupBy( + ParseKeysGroupBy[CompliantLazyFrameT, LazyExprT_contra], + CompliantGroupBy[CompliantLazyFrameT, LazyExprT_contra], + Protocol38[CompliantLazyFrameT, LazyExprT_contra, NativeExprT_co], +): + _keys: list[str] + _output_key_names: list[str] + + def _evaluate_expr(self, expr: LazyExprT_contra, /) -> Iterator[NativeExprT_co]: + output_names = expr._evaluate_output_names(self.compliant) + aliases = ( + expr._alias_output_names(output_names) + if expr._alias_output_names + else output_names + ) + native_exprs = expr(self.compliant) + if expr._is_multi_output_unnamed(): + exclude = {*self._keys, *self._output_key_names} + for native_expr, name, alias in zip(native_exprs, output_names, aliases): + if name not in exclude: + yield expr._alias_native(native_expr, alias) + else: + for native_expr, alias in zip(native_exprs, aliases): + yield expr._alias_native(native_expr, alias) + + def _evaluate_exprs( + self, exprs: Iterable[LazyExprT_contra], / + ) -> Iterator[NativeExprT_co]: + for expr in exprs: + yield from self._evaluate_expr(expr) diff --git a/venv/lib/python3.8/site-packages/narwhals/_compliant/namespace.py b/venv/lib/python3.8/site-packages/narwhals/_compliant/namespace.py new file mode 100644 index 0000000..e73ccc2 --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/_compliant/namespace.py @@ -0,0 +1,194 @@ +from __future__ import annotations + +from functools import partial +from typing import ( + TYPE_CHECKING, + Any, + Container, + Iterable, + Mapping, + Protocol, + Sequence, + overload, +) + +from narwhals._compliant.typing import ( + CompliantExprT, + CompliantFrameT, + CompliantLazyFrameT, + DepthTrackingExprT, + EagerDataFrameT, + EagerExprT, + EagerSeriesT, + LazyExprT, + NativeFrameT, + NativeFrameT_co, +) +from narwhals._utils import ( + exclude_column_names, + get_column_names, + passthrough_column_names, +) +from narwhals.dependencies import is_numpy_array_2d + +if TYPE_CHECKING: + from typing_extensions import TypeAlias + + from narwhals._compliant.selectors import CompliantSelectorNamespace + from narwhals._compliant.when_then import CompliantWhen, EagerWhen + from narwhals._utils import Implementation, Version + from narwhals.dtypes import DType + from narwhals.schema import Schema + from narwhals.typing import ( + ConcatMethod, + Into1DArray, + IntoDType, + NonNestedLiteral, + _2DArray, + ) + + Incomplete: TypeAlias = Any + +__all__ = ["CompliantNamespace", "EagerNamespace"] + + +class CompliantNamespace(Protocol[CompliantFrameT, CompliantExprT]): + _implementation: Implementation + _backend_version: tuple[int, ...] + _version: Version + + def all(self) -> CompliantExprT: + return self._expr.from_column_names(get_column_names, context=self) + + def col(self, *column_names: str) -> CompliantExprT: + return self._expr.from_column_names( + passthrough_column_names(column_names), context=self + ) + + def exclude(self, excluded_names: Container[str]) -> CompliantExprT: + return self._expr.from_column_names( + partial(exclude_column_names, names=excluded_names), context=self + ) + + def nth(self, *column_indices: int) -> CompliantExprT: + return self._expr.from_column_indices(*column_indices, context=self) + + def len(self) -> CompliantExprT: ... + def lit(self, value: NonNestedLiteral, dtype: IntoDType | None) -> CompliantExprT: ... + def all_horizontal(self, *exprs: CompliantExprT) -> CompliantExprT: ... + def any_horizontal(self, *exprs: CompliantExprT) -> CompliantExprT: ... + def sum_horizontal(self, *exprs: CompliantExprT) -> CompliantExprT: ... + def mean_horizontal(self, *exprs: CompliantExprT) -> CompliantExprT: ... + def min_horizontal(self, *exprs: CompliantExprT) -> CompliantExprT: ... + def max_horizontal(self, *exprs: CompliantExprT) -> CompliantExprT: ... + def concat( + self, items: Iterable[CompliantFrameT], *, how: ConcatMethod + ) -> CompliantFrameT: ... + def when( + self, predicate: CompliantExprT + ) -> CompliantWhen[CompliantFrameT, Incomplete, CompliantExprT]: ... + def concat_str( + self, *exprs: CompliantExprT, separator: str, ignore_nulls: bool + ) -> CompliantExprT: ... + @property + def selectors(self) -> CompliantSelectorNamespace[Any, Any]: ... + @property + def _expr(self) -> type[CompliantExprT]: ... + + +class DepthTrackingNamespace( + CompliantNamespace[CompliantFrameT, DepthTrackingExprT], + Protocol[CompliantFrameT, DepthTrackingExprT], +): + def all(self) -> DepthTrackingExprT: + return self._expr.from_column_names( + get_column_names, function_name="all", context=self + ) + + def col(self, *column_names: str) -> DepthTrackingExprT: + return self._expr.from_column_names( + passthrough_column_names(column_names), function_name="col", context=self + ) + + def exclude(self, excluded_names: Container[str]) -> DepthTrackingExprT: + return self._expr.from_column_names( + partial(exclude_column_names, names=excluded_names), + function_name="exclude", + context=self, + ) + + +class LazyNamespace( + CompliantNamespace[CompliantLazyFrameT, LazyExprT], + Protocol[CompliantLazyFrameT, LazyExprT, NativeFrameT_co], +): + @property + def _lazyframe(self) -> type[CompliantLazyFrameT]: ... + + def from_native(self, data: NativeFrameT_co | Any, /) -> CompliantLazyFrameT: + if self._lazyframe._is_native(data): + return self._lazyframe.from_native(data, context=self) + else: # pragma: no cover + msg = f"Unsupported type: {type(data).__name__!r}" + raise TypeError(msg) + + +class EagerNamespace( + DepthTrackingNamespace[EagerDataFrameT, EagerExprT], + Protocol[EagerDataFrameT, EagerSeriesT, EagerExprT, NativeFrameT], +): + @property + def _dataframe(self) -> type[EagerDataFrameT]: ... + @property + def _series(self) -> type[EagerSeriesT]: ... + def when( + self, predicate: EagerExprT + ) -> EagerWhen[EagerDataFrameT, EagerSeriesT, EagerExprT]: ... + + def from_native(self, data: Any, /) -> EagerDataFrameT | EagerSeriesT: + if self._dataframe._is_native(data): + return self._dataframe.from_native(data, context=self) + elif self._series._is_native(data): + return self._series.from_native(data, context=self) + msg = f"Unsupported type: {type(data).__name__!r}" + raise TypeError(msg) + + @overload + def from_numpy(self, data: Into1DArray, /, schema: None = ...) -> EagerSeriesT: ... + + @overload + def from_numpy( + self, + data: _2DArray, + /, + schema: Mapping[str, DType] | Schema | Sequence[str] | None, + ) -> EagerDataFrameT: ... + + def from_numpy( + self, + data: Into1DArray | _2DArray, + /, + schema: Mapping[str, DType] | Schema | Sequence[str] | None = None, + ) -> EagerDataFrameT | EagerSeriesT: + if is_numpy_array_2d(data): + return self._dataframe.from_numpy(data, schema=schema, context=self) + return self._series.from_numpy(data, context=self) + + def _concat_diagonal(self, dfs: Sequence[NativeFrameT], /) -> NativeFrameT: ... + def _concat_horizontal( + self, dfs: Sequence[NativeFrameT | Any], / + ) -> NativeFrameT: ... + def _concat_vertical(self, dfs: Sequence[NativeFrameT], /) -> NativeFrameT: ... + def concat( + self, items: Iterable[EagerDataFrameT], *, how: ConcatMethod + ) -> EagerDataFrameT: + dfs = [item.native for item in items] + if how == "horizontal": + native = self._concat_horizontal(dfs) + elif how == "vertical": + native = self._concat_vertical(dfs) + elif how == "diagonal": + native = self._concat_diagonal(dfs) + else: # pragma: no cover + raise NotImplementedError + return self._dataframe.from_native(native, context=self) diff --git a/venv/lib/python3.8/site-packages/narwhals/_compliant/selectors.py b/venv/lib/python3.8/site-packages/narwhals/_compliant/selectors.py new file mode 100644 index 0000000..9d4e468 --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/_compliant/selectors.py @@ -0,0 +1,332 @@ +"""Almost entirely complete, generic `selectors` implementation.""" + +from __future__ import annotations + +import re +from functools import partial +from typing import ( + TYPE_CHECKING, + Collection, + Iterable, + Iterator, + Protocol, + Sequence, + TypeVar, + overload, +) + +from narwhals._compliant.expr import CompliantExpr +from narwhals._typing_compat import Protocol38 +from narwhals._utils import ( + _parse_time_unit_and_time_zone, + dtype_matches_time_unit_and_time_zone, + get_column_names, + is_compliant_dataframe, +) + +if TYPE_CHECKING: + from datetime import timezone + + from typing_extensions import Self, TypeAlias, TypeIs + + from narwhals._compliant.expr import NativeExpr + from narwhals._compliant.typing import ( + CompliantDataFrameAny, + CompliantExprAny, + CompliantFrameAny, + CompliantLazyFrameAny, + CompliantSeriesAny, + CompliantSeriesOrNativeExprAny, + EvalNames, + EvalSeries, + ScalarKwargs, + ) + from narwhals._utils import Implementation, Version, _FullContext + from narwhals.dtypes import DType + from narwhals.typing import TimeUnit + +__all__ = [ + "CompliantSelector", + "CompliantSelectorNamespace", + "EagerSelectorNamespace", + "LazySelectorNamespace", +] + + +SeriesOrExprT = TypeVar("SeriesOrExprT", bound="CompliantSeriesOrNativeExprAny") +SeriesT = TypeVar("SeriesT", bound="CompliantSeriesAny") +ExprT = TypeVar("ExprT", bound="NativeExpr") +FrameT = TypeVar("FrameT", bound="CompliantFrameAny") +DataFrameT = TypeVar("DataFrameT", bound="CompliantDataFrameAny") +LazyFrameT = TypeVar("LazyFrameT", bound="CompliantLazyFrameAny") +SelectorOrExpr: TypeAlias = ( + "CompliantSelector[FrameT, SeriesOrExprT] | CompliantExpr[FrameT, SeriesOrExprT]" +) + + +class CompliantSelectorNamespace(Protocol[FrameT, SeriesOrExprT]): + _implementation: Implementation + _backend_version: tuple[int, ...] + _version: Version + + @classmethod + def from_namespace(cls, context: _FullContext, /) -> Self: + obj = cls.__new__(cls) + obj._implementation = context._implementation + obj._backend_version = context._backend_version + obj._version = context._version + return obj + + @property + def _selector(self) -> type[CompliantSelector[FrameT, SeriesOrExprT]]: ... + + def _iter_columns(self, df: FrameT, /) -> Iterator[SeriesOrExprT]: ... + + def _iter_schema(self, df: FrameT, /) -> Iterator[tuple[str, DType]]: ... + + def _iter_columns_dtypes( + self, df: FrameT, / + ) -> Iterator[tuple[SeriesOrExprT, DType]]: ... + + def _iter_columns_names(self, df: FrameT, /) -> Iterator[tuple[SeriesOrExprT, str]]: + yield from zip(self._iter_columns(df), df.columns) + + def _is_dtype( + self: CompliantSelectorNamespace[FrameT, SeriesOrExprT], dtype: type[DType], / + ) -> CompliantSelector[FrameT, SeriesOrExprT]: + def series(df: FrameT) -> Sequence[SeriesOrExprT]: + return [ + ser for ser, tp in self._iter_columns_dtypes(df) if isinstance(tp, dtype) + ] + + def names(df: FrameT) -> Sequence[str]: + return [name for name, tp in self._iter_schema(df) if isinstance(tp, dtype)] + + return self._selector.from_callables(series, names, context=self) + + def by_dtype( + self, dtypes: Collection[DType | type[DType]] + ) -> CompliantSelector[FrameT, SeriesOrExprT]: + def series(df: FrameT) -> Sequence[SeriesOrExprT]: + return [ser for ser, tp in self._iter_columns_dtypes(df) if tp in dtypes] + + def names(df: FrameT) -> Sequence[str]: + return [name for name, tp in self._iter_schema(df) if tp in dtypes] + + return self._selector.from_callables(series, names, context=self) + + def matches(self, pattern: str) -> CompliantSelector[FrameT, SeriesOrExprT]: + p = re.compile(pattern) + + def series(df: FrameT) -> Sequence[SeriesOrExprT]: + if ( + is_compliant_dataframe(df) + and not self._implementation.is_duckdb() + and not self._implementation.is_ibis() + ): + return [df.get_column(col) for col in df.columns if p.search(col)] + + return [ser for ser, name in self._iter_columns_names(df) if p.search(name)] + + def names(df: FrameT) -> Sequence[str]: + return [col for col in df.columns if p.search(col)] + + return self._selector.from_callables(series, names, context=self) + + def numeric(self) -> CompliantSelector[FrameT, SeriesOrExprT]: + def series(df: FrameT) -> Sequence[SeriesOrExprT]: + return [ser for ser, tp in self._iter_columns_dtypes(df) if tp.is_numeric()] + + def names(df: FrameT) -> Sequence[str]: + return [name for name, tp in self._iter_schema(df) if tp.is_numeric()] + + return self._selector.from_callables(series, names, context=self) + + def categorical(self) -> CompliantSelector[FrameT, SeriesOrExprT]: + return self._is_dtype(self._version.dtypes.Categorical) + + def string(self) -> CompliantSelector[FrameT, SeriesOrExprT]: + return self._is_dtype(self._version.dtypes.String) + + def boolean(self) -> CompliantSelector[FrameT, SeriesOrExprT]: + return self._is_dtype(self._version.dtypes.Boolean) + + def all(self) -> CompliantSelector[FrameT, SeriesOrExprT]: + def series(df: FrameT) -> Sequence[SeriesOrExprT]: + return list(self._iter_columns(df)) + + return self._selector.from_callables(series, get_column_names, context=self) + + def datetime( + self, + time_unit: TimeUnit | Iterable[TimeUnit] | None, + time_zone: str | timezone | Iterable[str | timezone | None] | None, + ) -> CompliantSelector[FrameT, SeriesOrExprT]: + time_units, time_zones = _parse_time_unit_and_time_zone(time_unit, time_zone) + matches = partial( + dtype_matches_time_unit_and_time_zone, + dtypes=self._version.dtypes, + time_units=time_units, + time_zones=time_zones, + ) + + def series(df: FrameT) -> Sequence[SeriesOrExprT]: + return [ser for ser, tp in self._iter_columns_dtypes(df) if matches(tp)] + + def names(df: FrameT) -> Sequence[str]: + return [name for name, tp in self._iter_schema(df) if matches(tp)] + + return self._selector.from_callables(series, names, context=self) + + +class EagerSelectorNamespace( + CompliantSelectorNamespace[DataFrameT, SeriesT], Protocol[DataFrameT, SeriesT] +): + def _iter_schema(self, df: DataFrameT, /) -> Iterator[tuple[str, DType]]: + for ser in self._iter_columns(df): + yield ser.name, ser.dtype + + def _iter_columns(self, df: DataFrameT, /) -> Iterator[SeriesT]: + yield from df.iter_columns() + + def _iter_columns_dtypes(self, df: DataFrameT, /) -> Iterator[tuple[SeriesT, DType]]: + for ser in self._iter_columns(df): + yield ser, ser.dtype + + +class LazySelectorNamespace( + CompliantSelectorNamespace[LazyFrameT, ExprT], Protocol[LazyFrameT, ExprT] +): + def _iter_schema(self, df: LazyFrameT) -> Iterator[tuple[str, DType]]: + yield from df.schema.items() + + def _iter_columns(self, df: LazyFrameT) -> Iterator[ExprT]: + yield from df._iter_columns() + + def _iter_columns_dtypes(self, df: LazyFrameT, /) -> Iterator[tuple[ExprT, DType]]: + yield from zip(self._iter_columns(df), df.schema.values()) + + +class CompliantSelector( + CompliantExpr[FrameT, SeriesOrExprT], Protocol38[FrameT, SeriesOrExprT] +): + _call: EvalSeries[FrameT, SeriesOrExprT] + _window_function: None + _function_name: str + _depth: int + _implementation: Implementation + _backend_version: tuple[int, ...] + _version: Version + _scalar_kwargs: ScalarKwargs + + @classmethod + def from_callables( + cls, + call: EvalSeries[FrameT, SeriesOrExprT], + evaluate_output_names: EvalNames[FrameT], + *, + context: _FullContext, + ) -> Self: + obj = cls.__new__(cls) + obj._call = call + obj._window_function = None + obj._depth = 0 + obj._function_name = "selector" + obj._evaluate_output_names = evaluate_output_names + obj._alias_output_names = None + obj._implementation = context._implementation + obj._backend_version = context._backend_version + obj._version = context._version + obj._scalar_kwargs = {} + return obj + + @property + def selectors(self) -> CompliantSelectorNamespace[FrameT, SeriesOrExprT]: + return self.__narwhals_namespace__().selectors + + def _to_expr(self) -> CompliantExpr[FrameT, SeriesOrExprT]: ... + + def _is_selector( + self, other: Self | CompliantExpr[FrameT, SeriesOrExprT] + ) -> TypeIs[CompliantSelector[FrameT, SeriesOrExprT]]: + return isinstance(other, type(self)) + + @overload + def __sub__(self, other: Self) -> Self: ... + @overload + def __sub__( + self, other: CompliantExpr[FrameT, SeriesOrExprT] + ) -> CompliantExpr[FrameT, SeriesOrExprT]: ... + def __sub__( + self, other: SelectorOrExpr[FrameT, SeriesOrExprT] + ) -> SelectorOrExpr[FrameT, SeriesOrExprT]: + if self._is_selector(other): + + def series(df: FrameT) -> Sequence[SeriesOrExprT]: + lhs_names, rhs_names = _eval_lhs_rhs(df, self, other) + return [ + x for x, name in zip(self(df), lhs_names) if name not in rhs_names + ] + + def names(df: FrameT) -> Sequence[str]: + lhs_names, rhs_names = _eval_lhs_rhs(df, self, other) + return [x for x in lhs_names if x not in rhs_names] + + return self.selectors._selector.from_callables(series, names, context=self) + return self._to_expr() - other + + @overload + def __or__(self, other: Self) -> Self: ... + @overload + def __or__( + self, other: CompliantExpr[FrameT, SeriesOrExprT] + ) -> CompliantExpr[FrameT, SeriesOrExprT]: ... + def __or__( + self, other: SelectorOrExpr[FrameT, SeriesOrExprT] + ) -> SelectorOrExpr[FrameT, SeriesOrExprT]: + if self._is_selector(other): + + def series(df: FrameT) -> Sequence[SeriesOrExprT]: + lhs_names, rhs_names = _eval_lhs_rhs(df, self, other) + return [ + *(x for x, name in zip(self(df), lhs_names) if name not in rhs_names), + *other(df), + ] + + def names(df: FrameT) -> Sequence[str]: + lhs_names, rhs_names = _eval_lhs_rhs(df, self, other) + return [*(x for x in lhs_names if x not in rhs_names), *rhs_names] + + return self.selectors._selector.from_callables(series, names, context=self) + return self._to_expr() | other + + @overload + def __and__(self, other: Self) -> Self: ... + @overload + def __and__( + self, other: CompliantExpr[FrameT, SeriesOrExprT] + ) -> CompliantExpr[FrameT, SeriesOrExprT]: ... + def __and__( + self, other: SelectorOrExpr[FrameT, SeriesOrExprT] + ) -> SelectorOrExpr[FrameT, SeriesOrExprT]: + if self._is_selector(other): + + def series(df: FrameT) -> Sequence[SeriesOrExprT]: + lhs_names, rhs_names = _eval_lhs_rhs(df, self, other) + return [x for x, name in zip(self(df), lhs_names) if name in rhs_names] + + def names(df: FrameT) -> Sequence[str]: + lhs_names, rhs_names = _eval_lhs_rhs(df, self, other) + return [x for x in lhs_names if x in rhs_names] + + return self.selectors._selector.from_callables(series, names, context=self) + return self._to_expr() & other + + def __invert__(self) -> CompliantSelector[FrameT, SeriesOrExprT]: + return self.selectors.all() - self + + +def _eval_lhs_rhs( + df: CompliantFrameAny, lhs: CompliantExprAny, rhs: CompliantExprAny +) -> tuple[Sequence[str], Sequence[str]]: + return lhs._evaluate_output_names(df), rhs._evaluate_output_names(df) diff --git a/venv/lib/python3.8/site-packages/narwhals/_compliant/series.py b/venv/lib/python3.8/site-packages/narwhals/_compliant/series.py new file mode 100644 index 0000000..706fd2b --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/_compliant/series.py @@ -0,0 +1,396 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, + Generic, + Iterable, + Iterator, + Mapping, + Protocol, + Sequence, +) + +from narwhals._compliant.any_namespace import ( + CatNamespace, + DateTimeNamespace, + ListNamespace, + StringNamespace, + StructNamespace, +) +from narwhals._compliant.typing import ( + CompliantSeriesT_co, + EagerSeriesT_co, + NativeSeriesT, + NativeSeriesT_co, +) +from narwhals._translate import FromIterable, FromNative, NumpyConvertible, ToNarwhals +from narwhals._utils import ( + _StoresCompliant, + _StoresNative, + is_compliant_series, + is_sized_multi_index_selector, + unstable, +) + +if TYPE_CHECKING: + from types import ModuleType + + import pandas as pd + import polars as pl + import pyarrow as pa + from typing_extensions import Self + + from narwhals._compliant.dataframe import CompliantDataFrame + from narwhals._compliant.expr import CompliantExpr, EagerExpr + from narwhals._compliant.namespace import CompliantNamespace, EagerNamespace + from narwhals._utils import Implementation, Version, _FullContext + from narwhals.dtypes import DType + from narwhals.series import Series + from narwhals.typing import ( + ClosedInterval, + FillNullStrategy, + Into1DArray, + IntoDType, + MultiIndexSelector, + NonNestedLiteral, + NumericLiteral, + RankMethod, + RollingInterpolationMethod, + SizedMultiIndexSelector, + TemporalLiteral, + _1DArray, + _SliceIndex, + ) + +__all__ = ["CompliantSeries", "EagerSeries"] + + +class CompliantSeries( + NumpyConvertible["_1DArray", "Into1DArray"], + FromIterable, + FromNative[NativeSeriesT], + ToNarwhals["Series[NativeSeriesT]"], + Protocol[NativeSeriesT], +): + _implementation: Implementation + _backend_version: tuple[int, ...] + _version: Version + + @property + def dtype(self) -> DType: ... + @property + def name(self) -> str: ... + @property + def native(self) -> NativeSeriesT: ... + def __narwhals_series__(self) -> Self: + return self + + def __narwhals_namespace__(self) -> CompliantNamespace[Any, Any]: ... + def __native_namespace__(self) -> ModuleType: ... + def __array__(self, dtype: Any, *, copy: bool | None) -> _1DArray: ... + def __contains__(self, other: Any) -> bool: ... + def __getitem__(self, item: MultiIndexSelector[Self]) -> Any: ... + def __iter__(self) -> Iterator[Any]: ... + def __len__(self) -> int: + return len(self.native) + + def _with_native(self, series: Any) -> Self: ... + def _with_version(self, version: Version) -> Self: ... + def _to_expr(self) -> CompliantExpr[Any, Self]: ... + @classmethod + def from_native(cls, data: NativeSeriesT, /, *, context: _FullContext) -> Self: ... + @classmethod + def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self: ... + @classmethod + def from_iterable( + cls, + data: Iterable[Any], + /, + *, + context: _FullContext, + name: str = "", + dtype: IntoDType | None = None, + ) -> Self: ... + def to_narwhals(self) -> Series[NativeSeriesT]: + return self._version.series(self, level="full") + + # Operators + def __add__(self, other: Any) -> Self: ... + def __and__(self, other: Any) -> Self: ... + def __eq__(self, other: object) -> Self: ... # type: ignore[override] + def __floordiv__(self, other: Any) -> Self: ... + def __ge__(self, other: Any) -> Self: ... + def __gt__(self, other: Any) -> Self: ... + def __invert__(self) -> Self: ... + def __le__(self, other: Any) -> Self: ... + def __lt__(self, other: Any) -> Self: ... + def __mod__(self, other: Any) -> Self: ... + def __mul__(self, other: Any) -> Self: ... + def __ne__(self, other: object) -> Self: ... # type: ignore[override] + def __or__(self, other: Any) -> Self: ... + def __pow__(self, other: Any) -> Self: ... + def __radd__(self, other: Any) -> Self: ... + def __rand__(self, other: Any) -> Self: ... + def __rfloordiv__(self, other: Any) -> Self: ... + def __rmod__(self, other: Any) -> Self: ... + def __rmul__(self, other: Any) -> Self: ... + def __ror__(self, other: Any) -> Self: ... + def __rpow__(self, other: Any) -> Self: ... + def __rsub__(self, other: Any) -> Self: ... + def __rtruediv__(self, other: Any) -> Self: ... + def __sub__(self, other: Any) -> Self: ... + def __truediv__(self, other: Any) -> Self: ... + + def abs(self) -> Self: ... + def alias(self, name: str) -> Self: ... + def all(self) -> bool: ... + def any(self) -> bool: ... + def arg_max(self) -> int: ... + def arg_min(self) -> int: ... + def arg_true(self) -> Self: ... + def cast(self, dtype: IntoDType) -> Self: ... + def clip( + self, + lower_bound: Self | NumericLiteral | TemporalLiteral | None, + upper_bound: Self | NumericLiteral | TemporalLiteral | None, + ) -> Self: ... + def count(self) -> int: ... + def cum_count(self, *, reverse: bool) -> Self: ... + def cum_max(self, *, reverse: bool) -> Self: ... + def cum_min(self, *, reverse: bool) -> Self: ... + def cum_prod(self, *, reverse: bool) -> Self: ... + def cum_sum(self, *, reverse: bool) -> Self: ... + def diff(self) -> Self: ... + def drop_nulls(self) -> Self: ... + def ewm_mean( + self, + *, + com: float | None, + span: float | None, + half_life: float | None, + alpha: float | None, + adjust: bool, + min_samples: int, + ignore_nulls: bool, + ) -> Self: ... + def exp(self) -> Self: ... + def fill_null( + self, + value: Self | NonNestedLiteral, + strategy: FillNullStrategy | None, + limit: int | None, + ) -> Self: ... + def filter(self, predicate: Any) -> Self: ... + def gather_every(self, n: int, offset: int) -> Self: ... + @unstable + def hist( + self, + bins: list[float | int] | None, + *, + bin_count: int | None, + include_breakpoint: bool, + ) -> CompliantDataFrame[Self, Any, Any, Any]: ... + def head(self, n: int) -> Self: ... + def is_between( + self, lower_bound: Any, upper_bound: Any, closed: ClosedInterval + ) -> Self: ... + def is_finite(self) -> Self: ... + def is_first_distinct(self) -> Self: ... + def is_in(self, other: Any) -> Self: ... + def is_last_distinct(self) -> Self: ... + def is_nan(self) -> Self: ... + def is_null(self) -> Self: ... + def is_sorted(self, *, descending: bool) -> bool: ... + def is_unique(self) -> Self: ... + def item(self, index: int | None) -> Any: ... + def len(self) -> int: ... + def log(self, base: float) -> Self: ... + def max(self) -> Any: ... + def mean(self) -> float: ... + def median(self) -> float: ... + def min(self) -> Any: ... + def mode(self) -> Self: ... + def n_unique(self) -> int: ... + def null_count(self) -> int: ... + def quantile( + self, quantile: float, interpolation: RollingInterpolationMethod + ) -> float: ... + def rank(self, method: RankMethod, *, descending: bool) -> Self: ... + def replace_strict( + self, + old: Sequence[Any] | Mapping[Any, Any], + new: Sequence[Any], + *, + return_dtype: IntoDType | None, + ) -> Self: ... + def rolling_mean( + self, window_size: int, *, min_samples: int, center: bool + ) -> Self: ... + def rolling_std( + self, window_size: int, *, min_samples: int, center: bool, ddof: int + ) -> Self: ... + def rolling_sum( + self, window_size: int, *, min_samples: int, center: bool + ) -> Self: ... + def rolling_var( + self, window_size: int, *, min_samples: int, center: bool, ddof: int + ) -> Self: ... + def round(self, decimals: int) -> Self: ... + def sample( + self, + n: int | None, + *, + fraction: float | None, + with_replacement: bool, + seed: int | None, + ) -> Self: ... + def scatter(self, indices: int | Sequence[int], values: Any) -> Self: ... + def shift(self, n: int) -> Self: ... + def skew(self) -> float | None: ... + def sort(self, *, descending: bool, nulls_last: bool) -> Self: ... + def std(self, *, ddof: int) -> float: ... + def sum(self) -> float: ... + def tail(self, n: int) -> Self: ... + def to_arrow(self) -> pa.Array[Any]: ... + def to_dummies( + self, *, separator: str, drop_first: bool + ) -> CompliantDataFrame[Self, Any, Any, Any]: ... + def to_frame(self) -> CompliantDataFrame[Self, Any, Any, Any]: ... + def to_list(self) -> list[Any]: ... + def to_pandas(self) -> pd.Series[Any]: ... + def to_polars(self) -> pl.Series: ... + def unique(self, *, maintain_order: bool) -> Self: ... + def value_counts( + self, *, sort: bool, parallel: bool, name: str | None, normalize: bool + ) -> CompliantDataFrame[Self, Any, Any, Any]: ... + def var(self, *, ddof: int) -> float: ... + def zip_with(self, mask: Any, other: Any) -> Self: ... + + @property + def str(self) -> Any: ... + @property + def dt(self) -> Any: ... + @property + def cat(self) -> Any: ... + @property + def list(self) -> Any: ... + @property + def struct(self) -> Any: ... + + +class EagerSeries(CompliantSeries[NativeSeriesT], Protocol[NativeSeriesT]): + _native_series: Any + _implementation: Implementation + _backend_version: tuple[int, ...] + _version: Version + _broadcast: bool + + def _from_scalar(self, value: Any) -> Self: + return self.from_iterable([value], name=self.name, context=self) + + def _with_native( + self, series: NativeSeriesT, *, preserve_broadcast: bool = False + ) -> Self: + """Return a new `CompliantSeries`, wrapping the native `series`. + + In cases when operations are known to not affect whether a result should + be broadcast, we can pass `preserve_broadcast=True`. + Set this with care - it should only be set for unary expressions which don't + change length or order, such as `.alias` or `.fill_null`. If in doubt, don't + set it, you probably don't need it. + """ + ... + + def __narwhals_namespace__(self) -> EagerNamespace[Any, Self, Any, Any]: ... + + def _to_expr(self) -> EagerExpr[Any, Any]: + return self.__narwhals_namespace__()._expr._from_series(self) # type: ignore[no-any-return] + + def _gather(self, rows: SizedMultiIndexSelector[NativeSeriesT]) -> Self: ... + def _gather_slice(self, rows: _SliceIndex | range) -> Self: ... + def __getitem__(self, item: MultiIndexSelector[Self]) -> Self: + if isinstance(item, (slice, range)): + return self._gather_slice(item) + elif is_compliant_series(item): + return self._gather(item.native) + elif is_sized_multi_index_selector(item): + return self._gather(item) + else: # pragma: no cover + msg = f"Unreachable code, got unexpected type: {type(item)}" + raise AssertionError(msg) + + @property + def str(self) -> EagerSeriesStringNamespace[Self, NativeSeriesT]: ... + @property + def dt(self) -> EagerSeriesDateTimeNamespace[Self, NativeSeriesT]: ... + @property + def cat(self) -> EagerSeriesCatNamespace[Self, NativeSeriesT]: ... + @property + def list(self) -> EagerSeriesListNamespace[Self, NativeSeriesT]: ... + @property + def struct(self) -> EagerSeriesStructNamespace[Self, NativeSeriesT]: ... + + +class _SeriesNamespace( # type: ignore[misc] + _StoresCompliant[CompliantSeriesT_co], + _StoresNative[NativeSeriesT_co], + Protocol[CompliantSeriesT_co, NativeSeriesT_co], +): + _compliant_series: CompliantSeriesT_co + + @property + def compliant(self) -> CompliantSeriesT_co: + return self._compliant_series + + @property + def native(self) -> NativeSeriesT_co: + return self._compliant_series.native # type: ignore[no-any-return] + + def with_native(self, series: Any, /) -> CompliantSeriesT_co: + return self.compliant._with_native(series) + + +class EagerSeriesNamespace( + _SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co], + Generic[EagerSeriesT_co, NativeSeriesT_co], +): + _compliant_series: EagerSeriesT_co + + def __init__(self, series: EagerSeriesT_co, /) -> None: + self._compliant_series = series + + +class EagerSeriesCatNamespace( # type: ignore[misc] + _SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co], + CatNamespace[EagerSeriesT_co], + Protocol[EagerSeriesT_co, NativeSeriesT_co], +): ... + + +class EagerSeriesDateTimeNamespace( # type: ignore[misc] + _SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co], + DateTimeNamespace[EagerSeriesT_co], + Protocol[EagerSeriesT_co, NativeSeriesT_co], +): ... + + +class EagerSeriesListNamespace( # type: ignore[misc] + _SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co], + ListNamespace[EagerSeriesT_co], + Protocol[EagerSeriesT_co, NativeSeriesT_co], +): ... + + +class EagerSeriesStringNamespace( # type: ignore[misc] + _SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co], + StringNamespace[EagerSeriesT_co], + Protocol[EagerSeriesT_co, NativeSeriesT_co], +): ... + + +class EagerSeriesStructNamespace( # type: ignore[misc] + _SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co], + StructNamespace[EagerSeriesT_co], + Protocol[EagerSeriesT_co, NativeSeriesT_co], +): ... diff --git a/venv/lib/python3.8/site-packages/narwhals/_compliant/typing.py b/venv/lib/python3.8/site-packages/narwhals/_compliant/typing.py new file mode 100644 index 0000000..4c3685b --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/_compliant/typing.py @@ -0,0 +1,154 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Callable, Sequence, TypedDict, TypeVar + +if TYPE_CHECKING: + from typing_extensions import TypeAlias + + from narwhals._compliant.dataframe import ( + CompliantDataFrame, + CompliantLazyFrame, + EagerDataFrame, + ) + from narwhals._compliant.expr import ( + CompliantExpr, + DepthTrackingExpr, + EagerExpr, + LazyExpr, + NativeExpr, + ) + from narwhals._compliant.namespace import CompliantNamespace, EagerNamespace + from narwhals._compliant.series import CompliantSeries, EagerSeries + from narwhals._compliant.window import WindowInputs + from narwhals.typing import FillNullStrategy, NativeFrame, NativeSeries, RankMethod + + class ScalarKwargs(TypedDict, total=False): + """Non-expressifiable args which we may need to reuse in `agg` or `over`.""" + + center: int + ddof: int + descending: bool + limit: int | None + method: RankMethod + min_samples: int + n: int + reverse: bool + strategy: FillNullStrategy | None + window_size: int + + +__all__ = [ + "AliasName", + "AliasNames", + "CompliantDataFrameT", + "CompliantFrameT", + "CompliantLazyFrameT", + "CompliantSeriesT", + "EvalNames", + "EvalSeries", + "IntoCompliantExpr", + "NativeFrameT_co", + "NativeSeriesT_co", +] +CompliantExprAny: TypeAlias = "CompliantExpr[Any, Any]" +CompliantSeriesAny: TypeAlias = "CompliantSeries[Any]" +CompliantSeriesOrNativeExprAny: TypeAlias = "CompliantSeriesAny | NativeExpr" +CompliantDataFrameAny: TypeAlias = "CompliantDataFrame[Any, Any, Any, Any]" +CompliantLazyFrameAny: TypeAlias = "CompliantLazyFrame[Any, Any, Any]" +CompliantFrameAny: TypeAlias = "CompliantDataFrameAny | CompliantLazyFrameAny" +CompliantNamespaceAny: TypeAlias = "CompliantNamespace[Any, Any]" + +DepthTrackingExprAny: TypeAlias = "DepthTrackingExpr[Any, Any]" + +EagerDataFrameAny: TypeAlias = "EagerDataFrame[Any, Any, Any]" +EagerSeriesAny: TypeAlias = "EagerSeries[Any]" +EagerExprAny: TypeAlias = "EagerExpr[Any, Any]" +EagerNamespaceAny: TypeAlias = ( + "EagerNamespace[EagerDataFrameAny, EagerSeriesAny, EagerExprAny, NativeFrame]" +) + +LazyExprAny: TypeAlias = "LazyExpr[Any, Any]" + +NativeExprT = TypeVar("NativeExprT", bound="NativeExpr") +NativeExprT_co = TypeVar("NativeExprT_co", bound="NativeExpr", covariant=True) +NativeSeriesT = TypeVar("NativeSeriesT", bound="NativeSeries") +NativeSeriesT_co = TypeVar("NativeSeriesT_co", bound="NativeSeries", covariant=True) +NativeFrameT = TypeVar("NativeFrameT", bound="NativeFrame") +NativeFrameT_co = TypeVar("NativeFrameT_co", bound="NativeFrame", covariant=True) +NativeFrameT_contra = TypeVar( + "NativeFrameT_contra", bound="NativeFrame", contravariant=True +) + +CompliantExprT = TypeVar("CompliantExprT", bound=CompliantExprAny) +CompliantExprT_co = TypeVar("CompliantExprT_co", bound=CompliantExprAny, covariant=True) +CompliantExprT_contra = TypeVar( + "CompliantExprT_contra", bound=CompliantExprAny, contravariant=True +) +CompliantSeriesT = TypeVar("CompliantSeriesT", bound=CompliantSeriesAny) +CompliantSeriesT_co = TypeVar( + "CompliantSeriesT_co", bound=CompliantSeriesAny, covariant=True +) +CompliantSeriesOrNativeExprT = TypeVar( + "CompliantSeriesOrNativeExprT", bound=CompliantSeriesOrNativeExprAny +) +CompliantSeriesOrNativeExprT_co = TypeVar( + "CompliantSeriesOrNativeExprT_co", + bound=CompliantSeriesOrNativeExprAny, + covariant=True, +) +CompliantFrameT = TypeVar("CompliantFrameT", bound=CompliantFrameAny) +CompliantFrameT_co = TypeVar( + "CompliantFrameT_co", bound=CompliantFrameAny, covariant=True +) +CompliantDataFrameT = TypeVar("CompliantDataFrameT", bound=CompliantDataFrameAny) +CompliantDataFrameT_co = TypeVar( + "CompliantDataFrameT_co", bound=CompliantDataFrameAny, covariant=True +) +CompliantLazyFrameT = TypeVar("CompliantLazyFrameT", bound=CompliantLazyFrameAny) +CompliantLazyFrameT_co = TypeVar( + "CompliantLazyFrameT_co", bound=CompliantLazyFrameAny, covariant=True +) +CompliantNamespaceT = TypeVar("CompliantNamespaceT", bound=CompliantNamespaceAny) +CompliantNamespaceT_co = TypeVar( + "CompliantNamespaceT_co", bound=CompliantNamespaceAny, covariant=True +) + +IntoCompliantExpr: TypeAlias = "CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co] | CompliantSeriesOrNativeExprT_co" + +DepthTrackingExprT = TypeVar("DepthTrackingExprT", bound=DepthTrackingExprAny) +DepthTrackingExprT_contra = TypeVar( + "DepthTrackingExprT_contra", bound=DepthTrackingExprAny, contravariant=True +) + +EagerExprT = TypeVar("EagerExprT", bound=EagerExprAny) +EagerExprT_contra = TypeVar("EagerExprT_contra", bound=EagerExprAny, contravariant=True) +EagerSeriesT = TypeVar("EagerSeriesT", bound=EagerSeriesAny) +EagerSeriesT_co = TypeVar("EagerSeriesT_co", bound=EagerSeriesAny, covariant=True) + +# NOTE: `pyright` gives false (8) positives if this uses `EagerDataFrameAny`? +EagerDataFrameT = TypeVar("EagerDataFrameT", bound="EagerDataFrame[Any, Any, Any]") + +LazyExprT = TypeVar("LazyExprT", bound=LazyExprAny) +LazyExprT_contra = TypeVar("LazyExprT_contra", bound=LazyExprAny, contravariant=True) + +AliasNames: TypeAlias = Callable[[Sequence[str]], Sequence[str]] +"""A function aliasing a *sequence* of column names.""" + +AliasName: TypeAlias = Callable[[str], str] +"""A function aliasing a *single* column name.""" + +EvalSeries: TypeAlias = Callable[ + [CompliantFrameT], Sequence[CompliantSeriesOrNativeExprT] +] +"""A function from a `Frame` to a sequence of `Series`*. + +See [underwater unicorn magic](https://narwhals-dev.github.io/narwhals/how_it_works/). +""" + +EvalNames: TypeAlias = Callable[[CompliantFrameT], Sequence[str]] +"""A function from a `Frame` to a sequence of columns names *before* any aliasing takes place.""" + +WindowFunction: TypeAlias = ( + "Callable[[CompliantFrameT, WindowInputs[NativeExprT]], Sequence[NativeExprT]]" +) +"""A function evaluated with `over(partition_by=..., order_by=...)`.""" diff --git a/venv/lib/python3.8/site-packages/narwhals/_compliant/when_then.py b/venv/lib/python3.8/site-packages/narwhals/_compliant/when_then.py new file mode 100644 index 0000000..1de91f9 --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/_compliant/when_then.py @@ -0,0 +1,232 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Callable, Sequence, TypeVar, cast + +from narwhals._compliant.expr import CompliantExpr +from narwhals._compliant.typing import ( + CompliantExprAny, + CompliantFrameAny, + CompliantLazyFrameT, + CompliantSeriesOrNativeExprAny, + EagerDataFrameT, + EagerExprT, + EagerSeriesT, + LazyExprAny, + NativeExprT, + WindowFunction, +) +from narwhals._typing_compat import Protocol38 + +if TYPE_CHECKING: + from typing_extensions import Self, TypeAlias + + from narwhals._compliant.typing import EvalSeries, ScalarKwargs + from narwhals._compliant.window import WindowInputs + from narwhals._utils import Implementation, Version, _FullContext + from narwhals.typing import NonNestedLiteral + + +__all__ = ["CompliantThen", "CompliantWhen", "EagerWhen", "LazyThen", "LazyWhen"] + +ExprT = TypeVar("ExprT", bound=CompliantExprAny) +LazyExprT = TypeVar("LazyExprT", bound=LazyExprAny) +SeriesT = TypeVar("SeriesT", bound=CompliantSeriesOrNativeExprAny) +FrameT = TypeVar("FrameT", bound=CompliantFrameAny) + +Scalar: TypeAlias = Any +"""A native literal value.""" + +IntoExpr: TypeAlias = "SeriesT | ExprT | NonNestedLiteral | Scalar" +"""Anything that is convertible into a `CompliantExpr`.""" + + +class CompliantWhen(Protocol38[FrameT, SeriesT, ExprT]): + _condition: ExprT + _then_value: IntoExpr[SeriesT, ExprT] + _otherwise_value: IntoExpr[SeriesT, ExprT] | None + _implementation: Implementation + _backend_version: tuple[int, ...] + _version: Version + + @property + def _then(self) -> type[CompliantThen[FrameT, SeriesT, ExprT]]: ... + def __call__(self, compliant_frame: FrameT, /) -> Sequence[SeriesT]: ... + def _window_function( + self, compliant_frame: FrameT, window_inputs: WindowInputs[Any] + ) -> Sequence[SeriesT]: ... + + def then( + self, value: IntoExpr[SeriesT, ExprT], / + ) -> CompliantThen[FrameT, SeriesT, ExprT]: + return self._then.from_when(self, value) + + @classmethod + def from_expr(cls, condition: ExprT, /, *, context: _FullContext) -> Self: + obj = cls.__new__(cls) + obj._condition = condition + obj._then_value = None + obj._otherwise_value = None + obj._implementation = context._implementation + obj._backend_version = context._backend_version + obj._version = context._version + return obj + + +class CompliantThen(CompliantExpr[FrameT, SeriesT], Protocol38[FrameT, SeriesT, ExprT]): + _call: EvalSeries[FrameT, SeriesT] + _when_value: CompliantWhen[FrameT, SeriesT, ExprT] + _function_name: str + _depth: int + _implementation: Implementation + _backend_version: tuple[int, ...] + _version: Version + _scalar_kwargs: ScalarKwargs + + @classmethod + def from_when( + cls, + when: CompliantWhen[FrameT, SeriesT, ExprT], + then: IntoExpr[SeriesT, ExprT], + /, + ) -> Self: + when._then_value = then + obj = cls.__new__(cls) + obj._call = when + obj._when_value = when + obj._depth = 0 + obj._function_name = "whenthen" + obj._evaluate_output_names = getattr( + then, "_evaluate_output_names", lambda _df: ["literal"] + ) + obj._alias_output_names = getattr(then, "_alias_output_names", None) + obj._implementation = when._implementation + obj._backend_version = when._backend_version + obj._version = when._version + obj._scalar_kwargs = {} + return obj + + def otherwise(self, otherwise: IntoExpr[SeriesT, ExprT], /) -> ExprT: + self._when_value._otherwise_value = otherwise + self._function_name = "whenotherwise" + return cast("ExprT", self) + + +class LazyThen( + CompliantThen[CompliantLazyFrameT, NativeExprT, LazyExprT], + Protocol38[CompliantLazyFrameT, NativeExprT, LazyExprT], +): + _window_function: WindowFunction[CompliantLazyFrameT, NativeExprT] | None + + @classmethod + def from_when( + cls, + when: CompliantWhen[CompliantLazyFrameT, NativeExprT, LazyExprT], + then: IntoExpr[NativeExprT, LazyExprT], + /, + ) -> Self: + when._then_value = then + obj = cls.__new__(cls) + obj._call = when + + obj._window_function = when._window_function + + obj._when_value = when + obj._depth = 0 + obj._function_name = "whenthen" + obj._evaluate_output_names = getattr( + then, "_evaluate_output_names", lambda _df: ["literal"] + ) + obj._alias_output_names = getattr(then, "_alias_output_names", None) + obj._implementation = when._implementation + obj._backend_version = when._backend_version + obj._version = when._version + obj._scalar_kwargs = {} + return obj + + +class EagerWhen( + CompliantWhen[EagerDataFrameT, EagerSeriesT, EagerExprT], + Protocol38[EagerDataFrameT, EagerSeriesT, EagerExprT], +): + def _if_then_else( + self, when: EagerSeriesT, then: EagerSeriesT, otherwise: EagerSeriesT | None, / + ) -> EagerSeriesT: ... + + def __call__(self, df: EagerDataFrameT, /) -> Sequence[EagerSeriesT]: + is_expr = self._condition._is_expr + when: EagerSeriesT = self._condition(df)[0] + then: EagerSeriesT + + if is_expr(self._then_value): + then = self._then_value(df)[0] + else: + then = when.alias("literal")._from_scalar(self._then_value) + then._broadcast = True + + if is_expr(self._otherwise_value): + otherwise = self._otherwise_value(df)[0] + elif self._otherwise_value is not None: + otherwise = when._from_scalar(self._otherwise_value) + otherwise._broadcast = True + else: + otherwise = self._otherwise_value + return [self._if_then_else(when, then, otherwise)] + + +class LazyWhen( + CompliantWhen[CompliantLazyFrameT, NativeExprT, LazyExprT], + Protocol38[CompliantLazyFrameT, NativeExprT, LazyExprT], +): + when: Callable[..., NativeExprT] + lit: Callable[..., NativeExprT] + + def __call__(self, df: CompliantLazyFrameT) -> Sequence[NativeExprT]: + is_expr = self._condition._is_expr + when = self.when + lit = self.lit + condition = df._evaluate_expr(self._condition) + then_ = self._then_value + then = df._evaluate_expr(then_) if is_expr(then_) else lit(then_) + other_ = self._otherwise_value + if other_ is None: + result = when(condition, then) + else: + otherwise = df._evaluate_expr(other_) if is_expr(other_) else lit(other_) + result = when(condition, then).otherwise(otherwise) # type: ignore # noqa: PGH003 + return [result] + + @classmethod + def from_expr(cls, condition: LazyExprT, /, *, context: _FullContext) -> Self: + obj = cls.__new__(cls) + obj._condition = condition + + obj._then_value = None + obj._otherwise_value = None + obj._implementation = context._implementation + obj._backend_version = context._backend_version + obj._version = context._version + return obj + + def _window_function( + self, df: CompliantLazyFrameT, window_inputs: WindowInputs[NativeExprT] + ) -> Sequence[NativeExprT]: + is_expr = self._condition._is_expr + condition = self._condition.window_function(df, window_inputs)[0] + then_ = self._then_value + then = ( + then_.window_function(df, window_inputs)[0] + if is_expr(then_) + else self.lit(then_) + ) + + other_ = self._otherwise_value + if other_ is None: + result = self.when(condition, then) + else: + other = ( + other_.window_function(df, window_inputs)[0] + if is_expr(other_) + else self.lit(other_) + ) + result = self.when(condition, then).otherwise(other) # type: ignore # noqa: PGH003 + return [result] diff --git a/venv/lib/python3.8/site-packages/narwhals/_compliant/window.py b/venv/lib/python3.8/site-packages/narwhals/_compliant/window.py new file mode 100644 index 0000000..07d37cc --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/_compliant/window.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from typing import Generic, Sequence + +from narwhals._compliant.typing import NativeExprT_co + + +class WindowInputs(Generic[NativeExprT_co]): + __slots__ = ("order_by", "partition_by") + + def __init__( + self, partition_by: Sequence[str | NativeExprT_co], order_by: Sequence[str] + ) -> None: + self.partition_by = partition_by + self.order_by = order_by -- cgit v1.2.3-70-g09d2