aboutsummaryrefslogtreecommitdiff
path: root/venv/lib/python3.8/site-packages/narwhals
diff options
context:
space:
mode:
authorsotech117 <michael_foiani@brown.edu>2025-07-31 17:27:24 -0400
committersotech117 <michael_foiani@brown.edu>2025-07-31 17:27:24 -0400
commit5bf22fc7e3c392c8bd44315ca2d06d7dca7d084e (patch)
tree8dacb0f195df1c0788d36dd0064f6bbaa3143ede /venv/lib/python3.8/site-packages/narwhals
parentb832d364da8c2efe09e3f75828caf73c50d01ce3 (diff)
add code for analysis of data
Diffstat (limited to 'venv/lib/python3.8/site-packages/narwhals')
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/__init__.py186
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_arrow/__init__.py0
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_arrow/dataframe.py771
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_arrow/expr.py205
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_arrow/group_by.py159
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_arrow/namespace.py283
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_arrow/selectors.py29
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_arrow/series.py1183
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_arrow/series_cat.py18
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_arrow/series_dt.py194
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_arrow/series_list.py16
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_arrow/series_str.py62
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_arrow/series_struct.py15
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_arrow/typing.py72
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_arrow/utils.py470
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_compliant/__init__.py84
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_compliant/any_namespace.py85
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_compliant/dataframe.py500
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_compliant/expr.py1140
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_compliant/group_by.py233
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_compliant/namespace.py194
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_compliant/selectors.py332
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_compliant/series.py396
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_compliant/typing.py154
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_compliant/when_then.py232
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_compliant/window.py15
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_dask/__init__.py0
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_dask/dataframe.py443
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_dask/expr.py675
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_dask/expr_dt.py162
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_dask/expr_str.py98
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_dask/group_by.py122
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_dask/namespace.py320
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_dask/selectors.py30
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_dask/utils.py160
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_duckdb/__init__.py0
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_duckdb/dataframe.py512
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_duckdb/expr.py898
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_duckdb/expr_dt.py160
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_duckdb/expr_list.py18
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_duckdb/expr_str.py103
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_duckdb/expr_struct.py20
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_duckdb/group_by.py31
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_duckdb/namespace.py207
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_duckdb/selectors.py31
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_duckdb/series.py44
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_duckdb/utils.py287
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_duration.py60
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_enum.py42
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_expression_parsing.py609
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_ibis/__init__.py0
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_ibis/dataframe.py430
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_ibis/expr.py698
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_ibis/expr_dt.py98
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_ibis/expr_list.py14
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_ibis/expr_str.py103
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_ibis/expr_struct.py19
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_ibis/group_by.py30
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_ibis/namespace.py227
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_ibis/selectors.py30
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_ibis/series.py41
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_ibis/utils.py227
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_interchange/__init__.py0
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_interchange/dataframe.py156
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_interchange/series.py47
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_namespace.py397
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/__init__.py0
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/dataframe.py1148
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/expr.py402
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/group_by.py293
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/namespace.py332
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/selectors.py34
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/series.py1109
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_cat.py17
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_dt.py237
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_list.py33
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_str.py79
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_struct.py16
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/typing.py15
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_pandas_like/utils.py673
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_polars/__init__.py0
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_polars/dataframe.py770
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_polars/expr.py415
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_polars/group_by.py78
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_polars/namespace.py313
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_polars/series.py757
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_polars/typing.py22
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_polars/utils.py249
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_spark_like/__init__.py0
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_spark_like/dataframe.py531
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_spark_like/expr.py930
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_spark_like/expr_dt.py193
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_spark_like/expr_list.py14
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_spark_like/expr_str.py115
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_spark_like/expr_struct.py19
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_spark_like/group_by.py35
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_spark_like/namespace.py290
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_spark_like/selectors.py29
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_spark_like/utils.py285
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_translate.py185
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_typing_compat.py76
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/_utils.py2010
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/dataframe.py3234
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/dependencies.py472
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/dtypes.py773
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/exceptions.py125
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/expr.py2544
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/expr_cat.py42
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/expr_dt.py784
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/expr_list.py47
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/expr_name.py161
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/expr_str.py449
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/expr_struct.py48
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/functions.py1793
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/group_by.py190
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/py.typed0
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/schema.py208
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/selectors.py352
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/series.py2665
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/series_cat.py30
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/series_dt.py683
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/series_list.py38
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/series_str.py400
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/series_struct.py33
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/stable/__init__.py5
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/stable/v1/__init__.py1875
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/stable/v1/_dtypes.py135
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/stable/v1/_namespace.py10
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/stable/v1/dependencies.py65
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/stable/v1/dtypes.py77
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/stable/v1/selectors.py23
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/stable/v1/typing.py209
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/this.py17
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/translate.py809
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/typing.py428
-rw-r--r--venv/lib/python3.8/site-packages/narwhals/utils.py6
136 files changed, 45011 insertions, 0 deletions
diff --git a/venv/lib/python3.8/site-packages/narwhals/__init__.py b/venv/lib/python3.8/site-packages/narwhals/__init__.py
new file mode 100644
index 0000000..a910f0e
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/__init__.py
@@ -0,0 +1,186 @@
+from __future__ import annotations
+
+import typing as _t
+
+from narwhals import dependencies, dtypes, exceptions, selectors
+from narwhals._utils import (
+ Implementation,
+ generate_temporary_column_name,
+ is_ordered_categorical,
+ maybe_align_index,
+ maybe_convert_dtypes,
+ maybe_get_index,
+ maybe_reset_index,
+ maybe_set_index,
+)
+from narwhals.dataframe import DataFrame, LazyFrame
+from narwhals.dtypes import (
+ Array,
+ Binary,
+ Boolean,
+ Categorical,
+ Date,
+ Datetime,
+ Decimal,
+ Duration,
+ Enum,
+ Field,
+ Float32,
+ Float64,
+ Int8,
+ Int16,
+ Int32,
+ Int64,
+ Int128,
+ List,
+ Object,
+ String,
+ Struct,
+ Time,
+ UInt8,
+ UInt16,
+ UInt32,
+ UInt64,
+ UInt128,
+ Unknown,
+)
+from narwhals.expr import Expr
+from narwhals.functions import (
+ all_ as all,
+ all_horizontal,
+ any_horizontal,
+ col,
+ concat,
+ concat_str,
+ exclude,
+ from_arrow,
+ from_dict,
+ from_numpy,
+ get_level,
+ len_ as len,
+ lit,
+ max,
+ max_horizontal,
+ mean,
+ mean_horizontal,
+ median,
+ min,
+ min_horizontal,
+ new_series,
+ nth,
+ read_csv,
+ read_parquet,
+ scan_csv,
+ scan_parquet,
+ show_versions,
+ sum,
+ sum_horizontal,
+ when,
+)
+from narwhals.schema import Schema
+from narwhals.series import Series
+from narwhals.translate import (
+ from_native,
+ get_native_namespace,
+ narwhalify,
+ to_native,
+ to_py_scalar,
+)
+
+__version__: str
+
+__all__ = [
+ "Array",
+ "Binary",
+ "Boolean",
+ "Categorical",
+ "DataFrame",
+ "Date",
+ "Datetime",
+ "Decimal",
+ "Duration",
+ "Enum",
+ "Expr",
+ "Field",
+ "Float32",
+ "Float64",
+ "Implementation",
+ "Int8",
+ "Int16",
+ "Int32",
+ "Int64",
+ "Int128",
+ "LazyFrame",
+ "List",
+ "Object",
+ "Schema",
+ "Series",
+ "String",
+ "Struct",
+ "Time",
+ "UInt8",
+ "UInt16",
+ "UInt32",
+ "UInt64",
+ "UInt128",
+ "Unknown",
+ "all",
+ "all_horizontal",
+ "any_horizontal",
+ "col",
+ "concat",
+ "concat_str",
+ "dependencies",
+ "dtypes",
+ "exceptions",
+ "exclude",
+ "from_arrow",
+ "from_dict",
+ "from_native",
+ "from_numpy",
+ "generate_temporary_column_name",
+ "get_level",
+ "get_native_namespace",
+ "is_ordered_categorical",
+ "len",
+ "lit",
+ "max",
+ "max_horizontal",
+ "maybe_align_index",
+ "maybe_convert_dtypes",
+ "maybe_get_index",
+ "maybe_reset_index",
+ "maybe_set_index",
+ "mean",
+ "mean_horizontal",
+ "median",
+ "min",
+ "min_horizontal",
+ "narwhalify",
+ "new_series",
+ "nth",
+ "read_csv",
+ "read_parquet",
+ "scan_csv",
+ "scan_parquet",
+ "selectors",
+ "show_versions",
+ "sum",
+ "sum_horizontal",
+ "to_native",
+ "to_py_scalar",
+ "when",
+]
+
+
+def __getattr__(name: _t.Literal["__version__"]) -> str: # type: ignore[misc]
+ if name == "__version__":
+ global __version__ # noqa: PLW0603
+
+ from importlib import metadata
+
+ __version__ = metadata.version(__name__)
+ return __version__
+ else:
+ msg = f"module {__name__!r} has no attribute {name!r}"
+ raise AttributeError(msg)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_arrow/__init__.py b/venv/lib/python3.8/site-packages/narwhals/_arrow/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_arrow/__init__.py
diff --git a/venv/lib/python3.8/site-packages/narwhals/_arrow/dataframe.py b/venv/lib/python3.8/site-packages/narwhals/_arrow/dataframe.py
new file mode 100644
index 0000000..19763b9
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_arrow/dataframe.py
@@ -0,0 +1,771 @@
+from __future__ import annotations
+
+from functools import partial
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Collection,
+ Iterator,
+ Literal,
+ Mapping,
+ Sequence,
+ cast,
+ overload,
+)
+
+import pyarrow as pa
+import pyarrow.compute as pc
+
+from narwhals._arrow.series import ArrowSeries
+from narwhals._arrow.utils import align_series_full_broadcast, native_to_narwhals_dtype
+from narwhals._compliant import EagerDataFrame
+from narwhals._expression_parsing import ExprKind
+from narwhals._utils import (
+ Implementation,
+ Version,
+ check_column_names_are_unique,
+ convert_str_slice_to_int_slice,
+ generate_temporary_column_name,
+ not_implemented,
+ parse_columns_to_drop,
+ parse_version,
+ scale_bytes,
+ supports_arrow_c_stream,
+ validate_backend_version,
+)
+from narwhals.dependencies import is_numpy_array_1d
+from narwhals.exceptions import ShapeError
+
+if TYPE_CHECKING:
+ from io import BytesIO
+ from pathlib import Path
+ from types import ModuleType
+
+ import pandas as pd
+ import polars as pl
+ from typing_extensions import Self, TypeAlias, TypeIs
+
+ from narwhals._arrow.expr import ArrowExpr
+ from narwhals._arrow.group_by import ArrowGroupBy
+ from narwhals._arrow.namespace import ArrowNamespace
+ from narwhals._arrow.typing import ( # type: ignore[attr-defined]
+ ChunkedArrayAny,
+ Mask,
+ Order,
+ )
+ from narwhals._compliant.typing import CompliantDataFrameAny, CompliantLazyFrameAny
+ from narwhals._translate import IntoArrowTable
+ from narwhals._utils import Version, _FullContext
+ from narwhals.dtypes import DType
+ from narwhals.schema import Schema
+ from narwhals.typing import (
+ JoinStrategy,
+ SizedMultiIndexSelector,
+ SizedMultiNameSelector,
+ SizeUnit,
+ UniqueKeepStrategy,
+ _1DArray,
+ _2DArray,
+ _SliceIndex,
+ _SliceName,
+ )
+
+ JoinType: TypeAlias = Literal[
+ "left semi",
+ "right semi",
+ "left anti",
+ "right anti",
+ "inner",
+ "left outer",
+ "right outer",
+ "full outer",
+ ]
+ PromoteOptions: TypeAlias = Literal["none", "default", "permissive"]
+
+
+class ArrowDataFrame(EagerDataFrame["ArrowSeries", "ArrowExpr", "pa.Table"]):
+ def __init__(
+ self,
+ native_dataframe: pa.Table,
+ *,
+ backend_version: tuple[int, ...],
+ version: Version,
+ validate_column_names: bool,
+ ) -> None:
+ if validate_column_names:
+ check_column_names_are_unique(native_dataframe.column_names)
+ self._native_frame = native_dataframe
+ self._implementation = Implementation.PYARROW
+ self._backend_version = backend_version
+ self._version = version
+ validate_backend_version(self._implementation, self._backend_version)
+
+ @classmethod
+ def from_arrow(cls, data: IntoArrowTable, /, *, context: _FullContext) -> Self:
+ backend_version = context._backend_version
+ if cls._is_native(data):
+ native = data
+ elif backend_version >= (14,) or isinstance(data, Collection):
+ native = pa.table(data)
+ elif supports_arrow_c_stream(data): # pragma: no cover
+ msg = f"'pyarrow>=14.0.0' is required for `from_arrow` for object of type {type(data).__name__!r}."
+ raise ModuleNotFoundError(msg)
+ else: # pragma: no cover
+ msg = f"`from_arrow` is not supported for object of type {type(data).__name__!r}."
+ raise TypeError(msg)
+ return cls.from_native(native, context=context)
+
+ @classmethod
+ def from_dict(
+ cls,
+ data: Mapping[str, Any],
+ /,
+ *,
+ context: _FullContext,
+ schema: Mapping[str, DType] | Schema | None,
+ ) -> Self:
+ from narwhals.schema import Schema
+
+ pa_schema = Schema(schema).to_arrow() if schema is not None else schema
+ native = pa.Table.from_pydict(data, schema=pa_schema)
+ return cls.from_native(native, context=context)
+
+ @staticmethod
+ def _is_native(obj: pa.Table | Any) -> TypeIs[pa.Table]:
+ return isinstance(obj, pa.Table)
+
+ @classmethod
+ def from_native(cls, data: pa.Table, /, *, context: _FullContext) -> Self:
+ return cls(
+ data,
+ backend_version=context._backend_version,
+ version=context._version,
+ validate_column_names=True,
+ )
+
+ @classmethod
+ def from_numpy(
+ cls,
+ data: _2DArray,
+ /,
+ *,
+ context: _FullContext,
+ schema: Mapping[str, DType] | Schema | Sequence[str] | None,
+ ) -> Self:
+ from narwhals.schema import Schema
+
+ arrays = [pa.array(val) for val in data.T]
+ if isinstance(schema, (Mapping, Schema)):
+ native = pa.Table.from_arrays(arrays, schema=Schema(schema).to_arrow())
+ else:
+ native = pa.Table.from_arrays(arrays, cls._numpy_column_names(data, schema))
+ return cls.from_native(native, context=context)
+
+ def __narwhals_namespace__(self) -> ArrowNamespace:
+ from narwhals._arrow.namespace import ArrowNamespace
+
+ return ArrowNamespace(
+ backend_version=self._backend_version, version=self._version
+ )
+
+ def __native_namespace__(self) -> ModuleType:
+ if self._implementation is Implementation.PYARROW:
+ return self._implementation.to_native_namespace()
+
+ msg = f"Expected pyarrow, got: {type(self._implementation)}" # pragma: no cover
+ raise AssertionError(msg)
+
+ def __narwhals_dataframe__(self) -> Self:
+ return self
+
+ def __narwhals_lazyframe__(self) -> Self:
+ return self
+
+ def _with_version(self, version: Version) -> Self:
+ return self.__class__(
+ self.native,
+ backend_version=self._backend_version,
+ version=version,
+ validate_column_names=False,
+ )
+
+ def _with_native(self, df: pa.Table, *, validate_column_names: bool = True) -> Self:
+ return self.__class__(
+ df,
+ backend_version=self._backend_version,
+ version=self._version,
+ validate_column_names=validate_column_names,
+ )
+
+ @property
+ def shape(self) -> tuple[int, int]:
+ return self.native.shape
+
+ def __len__(self) -> int:
+ return len(self.native)
+
+ def row(self, index: int) -> tuple[Any, ...]:
+ return tuple(col[index] for col in self.native.itercolumns())
+
+ @overload
+ def rows(self, *, named: Literal[True]) -> list[dict[str, Any]]: ...
+
+ @overload
+ def rows(self, *, named: Literal[False]) -> list[tuple[Any, ...]]: ...
+
+ @overload
+ def rows(self, *, named: bool) -> list[tuple[Any, ...]] | list[dict[str, Any]]: ...
+
+ def rows(self, *, named: bool) -> list[tuple[Any, ...]] | list[dict[str, Any]]:
+ if not named:
+ return list(self.iter_rows(named=False, buffer_size=512)) # type: ignore[return-value]
+ return self.native.to_pylist()
+
+ def iter_columns(self) -> Iterator[ArrowSeries]:
+ for name, series in zip(self.columns, self.native.itercolumns()):
+ yield ArrowSeries.from_native(series, context=self, name=name)
+
+ _iter_columns = iter_columns
+
+ def iter_rows(
+ self, *, named: bool, buffer_size: int
+ ) -> Iterator[tuple[Any, ...]] | Iterator[dict[str, Any]]:
+ df = self.native
+ num_rows = df.num_rows
+
+ if not named:
+ for i in range(0, num_rows, buffer_size):
+ rows = df[i : i + buffer_size].to_pydict().values()
+ yield from zip(*rows)
+ else:
+ for i in range(0, num_rows, buffer_size):
+ yield from df[i : i + buffer_size].to_pylist()
+
+ def get_column(self, name: str) -> ArrowSeries:
+ if not isinstance(name, str):
+ msg = f"Expected str, got: {type(name)}"
+ raise TypeError(msg)
+ return ArrowSeries.from_native(self.native[name], context=self, name=name)
+
+ def __array__(self, dtype: Any, *, copy: bool | None) -> _2DArray:
+ return self.native.__array__(dtype, copy=copy)
+
+ def _gather(self, rows: SizedMultiIndexSelector[ChunkedArrayAny]) -> Self:
+ if len(rows) == 0:
+ return self._with_native(self.native.slice(0, 0))
+ if self._backend_version < (18,) and isinstance(rows, tuple):
+ rows = list(rows)
+ return self._with_native(self.native.take(rows))
+
+ def _gather_slice(self, rows: _SliceIndex | range) -> Self:
+ start = rows.start or 0
+ stop = rows.stop if rows.stop is not None else len(self.native)
+ if start < 0:
+ start = len(self.native) + start
+ if stop < 0:
+ stop = len(self.native) + stop
+ if rows.step is not None and rows.step != 1:
+ msg = "Slicing with step is not supported on PyArrow tables"
+ raise NotImplementedError(msg)
+ return self._with_native(self.native.slice(start, stop - start))
+
+ def _select_slice_name(self, columns: _SliceName) -> Self:
+ start, stop, step = convert_str_slice_to_int_slice(columns, self.columns)
+ return self._with_native(self.native.select(self.columns[start:stop:step]))
+
+ def _select_slice_index(self, columns: _SliceIndex | range) -> Self:
+ return self._with_native(
+ self.native.select(self.columns[columns.start : columns.stop : columns.step])
+ )
+
+ def _select_multi_index(
+ self, columns: SizedMultiIndexSelector[ChunkedArrayAny]
+ ) -> Self:
+ selector: Sequence[int]
+ if isinstance(columns, pa.ChunkedArray):
+ # TODO @dangotbanned: Fix upstream with `pa.ChunkedArray.to_pylist(self) -> list[Any]:`
+ selector = cast("Sequence[int]", columns.to_pylist())
+ # TODO @dangotbanned: Fix upstream, it is actually much narrower
+ # **Doesn't accept `ndarray`**
+ elif is_numpy_array_1d(columns):
+ selector = columns.tolist()
+ else:
+ selector = columns
+ return self._with_native(self.native.select(selector))
+
+ def _select_multi_name(
+ self, columns: SizedMultiNameSelector[ChunkedArrayAny]
+ ) -> Self:
+ selector: Sequence[str] | _1DArray
+ if isinstance(columns, pa.ChunkedArray):
+ # TODO @dangotbanned: Fix upstream with `pa.ChunkedArray.to_pylist(self) -> list[Any]:`
+ selector = cast("Sequence[str]", columns.to_pylist())
+ else:
+ selector = columns
+ # NOTE: Fixed in https://github.com/zen-xu/pyarrow-stubs/pull/221
+ return self._with_native(self.native.select(selector)) # pyright: ignore[reportArgumentType]
+
+ @property
+ def schema(self) -> dict[str, DType]:
+ schema = self.native.schema
+ return {
+ name: native_to_narwhals_dtype(dtype, self._version)
+ for name, dtype in zip(schema.names, schema.types)
+ }
+
+ def collect_schema(self) -> dict[str, DType]:
+ return self.schema
+
+ def estimated_size(self, unit: SizeUnit) -> int | float:
+ sz = self.native.nbytes
+ return scale_bytes(sz, unit)
+
+ explode = not_implemented()
+
+ @property
+ def columns(self) -> list[str]:
+ return self.native.column_names
+
+ def simple_select(self, *column_names: str) -> Self:
+ return self._with_native(
+ self.native.select(list(column_names)), validate_column_names=False
+ )
+
+ def select(self: ArrowDataFrame, *exprs: ArrowExpr) -> ArrowDataFrame:
+ new_series = self._evaluate_into_exprs(*exprs)
+ if not new_series:
+ # return empty dataframe, like Polars does
+ return self._with_native(
+ self.native.__class__.from_arrays([]), validate_column_names=False
+ )
+ names = [s.name for s in new_series]
+ reshaped = align_series_full_broadcast(*new_series)
+ df = pa.Table.from_arrays([s.native for s in reshaped], names=names)
+ return self._with_native(df, validate_column_names=True)
+
+ def _extract_comparand(self, other: ArrowSeries) -> ChunkedArrayAny:
+ length = len(self)
+ if not other._broadcast:
+ if (len_other := len(other)) != length:
+ msg = f"Expected object of length {length}, got: {len_other}."
+ raise ShapeError(msg)
+ return other.native
+
+ value = other.native[0]
+ return pa.chunked_array([pa.repeat(value, length)])
+
+ def with_columns(self: ArrowDataFrame, *exprs: ArrowExpr) -> ArrowDataFrame:
+ # NOTE: We use a faux-mutable variable and repeatedly "overwrite" (native_frame)
+ # All `pyarrow` data is immutable, so this is fine
+ native_frame = self.native
+ new_columns = self._evaluate_into_exprs(*exprs)
+ columns = self.columns
+
+ for col_value in new_columns:
+ col_name = col_value.name
+ column = self._extract_comparand(col_value)
+ native_frame = (
+ native_frame.set_column(columns.index(col_name), col_name, column=column)
+ if col_name in columns
+ else native_frame.append_column(col_name, column=column)
+ )
+
+ return self._with_native(native_frame, validate_column_names=False)
+
+ def group_by(
+ self, keys: Sequence[str] | Sequence[ArrowExpr], *, drop_null_keys: bool
+ ) -> ArrowGroupBy:
+ from narwhals._arrow.group_by import ArrowGroupBy
+
+ return ArrowGroupBy(self, keys, drop_null_keys=drop_null_keys)
+
+ def join(
+ self,
+ other: Self,
+ *,
+ how: JoinStrategy,
+ left_on: Sequence[str] | None,
+ right_on: Sequence[str] | None,
+ suffix: str,
+ ) -> Self:
+ how_to_join_map: dict[str, JoinType] = {
+ "anti": "left anti",
+ "semi": "left semi",
+ "inner": "inner",
+ "left": "left outer",
+ "full": "full outer",
+ }
+
+ if how == "cross":
+ plx = self.__narwhals_namespace__()
+ key_token = generate_temporary_column_name(
+ n_bytes=8, columns=[*self.columns, *other.columns]
+ )
+
+ return self._with_native(
+ self.with_columns(
+ plx.lit(0, None).alias(key_token).broadcast(ExprKind.LITERAL)
+ )
+ .native.join(
+ other.with_columns(
+ plx.lit(0, None).alias(key_token).broadcast(ExprKind.LITERAL)
+ ).native,
+ keys=key_token,
+ right_keys=key_token,
+ join_type="inner",
+ right_suffix=suffix,
+ )
+ .drop([key_token])
+ )
+
+ coalesce_keys = how != "full" # polars full join does not coalesce keys
+ return self._with_native(
+ self.native.join(
+ other.native,
+ keys=left_on or [], # type: ignore[arg-type]
+ right_keys=right_on, # type: ignore[arg-type]
+ join_type=how_to_join_map[how],
+ right_suffix=suffix,
+ coalesce_keys=coalesce_keys,
+ )
+ )
+
+ join_asof = not_implemented()
+
+ def drop(self, columns: Sequence[str], *, strict: bool) -> Self:
+ to_drop = parse_columns_to_drop(self, columns, strict=strict)
+ return self._with_native(self.native.drop(to_drop), validate_column_names=False)
+
+ def drop_nulls(self: ArrowDataFrame, subset: Sequence[str] | None) -> ArrowDataFrame:
+ if subset is None:
+ return self._with_native(self.native.drop_null(), validate_column_names=False)
+ plx = self.__narwhals_namespace__()
+ return self.filter(~plx.any_horizontal(plx.col(*subset).is_null()))
+
+ def sort(self, *by: str, descending: bool | Sequence[bool], nulls_last: bool) -> Self:
+ if isinstance(descending, bool):
+ order: Order = "descending" if descending else "ascending"
+ sorting: list[tuple[str, Order]] = [(key, order) for key in by]
+ else:
+ sorting = [
+ (key, "descending" if is_descending else "ascending")
+ for key, is_descending in zip(by, descending)
+ ]
+
+ null_placement = "at_end" if nulls_last else "at_start"
+
+ return self._with_native(
+ self.native.sort_by(sorting, null_placement=null_placement),
+ validate_column_names=False,
+ )
+
+ def to_pandas(self) -> pd.DataFrame:
+ return self.native.to_pandas()
+
+ def to_polars(self) -> pl.DataFrame:
+ import polars as pl # ignore-banned-import
+
+ return pl.from_arrow(self.native) # type: ignore[return-value]
+
+ def to_numpy(self, dtype: Any = None, *, copy: bool | None = None) -> _2DArray:
+ import numpy as np # ignore-banned-import
+
+ arr: Any = np.column_stack([col.to_numpy() for col in self.native.columns])
+ return arr
+
+ @overload
+ def to_dict(self, *, as_series: Literal[True]) -> dict[str, ArrowSeries]: ...
+
+ @overload
+ def to_dict(self, *, as_series: Literal[False]) -> dict[str, list[Any]]: ...
+
+ def to_dict(
+ self, *, as_series: bool
+ ) -> dict[str, ArrowSeries] | dict[str, list[Any]]:
+ it = self.iter_columns()
+ if as_series:
+ return {ser.name: ser for ser in it}
+ return {ser.name: ser.to_list() for ser in it}
+
+ def with_row_index(self, name: str) -> Self:
+ df = self.native
+ cols = self.columns
+
+ row_indices = pa.array(range(df.num_rows))
+ return self._with_native(
+ df.append_column(name, row_indices).select([name, *cols])
+ )
+
+ def filter(
+ self: ArrowDataFrame, predicate: ArrowExpr | list[bool | None]
+ ) -> ArrowDataFrame:
+ if isinstance(predicate, list):
+ mask_native: Mask | ChunkedArrayAny = predicate
+ else:
+ # `[0]` is safe as the predicate's expression only returns a single column
+ mask_native = self._evaluate_into_exprs(predicate)[0].native
+ return self._with_native(
+ self.native.filter(mask_native), validate_column_names=False
+ )
+
+ def head(self, n: int) -> Self:
+ df = self.native
+ if n >= 0:
+ return self._with_native(df.slice(0, n), validate_column_names=False)
+ else:
+ num_rows = df.num_rows
+ return self._with_native(
+ df.slice(0, max(0, num_rows + n)), validate_column_names=False
+ )
+
+ def tail(self, n: int) -> Self:
+ df = self.native
+ if n >= 0:
+ num_rows = df.num_rows
+ return self._with_native(
+ df.slice(max(0, num_rows - n)), validate_column_names=False
+ )
+ else:
+ return self._with_native(df.slice(abs(n)), validate_column_names=False)
+
+ def lazy(self, *, backend: Implementation | None = None) -> CompliantLazyFrameAny:
+ if backend is None:
+ return self
+ elif backend is Implementation.DUCKDB:
+ import duckdb # ignore-banned-import
+
+ from narwhals._duckdb.dataframe import DuckDBLazyFrame
+
+ df = self.native # noqa: F841
+ return DuckDBLazyFrame(
+ duckdb.table("df"),
+ backend_version=parse_version(duckdb),
+ version=self._version,
+ )
+ elif backend is Implementation.POLARS:
+ import polars as pl # ignore-banned-import
+
+ from narwhals._polars.dataframe import PolarsLazyFrame
+
+ return PolarsLazyFrame(
+ cast("pl.DataFrame", pl.from_arrow(self.native)).lazy(),
+ backend_version=parse_version(pl),
+ version=self._version,
+ )
+ elif backend is Implementation.DASK:
+ import dask # ignore-banned-import
+ import dask.dataframe as dd # ignore-banned-import
+
+ from narwhals._dask.dataframe import DaskLazyFrame
+
+ return DaskLazyFrame(
+ dd.from_pandas(self.native.to_pandas()),
+ backend_version=parse_version(dask),
+ version=self._version,
+ )
+ raise AssertionError # pragma: no cover
+
+ def collect(
+ self, backend: Implementation | None, **kwargs: Any
+ ) -> CompliantDataFrameAny:
+ if backend is Implementation.PYARROW or backend is None:
+ from narwhals._arrow.dataframe import ArrowDataFrame
+
+ return ArrowDataFrame(
+ self.native,
+ backend_version=self._backend_version,
+ version=self._version,
+ validate_column_names=False,
+ )
+
+ if backend is Implementation.PANDAS:
+ import pandas as pd # ignore-banned-import
+
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+
+ return PandasLikeDataFrame(
+ self.native.to_pandas(),
+ implementation=Implementation.PANDAS,
+ backend_version=parse_version(pd),
+ version=self._version,
+ validate_column_names=False,
+ )
+
+ if backend is Implementation.POLARS:
+ import polars as pl # ignore-banned-import
+
+ from narwhals._polars.dataframe import PolarsDataFrame
+
+ return PolarsDataFrame(
+ cast("pl.DataFrame", pl.from_arrow(self.native)),
+ backend_version=parse_version(pl),
+ version=self._version,
+ )
+
+ msg = f"Unsupported `backend` value: {backend}" # pragma: no cover
+ raise AssertionError(msg) # pragma: no cover
+
+ def clone(self) -> Self:
+ return self._with_native(self.native, validate_column_names=False)
+
+ def item(self, row: int | None, column: int | str | None) -> Any:
+ from narwhals._arrow.series import maybe_extract_py_scalar
+
+ if row is None and column is None:
+ if self.shape != (1, 1):
+ msg = (
+ "can only call `.item()` if the dataframe is of shape (1, 1),"
+ " or if explicit row/col values are provided;"
+ f" frame has shape {self.shape!r}"
+ )
+ raise ValueError(msg)
+ return maybe_extract_py_scalar(self.native[0][0], return_py_scalar=True)
+
+ elif row is None or column is None:
+ msg = "cannot call `.item()` with only one of `row` or `column`"
+ raise ValueError(msg)
+
+ _col = self.columns.index(column) if isinstance(column, str) else column
+ return maybe_extract_py_scalar(self.native[_col][row], return_py_scalar=True)
+
+ def rename(self, mapping: Mapping[str, str]) -> Self:
+ names: dict[str, str] | list[str]
+ if self._backend_version >= (17,):
+ names = cast("dict[str, str]", mapping)
+ else: # pragma: no cover
+ names = [mapping.get(c, c) for c in self.columns]
+ return self._with_native(self.native.rename_columns(names))
+
+ def write_parquet(self, file: str | Path | BytesIO) -> None:
+ import pyarrow.parquet as pp
+
+ pp.write_table(self.native, file)
+
+ @overload
+ def write_csv(self, file: None) -> str: ...
+
+ @overload
+ def write_csv(self, file: str | Path | BytesIO) -> None: ...
+
+ def write_csv(self, file: str | Path | BytesIO | None) -> str | None:
+ import pyarrow.csv as pa_csv
+
+ if file is None:
+ csv_buffer = pa.BufferOutputStream()
+ pa_csv.write_csv(self.native, csv_buffer)
+ return csv_buffer.getvalue().to_pybytes().decode()
+ pa_csv.write_csv(self.native, file)
+ return None
+
+ def is_unique(self) -> ArrowSeries:
+ col_token = generate_temporary_column_name(n_bytes=8, columns=self.columns)
+ row_index = pa.array(range(len(self)))
+ keep_idx = (
+ self.native.append_column(col_token, row_index)
+ .group_by(self.columns)
+ .aggregate([(col_token, "min"), (col_token, "max")])
+ )
+ native = pa.chunked_array(
+ pc.and_(
+ pc.is_in(row_index, keep_idx[f"{col_token}_min"]),
+ pc.is_in(row_index, keep_idx[f"{col_token}_max"]),
+ )
+ )
+ return ArrowSeries.from_native(native, context=self)
+
+ def unique(
+ self: ArrowDataFrame,
+ subset: Sequence[str] | None,
+ *,
+ keep: UniqueKeepStrategy,
+ maintain_order: bool | None = None,
+ ) -> ArrowDataFrame:
+ # The param `maintain_order` is only here for compatibility with the Polars API
+ # and has no effect on the output.
+ import numpy as np # ignore-banned-import
+
+ if subset and (error := self._check_columns_exist(subset)):
+ raise error
+ subset = list(subset or self.columns)
+
+ if keep in {"any", "first", "last"}:
+ from narwhals._arrow.group_by import ArrowGroupBy
+
+ agg_func = ArrowGroupBy._REMAP_UNIQUE[keep]
+ col_token = generate_temporary_column_name(n_bytes=8, columns=self.columns)
+ keep_idx_native = (
+ self.native.append_column(col_token, pa.array(np.arange(len(self))))
+ .group_by(subset)
+ .aggregate([(col_token, agg_func)])
+ .column(f"{col_token}_{agg_func}")
+ )
+ return self._with_native(
+ self.native.take(keep_idx_native), validate_column_names=False
+ )
+
+ keep_idx = self.simple_select(*subset).is_unique()
+ plx = self.__narwhals_namespace__()
+ return self.filter(plx._expr._from_series(keep_idx))
+
+ def gather_every(self, n: int, offset: int) -> Self:
+ return self._with_native(self.native[offset::n], validate_column_names=False)
+
+ def to_arrow(self) -> pa.Table:
+ return self.native
+
+ def sample(
+ self,
+ n: int | None,
+ *,
+ fraction: float | None,
+ with_replacement: bool,
+ seed: int | None,
+ ) -> Self:
+ import numpy as np # ignore-banned-import
+
+ num_rows = len(self)
+ if n is None and fraction is not None:
+ n = int(num_rows * fraction)
+ rng = np.random.default_rng(seed=seed)
+ idx = np.arange(0, num_rows)
+ mask = rng.choice(idx, size=n, replace=with_replacement)
+ return self._with_native(self.native.take(mask), validate_column_names=False)
+
+ def unpivot(
+ self,
+ on: Sequence[str] | None,
+ index: Sequence[str] | None,
+ variable_name: str,
+ value_name: str,
+ ) -> Self:
+ n_rows = len(self)
+ index_ = [] if index is None else index
+ on_ = [c for c in self.columns if c not in index_] if on is None else on
+ concat = (
+ partial(pa.concat_tables, promote_options="permissive")
+ if self._backend_version >= (14, 0, 0)
+ else pa.concat_tables
+ )
+ names = [*index_, variable_name, value_name]
+ return self._with_native(
+ concat(
+ [
+ pa.Table.from_arrays(
+ [
+ *(self.native.column(idx_col) for idx_col in index_),
+ cast(
+ "ChunkedArrayAny",
+ pa.array([on_col] * n_rows, pa.string()),
+ ),
+ self.native.column(on_col),
+ ],
+ names=names,
+ )
+ for on_col in on_
+ ]
+ )
+ )
+ # TODO(Unassigned): Even with promote_options="permissive", pyarrow does not
+ # upcast numeric to non-numeric (e.g. string) datatypes
+
+ pivot = not_implemented()
diff --git a/venv/lib/python3.8/site-packages/narwhals/_arrow/expr.py b/venv/lib/python3.8/site-packages/narwhals/_arrow/expr.py
new file mode 100644
index 0000000..af7993c
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_arrow/expr.py
@@ -0,0 +1,205 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Sequence
+
+import pyarrow.compute as pc
+
+from narwhals._arrow.series import ArrowSeries
+from narwhals._compliant import EagerExpr
+from narwhals._expression_parsing import evaluate_output_names_and_aliases
+from narwhals._utils import (
+ Implementation,
+ generate_temporary_column_name,
+ not_implemented,
+)
+
+if TYPE_CHECKING:
+ from typing_extensions import Self
+
+ from narwhals._arrow.dataframe import ArrowDataFrame
+ from narwhals._arrow.namespace import ArrowNamespace
+ from narwhals._compliant.typing import AliasNames, EvalNames, EvalSeries, ScalarKwargs
+ from narwhals._expression_parsing import ExprMetadata
+ from narwhals._utils import Version, _FullContext
+ from narwhals.typing import RankMethod
+
+
+class ArrowExpr(EagerExpr["ArrowDataFrame", ArrowSeries]):
+ _implementation: Implementation = Implementation.PYARROW
+
+ def __init__(
+ self,
+ call: EvalSeries[ArrowDataFrame, ArrowSeries],
+ *,
+ depth: int,
+ function_name: str,
+ evaluate_output_names: EvalNames[ArrowDataFrame],
+ alias_output_names: AliasNames | None,
+ backend_version: tuple[int, ...],
+ version: Version,
+ scalar_kwargs: ScalarKwargs | None = None,
+ implementation: Implementation | None = None,
+ ) -> None:
+ self._call = call
+ self._depth = depth
+ self._function_name = function_name
+ self._depth = depth
+ self._evaluate_output_names = evaluate_output_names
+ self._alias_output_names = alias_output_names
+ self._backend_version = backend_version
+ self._version = version
+ self._scalar_kwargs = scalar_kwargs or {}
+ self._metadata: ExprMetadata | None = None
+
+ @classmethod
+ def from_column_names(
+ cls: type[Self],
+ evaluate_column_names: EvalNames[ArrowDataFrame],
+ /,
+ *,
+ context: _FullContext,
+ function_name: str = "",
+ ) -> Self:
+ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
+ try:
+ return [
+ ArrowSeries(
+ df.native[column_name],
+ name=column_name,
+ backend_version=df._backend_version,
+ version=df._version,
+ )
+ for column_name in evaluate_column_names(df)
+ ]
+ except KeyError as e:
+ if error := df._check_columns_exist(evaluate_column_names(df)):
+ raise error from e
+ raise
+
+ return cls(
+ func,
+ depth=0,
+ function_name=function_name,
+ evaluate_output_names=evaluate_column_names,
+ alias_output_names=None,
+ backend_version=context._backend_version,
+ version=context._version,
+ )
+
+ @classmethod
+ def from_column_indices(cls, *column_indices: int, context: _FullContext) -> Self:
+ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
+ tbl = df.native
+ cols = df.columns
+ return [
+ ArrowSeries.from_native(tbl[i], name=cols[i], context=df)
+ for i in column_indices
+ ]
+
+ return cls(
+ func,
+ depth=0,
+ function_name="nth",
+ evaluate_output_names=cls._eval_names_indices(column_indices),
+ alias_output_names=None,
+ backend_version=context._backend_version,
+ version=context._version,
+ )
+
+ def __narwhals_namespace__(self) -> ArrowNamespace:
+ from narwhals._arrow.namespace import ArrowNamespace
+
+ return ArrowNamespace(
+ backend_version=self._backend_version, version=self._version
+ )
+
+ def __narwhals_expr__(self) -> None: ...
+
+ def _reuse_series_extra_kwargs(
+ self, *, returns_scalar: bool = False
+ ) -> dict[str, Any]:
+ return {"_return_py_scalar": False} if returns_scalar else {}
+
+ def cum_sum(self, *, reverse: bool) -> Self:
+ return self._reuse_series("cum_sum", reverse=reverse)
+
+ def shift(self, n: int) -> Self:
+ return self._reuse_series("shift", n=n)
+
+ def over(self, partition_by: Sequence[str], order_by: Sequence[str]) -> Self:
+ assert self._metadata is not None # noqa: S101
+ if partition_by and not self._metadata.is_scalar_like:
+ msg = "Only aggregation or literal operations are supported in grouped `over` context for PyArrow."
+ raise NotImplementedError(msg)
+
+ if not partition_by:
+ # e.g. `nw.col('a').cum_sum().order_by(key)`
+ # which we can always easily support, as it doesn't require grouping.
+ assert order_by # noqa: S101
+
+ def func(df: ArrowDataFrame) -> Sequence[ArrowSeries]:
+ token = generate_temporary_column_name(8, df.columns)
+ df = df.with_row_index(token).sort(
+ *order_by, descending=False, nulls_last=False
+ )
+ result = self(df.drop([token], strict=True))
+ # TODO(marco): is there a way to do this efficiently without
+ # doing 2 sorts? Here we're sorting the dataframe and then
+ # again calling `sort_indices`. `ArrowSeries.scatter` would also sort.
+ sorting_indices = pc.sort_indices(df.get_column(token).native)
+ return [s._with_native(s.native.take(sorting_indices)) for s in result]
+ else:
+
+ def func(df: ArrowDataFrame) -> Sequence[ArrowSeries]:
+ output_names, aliases = evaluate_output_names_and_aliases(self, df, [])
+ if overlap := set(output_names).intersection(partition_by):
+ # E.g. `df.select(nw.all().sum().over('a'))`. This is well-defined,
+ # we just don't support it yet.
+ msg = (
+ f"Column names {overlap} appear in both expression output names and in `over` keys.\n"
+ "This is not yet supported."
+ )
+ raise NotImplementedError(msg)
+
+ tmp = df.group_by(partition_by, drop_null_keys=False).agg(self)
+ tmp = df.simple_select(*partition_by).join(
+ tmp,
+ how="left",
+ left_on=partition_by,
+ right_on=partition_by,
+ suffix="_right",
+ )
+ return [tmp.get_column(alias) for alias in aliases]
+
+ return self.__class__(
+ func,
+ depth=self._depth + 1,
+ function_name=self._function_name + "->over",
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def cum_count(self, *, reverse: bool) -> Self:
+ return self._reuse_series("cum_count", reverse=reverse)
+
+ def cum_min(self, *, reverse: bool) -> Self:
+ return self._reuse_series("cum_min", reverse=reverse)
+
+ def cum_max(self, *, reverse: bool) -> Self:
+ return self._reuse_series("cum_max", reverse=reverse)
+
+ def cum_prod(self, *, reverse: bool) -> Self:
+ return self._reuse_series("cum_prod", reverse=reverse)
+
+ def rank(self, method: RankMethod, *, descending: bool) -> Self:
+ return self._reuse_series("rank", method=method, descending=descending)
+
+ def log(self, base: float) -> Self:
+ return self._reuse_series("log", base=base)
+
+ def exp(self) -> Self:
+ return self._reuse_series("exp")
+
+ ewm_mean = not_implemented()
diff --git a/venv/lib/python3.8/site-packages/narwhals/_arrow/group_by.py b/venv/lib/python3.8/site-packages/narwhals/_arrow/group_by.py
new file mode 100644
index 0000000..d61906a
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_arrow/group_by.py
@@ -0,0 +1,159 @@
+from __future__ import annotations
+
+import collections
+from typing import TYPE_CHECKING, Any, ClassVar, Iterator, Mapping, Sequence
+
+import pyarrow as pa
+import pyarrow.compute as pc
+
+from narwhals._arrow.utils import cast_to_comparable_string_types, extract_py_scalar
+from narwhals._compliant import EagerGroupBy
+from narwhals._expression_parsing import evaluate_output_names_and_aliases
+from narwhals._utils import generate_temporary_column_name
+
+if TYPE_CHECKING:
+ from narwhals._arrow.dataframe import ArrowDataFrame
+ from narwhals._arrow.expr import ArrowExpr
+ from narwhals._arrow.typing import ( # type: ignore[attr-defined]
+ AggregateOptions,
+ Aggregation,
+ Incomplete,
+ )
+ from narwhals._compliant.group_by import NarwhalsAggregation
+ from narwhals.typing import UniqueKeepStrategy
+
+
+class ArrowGroupBy(EagerGroupBy["ArrowDataFrame", "ArrowExpr", "Aggregation"]):
+ _REMAP_AGGS: ClassVar[Mapping[NarwhalsAggregation, Aggregation]] = {
+ "sum": "sum",
+ "mean": "mean",
+ "median": "approximate_median",
+ "max": "max",
+ "min": "min",
+ "std": "stddev",
+ "var": "variance",
+ "len": "count",
+ "n_unique": "count_distinct",
+ "count": "count",
+ }
+ _REMAP_UNIQUE: ClassVar[Mapping[UniqueKeepStrategy, Aggregation]] = {
+ "any": "min",
+ "first": "min",
+ "last": "max",
+ }
+
+ def __init__(
+ self,
+ df: ArrowDataFrame,
+ keys: Sequence[ArrowExpr] | Sequence[str],
+ /,
+ *,
+ drop_null_keys: bool,
+ ) -> None:
+ self._df = df
+ frame, self._keys, self._output_key_names = self._parse_keys(df, keys=keys)
+ self._compliant_frame = frame.drop_nulls(self._keys) if drop_null_keys else frame
+ self._grouped = pa.TableGroupBy(self.compliant.native, self._keys)
+ self._drop_null_keys = drop_null_keys
+
+ def agg(self, *exprs: ArrowExpr) -> ArrowDataFrame:
+ self._ensure_all_simple(exprs)
+ aggs: list[tuple[str, Aggregation, AggregateOptions | None]] = []
+ expected_pyarrow_column_names: list[str] = self._keys.copy()
+ new_column_names: list[str] = self._keys.copy()
+ exclude = (*self._keys, *self._output_key_names)
+
+ for expr in exprs:
+ output_names, aliases = evaluate_output_names_and_aliases(
+ expr, self.compliant, exclude
+ )
+
+ if expr._depth == 0:
+ # e.g. `agg(nw.len())`
+ if expr._function_name != "len": # pragma: no cover
+ msg = "Safety assertion failed, please report a bug to https://github.com/narwhals-dev/narwhals/issues"
+ raise AssertionError(msg)
+
+ new_column_names.append(aliases[0])
+ expected_pyarrow_column_names.append(f"{self._keys[0]}_count")
+ aggs.append((self._keys[0], "count", pc.CountOptions(mode="all")))
+ continue
+
+ function_name = self._leaf_name(expr)
+ if function_name in {"std", "var"}:
+ assert "ddof" in expr._scalar_kwargs # noqa: S101
+ option: Any = pc.VarianceOptions(ddof=expr._scalar_kwargs["ddof"])
+ elif function_name in {"len", "n_unique"}:
+ option = pc.CountOptions(mode="all")
+ elif function_name == "count":
+ option = pc.CountOptions(mode="only_valid")
+ else:
+ option = None
+
+ function_name = self._remap_expr_name(function_name)
+ new_column_names.extend(aliases)
+ expected_pyarrow_column_names.extend(
+ [f"{output_name}_{function_name}" for output_name in output_names]
+ )
+ aggs.extend(
+ [(output_name, function_name, option) for output_name in output_names]
+ )
+
+ result_simple = self._grouped.aggregate(aggs)
+
+ # Rename columns, being very careful
+ expected_old_names_indices: dict[str, list[int]] = collections.defaultdict(list)
+ for idx, item in enumerate(expected_pyarrow_column_names):
+ expected_old_names_indices[item].append(idx)
+ if not (
+ set(result_simple.column_names) == set(expected_pyarrow_column_names)
+ and len(result_simple.column_names) == len(expected_pyarrow_column_names)
+ ): # pragma: no cover
+ msg = (
+ f"Safety assertion failed, expected {expected_pyarrow_column_names} "
+ f"got {result_simple.column_names}, "
+ "please report a bug at https://github.com/narwhals-dev/narwhals/issues"
+ )
+ raise AssertionError(msg)
+ index_map: list[int] = [
+ expected_old_names_indices[item].pop(0) for item in result_simple.column_names
+ ]
+ new_column_names = [new_column_names[i] for i in index_map]
+ result_simple = result_simple.rename_columns(new_column_names)
+ if self.compliant._backend_version < (12, 0, 0):
+ columns = result_simple.column_names
+ result_simple = result_simple.select(
+ [*self._keys, *[col for col in columns if col not in self._keys]]
+ )
+
+ return self.compliant._with_native(result_simple).rename(
+ dict(zip(self._keys, self._output_key_names))
+ )
+
+ def __iter__(self) -> Iterator[tuple[Any, ArrowDataFrame]]:
+ col_token = generate_temporary_column_name(
+ n_bytes=8, columns=self.compliant.columns
+ )
+ null_token: str = "__null_token_value__" # noqa: S105
+
+ table = self.compliant.native
+ it, separator_scalar = cast_to_comparable_string_types(
+ *(table[key] for key in self._keys), separator=""
+ )
+ # NOTE: stubs indicate `separator` must also be a `ChunkedArray`
+ # Reality: `str` is fine
+ concat_str: Incomplete = pc.binary_join_element_wise
+ key_values = concat_str(
+ *it, separator_scalar, null_handling="replace", null_replacement=null_token
+ )
+ table = table.add_column(i=0, field_=col_token, column=key_values)
+
+ for v in pc.unique(key_values):
+ t = self.compliant._with_native(
+ table.filter(pc.equal(table[col_token], v)).drop([col_token])
+ )
+ row = t.simple_select(*self._keys).row(0)
+ yield (
+ tuple(extract_py_scalar(el) for el in row),
+ t.simple_select(*self._df.columns),
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/_arrow/namespace.py b/venv/lib/python3.8/site-packages/narwhals/_arrow/namespace.py
new file mode 100644
index 0000000..02d4c69
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_arrow/namespace.py
@@ -0,0 +1,283 @@
+from __future__ import annotations
+
+import operator
+from functools import reduce
+from itertools import chain
+from typing import TYPE_CHECKING, Literal, Sequence
+
+import pyarrow as pa
+import pyarrow.compute as pc
+
+from narwhals._arrow.dataframe import ArrowDataFrame
+from narwhals._arrow.expr import ArrowExpr
+from narwhals._arrow.selectors import ArrowSelectorNamespace
+from narwhals._arrow.series import ArrowSeries
+from narwhals._arrow.utils import (
+ align_series_full_broadcast,
+ cast_to_comparable_string_types,
+)
+from narwhals._compliant import CompliantThen, EagerNamespace, EagerWhen
+from narwhals._expression_parsing import (
+ combine_alias_output_names,
+ combine_evaluate_output_names,
+)
+from narwhals._utils import Implementation
+
+if TYPE_CHECKING:
+ from narwhals._arrow.typing import Incomplete
+ from narwhals._utils import Version
+ from narwhals.typing import IntoDType, NonNestedLiteral
+
+
+class ArrowNamespace(EagerNamespace[ArrowDataFrame, ArrowSeries, ArrowExpr, pa.Table]):
+ @property
+ def _dataframe(self) -> type[ArrowDataFrame]:
+ return ArrowDataFrame
+
+ @property
+ def _expr(self) -> type[ArrowExpr]:
+ return ArrowExpr
+
+ @property
+ def _series(self) -> type[ArrowSeries]:
+ return ArrowSeries
+
+ # --- not in spec ---
+ def __init__(self, *, backend_version: tuple[int, ...], version: Version) -> None:
+ self._backend_version = backend_version
+ self._implementation = Implementation.PYARROW
+ self._version = version
+
+ def len(self) -> ArrowExpr:
+ # coverage bug? this is definitely hit
+ return self._expr( # pragma: no cover
+ lambda df: [
+ ArrowSeries.from_iterable([len(df.native)], name="len", context=self)
+ ],
+ depth=0,
+ function_name="len",
+ evaluate_output_names=lambda _df: ["len"],
+ alias_output_names=None,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def lit(self, value: NonNestedLiteral, dtype: IntoDType | None) -> ArrowExpr:
+ def _lit_arrow_series(_: ArrowDataFrame) -> ArrowSeries:
+ arrow_series = ArrowSeries.from_iterable(
+ data=[value], name="literal", context=self
+ )
+ if dtype:
+ return arrow_series.cast(dtype)
+ return arrow_series
+
+ return self._expr(
+ lambda df: [_lit_arrow_series(df)],
+ depth=0,
+ function_name="lit",
+ evaluate_output_names=lambda _df: ["literal"],
+ alias_output_names=None,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def all_horizontal(self, *exprs: ArrowExpr) -> ArrowExpr:
+ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
+ series = chain.from_iterable(expr(df) for expr in exprs)
+ return [reduce(operator.and_, align_series_full_broadcast(*series))]
+
+ return self._expr._from_callable(
+ func=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="all_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ context=self,
+ )
+
+ def any_horizontal(self, *exprs: ArrowExpr) -> ArrowExpr:
+ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
+ series = chain.from_iterable(expr(df) for expr in exprs)
+ return [reduce(operator.or_, align_series_full_broadcast(*series))]
+
+ return self._expr._from_callable(
+ func=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="any_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ context=self,
+ )
+
+ def sum_horizontal(self, *exprs: ArrowExpr) -> ArrowExpr:
+ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
+ it = chain.from_iterable(expr(df) for expr in exprs)
+ series = (s.fill_null(0, strategy=None, limit=None) for s in it)
+ return [reduce(operator.add, align_series_full_broadcast(*series))]
+
+ return self._expr._from_callable(
+ func=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="sum_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ context=self,
+ )
+
+ def mean_horizontal(self, *exprs: ArrowExpr) -> ArrowExpr:
+ int_64 = self._version.dtypes.Int64()
+
+ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
+ expr_results = list(chain.from_iterable(expr(df) for expr in exprs))
+ series = align_series_full_broadcast(
+ *(s.fill_null(0, strategy=None, limit=None) for s in expr_results)
+ )
+ non_na = align_series_full_broadcast(
+ *(1 - s.is_null().cast(int_64) for s in expr_results)
+ )
+ return [reduce(operator.add, series) / reduce(operator.add, non_na)]
+
+ return self._expr._from_callable(
+ func=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="mean_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ context=self,
+ )
+
+ def min_horizontal(self, *exprs: ArrowExpr) -> ArrowExpr:
+ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
+ init_series, *series = list(chain.from_iterable(expr(df) for expr in exprs))
+ init_series, *series = align_series_full_broadcast(init_series, *series)
+ native_series = reduce(
+ pc.min_element_wise, [s.native for s in series], init_series.native
+ )
+ return [
+ ArrowSeries(
+ native_series,
+ name=init_series.name,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+ ]
+
+ return self._expr._from_callable(
+ func=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="min_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ context=self,
+ )
+
+ def max_horizontal(self, *exprs: ArrowExpr) -> ArrowExpr:
+ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
+ init_series, *series = list(chain.from_iterable(expr(df) for expr in exprs))
+ init_series, *series = align_series_full_broadcast(init_series, *series)
+ native_series = reduce(
+ pc.max_element_wise, [s.native for s in series], init_series.native
+ )
+ return [
+ ArrowSeries(
+ native_series,
+ name=init_series.name,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+ ]
+
+ return self._expr._from_callable(
+ func=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="max_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ context=self,
+ )
+
+ def _concat_diagonal(self, dfs: Sequence[pa.Table], /) -> pa.Table:
+ if self._backend_version >= (14,):
+ return pa.concat_tables(dfs, promote_options="default")
+ return pa.concat_tables(dfs, promote=True) # pragma: no cover
+
+ def _concat_horizontal(self, dfs: Sequence[pa.Table], /) -> pa.Table:
+ names = list(chain.from_iterable(df.column_names for df in dfs))
+ arrays = list(chain.from_iterable(df.itercolumns() for df in dfs))
+ return pa.Table.from_arrays(arrays, names=names)
+
+ def _concat_vertical(self, dfs: Sequence[pa.Table], /) -> pa.Table:
+ cols_0 = dfs[0].column_names
+ for i, df in enumerate(dfs[1:], start=1):
+ cols_current = df.column_names
+ if cols_current != cols_0:
+ msg = (
+ "unable to vstack, column names don't match:\n"
+ f" - dataframe 0: {cols_0}\n"
+ f" - dataframe {i}: {cols_current}\n"
+ )
+ raise TypeError(msg)
+ return pa.concat_tables(dfs)
+
+ @property
+ def selectors(self) -> ArrowSelectorNamespace:
+ return ArrowSelectorNamespace.from_namespace(self)
+
+ def when(self, predicate: ArrowExpr) -> ArrowWhen:
+ return ArrowWhen.from_expr(predicate, context=self)
+
+ def concat_str(
+ self, *exprs: ArrowExpr, separator: str, ignore_nulls: bool
+ ) -> ArrowExpr:
+ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
+ compliant_series_list = align_series_full_broadcast(
+ *(chain.from_iterable(expr(df) for expr in exprs))
+ )
+ name = compliant_series_list[0].name
+ null_handling: Literal["skip", "emit_null"] = (
+ "skip" if ignore_nulls else "emit_null"
+ )
+ it, separator_scalar = cast_to_comparable_string_types(
+ *(s.native for s in compliant_series_list), separator=separator
+ )
+ # NOTE: stubs indicate `separator` must also be a `ChunkedArray`
+ # Reality: `str` is fine
+ concat_str: Incomplete = pc.binary_join_element_wise
+ compliant = self._series(
+ concat_str(*it, separator_scalar, null_handling=null_handling),
+ name=name,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+ return [compliant]
+
+ return self._expr._from_callable(
+ func=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="concat_str",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ context=self,
+ )
+
+
+class ArrowWhen(EagerWhen[ArrowDataFrame, ArrowSeries, ArrowExpr]):
+ @property
+ def _then(self) -> type[ArrowThen]:
+ return ArrowThen
+
+ def _if_then_else(
+ self, when: ArrowSeries, then: ArrowSeries, otherwise: ArrowSeries | None, /
+ ) -> ArrowSeries:
+ if otherwise is None:
+ when, then = align_series_full_broadcast(when, then)
+ res_native = pc.if_else(
+ when.native, then.native, pa.nulls(len(when.native), then.native.type)
+ )
+ else:
+ when, then, otherwise = align_series_full_broadcast(when, then, otherwise)
+ res_native = pc.if_else(when.native, then.native, otherwise.native)
+ return then._with_native(res_native)
+
+
+class ArrowThen(CompliantThen[ArrowDataFrame, ArrowSeries, ArrowExpr], ArrowExpr): ...
diff --git a/venv/lib/python3.8/site-packages/narwhals/_arrow/selectors.py b/venv/lib/python3.8/site-packages/narwhals/_arrow/selectors.py
new file mode 100644
index 0000000..d72da05
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_arrow/selectors.py
@@ -0,0 +1,29 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from narwhals._arrow.expr import ArrowExpr
+from narwhals._compliant import CompliantSelector, EagerSelectorNamespace
+
+if TYPE_CHECKING:
+ from narwhals._arrow.dataframe import ArrowDataFrame # noqa: F401
+ from narwhals._arrow.series import ArrowSeries # noqa: F401
+
+
+class ArrowSelectorNamespace(EagerSelectorNamespace["ArrowDataFrame", "ArrowSeries"]):
+ @property
+ def _selector(self) -> type[ArrowSelector]:
+ return ArrowSelector
+
+
+class ArrowSelector(CompliantSelector["ArrowDataFrame", "ArrowSeries"], ArrowExpr): # type: ignore[misc]
+ def _to_expr(self) -> ArrowExpr:
+ return ArrowExpr(
+ self._call,
+ depth=self._depth,
+ function_name=self._function_name,
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/_arrow/series.py b/venv/lib/python3.8/site-packages/narwhals/_arrow/series.py
new file mode 100644
index 0000000..0259620
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_arrow/series.py
@@ -0,0 +1,1183 @@
+from __future__ import annotations
+
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Iterable,
+ Iterator,
+ Mapping,
+ Sequence,
+ cast,
+ overload,
+)
+
+import pyarrow as pa
+import pyarrow.compute as pc
+
+from narwhals._arrow.series_cat import ArrowSeriesCatNamespace
+from narwhals._arrow.series_dt import ArrowSeriesDateTimeNamespace
+from narwhals._arrow.series_list import ArrowSeriesListNamespace
+from narwhals._arrow.series_str import ArrowSeriesStringNamespace
+from narwhals._arrow.series_struct import ArrowSeriesStructNamespace
+from narwhals._arrow.utils import (
+ cast_for_truediv,
+ chunked_array,
+ extract_native,
+ floordiv_compat,
+ lit,
+ narwhals_to_native_dtype,
+ native_to_narwhals_dtype,
+ nulls_like,
+ pad_series,
+)
+from narwhals._compliant import EagerSeries
+from narwhals._expression_parsing import ExprKind
+from narwhals._utils import (
+ Implementation,
+ generate_temporary_column_name,
+ is_list_of,
+ not_implemented,
+ requires,
+ validate_backend_version,
+)
+from narwhals.dependencies import is_numpy_array_1d
+from narwhals.exceptions import InvalidOperationError
+
+if TYPE_CHECKING:
+ from types import ModuleType
+
+ import pandas as pd
+ import polars as pl
+ from typing_extensions import Self, TypeIs
+
+ from narwhals._arrow.dataframe import ArrowDataFrame
+ from narwhals._arrow.namespace import ArrowNamespace
+ from narwhals._arrow.typing import ( # type: ignore[attr-defined]
+ ArrayAny,
+ ArrayOrChunkedArray,
+ ArrayOrScalar,
+ ChunkedArrayAny,
+ Incomplete,
+ NullPlacement,
+ Order,
+ TieBreaker,
+ _AsPyType,
+ _BasicDataType,
+ )
+ from narwhals._utils import Version, _FullContext
+ from narwhals.dtypes import DType
+ from narwhals.typing import (
+ ClosedInterval,
+ FillNullStrategy,
+ Into1DArray,
+ IntoDType,
+ NonNestedLiteral,
+ NumericLiteral,
+ PythonLiteral,
+ RankMethod,
+ RollingInterpolationMethod,
+ SizedMultiIndexSelector,
+ TemporalLiteral,
+ _1DArray,
+ _2DArray,
+ _SliceIndex,
+ )
+
+
+# TODO @dangotbanned: move into `_arrow.utils`
+# Lots of modules are importing inline
+@overload
+def maybe_extract_py_scalar(
+ value: pa.Scalar[_BasicDataType[_AsPyType]],
+ return_py_scalar: bool, # noqa: FBT001
+) -> _AsPyType: ...
+
+
+@overload
+def maybe_extract_py_scalar(
+ value: pa.Scalar[pa.StructType],
+ return_py_scalar: bool, # noqa: FBT001
+) -> list[dict[str, Any]]: ...
+
+
+@overload
+def maybe_extract_py_scalar(
+ value: pa.Scalar[pa.ListType[_BasicDataType[_AsPyType]]],
+ return_py_scalar: bool, # noqa: FBT001
+) -> list[_AsPyType]: ...
+
+
+@overload
+def maybe_extract_py_scalar(
+ value: pa.Scalar[Any] | Any,
+ return_py_scalar: bool, # noqa: FBT001
+) -> Any: ...
+
+
+def maybe_extract_py_scalar(value: Any, return_py_scalar: bool) -> Any: # noqa: FBT001
+ if TYPE_CHECKING:
+ return value.as_py()
+ if return_py_scalar:
+ return getattr(value, "as_py", lambda: value)()
+ return value
+
+
+class ArrowSeries(EagerSeries["ChunkedArrayAny"]):
+ def __init__(
+ self,
+ native_series: ChunkedArrayAny,
+ *,
+ name: str,
+ backend_version: tuple[int, ...],
+ version: Version,
+ ) -> None:
+ self._name = name
+ self._native_series: ChunkedArrayAny = native_series
+ self._implementation = Implementation.PYARROW
+ self._backend_version = backend_version
+ self._version = version
+ validate_backend_version(self._implementation, self._backend_version)
+ self._broadcast = False
+
+ @property
+ def native(self) -> ChunkedArrayAny:
+ return self._native_series
+
+ def _with_version(self, version: Version) -> Self:
+ return self.__class__(
+ self.native,
+ name=self._name,
+ backend_version=self._backend_version,
+ version=version,
+ )
+
+ def _with_native(
+ self, series: ArrayOrScalar, *, preserve_broadcast: bool = False
+ ) -> Self:
+ result = self.from_native(chunked_array(series), name=self.name, context=self)
+ if preserve_broadcast:
+ result._broadcast = self._broadcast
+ return result
+
+ @classmethod
+ def from_iterable(
+ cls,
+ data: Iterable[Any],
+ *,
+ context: _FullContext,
+ name: str = "",
+ dtype: IntoDType | None = None,
+ ) -> Self:
+ version = context._version
+ dtype_pa = narwhals_to_native_dtype(dtype, version) if dtype else None
+ return cls.from_native(
+ chunked_array([data], dtype_pa), name=name, context=context
+ )
+
+ def _from_scalar(self, value: Any) -> Self:
+ if self._backend_version < (13,) and hasattr(value, "as_py"):
+ value = value.as_py()
+ return super()._from_scalar(value)
+
+ @staticmethod
+ def _is_native(obj: ChunkedArrayAny | Any) -> TypeIs[ChunkedArrayAny]:
+ return isinstance(obj, pa.ChunkedArray)
+
+ @classmethod
+ def from_native(
+ cls, data: ChunkedArrayAny, /, *, context: _FullContext, name: str = ""
+ ) -> Self:
+ return cls(
+ data,
+ backend_version=context._backend_version,
+ version=context._version,
+ name=name,
+ )
+
+ @classmethod
+ def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self:
+ return cls.from_iterable(
+ data if is_numpy_array_1d(data) else [data], context=context
+ )
+
+ def __narwhals_namespace__(self) -> ArrowNamespace:
+ from narwhals._arrow.namespace import ArrowNamespace
+
+ return ArrowNamespace(
+ backend_version=self._backend_version, version=self._version
+ )
+
+ def __eq__(self, other: object) -> Self: # type: ignore[override]
+ other = cast("PythonLiteral | ArrowSeries | None", other)
+ ser, rhs = extract_native(self, other)
+ return self._with_native(pc.equal(ser, rhs))
+
+ def __ne__(self, other: object) -> Self: # type: ignore[override]
+ other = cast("PythonLiteral | ArrowSeries | None", other)
+ ser, rhs = extract_native(self, other)
+ return self._with_native(pc.not_equal(ser, rhs))
+
+ def __ge__(self, other: Any) -> Self:
+ ser, other = extract_native(self, other)
+ return self._with_native(pc.greater_equal(ser, other))
+
+ def __gt__(self, other: Any) -> Self:
+ ser, other = extract_native(self, other)
+ return self._with_native(pc.greater(ser, other))
+
+ def __le__(self, other: Any) -> Self:
+ ser, other = extract_native(self, other)
+ return self._with_native(pc.less_equal(ser, other))
+
+ def __lt__(self, other: Any) -> Self:
+ ser, other = extract_native(self, other)
+ return self._with_native(pc.less(ser, other))
+
+ def __and__(self, other: Any) -> Self:
+ ser, other = extract_native(self, other)
+ return self._with_native(pc.and_kleene(ser, other)) # type: ignore[arg-type]
+
+ def __rand__(self, other: Any) -> Self:
+ ser, other = extract_native(self, other)
+ return self._with_native(pc.and_kleene(other, ser)) # type: ignore[arg-type]
+
+ def __or__(self, other: Any) -> Self:
+ ser, other = extract_native(self, other)
+ return self._with_native(pc.or_kleene(ser, other)) # type: ignore[arg-type]
+
+ def __ror__(self, other: Any) -> Self:
+ ser, other = extract_native(self, other)
+ return self._with_native(pc.or_kleene(other, ser)) # type: ignore[arg-type]
+
+ def __add__(self, other: Any) -> Self:
+ ser, other = extract_native(self, other)
+ return self._with_native(pc.add(ser, other))
+
+ def __radd__(self, other: Any) -> Self:
+ return self + other
+
+ def __sub__(self, other: Any) -> Self:
+ ser, other = extract_native(self, other)
+ return self._with_native(pc.subtract(ser, other))
+
+ def __rsub__(self, other: Any) -> Self:
+ return (self - other) * (-1)
+
+ def __mul__(self, other: Any) -> Self:
+ ser, other = extract_native(self, other)
+ return self._with_native(pc.multiply(ser, other))
+
+ def __rmul__(self, other: Any) -> Self:
+ return self * other
+
+ def __pow__(self, other: Any) -> Self:
+ ser, other = extract_native(self, other)
+ return self._with_native(pc.power(ser, other))
+
+ def __rpow__(self, other: Any) -> Self:
+ ser, other = extract_native(self, other)
+ return self._with_native(pc.power(other, ser))
+
+ def __floordiv__(self, other: Any) -> Self:
+ ser, other = extract_native(self, other)
+ return self._with_native(floordiv_compat(ser, other))
+
+ def __rfloordiv__(self, other: Any) -> Self:
+ ser, other = extract_native(self, other)
+ return self._with_native(floordiv_compat(other, ser))
+
+ def __truediv__(self, other: Any) -> Self:
+ ser, other = extract_native(self, other)
+ return self._with_native(pc.divide(*cast_for_truediv(ser, other))) # type: ignore[type-var]
+
+ def __rtruediv__(self, other: Any) -> Self:
+ ser, other = extract_native(self, other)
+ return self._with_native(pc.divide(*cast_for_truediv(other, ser))) # type: ignore[type-var]
+
+ def __mod__(self, other: Any) -> Self:
+ floor_div = (self // other).native
+ ser, other = extract_native(self, other)
+ res = pc.subtract(ser, pc.multiply(floor_div, other))
+ return self._with_native(res)
+
+ def __rmod__(self, other: Any) -> Self:
+ floor_div = (other // self).native
+ ser, other = extract_native(self, other)
+ res = pc.subtract(other, pc.multiply(floor_div, ser))
+ return self._with_native(res)
+
+ def __invert__(self) -> Self:
+ return self._with_native(pc.invert(self.native))
+
+ @property
+ def _type(self) -> pa.DataType:
+ return self.native.type
+
+ def len(self, *, _return_py_scalar: bool = True) -> int:
+ return maybe_extract_py_scalar(len(self.native), _return_py_scalar)
+
+ def filter(self, predicate: ArrowSeries | list[bool | None]) -> Self:
+ other_native: Any
+ if not is_list_of(predicate, bool):
+ _, other_native = extract_native(self, predicate)
+ else:
+ other_native = predicate
+ return self._with_native(self.native.filter(other_native))
+
+ def mean(self, *, _return_py_scalar: bool = True) -> float:
+ return maybe_extract_py_scalar(pc.mean(self.native), _return_py_scalar)
+
+ def median(self, *, _return_py_scalar: bool = True) -> float:
+ from narwhals.exceptions import InvalidOperationError
+
+ if not self.dtype.is_numeric():
+ msg = "`median` operation not supported for non-numeric input type."
+ raise InvalidOperationError(msg)
+
+ return maybe_extract_py_scalar(
+ pc.approximate_median(self.native), _return_py_scalar
+ )
+
+ def min(self, *, _return_py_scalar: bool = True) -> Any:
+ return maybe_extract_py_scalar(pc.min(self.native), _return_py_scalar)
+
+ def max(self, *, _return_py_scalar: bool = True) -> Any:
+ return maybe_extract_py_scalar(pc.max(self.native), _return_py_scalar)
+
+ def arg_min(self, *, _return_py_scalar: bool = True) -> int:
+ index_min = pc.index(self.native, pc.min(self.native))
+ return maybe_extract_py_scalar(index_min, _return_py_scalar)
+
+ def arg_max(self, *, _return_py_scalar: bool = True) -> int:
+ index_max = pc.index(self.native, pc.max(self.native))
+ return maybe_extract_py_scalar(index_max, _return_py_scalar)
+
+ def sum(self, *, _return_py_scalar: bool = True) -> float:
+ return maybe_extract_py_scalar(
+ pc.sum(self.native, min_count=0), _return_py_scalar
+ )
+
+ def drop_nulls(self) -> Self:
+ return self._with_native(self.native.drop_null())
+
+ def shift(self, n: int) -> Self:
+ if n > 0:
+ arrays = [nulls_like(n, self), *self.native[:-n].chunks]
+ elif n < 0:
+ arrays = [*self.native[-n:].chunks, nulls_like(-n, self)]
+ else:
+ return self._with_native(self.native)
+ return self._with_native(pa.concat_arrays(arrays))
+
+ def std(self, ddof: int, *, _return_py_scalar: bool = True) -> float:
+ return maybe_extract_py_scalar(
+ pc.stddev(self.native, ddof=ddof), _return_py_scalar
+ )
+
+ def var(self, ddof: int, *, _return_py_scalar: bool = True) -> float:
+ return maybe_extract_py_scalar(
+ pc.variance(self.native, ddof=ddof), _return_py_scalar
+ )
+
+ def skew(self, *, _return_py_scalar: bool = True) -> float | None:
+ ser_not_null = self.native.drop_null()
+ if len(ser_not_null) == 0:
+ return None
+ elif len(ser_not_null) == 1:
+ return float("nan")
+ elif len(ser_not_null) == 2:
+ return 0.0
+ else:
+ m = pc.subtract(ser_not_null, pc.mean(ser_not_null))
+ m2 = pc.mean(pc.power(m, lit(2)))
+ m3 = pc.mean(pc.power(m, lit(3)))
+ biased_population_skewness = pc.divide(m3, pc.power(m2, lit(1.5)))
+ return maybe_extract_py_scalar(biased_population_skewness, _return_py_scalar)
+
+ def count(self, *, _return_py_scalar: bool = True) -> int:
+ return maybe_extract_py_scalar(pc.count(self.native), _return_py_scalar)
+
+ def n_unique(self, *, _return_py_scalar: bool = True) -> int:
+ return maybe_extract_py_scalar(
+ pc.count(self.native.unique(), mode="all"), _return_py_scalar
+ )
+
+ def __native_namespace__(self) -> ModuleType:
+ if self._implementation is Implementation.PYARROW:
+ return self._implementation.to_native_namespace()
+
+ msg = f"Expected pyarrow, got: {type(self._implementation)}" # pragma: no cover
+ raise AssertionError(msg)
+
+ @property
+ def name(self) -> str:
+ return self._name
+
+ def _gather(self, rows: SizedMultiIndexSelector[ChunkedArrayAny]) -> Self:
+ if len(rows) == 0:
+ return self._with_native(self.native.slice(0, 0))
+ if self._backend_version < (18,) and isinstance(rows, tuple):
+ rows = list(rows)
+ return self._with_native(self.native.take(rows))
+
+ def _gather_slice(self, rows: _SliceIndex | range) -> Self:
+ start = rows.start or 0
+ stop = rows.stop if rows.stop is not None else len(self.native)
+ if start < 0:
+ start = len(self.native) + start
+ if stop < 0:
+ stop = len(self.native) + stop
+ if rows.step is not None and rows.step != 1:
+ msg = "Slicing with step is not supported on PyArrow tables"
+ raise NotImplementedError(msg)
+ return self._with_native(self.native.slice(start, stop - start))
+
+ def scatter(self, indices: int | Sequence[int], values: Any) -> Self:
+ import numpy as np # ignore-banned-import
+
+ values_native: ArrayAny
+ if isinstance(indices, int):
+ indices_native = pa.array([indices])
+ values_native = pa.array([values])
+ else:
+ # TODO(unassigned): we may also want to let `indices` be a Series.
+ # https://github.com/narwhals-dev/narwhals/issues/2155
+ indices_native = pa.array(indices)
+ if isinstance(values, self.__class__):
+ values_native = values.native.combine_chunks()
+ else:
+ # NOTE: Requires fixes in https://github.com/zen-xu/pyarrow-stubs/pull/209
+ pa_array: Incomplete = pa.array
+ values_native = pa_array(values)
+
+ sorting_indices = pc.sort_indices(indices_native)
+ indices_native = indices_native.take(sorting_indices)
+ values_native = values_native.take(sorting_indices)
+
+ mask: _1DArray = np.zeros(self.len(), dtype=bool)
+ mask[indices_native] = True
+ # NOTE: Multiple issues
+ # - Missing `values` type
+ # - `mask` accepts a `np.ndarray`, but not mentioned in stubs
+ # - Missing `replacements` type
+ # - Missing return type
+ pc_replace_with_mask: Incomplete = pc.replace_with_mask
+ return self._with_native(
+ pc_replace_with_mask(self.native, mask, values_native.take(indices_native))
+ )
+
+ def to_list(self) -> list[Any]:
+ return self.native.to_pylist()
+
+ def __array__(self, dtype: Any = None, *, copy: bool | None = None) -> _1DArray:
+ return self.native.__array__(dtype=dtype, copy=copy)
+
+ def to_numpy(self, dtype: Any = None, *, copy: bool | None = None) -> _1DArray:
+ return self.native.to_numpy()
+
+ def alias(self, name: str) -> Self:
+ result = self.__class__(
+ self.native,
+ name=name,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+ result._broadcast = self._broadcast
+ return result
+
+ @property
+ def dtype(self) -> DType:
+ return native_to_narwhals_dtype(self.native.type, self._version)
+
+ def abs(self) -> Self:
+ return self._with_native(pc.abs(self.native))
+
+ def cum_sum(self, *, reverse: bool) -> Self:
+ cum_sum = pc.cumulative_sum
+ result = (
+ cum_sum(self.native, skip_nulls=True)
+ if not reverse
+ else cum_sum(self.native[::-1], skip_nulls=True)[::-1]
+ )
+ return self._with_native(result)
+
+ def round(self, decimals: int) -> Self:
+ return self._with_native(
+ pc.round(self.native, decimals, round_mode="half_towards_infinity")
+ )
+
+ def diff(self) -> Self:
+ return self._with_native(pc.pairwise_diff(self.native.combine_chunks()))
+
+ def any(self, *, _return_py_scalar: bool = True) -> bool:
+ return maybe_extract_py_scalar(
+ pc.any(self.native, min_count=0), _return_py_scalar
+ )
+
+ def all(self, *, _return_py_scalar: bool = True) -> bool:
+ return maybe_extract_py_scalar(
+ pc.all(self.native, min_count=0), _return_py_scalar
+ )
+
+ def is_between(
+ self, lower_bound: Any, upper_bound: Any, closed: ClosedInterval
+ ) -> Self:
+ _, lower_bound = extract_native(self, lower_bound)
+ _, upper_bound = extract_native(self, upper_bound)
+ if closed == "left":
+ ge = pc.greater_equal(self.native, lower_bound)
+ lt = pc.less(self.native, upper_bound)
+ res = pc.and_kleene(ge, lt)
+ elif closed == "right":
+ gt = pc.greater(self.native, lower_bound)
+ le = pc.less_equal(self.native, upper_bound)
+ res = pc.and_kleene(gt, le)
+ elif closed == "none":
+ gt = pc.greater(self.native, lower_bound)
+ lt = pc.less(self.native, upper_bound)
+ res = pc.and_kleene(gt, lt)
+ elif closed == "both":
+ ge = pc.greater_equal(self.native, lower_bound)
+ le = pc.less_equal(self.native, upper_bound)
+ res = pc.and_kleene(ge, le)
+ else: # pragma: no cover
+ raise AssertionError
+ return self._with_native(res)
+
+ def is_null(self) -> Self:
+ return self._with_native(self.native.is_null(), preserve_broadcast=True)
+
+ def is_nan(self) -> Self:
+ return self._with_native(pc.is_nan(self.native), preserve_broadcast=True)
+
+ def cast(self, dtype: IntoDType) -> Self:
+ data_type = narwhals_to_native_dtype(dtype, self._version)
+ return self._with_native(pc.cast(self.native, data_type), preserve_broadcast=True)
+
+ def null_count(self, *, _return_py_scalar: bool = True) -> int:
+ return maybe_extract_py_scalar(self.native.null_count, _return_py_scalar)
+
+ def head(self, n: int) -> Self:
+ if n >= 0:
+ return self._with_native(self.native.slice(0, n))
+ else:
+ num_rows = len(self)
+ return self._with_native(self.native.slice(0, max(0, num_rows + n)))
+
+ def tail(self, n: int) -> Self:
+ if n >= 0:
+ num_rows = len(self)
+ return self._with_native(self.native.slice(max(0, num_rows - n)))
+ else:
+ return self._with_native(self.native.slice(abs(n)))
+
+ def is_in(self, other: Any) -> Self:
+ if self._is_native(other):
+ value_set: ArrayOrChunkedArray = other
+ else:
+ value_set = pa.array(other)
+ return self._with_native(pc.is_in(self.native, value_set=value_set))
+
+ def arg_true(self) -> Self:
+ import numpy as np # ignore-banned-import
+
+ res = np.flatnonzero(self.native)
+ return self.from_iterable(res, name=self.name, context=self)
+
+ def item(self, index: int | None = None) -> Any:
+ if index is None:
+ if len(self) != 1:
+ msg = (
+ "can only call '.item()' if the Series is of length 1,"
+ f" or an explicit index is provided (Series is of length {len(self)})"
+ )
+ raise ValueError(msg)
+ return maybe_extract_py_scalar(self.native[0], return_py_scalar=True)
+ return maybe_extract_py_scalar(self.native[index], return_py_scalar=True)
+
+ def value_counts(
+ self, *, sort: bool, parallel: bool, name: str | None, normalize: bool
+ ) -> ArrowDataFrame:
+ """Parallel is unused, exists for compatibility."""
+ from narwhals._arrow.dataframe import ArrowDataFrame
+
+ index_name_ = "index" if self._name is None else self._name
+ value_name_ = name or ("proportion" if normalize else "count")
+
+ val_counts = pc.value_counts(self.native)
+ values = val_counts.field("values")
+ counts = cast("ChunkedArrayAny", val_counts.field("counts"))
+
+ if normalize:
+ arrays = [values, pc.divide(*cast_for_truediv(counts, pc.sum(counts)))]
+ else:
+ arrays = [values, counts]
+
+ val_count = pa.Table.from_arrays(arrays, names=[index_name_, value_name_])
+
+ if sort:
+ val_count = val_count.sort_by([(value_name_, "descending")])
+
+ return ArrowDataFrame(
+ val_count,
+ backend_version=self._backend_version,
+ version=self._version,
+ validate_column_names=True,
+ )
+
+ def zip_with(self, mask: Self, other: Self) -> Self:
+ cond = mask.native.combine_chunks()
+ return self._with_native(pc.if_else(cond, self.native, other.native))
+
+ def sample(
+ self,
+ n: int | None,
+ *,
+ fraction: float | None,
+ with_replacement: bool,
+ seed: int | None,
+ ) -> Self:
+ import numpy as np # ignore-banned-import
+
+ num_rows = len(self)
+ if n is None and fraction is not None:
+ n = int(num_rows * fraction)
+
+ rng = np.random.default_rng(seed=seed)
+ idx = np.arange(0, num_rows)
+ mask = rng.choice(idx, size=n, replace=with_replacement)
+ return self._with_native(self.native.take(mask))
+
+ def fill_null(
+ self,
+ value: Self | NonNestedLiteral,
+ strategy: FillNullStrategy | None,
+ limit: int | None,
+ ) -> Self:
+ import numpy as np # ignore-banned-import
+
+ def fill_aux(
+ arr: ChunkedArrayAny, limit: int, direction: FillNullStrategy | None
+ ) -> ArrayAny:
+ # this algorithm first finds the indices of the valid values to fill all the null value positions
+ # then it calculates the distance of each new index and the original index
+ # if the distance is equal to or less than the limit and the original value is null, it is replaced
+ valid_mask = pc.is_valid(arr)
+ indices = pa.array(np.arange(len(arr)), type=pa.int64())
+ if direction == "forward":
+ valid_index = np.maximum.accumulate(np.where(valid_mask, indices, -1))
+ distance = indices - valid_index
+ else:
+ valid_index = np.minimum.accumulate(
+ np.where(valid_mask[::-1], indices[::-1], len(arr))
+ )[::-1]
+ distance = valid_index - indices
+ return pc.if_else(
+ pc.and_(pc.is_null(arr), pc.less_equal(distance, lit(limit))), # pyright: ignore[reportArgumentType, reportCallIssue]
+ arr.take(valid_index),
+ arr,
+ )
+
+ if value is not None:
+ _, native_value = extract_native(self, value)
+ series: ArrayOrScalar = pc.fill_null(self.native, native_value)
+ elif limit is None:
+ fill_func = (
+ pc.fill_null_forward if strategy == "forward" else pc.fill_null_backward
+ )
+ series = fill_func(self.native)
+ else:
+ series = fill_aux(self.native, limit, strategy)
+ return self._with_native(series, preserve_broadcast=True)
+
+ def to_frame(self) -> ArrowDataFrame:
+ from narwhals._arrow.dataframe import ArrowDataFrame
+
+ df = pa.Table.from_arrays([self.native], names=[self.name])
+ return ArrowDataFrame(
+ df,
+ backend_version=self._backend_version,
+ version=self._version,
+ validate_column_names=False,
+ )
+
+ def to_pandas(self) -> pd.Series[Any]:
+ import pandas as pd # ignore-banned-import()
+
+ return pd.Series(self.native, name=self.name)
+
+ def to_polars(self) -> pl.Series:
+ import polars as pl # ignore-banned-import
+
+ return cast("pl.Series", pl.from_arrow(self.native))
+
+ def is_unique(self) -> ArrowSeries:
+ return self.to_frame().is_unique().alias(self.name)
+
+ def is_first_distinct(self) -> Self:
+ import numpy as np # ignore-banned-import
+
+ row_number = pa.array(np.arange(len(self)))
+ col_token = generate_temporary_column_name(n_bytes=8, columns=[self.name])
+ first_distinct_index = (
+ pa.Table.from_arrays([self.native], names=[self.name])
+ .append_column(col_token, row_number)
+ .group_by(self.name)
+ .aggregate([(col_token, "min")])
+ .column(f"{col_token}_min")
+ )
+
+ return self._with_native(pc.is_in(row_number, first_distinct_index))
+
+ def is_last_distinct(self) -> Self:
+ import numpy as np # ignore-banned-import
+
+ row_number = pa.array(np.arange(len(self)))
+ col_token = generate_temporary_column_name(n_bytes=8, columns=[self.name])
+ last_distinct_index = (
+ pa.Table.from_arrays([self.native], names=[self.name])
+ .append_column(col_token, row_number)
+ .group_by(self.name)
+ .aggregate([(col_token, "max")])
+ .column(f"{col_token}_max")
+ )
+
+ return self._with_native(pc.is_in(row_number, last_distinct_index))
+
+ def is_sorted(self, *, descending: bool) -> bool:
+ if not isinstance(descending, bool):
+ msg = f"argument 'descending' should be boolean, found {type(descending)}"
+ raise TypeError(msg)
+ if descending:
+ result = pc.all(pc.greater_equal(self.native[:-1], self.native[1:]))
+ else:
+ result = pc.all(pc.less_equal(self.native[:-1], self.native[1:]))
+ return maybe_extract_py_scalar(result, return_py_scalar=True)
+
+ def unique(self, *, maintain_order: bool) -> Self:
+ # TODO(marco): `pc.unique` seems to always maintain order, is that guaranteed?
+ return self._with_native(self.native.unique())
+
+ def replace_strict(
+ self,
+ old: Sequence[Any] | Mapping[Any, Any],
+ new: Sequence[Any],
+ *,
+ return_dtype: IntoDType | None,
+ ) -> Self:
+ # https://stackoverflow.com/a/79111029/4451315
+ idxs = pc.index_in(self.native, pa.array(old))
+ result_native = pc.take(pa.array(new), idxs)
+ if return_dtype is not None:
+ result_native.cast(narwhals_to_native_dtype(return_dtype, self._version))
+ result = self._with_native(result_native)
+ if result.is_null().sum() != self.is_null().sum():
+ msg = (
+ "replace_strict did not replace all non-null values.\n\n"
+ "The following did not get replaced: "
+ f"{self.filter(~self.is_null() & result.is_null()).unique(maintain_order=False).to_list()}"
+ )
+ raise ValueError(msg)
+ return result
+
+ def sort(self, *, descending: bool, nulls_last: bool) -> Self:
+ order: Order = "descending" if descending else "ascending"
+ null_placement: NullPlacement = "at_end" if nulls_last else "at_start"
+ sorted_indices = pc.array_sort_indices(
+ self.native, order=order, null_placement=null_placement
+ )
+ return self._with_native(self.native.take(sorted_indices))
+
+ def to_dummies(self, *, separator: str, drop_first: bool) -> ArrowDataFrame:
+ import numpy as np # ignore-banned-import
+
+ from narwhals._arrow.dataframe import ArrowDataFrame
+
+ name = self._name
+ # NOTE: stub is missing attributes (https://arrow.apache.org/docs/python/generated/pyarrow.DictionaryArray.html)
+ da: Incomplete = self.native.combine_chunks().dictionary_encode("encode")
+
+ columns: _2DArray = np.zeros((len(da.dictionary), len(da)), np.int8)
+ columns[da.indices, np.arange(len(da))] = 1
+ null_col_pa, null_col_pl = f"{name}{separator}None", f"{name}{separator}null"
+ cols = [
+ {null_col_pa: null_col_pl}.get(
+ f"{name}{separator}{v}", f"{name}{separator}{v}"
+ )
+ for v in da.dictionary
+ ]
+
+ output_order = (
+ [
+ null_col_pl,
+ *sorted([c for c in cols if c != null_col_pl])[int(drop_first) :],
+ ]
+ if null_col_pl in cols
+ else sorted(cols)[int(drop_first) :]
+ )
+ return ArrowDataFrame(
+ pa.Table.from_arrays(columns, names=cols),
+ backend_version=self._backend_version,
+ version=self._version,
+ validate_column_names=True,
+ ).simple_select(*output_order)
+
+ def quantile(
+ self,
+ quantile: float,
+ interpolation: RollingInterpolationMethod,
+ *,
+ _return_py_scalar: bool = True,
+ ) -> float:
+ return maybe_extract_py_scalar(
+ pc.quantile(self.native, q=quantile, interpolation=interpolation)[0],
+ _return_py_scalar,
+ )
+
+ def gather_every(self, n: int, offset: int = 0) -> Self:
+ return self._with_native(self.native[offset::n])
+
+ def clip(
+ self,
+ lower_bound: Self | NumericLiteral | TemporalLiteral | None,
+ upper_bound: Self | NumericLiteral | TemporalLiteral | None,
+ ) -> Self:
+ _, lower = extract_native(self, lower_bound) if lower_bound else (None, None)
+ _, upper = extract_native(self, upper_bound) if upper_bound else (None, None)
+
+ if lower is None:
+ return self._with_native(pc.min_element_wise(self.native, upper))
+ if upper is None:
+ return self._with_native(pc.max_element_wise(self.native, lower))
+ return self._with_native(
+ pc.max_element_wise(pc.min_element_wise(self.native, upper), lower)
+ )
+
+ def to_arrow(self) -> ArrayAny:
+ return self.native.combine_chunks()
+
+ def mode(self) -> ArrowSeries:
+ plx = self.__narwhals_namespace__()
+ col_token = generate_temporary_column_name(n_bytes=8, columns=[self.name])
+ counts = self.value_counts(
+ name=col_token, normalize=False, sort=False, parallel=False
+ )
+ return counts.filter(
+ plx.col(col_token)
+ == plx.col(col_token).max().broadcast(kind=ExprKind.AGGREGATION)
+ ).get_column(self.name)
+
+ def is_finite(self) -> Self:
+ return self._with_native(pc.is_finite(self.native))
+
+ def cum_count(self, *, reverse: bool) -> Self:
+ dtypes = self._version.dtypes
+ return (~self.is_null()).cast(dtypes.UInt32()).cum_sum(reverse=reverse)
+
+ @requires.backend_version((13,))
+ def cum_min(self, *, reverse: bool) -> Self:
+ result = (
+ pc.cumulative_min(self.native, skip_nulls=True)
+ if not reverse
+ else pc.cumulative_min(self.native[::-1], skip_nulls=True)[::-1]
+ )
+ return self._with_native(result)
+
+ @requires.backend_version((13,))
+ def cum_max(self, *, reverse: bool) -> Self:
+ result = (
+ pc.cumulative_max(self.native, skip_nulls=True)
+ if not reverse
+ else pc.cumulative_max(self.native[::-1], skip_nulls=True)[::-1]
+ )
+ return self._with_native(result)
+
+ @requires.backend_version((13,))
+ def cum_prod(self, *, reverse: bool) -> Self:
+ result = (
+ pc.cumulative_prod(self.native, skip_nulls=True)
+ if not reverse
+ else pc.cumulative_prod(self.native[::-1], skip_nulls=True)[::-1]
+ )
+ return self._with_native(result)
+
+ def rolling_sum(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ min_samples = min_samples if min_samples is not None else window_size
+ padded_series, offset = pad_series(self, window_size=window_size, center=center)
+
+ cum_sum = padded_series.cum_sum(reverse=False).fill_null(
+ value=None, strategy="forward", limit=None
+ )
+ rolling_sum = (
+ cum_sum
+ - cum_sum.shift(window_size).fill_null(value=0, strategy=None, limit=None)
+ if window_size != 0
+ else cum_sum
+ )
+
+ valid_count = padded_series.cum_count(reverse=False)
+ count_in_window = valid_count - valid_count.shift(window_size).fill_null(
+ value=0, strategy=None, limit=None
+ )
+
+ result = self._with_native(
+ pc.if_else((count_in_window >= min_samples).native, rolling_sum.native, None)
+ )
+ return result._gather_slice(slice(offset, None))
+
+ def rolling_mean(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ min_samples = min_samples if min_samples is not None else window_size
+ padded_series, offset = pad_series(self, window_size=window_size, center=center)
+
+ cum_sum = padded_series.cum_sum(reverse=False).fill_null(
+ value=None, strategy="forward", limit=None
+ )
+ rolling_sum = (
+ cum_sum
+ - cum_sum.shift(window_size).fill_null(value=0, strategy=None, limit=None)
+ if window_size != 0
+ else cum_sum
+ )
+
+ valid_count = padded_series.cum_count(reverse=False)
+ count_in_window = valid_count - valid_count.shift(window_size).fill_null(
+ value=0, strategy=None, limit=None
+ )
+
+ result = (
+ self._with_native(
+ pc.if_else(
+ (count_in_window >= min_samples).native, rolling_sum.native, None
+ )
+ )
+ / count_in_window
+ )
+ return result._gather_slice(slice(offset, None))
+
+ def rolling_var(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ min_samples = min_samples if min_samples is not None else window_size
+ padded_series, offset = pad_series(self, window_size=window_size, center=center)
+
+ cum_sum = padded_series.cum_sum(reverse=False).fill_null(
+ value=None, strategy="forward", limit=None
+ )
+ rolling_sum = (
+ cum_sum
+ - cum_sum.shift(window_size).fill_null(value=0, strategy=None, limit=None)
+ if window_size != 0
+ else cum_sum
+ )
+
+ cum_sum_sq = (
+ pow(padded_series, 2)
+ .cum_sum(reverse=False)
+ .fill_null(value=None, strategy="forward", limit=None)
+ )
+ rolling_sum_sq = (
+ cum_sum_sq
+ - cum_sum_sq.shift(window_size).fill_null(value=0, strategy=None, limit=None)
+ if window_size != 0
+ else cum_sum_sq
+ )
+
+ valid_count = padded_series.cum_count(reverse=False)
+ count_in_window = valid_count - valid_count.shift(window_size).fill_null(
+ value=0, strategy=None, limit=None
+ )
+
+ result = self._with_native(
+ pc.if_else(
+ (count_in_window >= min_samples).native,
+ (rolling_sum_sq - (rolling_sum**2 / count_in_window)).native,
+ None,
+ )
+ ) / self._with_native(pc.max_element_wise((count_in_window - ddof).native, 0))
+
+ return result._gather_slice(slice(offset, None, None))
+
+ def rolling_std(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ return (
+ self.rolling_var(
+ window_size=window_size, min_samples=min_samples, center=center, ddof=ddof
+ )
+ ** 0.5
+ )
+
+ def rank(self, method: RankMethod, *, descending: bool) -> Self:
+ if method == "average":
+ msg = (
+ "`rank` with `method='average' is not supported for pyarrow backend. "
+ "The available methods are {'min', 'max', 'dense', 'ordinal'}."
+ )
+ raise ValueError(msg)
+
+ sort_keys: Order = "descending" if descending else "ascending"
+ tiebreaker: TieBreaker = "first" if method == "ordinal" else method
+
+ native_series: ArrayOrChunkedArray
+ if self._backend_version < (14, 0, 0): # pragma: no cover
+ native_series = self.native.combine_chunks()
+ else:
+ native_series = self.native
+
+ null_mask = pc.is_null(native_series)
+
+ rank = pc.rank(native_series, sort_keys=sort_keys, tiebreaker=tiebreaker)
+
+ result = pc.if_else(null_mask, lit(None, native_series.type), rank)
+ return self._with_native(result)
+
+ @requires.backend_version((13,))
+ def hist( # noqa: C901, PLR0912, PLR0915
+ self,
+ bins: list[float | int] | None,
+ *,
+ bin_count: int | None,
+ include_breakpoint: bool,
+ ) -> ArrowDataFrame:
+ import numpy as np # ignore-banned-import
+
+ from narwhals._arrow.dataframe import ArrowDataFrame
+
+ def _hist_from_bin_count(bin_count: int): # type: ignore[no-untyped-def] # noqa: ANN202
+ d = pc.min_max(self.native)
+ lower, upper = d["min"].as_py(), d["max"].as_py()
+ if lower == upper:
+ lower -= 0.5
+ upper += 0.5
+ bins = np.linspace(lower, upper, bin_count + 1)
+ return _hist_from_bins(bins)
+
+ def _hist_from_bins(bins: Sequence[int | float]): # type: ignore[no-untyped-def] # noqa: ANN202
+ bin_indices = np.searchsorted(bins, self.native, side="left")
+ bin_indices = pc.if_else( # lowest bin is inclusive
+ pc.equal(self.native, lit(bins[0])), 1, bin_indices
+ )
+
+ # align unique categories and counts appropriately
+ obs_cats, obs_counts = np.unique(bin_indices, return_counts=True)
+ obj_cats = np.arange(1, len(bins))
+ counts = np.zeros_like(obj_cats)
+ counts[np.isin(obj_cats, obs_cats)] = obs_counts[np.isin(obs_cats, obj_cats)]
+
+ bin_right = bins[1:]
+ return counts, bin_right
+
+ counts: Sequence[int | float | pa.Scalar[Any]] | np.typing.ArrayLike
+ bin_right: Sequence[int | float | pa.Scalar[Any]] | np.typing.ArrayLike
+
+ data_count = pc.sum(
+ pc.invert(pc.or_(pc.is_nan(self.native), pc.is_null(self.native))).cast(
+ pa.uint8()
+ ),
+ min_count=0,
+ )
+ if bins is not None:
+ if len(bins) < 2:
+ counts, bin_right = [], []
+
+ elif data_count == pa.scalar(0, type=pa.uint64()): # type:ignore[comparison-overlap]
+ counts = np.zeros(len(bins) - 1)
+ bin_right = bins[1:]
+
+ elif len(bins) == 2:
+ counts = [
+ pc.sum(
+ pc.and_(
+ pc.greater_equal(self.native, lit(float(bins[0]))),
+ pc.less_equal(self.native, lit(float(bins[1]))),
+ ).cast(pa.uint8())
+ )
+ ]
+ bin_right = [bins[-1]]
+ else:
+ counts, bin_right = _hist_from_bins(bins)
+
+ elif bin_count is not None:
+ if bin_count == 0:
+ counts, bin_right = [], []
+ elif data_count == pa.scalar(0, type=pa.uint64()): # type:ignore[comparison-overlap]
+ counts, bin_right = (
+ np.zeros(bin_count),
+ np.linspace(0, 1, bin_count + 1)[1:],
+ )
+ elif bin_count == 1:
+ d = pc.min_max(self.native)
+ lower, upper = d["min"], d["max"]
+ if lower == upper:
+ counts, bin_right = [data_count], [pc.add(upper, pa.scalar(0.5))]
+ else:
+ counts, bin_right = [data_count], [upper]
+ else:
+ counts, bin_right = _hist_from_bin_count(bin_count)
+
+ else: # pragma: no cover
+ # caller guarantees that either bins or bin_count is specified
+ msg = "must provide one of `bin_count` or `bins`"
+ raise InvalidOperationError(msg)
+
+ data: dict[str, Any] = {}
+ if include_breakpoint:
+ data["breakpoint"] = bin_right
+ data["count"] = counts
+
+ return ArrowDataFrame(
+ pa.Table.from_pydict(data),
+ backend_version=self._backend_version,
+ version=self._version,
+ validate_column_names=True,
+ )
+
+ def __iter__(self) -> Iterator[Any]:
+ for x in self.native:
+ yield maybe_extract_py_scalar(x, return_py_scalar=True)
+
+ def __contains__(self, other: Any) -> bool:
+ from pyarrow import (
+ ArrowInvalid, # ignore-banned-imports
+ ArrowNotImplementedError, # ignore-banned-imports
+ ArrowTypeError, # ignore-banned-imports
+ )
+
+ try:
+ other_ = lit(other) if other is not None else lit(None, type=self._type)
+ return maybe_extract_py_scalar(
+ pc.is_in(other_, self.native), return_py_scalar=True
+ )
+ except (ArrowInvalid, ArrowNotImplementedError, ArrowTypeError) as exc:
+ from narwhals.exceptions import InvalidOperationError
+
+ msg = f"Unable to compare other of type {type(other)} with series of type {self.dtype}."
+ raise InvalidOperationError(msg) from exc
+
+ def log(self, base: float) -> Self:
+ return self._with_native(pc.logb(self.native, lit(base)))
+
+ def exp(self) -> Self:
+ return self._with_native(pc.exp(self.native))
+
+ @property
+ def dt(self) -> ArrowSeriesDateTimeNamespace:
+ return ArrowSeriesDateTimeNamespace(self)
+
+ @property
+ def cat(self) -> ArrowSeriesCatNamespace:
+ return ArrowSeriesCatNamespace(self)
+
+ @property
+ def str(self) -> ArrowSeriesStringNamespace:
+ return ArrowSeriesStringNamespace(self)
+
+ @property
+ def list(self) -> ArrowSeriesListNamespace:
+ return ArrowSeriesListNamespace(self)
+
+ @property
+ def struct(self) -> ArrowSeriesStructNamespace:
+ return ArrowSeriesStructNamespace(self)
+
+ ewm_mean = not_implemented()
diff --git a/venv/lib/python3.8/site-packages/narwhals/_arrow/series_cat.py b/venv/lib/python3.8/site-packages/narwhals/_arrow/series_cat.py
new file mode 100644
index 0000000..944f339
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_arrow/series_cat.py
@@ -0,0 +1,18 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pyarrow as pa
+
+from narwhals._arrow.utils import ArrowSeriesNamespace
+
+if TYPE_CHECKING:
+ from narwhals._arrow.series import ArrowSeries
+ from narwhals._arrow.typing import Incomplete
+
+
+class ArrowSeriesCatNamespace(ArrowSeriesNamespace):
+ def get_categories(self) -> ArrowSeries:
+ # NOTE: Should be `list[pa.DictionaryArray]`, but `DictionaryArray` has no attributes
+ chunks: Incomplete = self.native.chunks
+ return self.with_native(pa.concat_arrays(x.dictionary for x in chunks).unique())
diff --git a/venv/lib/python3.8/site-packages/narwhals/_arrow/series_dt.py b/venv/lib/python3.8/site-packages/narwhals/_arrow/series_dt.py
new file mode 100644
index 0000000..75aaec5
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_arrow/series_dt.py
@@ -0,0 +1,194 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Callable, ClassVar, Mapping, cast
+
+import pyarrow as pa
+import pyarrow.compute as pc
+
+from narwhals._arrow.utils import UNITS_DICT, ArrowSeriesNamespace, floordiv_compat, lit
+from narwhals._duration import parse_interval_string
+
+if TYPE_CHECKING:
+ from typing_extensions import TypeAlias
+
+ from narwhals._arrow.series import ArrowSeries
+ from narwhals._arrow.typing import ChunkedArrayAny, ScalarAny
+ from narwhals.dtypes import Datetime
+ from narwhals.typing import TimeUnit
+
+ UnitCurrent: TypeAlias = TimeUnit
+ UnitTarget: TypeAlias = TimeUnit
+ BinOpBroadcast: TypeAlias = Callable[[ChunkedArrayAny, ScalarAny], ChunkedArrayAny]
+ IntoRhs: TypeAlias = int
+
+
+class ArrowSeriesDateTimeNamespace(ArrowSeriesNamespace):
+ _TIMESTAMP_DATE_FACTOR: ClassVar[Mapping[TimeUnit, int]] = {
+ "ns": 1_000_000_000,
+ "us": 1_000_000,
+ "ms": 1_000,
+ "s": 1,
+ }
+ _TIMESTAMP_DATETIME_OP_FACTOR: ClassVar[
+ Mapping[tuple[UnitCurrent, UnitTarget], tuple[BinOpBroadcast, IntoRhs]]
+ ] = {
+ ("ns", "us"): (floordiv_compat, 1_000),
+ ("ns", "ms"): (floordiv_compat, 1_000_000),
+ ("us", "ns"): (pc.multiply, 1_000),
+ ("us", "ms"): (floordiv_compat, 1_000),
+ ("ms", "ns"): (pc.multiply, 1_000_000),
+ ("ms", "us"): (pc.multiply, 1_000),
+ ("s", "ns"): (pc.multiply, 1_000_000_000),
+ ("s", "us"): (pc.multiply, 1_000_000),
+ ("s", "ms"): (pc.multiply, 1_000),
+ }
+
+ @property
+ def unit(self) -> TimeUnit: # NOTE: Unsafe (native).
+ return cast("pa.TimestampType[TimeUnit, Any]", self.native.type).unit
+
+ @property
+ def time_zone(self) -> str | None: # NOTE: Unsafe (narwhals).
+ return cast("Datetime", self.compliant.dtype).time_zone
+
+ def to_string(self, format: str) -> ArrowSeries:
+ # PyArrow differs from other libraries in that %S also prints out
+ # the fractional part of the second...:'(
+ # https://arrow.apache.org/docs/python/generated/pyarrow.compute.strftime.html
+ format = format.replace("%S.%f", "%S").replace("%S%.f", "%S")
+ return self.with_native(pc.strftime(self.native, format))
+
+ def replace_time_zone(self, time_zone: str | None) -> ArrowSeries:
+ if time_zone is not None:
+ result = pc.assume_timezone(pc.local_timestamp(self.native), time_zone)
+ else:
+ result = pc.local_timestamp(self.native)
+ return self.with_native(result)
+
+ def convert_time_zone(self, time_zone: str) -> ArrowSeries:
+ ser = self.replace_time_zone("UTC") if self.time_zone is None else self.compliant
+ return self.with_native(ser.native.cast(pa.timestamp(self.unit, time_zone)))
+
+ def timestamp(self, time_unit: TimeUnit) -> ArrowSeries:
+ ser = self.compliant
+ dtypes = ser._version.dtypes
+ if isinstance(ser.dtype, dtypes.Datetime):
+ current = ser.dtype.time_unit
+ s_cast = self.native.cast(pa.int64())
+ if current == time_unit:
+ result = s_cast
+ elif item := self._TIMESTAMP_DATETIME_OP_FACTOR.get((current, time_unit)):
+ fn, factor = item
+ result = fn(s_cast, lit(factor))
+ else: # pragma: no cover
+ msg = f"unexpected time unit {current}, please report an issue at https://github.com/narwhals-dev/narwhals"
+ raise AssertionError(msg)
+ return self.with_native(result)
+ elif isinstance(ser.dtype, dtypes.Date):
+ time_s = pc.multiply(self.native.cast(pa.int32()), lit(86_400))
+ factor = self._TIMESTAMP_DATE_FACTOR[time_unit]
+ return self.with_native(pc.multiply(time_s, lit(factor)))
+ else:
+ msg = "Input should be either of Date or Datetime type"
+ raise TypeError(msg)
+
+ def date(self) -> ArrowSeries:
+ return self.with_native(self.native.cast(pa.date32()))
+
+ def year(self) -> ArrowSeries:
+ return self.with_native(pc.year(self.native))
+
+ def month(self) -> ArrowSeries:
+ return self.with_native(pc.month(self.native))
+
+ def day(self) -> ArrowSeries:
+ return self.with_native(pc.day(self.native))
+
+ def hour(self) -> ArrowSeries:
+ return self.with_native(pc.hour(self.native))
+
+ def minute(self) -> ArrowSeries:
+ return self.with_native(pc.minute(self.native))
+
+ def second(self) -> ArrowSeries:
+ return self.with_native(pc.second(self.native))
+
+ def millisecond(self) -> ArrowSeries:
+ return self.with_native(pc.millisecond(self.native))
+
+ def microsecond(self) -> ArrowSeries:
+ arr = self.native
+ result = pc.add(pc.multiply(pc.millisecond(arr), lit(1000)), pc.microsecond(arr))
+ return self.with_native(result)
+
+ def nanosecond(self) -> ArrowSeries:
+ result = pc.add(
+ pc.multiply(self.microsecond().native, lit(1000)), pc.nanosecond(self.native)
+ )
+ return self.with_native(result)
+
+ def ordinal_day(self) -> ArrowSeries:
+ return self.with_native(pc.day_of_year(self.native))
+
+ def weekday(self) -> ArrowSeries:
+ return self.with_native(pc.day_of_week(self.native, count_from_zero=False))
+
+ def total_minutes(self) -> ArrowSeries:
+ unit_to_minutes_factor = {
+ "s": 60, # seconds
+ "ms": 60 * 1e3, # milli
+ "us": 60 * 1e6, # micro
+ "ns": 60 * 1e9, # nano
+ }
+ factor = lit(unit_to_minutes_factor[self.unit], type=pa.int64())
+ return self.with_native(pc.divide(self.native, factor).cast(pa.int64()))
+
+ def total_seconds(self) -> ArrowSeries:
+ unit_to_seconds_factor = {
+ "s": 1, # seconds
+ "ms": 1e3, # milli
+ "us": 1e6, # micro
+ "ns": 1e9, # nano
+ }
+ factor = lit(unit_to_seconds_factor[self.unit], type=pa.int64())
+ return self.with_native(pc.divide(self.native, factor).cast(pa.int64()))
+
+ def total_milliseconds(self) -> ArrowSeries:
+ unit_to_milli_factor = {
+ "s": 1e3, # seconds
+ "ms": 1, # milli
+ "us": 1e3, # micro
+ "ns": 1e6, # nano
+ }
+ factor = lit(unit_to_milli_factor[self.unit], type=pa.int64())
+ if self.unit == "s":
+ return self.with_native(pc.multiply(self.native, factor).cast(pa.int64()))
+ return self.with_native(pc.divide(self.native, factor).cast(pa.int64()))
+
+ def total_microseconds(self) -> ArrowSeries:
+ unit_to_micro_factor = {
+ "s": 1e6, # seconds
+ "ms": 1e3, # milli
+ "us": 1, # micro
+ "ns": 1e3, # nano
+ }
+ factor = lit(unit_to_micro_factor[self.unit], type=pa.int64())
+ if self.unit in {"s", "ms"}:
+ return self.with_native(pc.multiply(self.native, factor).cast(pa.int64()))
+ return self.with_native(pc.divide(self.native, factor).cast(pa.int64()))
+
+ def total_nanoseconds(self) -> ArrowSeries:
+ unit_to_nano_factor = {
+ "s": 1e9, # seconds
+ "ms": 1e6, # milli
+ "us": 1e3, # micro
+ "ns": 1, # nano
+ }
+ factor = lit(unit_to_nano_factor[self.unit], type=pa.int64())
+ return self.with_native(pc.multiply(self.native, factor).cast(pa.int64()))
+
+ def truncate(self, every: str) -> ArrowSeries:
+ multiple, unit = parse_interval_string(every)
+ return self.with_native(
+ pc.floor_temporal(self.native, multiple=multiple, unit=UNITS_DICT[unit])
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/_arrow/series_list.py b/venv/lib/python3.8/site-packages/narwhals/_arrow/series_list.py
new file mode 100644
index 0000000..aeb4315
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_arrow/series_list.py
@@ -0,0 +1,16 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pyarrow as pa
+import pyarrow.compute as pc
+
+from narwhals._arrow.utils import ArrowSeriesNamespace
+
+if TYPE_CHECKING:
+ from narwhals._arrow.series import ArrowSeries
+
+
+class ArrowSeriesListNamespace(ArrowSeriesNamespace):
+ def len(self) -> ArrowSeries:
+ return self.with_native(pc.list_value_length(self.native).cast(pa.uint32()))
diff --git a/venv/lib/python3.8/site-packages/narwhals/_arrow/series_str.py b/venv/lib/python3.8/site-packages/narwhals/_arrow/series_str.py
new file mode 100644
index 0000000..64dcce8
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_arrow/series_str.py
@@ -0,0 +1,62 @@
+from __future__ import annotations
+
+import string
+from typing import TYPE_CHECKING
+
+import pyarrow.compute as pc
+
+from narwhals._arrow.utils import ArrowSeriesNamespace, lit, parse_datetime_format
+
+if TYPE_CHECKING:
+ from narwhals._arrow.series import ArrowSeries
+
+
+class ArrowSeriesStringNamespace(ArrowSeriesNamespace):
+ def len_chars(self) -> ArrowSeries:
+ return self.with_native(pc.utf8_length(self.native))
+
+ def replace(self, pattern: str, value: str, *, literal: bool, n: int) -> ArrowSeries:
+ fn = pc.replace_substring if literal else pc.replace_substring_regex
+ arr = fn(self.native, pattern, replacement=value, max_replacements=n)
+ return self.with_native(arr)
+
+ def replace_all(self, pattern: str, value: str, *, literal: bool) -> ArrowSeries:
+ return self.replace(pattern, value, literal=literal, n=-1)
+
+ def strip_chars(self, characters: str | None) -> ArrowSeries:
+ return self.with_native(
+ pc.utf8_trim(self.native, characters or string.whitespace)
+ )
+
+ def starts_with(self, prefix: str) -> ArrowSeries:
+ return self.with_native(pc.equal(self.slice(0, len(prefix)).native, lit(prefix)))
+
+ def ends_with(self, suffix: str) -> ArrowSeries:
+ return self.with_native(
+ pc.equal(self.slice(-len(suffix), None).native, lit(suffix))
+ )
+
+ def contains(self, pattern: str, *, literal: bool) -> ArrowSeries:
+ check_func = pc.match_substring if literal else pc.match_substring_regex
+ return self.with_native(check_func(self.native, pattern))
+
+ def slice(self, offset: int, length: int | None) -> ArrowSeries:
+ stop = offset + length if length is not None else None
+ return self.with_native(
+ pc.utf8_slice_codeunits(self.native, start=offset, stop=stop)
+ )
+
+ def split(self, by: str) -> ArrowSeries:
+ split_series = pc.split_pattern(self.native, by) # type: ignore[call-overload]
+ return self.with_native(split_series)
+
+ def to_datetime(self, format: str | None) -> ArrowSeries:
+ format = parse_datetime_format(self.native) if format is None else format
+ timestamp_array = pc.strptime(self.native, format=format, unit="us")
+ return self.with_native(timestamp_array)
+
+ def to_uppercase(self) -> ArrowSeries:
+ return self.with_native(pc.utf8_upper(self.native))
+
+ def to_lowercase(self) -> ArrowSeries:
+ return self.with_native(pc.utf8_lower(self.native))
diff --git a/venv/lib/python3.8/site-packages/narwhals/_arrow/series_struct.py b/venv/lib/python3.8/site-packages/narwhals/_arrow/series_struct.py
new file mode 100644
index 0000000..be5aa4b
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_arrow/series_struct.py
@@ -0,0 +1,15 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pyarrow.compute as pc
+
+from narwhals._arrow.utils import ArrowSeriesNamespace
+
+if TYPE_CHECKING:
+ from narwhals._arrow.series import ArrowSeries
+
+
+class ArrowSeriesStructNamespace(ArrowSeriesNamespace):
+ def field(self, name: str) -> ArrowSeries:
+ return self.with_native(pc.struct_field(self.native, name)).alias(name)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_arrow/typing.py b/venv/lib/python3.8/site-packages/narwhals/_arrow/typing.py
new file mode 100644
index 0000000..3f79fec
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_arrow/typing.py
@@ -0,0 +1,72 @@
+from __future__ import annotations # pragma: no cover
+
+from typing import (
+ TYPE_CHECKING, # pragma: no cover
+ Any, # pragma: no cover
+ TypeVar, # pragma: no cover
+)
+
+if TYPE_CHECKING:
+ import sys
+ from typing import Generic, Literal
+
+ if sys.version_info >= (3, 10):
+ from typing import TypeAlias
+ else:
+ from typing_extensions import TypeAlias
+
+ import pyarrow as pa
+ from pyarrow.__lib_pxi.table import (
+ AggregateOptions, # noqa: F401
+ Aggregation, # noqa: F401
+ )
+ from pyarrow._stubs_typing import ( # pyright: ignore[reportMissingModuleSource] # pyright: ignore[reportMissingModuleSource] # pyright: ignore[reportMissingModuleSource]
+ Indices, # noqa: F401
+ Mask, # noqa: F401
+ Order, # noqa: F401
+ )
+
+ from narwhals._arrow.expr import ArrowExpr
+ from narwhals._arrow.series import ArrowSeries
+
+ IntoArrowExpr: TypeAlias = "ArrowExpr | ArrowSeries"
+ TieBreaker: TypeAlias = Literal["min", "max", "first", "dense"]
+ NullPlacement: TypeAlias = Literal["at_start", "at_end"]
+ NativeIntervalUnit: TypeAlias = Literal[
+ "year",
+ "quarter",
+ "month",
+ "week",
+ "day",
+ "hour",
+ "minute",
+ "second",
+ "millisecond",
+ "microsecond",
+ "nanosecond",
+ ]
+
+ ChunkedArrayAny: TypeAlias = pa.ChunkedArray[Any]
+ ArrayAny: TypeAlias = pa.Array[Any]
+ ArrayOrChunkedArray: TypeAlias = "ArrayAny | ChunkedArrayAny"
+ ScalarAny: TypeAlias = pa.Scalar[Any]
+ ArrayOrScalar: TypeAlias = "ArrayOrChunkedArray | ScalarAny"
+ ArrayOrScalarT1 = TypeVar("ArrayOrScalarT1", ArrayAny, ChunkedArrayAny, ScalarAny)
+ ArrayOrScalarT2 = TypeVar("ArrayOrScalarT2", ArrayAny, ChunkedArrayAny, ScalarAny)
+ _AsPyType = TypeVar("_AsPyType")
+
+ class _BasicDataType(pa.DataType, Generic[_AsPyType]): ...
+
+
+Incomplete: TypeAlias = Any # pragma: no cover
+"""
+Marker for working code that fails on the stubs.
+
+Common issues:
+- Annotated for `Array`, but not `ChunkedArray`
+- Relies on typing information that the stubs don't provide statically
+- Missing attributes
+- Incorrect return types
+- Inconsistent use of generic/concrete types
+- `_clone_signature` used on signatures that are not identical
+"""
diff --git a/venv/lib/python3.8/site-packages/narwhals/_arrow/utils.py b/venv/lib/python3.8/site-packages/narwhals/_arrow/utils.py
new file mode 100644
index 0000000..d100448
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_arrow/utils.py
@@ -0,0 +1,470 @@
+from __future__ import annotations
+
+from functools import lru_cache
+from typing import TYPE_CHECKING, Any, Iterable, Iterator, Mapping, Sequence, cast
+
+import pyarrow as pa
+import pyarrow.compute as pc
+
+from narwhals._compliant.series import _SeriesNamespace
+from narwhals._utils import isinstance_or_issubclass
+from narwhals.exceptions import ShapeError
+
+if TYPE_CHECKING:
+ from typing_extensions import TypeAlias, TypeIs
+
+ from narwhals._arrow.series import ArrowSeries
+ from narwhals._arrow.typing import (
+ ArrayAny,
+ ArrayOrScalar,
+ ArrayOrScalarT1,
+ ArrayOrScalarT2,
+ ChunkedArrayAny,
+ NativeIntervalUnit,
+ ScalarAny,
+ )
+ from narwhals._duration import IntervalUnit
+ from narwhals._utils import Version
+ from narwhals.dtypes import DType
+ from narwhals.typing import IntoDType, PythonLiteral
+
+ # NOTE: stubs don't allow for `ChunkedArray[StructArray]`
+ # Intended to represent the `.chunks` property storing `list[pa.StructArray]`
+ ChunkedArrayStructArray: TypeAlias = ChunkedArrayAny
+
+ def is_timestamp(t: Any) -> TypeIs[pa.TimestampType[Any, Any]]: ...
+ def is_duration(t: Any) -> TypeIs[pa.DurationType[Any]]: ...
+ def is_list(t: Any) -> TypeIs[pa.ListType[Any]]: ...
+ def is_large_list(t: Any) -> TypeIs[pa.LargeListType[Any]]: ...
+ def is_fixed_size_list(t: Any) -> TypeIs[pa.FixedSizeListType[Any, Any]]: ...
+ def is_dictionary(t: Any) -> TypeIs[pa.DictionaryType[Any, Any, Any]]: ...
+ def extract_regex(
+ strings: ChunkedArrayAny,
+ /,
+ pattern: str,
+ *,
+ options: Any = None,
+ memory_pool: Any = None,
+ ) -> ChunkedArrayStructArray: ...
+else:
+ from pyarrow.compute import extract_regex
+ from pyarrow.types import (
+ is_dictionary, # noqa: F401
+ is_duration,
+ is_fixed_size_list,
+ is_large_list,
+ is_list,
+ is_timestamp,
+ )
+
+UNITS_DICT: Mapping[IntervalUnit, NativeIntervalUnit] = {
+ "y": "year",
+ "q": "quarter",
+ "mo": "month",
+ "d": "day",
+ "h": "hour",
+ "m": "minute",
+ "s": "second",
+ "ms": "millisecond",
+ "us": "microsecond",
+ "ns": "nanosecond",
+}
+
+lit = pa.scalar
+"""Alias for `pyarrow.scalar`."""
+
+
+def extract_py_scalar(value: Any, /) -> Any:
+ from narwhals._arrow.series import maybe_extract_py_scalar
+
+ return maybe_extract_py_scalar(value, return_py_scalar=True)
+
+
+def chunked_array(
+ arr: ArrayOrScalar | list[Iterable[Any]], dtype: pa.DataType | None = None, /
+) -> ChunkedArrayAny:
+ if isinstance(arr, pa.ChunkedArray):
+ return arr
+ if isinstance(arr, list):
+ return pa.chunked_array(arr, dtype)
+ else:
+ return pa.chunked_array([arr], arr.type)
+
+
+def nulls_like(n: int, series: ArrowSeries) -> ArrayAny:
+ """Create a strongly-typed Array instance with all elements null.
+
+ Uses the type of `series`, without upseting `mypy`.
+ """
+ return pa.nulls(n, series.native.type)
+
+
+@lru_cache(maxsize=16)
+def native_to_narwhals_dtype(dtype: pa.DataType, version: Version) -> DType: # noqa: C901, PLR0912
+ dtypes = version.dtypes
+ if pa.types.is_int64(dtype):
+ return dtypes.Int64()
+ if pa.types.is_int32(dtype):
+ return dtypes.Int32()
+ if pa.types.is_int16(dtype):
+ return dtypes.Int16()
+ if pa.types.is_int8(dtype):
+ return dtypes.Int8()
+ if pa.types.is_uint64(dtype):
+ return dtypes.UInt64()
+ if pa.types.is_uint32(dtype):
+ return dtypes.UInt32()
+ if pa.types.is_uint16(dtype):
+ return dtypes.UInt16()
+ if pa.types.is_uint8(dtype):
+ return dtypes.UInt8()
+ if pa.types.is_boolean(dtype):
+ return dtypes.Boolean()
+ if pa.types.is_float64(dtype):
+ return dtypes.Float64()
+ if pa.types.is_float32(dtype):
+ return dtypes.Float32()
+ # bug in coverage? it shows `31->exit` (where `31` is currently the line number of
+ # the next line), even though both when the if condition is true and false are covered
+ if ( # pragma: no cover
+ pa.types.is_string(dtype)
+ or pa.types.is_large_string(dtype)
+ or getattr(pa.types, "is_string_view", lambda _: False)(dtype)
+ ):
+ return dtypes.String()
+ if pa.types.is_date32(dtype):
+ return dtypes.Date()
+ if is_timestamp(dtype):
+ return dtypes.Datetime(time_unit=dtype.unit, time_zone=dtype.tz)
+ if is_duration(dtype):
+ return dtypes.Duration(time_unit=dtype.unit)
+ if pa.types.is_dictionary(dtype):
+ return dtypes.Categorical()
+ if pa.types.is_struct(dtype):
+ return dtypes.Struct(
+ [
+ dtypes.Field(
+ dtype.field(i).name,
+ native_to_narwhals_dtype(dtype.field(i).type, version),
+ )
+ for i in range(dtype.num_fields)
+ ]
+ )
+ if is_list(dtype) or is_large_list(dtype):
+ return dtypes.List(native_to_narwhals_dtype(dtype.value_type, version))
+ if is_fixed_size_list(dtype):
+ return dtypes.Array(
+ native_to_narwhals_dtype(dtype.value_type, version), dtype.list_size
+ )
+ if pa.types.is_decimal(dtype):
+ return dtypes.Decimal()
+ if pa.types.is_time32(dtype) or pa.types.is_time64(dtype):
+ return dtypes.Time()
+ if pa.types.is_binary(dtype):
+ return dtypes.Binary()
+ return dtypes.Unknown() # pragma: no cover
+
+
+def narwhals_to_native_dtype(dtype: IntoDType, version: Version) -> pa.DataType: # noqa: C901, PLR0912
+ dtypes = version.dtypes
+ if isinstance_or_issubclass(dtype, dtypes.Decimal):
+ msg = "Casting to Decimal is not supported yet."
+ raise NotImplementedError(msg)
+ if isinstance_or_issubclass(dtype, dtypes.Float64):
+ return pa.float64()
+ if isinstance_or_issubclass(dtype, dtypes.Float32):
+ return pa.float32()
+ if isinstance_or_issubclass(dtype, dtypes.Int64):
+ return pa.int64()
+ if isinstance_or_issubclass(dtype, dtypes.Int32):
+ return pa.int32()
+ if isinstance_or_issubclass(dtype, dtypes.Int16):
+ return pa.int16()
+ if isinstance_or_issubclass(dtype, dtypes.Int8):
+ return pa.int8()
+ if isinstance_or_issubclass(dtype, dtypes.UInt64):
+ return pa.uint64()
+ if isinstance_or_issubclass(dtype, dtypes.UInt32):
+ return pa.uint32()
+ if isinstance_or_issubclass(dtype, dtypes.UInt16):
+ return pa.uint16()
+ if isinstance_or_issubclass(dtype, dtypes.UInt8):
+ return pa.uint8()
+ if isinstance_or_issubclass(dtype, dtypes.String):
+ return pa.string()
+ if isinstance_or_issubclass(dtype, dtypes.Boolean):
+ return pa.bool_()
+ if isinstance_or_issubclass(dtype, dtypes.Categorical):
+ return pa.dictionary(pa.uint32(), pa.string())
+ if isinstance_or_issubclass(dtype, dtypes.Datetime):
+ unit = dtype.time_unit
+ return pa.timestamp(unit, tz) if (tz := dtype.time_zone) else pa.timestamp(unit)
+ if isinstance_or_issubclass(dtype, dtypes.Duration):
+ return pa.duration(dtype.time_unit)
+ if isinstance_or_issubclass(dtype, dtypes.Date):
+ return pa.date32()
+ if isinstance_or_issubclass(dtype, dtypes.List):
+ return pa.list_(value_type=narwhals_to_native_dtype(dtype.inner, version=version))
+ if isinstance_or_issubclass(dtype, dtypes.Struct):
+ return pa.struct(
+ [
+ (field.name, narwhals_to_native_dtype(field.dtype, version=version))
+ for field in dtype.fields
+ ]
+ )
+ if isinstance_or_issubclass(dtype, dtypes.Array): # pragma: no cover
+ inner = narwhals_to_native_dtype(dtype.inner, version=version)
+ list_size = dtype.size
+ return pa.list_(inner, list_size=list_size)
+ if isinstance_or_issubclass(dtype, dtypes.Time):
+ return pa.time64("ns")
+ if isinstance_or_issubclass(dtype, dtypes.Binary):
+ return pa.binary()
+
+ msg = f"Unknown dtype: {dtype}" # pragma: no cover
+ raise AssertionError(msg)
+
+
+def extract_native(
+ lhs: ArrowSeries, rhs: ArrowSeries | PythonLiteral | ScalarAny
+) -> tuple[ChunkedArrayAny | ScalarAny, ChunkedArrayAny | ScalarAny]:
+ """Extract native objects in binary operation.
+
+ If the comparison isn't supported, return `NotImplemented` so that the
+ "right-hand-side" operation (e.g. `__radd__`) can be tried.
+
+ If one of the two sides has a `_broadcast` flag, then extract the scalar
+ underneath it so that PyArrow can do its own broadcasting.
+ """
+ from narwhals._arrow.dataframe import ArrowDataFrame
+ from narwhals._arrow.series import ArrowSeries
+
+ if rhs is None: # pragma: no cover
+ return lhs.native, lit(None, type=lhs._type)
+
+ if isinstance(rhs, ArrowDataFrame):
+ return NotImplemented
+
+ if isinstance(rhs, ArrowSeries):
+ if lhs._broadcast and not rhs._broadcast:
+ return lhs.native[0], rhs.native
+ if rhs._broadcast:
+ return lhs.native, rhs.native[0]
+ return lhs.native, rhs.native
+
+ if isinstance(rhs, list):
+ msg = "Expected Series or scalar, got list."
+ raise TypeError(msg)
+
+ return lhs.native, rhs if isinstance(rhs, pa.Scalar) else lit(rhs)
+
+
+def align_series_full_broadcast(*series: ArrowSeries) -> Sequence[ArrowSeries]:
+ # Ensure all of `series` are of the same length.
+ lengths = [len(s) for s in series]
+ max_length = max(lengths)
+ fast_path = all(_len == max_length for _len in lengths)
+
+ if fast_path:
+ return series
+
+ reshaped = []
+ for s in series:
+ if s._broadcast:
+ value = s.native[0]
+ if s._backend_version < (13,) and hasattr(value, "as_py"):
+ value = value.as_py()
+ reshaped.append(s._with_native(pa.array([value] * max_length, type=s._type)))
+ else:
+ if (actual_len := len(s)) != max_length:
+ msg = f"Expected object of length {max_length}, got {actual_len}."
+ raise ShapeError(msg)
+ reshaped.append(s)
+
+ return reshaped
+
+
+def floordiv_compat(left: ArrayOrScalar, right: ArrayOrScalar, /) -> Any:
+ # The following lines are adapted from pandas' pyarrow implementation.
+ # Ref: https://github.com/pandas-dev/pandas/blob/262fcfbffcee5c3116e86a951d8b693f90411e68/pandas/core/arrays/arrow/array.py#L124-L154
+
+ if pa.types.is_integer(left.type) and pa.types.is_integer(right.type):
+ divided = pc.divide_checked(left, right)
+ # TODO @dangotbanned: Use a `TypeVar` in guards
+ # Narrowing to a `Union` isn't interacting well with the rest of the stubs
+ # https://github.com/zen-xu/pyarrow-stubs/pull/215
+ if pa.types.is_signed_integer(divided.type):
+ div_type = cast("pa._lib.Int64Type", divided.type)
+ has_remainder = pc.not_equal(pc.multiply(divided, right), left)
+ has_one_negative_operand = pc.less(
+ pc.bit_wise_xor(left, right), lit(0, div_type)
+ )
+ result = pc.if_else(
+ pc.and_(has_remainder, has_one_negative_operand),
+ pc.subtract(divided, lit(1, div_type)),
+ divided,
+ )
+ else:
+ result = divided # pragma: no cover
+ result = result.cast(left.type)
+ else:
+ divided = pc.divide(left, right)
+ result = pc.floor(divided)
+ return result
+
+
+def cast_for_truediv(
+ arrow_array: ArrayOrScalarT1, pa_object: ArrayOrScalarT2
+) -> tuple[ArrayOrScalarT1, ArrayOrScalarT2]:
+ # Lifted from:
+ # https://github.com/pandas-dev/pandas/blob/262fcfbffcee5c3116e86a951d8b693f90411e68/pandas/core/arrays/arrow/array.py#L108-L122
+ # Ensure int / int -> float mirroring Python/Numpy behavior
+ # as pc.divide_checked(int, int) -> int
+ if pa.types.is_integer(arrow_array.type) and pa.types.is_integer(pa_object.type):
+ # GH: 56645. # noqa: ERA001
+ # https://github.com/apache/arrow/issues/35563
+ # NOTE: `pyarrow==11.*` doesn't allow keywords in `Array.cast`
+ return pc.cast(arrow_array, pa.float64(), safe=False), pc.cast(
+ pa_object, pa.float64(), safe=False
+ )
+
+ return arrow_array, pa_object
+
+
+# Regex for date, time, separator and timezone components
+DATE_RE = r"(?P<date>\d{1,4}[-/.]\d{1,2}[-/.]\d{1,4}|\d{8})"
+SEP_RE = r"(?P<sep>\s|T)"
+TIME_RE = r"(?P<time>\d{2}:\d{2}(?::\d{2})?|\d{6}?)" # \s*(?P<period>[AP]M)?)?
+HMS_RE = r"^(?P<hms>\d{2}:\d{2}:\d{2})$"
+HM_RE = r"^(?P<hm>\d{2}:\d{2})$"
+HMS_RE_NO_SEP = r"^(?P<hms_no_sep>\d{6})$"
+TZ_RE = r"(?P<tz>Z|[+-]\d{2}:?\d{2})" # Matches 'Z', '+02:00', '+0200', '+02', etc.
+FULL_RE = rf"{DATE_RE}{SEP_RE}?{TIME_RE}?{TZ_RE}?$"
+
+# Separate regexes for different date formats
+YMD_RE = r"^(?P<year>(?:[12][0-9])?[0-9]{2})(?P<sep1>[-/.])(?P<month>0[1-9]|1[0-2])(?P<sep2>[-/.])(?P<day>0[1-9]|[12][0-9]|3[01])$"
+DMY_RE = r"^(?P<day>0[1-9]|[12][0-9]|3[01])(?P<sep1>[-/.])(?P<month>0[1-9]|1[0-2])(?P<sep2>[-/.])(?P<year>(?:[12][0-9])?[0-9]{2})$"
+MDY_RE = r"^(?P<month>0[1-9]|1[0-2])(?P<sep1>[-/.])(?P<day>0[1-9]|[12][0-9]|3[01])(?P<sep2>[-/.])(?P<year>(?:[12][0-9])?[0-9]{2})$"
+YMD_RE_NO_SEP = r"^(?P<year>(?:[12][0-9])?[0-9]{2})(?P<month>0[1-9]|1[0-2])(?P<day>0[1-9]|[12][0-9]|3[01])$"
+
+DATE_FORMATS = (
+ (YMD_RE_NO_SEP, "%Y%m%d"),
+ (YMD_RE, "%Y-%m-%d"),
+ (DMY_RE, "%d-%m-%Y"),
+ (MDY_RE, "%m-%d-%Y"),
+)
+TIME_FORMATS = ((HMS_RE, "%H:%M:%S"), (HM_RE, "%H:%M"), (HMS_RE_NO_SEP, "%H%M%S"))
+
+
+def _extract_regex_concat_arrays(
+ strings: ChunkedArrayAny,
+ /,
+ pattern: str,
+ *,
+ options: Any = None,
+ memory_pool: Any = None,
+) -> pa.StructArray:
+ r = pa.concat_arrays(
+ extract_regex(strings, pattern, options=options, memory_pool=memory_pool).chunks
+ )
+ return cast("pa.StructArray", r)
+
+
+def parse_datetime_format(arr: ChunkedArrayAny) -> str:
+ """Try to infer datetime format from StringArray."""
+ matches = _extract_regex_concat_arrays(arr.drop_null().slice(0, 10), pattern=FULL_RE)
+ if not pc.all(matches.is_valid()).as_py():
+ msg = (
+ "Unable to infer datetime format, provided format is not supported. "
+ "Please report a bug to https://github.com/narwhals-dev/narwhals/issues"
+ )
+ raise NotImplementedError(msg)
+
+ separators = matches.field("sep")
+ tz = matches.field("tz")
+
+ # separators and time zones must be unique
+ if pc.count(pc.unique(separators)).as_py() > 1:
+ msg = "Found multiple separator values while inferring datetime format."
+ raise ValueError(msg)
+
+ if pc.count(pc.unique(tz)).as_py() > 1:
+ msg = "Found multiple timezone values while inferring datetime format."
+ raise ValueError(msg)
+
+ date_value = _parse_date_format(cast("pc.StringArray", matches.field("date")))
+ time_value = _parse_time_format(cast("pc.StringArray", matches.field("time")))
+
+ sep_value = separators[0].as_py()
+ tz_value = "%z" if tz[0].as_py() else ""
+
+ return f"{date_value}{sep_value}{time_value}{tz_value}"
+
+
+def _parse_date_format(arr: pc.StringArray) -> str:
+ for date_rgx, date_fmt in DATE_FORMATS:
+ matches = pc.extract_regex(arr, pattern=date_rgx)
+ if date_fmt == "%Y%m%d" and pc.all(matches.is_valid()).as_py():
+ return date_fmt
+ elif (
+ pc.all(matches.is_valid()).as_py()
+ and pc.count(pc.unique(sep1 := matches.field("sep1"))).as_py() == 1
+ and pc.count(pc.unique(sep2 := matches.field("sep2"))).as_py() == 1
+ and (date_sep_value := sep1[0].as_py()) == sep2[0].as_py()
+ ):
+ return date_fmt.replace("-", date_sep_value)
+
+ msg = (
+ "Unable to infer datetime format. "
+ "Please report a bug to https://github.com/narwhals-dev/narwhals/issues"
+ )
+ raise ValueError(msg)
+
+
+def _parse_time_format(arr: pc.StringArray) -> str:
+ for time_rgx, time_fmt in TIME_FORMATS:
+ matches = pc.extract_regex(arr, pattern=time_rgx)
+ if pc.all(matches.is_valid()).as_py():
+ return time_fmt
+ return ""
+
+
+def pad_series(
+ series: ArrowSeries, *, window_size: int, center: bool
+) -> tuple[ArrowSeries, int]:
+ """Pad series with None values on the left and/or right side, depending on the specified parameters.
+
+ Arguments:
+ series: The input ArrowSeries to be padded.
+ window_size: The desired size of the window.
+ center: Specifies whether to center the padding or not.
+
+ Returns:
+ A tuple containing the padded ArrowSeries and the offset value.
+ """
+ if not center:
+ return series, 0
+ offset_left = window_size // 2
+ # subtract one if window_size is even
+ offset_right = offset_left - (window_size % 2 == 0)
+ pad_left = pa.array([None] * offset_left, type=series._type)
+ pad_right = pa.array([None] * offset_right, type=series._type)
+ concat = pa.concat_arrays([pad_left, *series.native.chunks, pad_right])
+ return series._with_native(concat), offset_left + offset_right
+
+
+def cast_to_comparable_string_types(
+ *chunked_arrays: ChunkedArrayAny, separator: str
+) -> tuple[Iterator[ChunkedArrayAny], ScalarAny]:
+ # Ensure `chunked_arrays` are either all `string` or all `large_string`.
+ dtype = (
+ pa.string() # (PyArrow default)
+ if not any(pa.types.is_large_string(ca.type) for ca in chunked_arrays)
+ else pa.large_string()
+ )
+ return (ca.cast(dtype) for ca in chunked_arrays), lit(separator, dtype)
+
+
+class ArrowSeriesNamespace(_SeriesNamespace["ArrowSeries", "ChunkedArrayAny"]):
+ def __init__(self, series: ArrowSeries, /) -> None:
+ self._compliant_series = series
diff --git a/venv/lib/python3.8/site-packages/narwhals/_compliant/__init__.py b/venv/lib/python3.8/site-packages/narwhals/_compliant/__init__.py
new file mode 100644
index 0000000..cebafbd
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_compliant/__init__.py
@@ -0,0 +1,84 @@
+from __future__ import annotations
+
+from narwhals._compliant.dataframe import (
+ CompliantDataFrame,
+ CompliantLazyFrame,
+ EagerDataFrame,
+)
+from narwhals._compliant.expr import CompliantExpr, EagerExpr, LazyExpr
+from narwhals._compliant.group_by import (
+ CompliantGroupBy,
+ DepthTrackingGroupBy,
+ EagerGroupBy,
+ LazyGroupBy,
+)
+from narwhals._compliant.namespace import (
+ CompliantNamespace,
+ EagerNamespace,
+ LazyNamespace,
+)
+from narwhals._compliant.selectors import (
+ CompliantSelector,
+ CompliantSelectorNamespace,
+ EagerSelectorNamespace,
+ LazySelectorNamespace,
+)
+from narwhals._compliant.series import CompliantSeries, EagerSeries
+from narwhals._compliant.typing import (
+ CompliantExprT,
+ CompliantFrameT,
+ CompliantSeriesOrNativeExprT_co,
+ CompliantSeriesT,
+ EagerDataFrameT,
+ EagerSeriesT,
+ EvalNames,
+ EvalSeries,
+ IntoCompliantExpr,
+ NativeFrameT_co,
+ NativeSeriesT_co,
+)
+from narwhals._compliant.when_then import (
+ CompliantThen,
+ CompliantWhen,
+ EagerWhen,
+ LazyThen,
+ LazyWhen,
+)
+
+__all__ = [
+ "CompliantDataFrame",
+ "CompliantExpr",
+ "CompliantExprT",
+ "CompliantFrameT",
+ "CompliantGroupBy",
+ "CompliantLazyFrame",
+ "CompliantNamespace",
+ "CompliantSelector",
+ "CompliantSelectorNamespace",
+ "CompliantSeries",
+ "CompliantSeriesOrNativeExprT_co",
+ "CompliantSeriesT",
+ "CompliantThen",
+ "CompliantWhen",
+ "DepthTrackingGroupBy",
+ "EagerDataFrame",
+ "EagerDataFrameT",
+ "EagerExpr",
+ "EagerGroupBy",
+ "EagerNamespace",
+ "EagerSelectorNamespace",
+ "EagerSeries",
+ "EagerSeriesT",
+ "EagerWhen",
+ "EvalNames",
+ "EvalSeries",
+ "IntoCompliantExpr",
+ "LazyExpr",
+ "LazyGroupBy",
+ "LazyNamespace",
+ "LazySelectorNamespace",
+ "LazyThen",
+ "LazyWhen",
+ "NativeFrameT_co",
+ "NativeSeriesT_co",
+]
diff --git a/venv/lib/python3.8/site-packages/narwhals/_compliant/any_namespace.py b/venv/lib/python3.8/site-packages/narwhals/_compliant/any_namespace.py
new file mode 100644
index 0000000..3365d25
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_compliant/any_namespace.py
@@ -0,0 +1,85 @@
+"""`Expr` and `Series` namespace accessor protocols."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Protocol
+
+from narwhals._utils import CompliantT_co, _StoresCompliant
+
+if TYPE_CHECKING:
+ from typing import Callable
+
+ from narwhals.typing import TimeUnit
+
+__all__ = [
+ "CatNamespace",
+ "DateTimeNamespace",
+ "ListNamespace",
+ "NameNamespace",
+ "StringNamespace",
+ "StructNamespace",
+]
+
+
+class CatNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]):
+ def get_categories(self) -> CompliantT_co: ...
+
+
+class DateTimeNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]):
+ def to_string(self, format: str) -> CompliantT_co: ...
+ def replace_time_zone(self, time_zone: str | None) -> CompliantT_co: ...
+ def convert_time_zone(self, time_zone: str) -> CompliantT_co: ...
+ def timestamp(self, time_unit: TimeUnit) -> CompliantT_co: ...
+ def date(self) -> CompliantT_co: ...
+ def year(self) -> CompliantT_co: ...
+ def month(self) -> CompliantT_co: ...
+ def day(self) -> CompliantT_co: ...
+ def hour(self) -> CompliantT_co: ...
+ def minute(self) -> CompliantT_co: ...
+ def second(self) -> CompliantT_co: ...
+ def millisecond(self) -> CompliantT_co: ...
+ def microsecond(self) -> CompliantT_co: ...
+ def nanosecond(self) -> CompliantT_co: ...
+ def ordinal_day(self) -> CompliantT_co: ...
+ def weekday(self) -> CompliantT_co: ...
+ def total_minutes(self) -> CompliantT_co: ...
+ def total_seconds(self) -> CompliantT_co: ...
+ def total_milliseconds(self) -> CompliantT_co: ...
+ def total_microseconds(self) -> CompliantT_co: ...
+ def total_nanoseconds(self) -> CompliantT_co: ...
+
+
+class ListNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]):
+ def len(self) -> CompliantT_co: ...
+
+
+class NameNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]):
+ def keep(self) -> CompliantT_co: ...
+ def map(self, function: Callable[[str], str]) -> CompliantT_co: ...
+ def prefix(self, prefix: str) -> CompliantT_co: ...
+ def suffix(self, suffix: str) -> CompliantT_co: ...
+ def to_lowercase(self) -> CompliantT_co: ...
+ def to_uppercase(self) -> CompliantT_co: ...
+
+
+class StringNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]):
+ def len_chars(self) -> CompliantT_co: ...
+ def replace(
+ self, pattern: str, value: str, *, literal: bool, n: int
+ ) -> CompliantT_co: ...
+ def replace_all(
+ self, pattern: str, value: str, *, literal: bool
+ ) -> CompliantT_co: ...
+ def strip_chars(self, characters: str | None) -> CompliantT_co: ...
+ def starts_with(self, prefix: str) -> CompliantT_co: ...
+ def ends_with(self, suffix: str) -> CompliantT_co: ...
+ def contains(self, pattern: str, *, literal: bool) -> CompliantT_co: ...
+ def slice(self, offset: int, length: int | None) -> CompliantT_co: ...
+ def split(self, by: str) -> CompliantT_co: ...
+ def to_datetime(self, format: str | None) -> CompliantT_co: ...
+ def to_lowercase(self) -> CompliantT_co: ...
+ def to_uppercase(self) -> CompliantT_co: ...
+
+
+class StructNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]):
+ def field(self, name: str) -> CompliantT_co: ...
diff --git a/venv/lib/python3.8/site-packages/narwhals/_compliant/dataframe.py b/venv/lib/python3.8/site-packages/narwhals/_compliant/dataframe.py
new file mode 100644
index 0000000..5f21055
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_compliant/dataframe.py
@@ -0,0 +1,500 @@
+from __future__ import annotations
+
+from itertools import chain
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Iterator,
+ Literal,
+ Mapping,
+ Protocol,
+ Sequence,
+ Sized,
+ TypeVar,
+ overload,
+)
+
+from narwhals._compliant.typing import (
+ CompliantDataFrameAny,
+ CompliantExprT_contra,
+ CompliantLazyFrameAny,
+ CompliantSeriesT,
+ EagerExprT,
+ EagerSeriesT,
+ NativeExprT,
+ NativeFrameT,
+)
+from narwhals._translate import (
+ ArrowConvertible,
+ DictConvertible,
+ FromNative,
+ NumpyConvertible,
+ ToNarwhals,
+ ToNarwhalsT_co,
+)
+from narwhals._typing_compat import deprecated
+from narwhals._utils import (
+ Version,
+ _StoresNative,
+ check_columns_exist,
+ is_compliant_series,
+ is_index_selector,
+ is_range,
+ is_sequence_like,
+ is_sized_multi_index_selector,
+ is_slice_index,
+ is_slice_none,
+)
+
+if TYPE_CHECKING:
+ from io import BytesIO
+ from pathlib import Path
+
+ import pandas as pd
+ import polars as pl
+ import pyarrow as pa
+ from typing_extensions import Self, TypeAlias
+
+ from narwhals._compliant.expr import LazyExpr
+ from narwhals._compliant.group_by import CompliantGroupBy, DataFrameGroupBy
+ from narwhals._compliant.namespace import EagerNamespace
+ from narwhals._compliant.window import WindowInputs
+ from narwhals._translate import IntoArrowTable
+ from narwhals._utils import Implementation, _FullContext
+ from narwhals.dataframe import DataFrame
+ from narwhals.dtypes import DType
+ from narwhals.exceptions import ColumnNotFoundError
+ from narwhals.schema import Schema
+ from narwhals.typing import (
+ AsofJoinStrategy,
+ JoinStrategy,
+ LazyUniqueKeepStrategy,
+ MultiColSelector,
+ MultiIndexSelector,
+ PivotAgg,
+ SingleIndexSelector,
+ SizedMultiIndexSelector,
+ SizedMultiNameSelector,
+ SizeUnit,
+ UniqueKeepStrategy,
+ _2DArray,
+ _SliceIndex,
+ _SliceName,
+ )
+
+ Incomplete: TypeAlias = Any
+
+__all__ = ["CompliantDataFrame", "CompliantLazyFrame", "EagerDataFrame"]
+
+T = TypeVar("T")
+
+_ToDict: TypeAlias = "dict[str, CompliantSeriesT] | dict[str, list[Any]]" # noqa: PYI047
+
+
+class CompliantDataFrame(
+ NumpyConvertible["_2DArray", "_2DArray"],
+ DictConvertible["_ToDict[CompliantSeriesT]", Mapping[str, Any]],
+ ArrowConvertible["pa.Table", "IntoArrowTable"],
+ _StoresNative[NativeFrameT],
+ FromNative[NativeFrameT],
+ ToNarwhals[ToNarwhalsT_co],
+ Sized,
+ Protocol[CompliantSeriesT, CompliantExprT_contra, NativeFrameT, ToNarwhalsT_co],
+):
+ _native_frame: NativeFrameT
+ _implementation: Implementation
+ _backend_version: tuple[int, ...]
+ _version: Version
+
+ def __narwhals_dataframe__(self) -> Self: ...
+ def __narwhals_namespace__(self) -> Any: ...
+ @classmethod
+ def from_arrow(cls, data: IntoArrowTable, /, *, context: _FullContext) -> Self: ...
+ @classmethod
+ def from_dict(
+ cls,
+ data: Mapping[str, Any],
+ /,
+ *,
+ context: _FullContext,
+ schema: Mapping[str, DType] | Schema | None,
+ ) -> Self: ...
+ @classmethod
+ def from_native(cls, data: NativeFrameT, /, *, context: _FullContext) -> Self: ...
+ @classmethod
+ def from_numpy(
+ cls,
+ data: _2DArray,
+ /,
+ *,
+ context: _FullContext,
+ schema: Mapping[str, DType] | Schema | Sequence[str] | None,
+ ) -> Self: ...
+
+ def __array__(self, dtype: Any, *, copy: bool | None) -> _2DArray: ...
+ def __getitem__(
+ self,
+ item: tuple[
+ SingleIndexSelector | MultiIndexSelector[CompliantSeriesT],
+ MultiColSelector[CompliantSeriesT],
+ ],
+ ) -> Self: ...
+ def simple_select(self, *column_names: str) -> Self:
+ """`select` where all args are column names."""
+ ...
+
+ def aggregate(self, *exprs: CompliantExprT_contra) -> Self:
+ """`select` where all args are aggregations or literals.
+
+ (so, no broadcasting is necessary).
+ """
+ # NOTE: Ignore is to avoid an intermittent false positive
+ return self.select(*exprs) # pyright: ignore[reportArgumentType]
+
+ def _with_version(self, version: Version) -> Self: ...
+
+ @property
+ def native(self) -> NativeFrameT:
+ return self._native_frame
+
+ @property
+ def columns(self) -> Sequence[str]: ...
+ @property
+ def schema(self) -> Mapping[str, DType]: ...
+ @property
+ def shape(self) -> tuple[int, int]: ...
+ def clone(self) -> Self: ...
+ def collect(
+ self, backend: Implementation | None, **kwargs: Any
+ ) -> CompliantDataFrameAny: ...
+ def collect_schema(self) -> Mapping[str, DType]: ...
+ def drop(self, columns: Sequence[str], *, strict: bool) -> Self: ...
+ def drop_nulls(self, subset: Sequence[str] | None) -> Self: ...
+ def estimated_size(self, unit: SizeUnit) -> int | float: ...
+ def explode(self, columns: Sequence[str]) -> Self: ...
+ def filter(self, predicate: CompliantExprT_contra | Incomplete) -> Self: ...
+ def gather_every(self, n: int, offset: int) -> Self: ...
+ def get_column(self, name: str) -> CompliantSeriesT: ...
+ def group_by(
+ self,
+ keys: Sequence[str] | Sequence[CompliantExprT_contra],
+ *,
+ drop_null_keys: bool,
+ ) -> DataFrameGroupBy[Self, Any]: ...
+ def head(self, n: int) -> Self: ...
+ def item(self, row: int | None, column: int | str | None) -> Any: ...
+ def iter_columns(self) -> Iterator[CompliantSeriesT]: ...
+ def iter_rows(
+ self, *, named: bool, buffer_size: int
+ ) -> Iterator[tuple[Any, ...]] | Iterator[Mapping[str, Any]]: ...
+ def is_unique(self) -> CompliantSeriesT: ...
+ def join(
+ self,
+ other: Self,
+ *,
+ how: JoinStrategy,
+ left_on: Sequence[str] | None,
+ right_on: Sequence[str] | None,
+ suffix: str,
+ ) -> Self: ...
+ def join_asof(
+ self,
+ other: Self,
+ *,
+ left_on: str,
+ right_on: str,
+ by_left: Sequence[str] | None,
+ by_right: Sequence[str] | None,
+ strategy: AsofJoinStrategy,
+ suffix: str,
+ ) -> Self: ...
+ def lazy(self, *, backend: Implementation | None) -> CompliantLazyFrameAny: ...
+ def pivot(
+ self,
+ on: Sequence[str],
+ *,
+ index: Sequence[str] | None,
+ values: Sequence[str] | None,
+ aggregate_function: PivotAgg | None,
+ sort_columns: bool,
+ separator: str,
+ ) -> Self: ...
+ def rename(self, mapping: Mapping[str, str]) -> Self: ...
+ def row(self, index: int) -> tuple[Any, ...]: ...
+ def rows(
+ self, *, named: bool
+ ) -> Sequence[tuple[Any, ...]] | Sequence[Mapping[str, Any]]: ...
+ def sample(
+ self,
+ n: int | None,
+ *,
+ fraction: float | None,
+ with_replacement: bool,
+ seed: int | None,
+ ) -> Self: ...
+ def select(self, *exprs: CompliantExprT_contra) -> Self: ...
+ def sort(
+ self, *by: str, descending: bool | Sequence[bool], nulls_last: bool
+ ) -> Self: ...
+ def tail(self, n: int) -> Self: ...
+ def to_arrow(self) -> pa.Table: ...
+ def to_pandas(self) -> pd.DataFrame: ...
+ def to_polars(self) -> pl.DataFrame: ...
+ @overload
+ def to_dict(self, *, as_series: Literal[True]) -> dict[str, CompliantSeriesT]: ...
+ @overload
+ def to_dict(self, *, as_series: Literal[False]) -> dict[str, list[Any]]: ...
+ def to_dict(
+ self, *, as_series: bool
+ ) -> dict[str, CompliantSeriesT] | dict[str, list[Any]]: ...
+ def unique(
+ self,
+ subset: Sequence[str] | None,
+ *,
+ keep: UniqueKeepStrategy,
+ maintain_order: bool | None = None,
+ ) -> Self: ...
+ def unpivot(
+ self,
+ on: Sequence[str] | None,
+ index: Sequence[str] | None,
+ variable_name: str,
+ value_name: str,
+ ) -> Self: ...
+ def with_columns(self, *exprs: CompliantExprT_contra) -> Self: ...
+ def with_row_index(self, name: str) -> Self: ...
+ @overload
+ def write_csv(self, file: None) -> str: ...
+ @overload
+ def write_csv(self, file: str | Path | BytesIO) -> None: ...
+ def write_csv(self, file: str | Path | BytesIO | None) -> str | None: ...
+ def write_parquet(self, file: str | Path | BytesIO) -> None: ...
+
+ def _evaluate_aliases(self, *exprs: CompliantExprT_contra) -> list[str]:
+ it = (expr._evaluate_aliases(self) for expr in exprs)
+ return list(chain.from_iterable(it))
+
+ def _check_columns_exist(self, subset: Sequence[str]) -> ColumnNotFoundError | None:
+ return check_columns_exist(subset, available=self.columns)
+
+
+class CompliantLazyFrame(
+ _StoresNative[NativeFrameT],
+ FromNative[NativeFrameT],
+ ToNarwhals[ToNarwhalsT_co],
+ Protocol[CompliantExprT_contra, NativeFrameT, ToNarwhalsT_co],
+):
+ _native_frame: NativeFrameT
+ _implementation: Implementation
+ _backend_version: tuple[int, ...]
+ _version: Version
+
+ def __narwhals_lazyframe__(self) -> Self: ...
+ def __narwhals_namespace__(self) -> Any: ...
+
+ @classmethod
+ def from_native(cls, data: NativeFrameT, /, *, context: _FullContext) -> Self: ...
+
+ def simple_select(self, *column_names: str) -> Self:
+ """`select` where all args are column names."""
+ ...
+
+ def aggregate(self, *exprs: CompliantExprT_contra) -> Self:
+ """`select` where all args are aggregations or literals.
+
+ (so, no broadcasting is necessary).
+ """
+ ...
+
+ def _with_version(self, version: Version) -> Self: ...
+
+ @property
+ def native(self) -> NativeFrameT:
+ return self._native_frame
+
+ @property
+ def columns(self) -> Sequence[str]: ...
+ @property
+ def schema(self) -> Mapping[str, DType]: ...
+ def _iter_columns(self) -> Iterator[Any]: ...
+ def collect(
+ self, backend: Implementation | None, **kwargs: Any
+ ) -> CompliantDataFrameAny: ...
+ def collect_schema(self) -> Mapping[str, DType]: ...
+ def drop(self, columns: Sequence[str], *, strict: bool) -> Self: ...
+ def drop_nulls(self, subset: Sequence[str] | None) -> Self: ...
+ def explode(self, columns: Sequence[str]) -> Self: ...
+ def filter(self, predicate: CompliantExprT_contra | Incomplete) -> Self: ...
+ @deprecated(
+ "`LazyFrame.gather_every` is deprecated and will be removed in a future version."
+ )
+ def gather_every(self, n: int, offset: int) -> Self: ...
+ def group_by(
+ self,
+ keys: Sequence[str] | Sequence[CompliantExprT_contra],
+ *,
+ drop_null_keys: bool,
+ ) -> CompliantGroupBy[Self, CompliantExprT_contra]: ...
+ def head(self, n: int) -> Self: ...
+ def join(
+ self,
+ other: Self,
+ *,
+ how: Literal["left", "inner", "cross", "anti", "semi"],
+ left_on: Sequence[str] | None,
+ right_on: Sequence[str] | None,
+ suffix: str,
+ ) -> Self: ...
+ def join_asof(
+ self,
+ other: Self,
+ *,
+ left_on: str,
+ right_on: str,
+ by_left: Sequence[str] | None,
+ by_right: Sequence[str] | None,
+ strategy: AsofJoinStrategy,
+ suffix: str,
+ ) -> Self: ...
+ def rename(self, mapping: Mapping[str, str]) -> Self: ...
+ def select(self, *exprs: CompliantExprT_contra) -> Self: ...
+ def sort(
+ self, *by: str, descending: bool | Sequence[bool], nulls_last: bool
+ ) -> Self: ...
+ @deprecated("`LazyFrame.tail` is deprecated and will be removed in a future version.")
+ def tail(self, n: int) -> Self: ...
+ def unique(
+ self, subset: Sequence[str] | None, *, keep: LazyUniqueKeepStrategy
+ ) -> Self: ...
+ def unpivot(
+ self,
+ on: Sequence[str] | None,
+ index: Sequence[str] | None,
+ variable_name: str,
+ value_name: str,
+ ) -> Self: ...
+ def with_columns(self, *exprs: CompliantExprT_contra) -> Self: ...
+ def with_row_index(self, name: str) -> Self: ...
+ def _evaluate_expr(self, expr: CompliantExprT_contra, /) -> Any:
+ result = expr(self)
+ assert len(result) == 1 # debug assertion # noqa: S101
+ return result[0]
+
+ def _evaluate_window_expr(
+ self,
+ expr: LazyExpr[Self, NativeExprT],
+ /,
+ window_inputs: WindowInputs[NativeExprT],
+ ) -> NativeExprT:
+ result = expr.window_function(self, window_inputs)
+ assert len(result) == 1 # debug assertion # noqa: S101
+ return result[0]
+
+ def _evaluate_aliases(self, *exprs: CompliantExprT_contra) -> list[str]:
+ it = (expr._evaluate_aliases(self) for expr in exprs)
+ return list(chain.from_iterable(it))
+
+ def _check_columns_exist(self, subset: Sequence[str]) -> ColumnNotFoundError | None:
+ return check_columns_exist(subset, available=self.columns)
+
+
+class EagerDataFrame(
+ CompliantDataFrame[EagerSeriesT, EagerExprT, NativeFrameT, "DataFrame[NativeFrameT]"],
+ CompliantLazyFrame[EagerExprT, NativeFrameT, "DataFrame[NativeFrameT]"],
+ Protocol[EagerSeriesT, EagerExprT, NativeFrameT],
+):
+ def __narwhals_namespace__(
+ self,
+ ) -> EagerNamespace[Self, EagerSeriesT, EagerExprT, NativeFrameT]: ...
+
+ def to_narwhals(self) -> DataFrame[NativeFrameT]:
+ return self._version.dataframe(self, level="full")
+
+ def _evaluate_expr(self, expr: EagerExprT, /) -> EagerSeriesT:
+ """Evaluate `expr` and ensure it has a **single** output."""
+ result: Sequence[EagerSeriesT] = expr(self)
+ assert len(result) == 1 # debug assertion # noqa: S101
+ return result[0]
+
+ def _evaluate_into_exprs(self, *exprs: EagerExprT) -> Sequence[EagerSeriesT]:
+ # NOTE: Ignore is to avoid an intermittent false positive
+ return list(chain.from_iterable(self._evaluate_into_expr(expr) for expr in exprs)) # pyright: ignore[reportArgumentType]
+
+ def _evaluate_into_expr(self, expr: EagerExprT, /) -> Sequence[EagerSeriesT]:
+ """Return list of raw columns.
+
+ For eager backends we alias operations at each step.
+
+ As a safety precaution, here we can check that the expected result names match those
+ we were expecting from the various `evaluate_output_names` / `alias_output_names` calls.
+
+ Note that for PySpark / DuckDB, we are less free to liberally set aliases whenever we want.
+ """
+ aliases = expr._evaluate_aliases(self)
+ result = expr(self)
+ if list(aliases) != (
+ result_aliases := [s.name for s in result]
+ ): # pragma: no cover
+ msg = f"Safety assertion failed, expected {aliases}, got {result_aliases}"
+ raise AssertionError(msg)
+ return result
+
+ def _extract_comparand(self, other: EagerSeriesT, /) -> Any:
+ """Extract native Series, broadcasting to `len(self)` if necessary."""
+ ...
+
+ @staticmethod
+ def _numpy_column_names(
+ data: _2DArray, columns: Sequence[str] | None, /
+ ) -> list[str]:
+ return list(columns or (f"column_{x}" for x in range(data.shape[1])))
+
+ def _gather(self, rows: SizedMultiIndexSelector[Any]) -> Self: ...
+ def _gather_slice(self, rows: _SliceIndex | range) -> Self: ...
+ def _select_multi_index(self, columns: SizedMultiIndexSelector[Any]) -> Self: ...
+ def _select_multi_name(self, columns: SizedMultiNameSelector[Any]) -> Self: ...
+ def _select_slice_index(self, columns: _SliceIndex | range) -> Self: ...
+ def _select_slice_name(self, columns: _SliceName) -> Self: ...
+ def __getitem__( # noqa: C901, PLR0912
+ self,
+ item: tuple[
+ SingleIndexSelector | MultiIndexSelector[EagerSeriesT],
+ MultiColSelector[EagerSeriesT],
+ ],
+ ) -> Self:
+ rows, columns = item
+ compliant = self
+ if not is_slice_none(columns):
+ if isinstance(columns, Sized) and len(columns) == 0:
+ return compliant.select()
+ if is_index_selector(columns):
+ if is_slice_index(columns) or is_range(columns):
+ compliant = compliant._select_slice_index(columns)
+ elif is_compliant_series(columns):
+ compliant = self._select_multi_index(columns.native)
+ else:
+ compliant = compliant._select_multi_index(columns)
+ elif isinstance(columns, slice):
+ compliant = compliant._select_slice_name(columns)
+ elif is_compliant_series(columns):
+ compliant = self._select_multi_name(columns.native)
+ elif is_sequence_like(columns):
+ compliant = self._select_multi_name(columns)
+ else: # pragma: no cover
+ msg = f"Unreachable code, got unexpected type: {type(columns)}"
+ raise AssertionError(msg)
+
+ if not is_slice_none(rows):
+ if isinstance(rows, int):
+ compliant = compliant._gather([rows])
+ elif isinstance(rows, (slice, range)):
+ compliant = compliant._gather_slice(rows)
+ elif is_compliant_series(rows):
+ compliant = compliant._gather(rows.native)
+ elif is_sized_multi_index_selector(rows):
+ compliant = compliant._gather(rows)
+ else: # pragma: no cover
+ msg = f"Unreachable code, got unexpected type: {type(rows)}"
+ raise AssertionError(msg)
+
+ return compliant
diff --git a/venv/lib/python3.8/site-packages/narwhals/_compliant/expr.py b/venv/lib/python3.8/site-packages/narwhals/_compliant/expr.py
new file mode 100644
index 0000000..965469e
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_compliant/expr.py
@@ -0,0 +1,1140 @@
+from __future__ import annotations
+
+from functools import partial
+from operator import methodcaller
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ Generic,
+ Literal,
+ Mapping,
+ Protocol,
+ Sequence,
+)
+
+from narwhals._compliant.any_namespace import (
+ CatNamespace,
+ DateTimeNamespace,
+ ListNamespace,
+ NameNamespace,
+ StringNamespace,
+ StructNamespace,
+)
+from narwhals._compliant.namespace import CompliantNamespace
+from narwhals._compliant.typing import (
+ AliasName,
+ AliasNames,
+ CompliantExprT_co,
+ CompliantFrameT,
+ CompliantLazyFrameT,
+ CompliantSeriesOrNativeExprT_co,
+ EagerDataFrameT,
+ EagerExprT,
+ EagerSeriesT,
+ LazyExprT,
+ NativeExprT,
+)
+from narwhals._typing_compat import Protocol38, deprecated
+from narwhals._utils import _StoresCompliant, not_implemented
+from narwhals.dependencies import get_numpy, is_numpy_array
+
+if TYPE_CHECKING:
+ from typing import Mapping
+
+ from typing_extensions import Self, TypeIs
+
+ from narwhals._compliant.namespace import CompliantNamespace, EagerNamespace
+ from narwhals._compliant.series import CompliantSeries
+ from narwhals._compliant.typing import (
+ AliasNames,
+ EvalNames,
+ EvalSeries,
+ ScalarKwargs,
+ WindowFunction,
+ )
+ from narwhals._expression_parsing import ExprKind, ExprMetadata
+ from narwhals._utils import Implementation, Version, _FullContext
+ from narwhals.typing import (
+ FillNullStrategy,
+ IntoDType,
+ NonNestedLiteral,
+ NumericLiteral,
+ RankMethod,
+ RollingInterpolationMethod,
+ TemporalLiteral,
+ TimeUnit,
+ )
+
+__all__ = ["CompliantExpr", "EagerExpr", "LazyExpr", "NativeExpr"]
+
+
+class NativeExpr(Protocol):
+ """An `Expr`-like object from a package with [Lazy-only support](https://narwhals-dev.github.io/narwhals/extending/#levels-of-support).
+
+ Protocol members are chosen *purely* for matching statically - as they
+ are common to all currently supported packages.
+ """
+
+ def between(self, *args: Any, **kwds: Any) -> Any: ...
+ def isin(self, *args: Any, **kwds: Any) -> Any: ...
+
+
+class CompliantExpr(Protocol38[CompliantFrameT, CompliantSeriesOrNativeExprT_co]):
+ _implementation: Implementation
+ _backend_version: tuple[int, ...]
+ _version: Version
+ _evaluate_output_names: EvalNames[CompliantFrameT]
+ _alias_output_names: AliasNames | None
+ _metadata: ExprMetadata | None
+
+ def __call__(
+ self, df: CompliantFrameT
+ ) -> Sequence[CompliantSeriesOrNativeExprT_co]: ...
+ def __narwhals_expr__(self) -> None: ...
+ def __narwhals_namespace__(self) -> CompliantNamespace[CompliantFrameT, Self]: ...
+ @classmethod
+ def from_column_names(
+ cls,
+ evaluate_column_names: EvalNames[CompliantFrameT],
+ /,
+ *,
+ context: _FullContext,
+ ) -> Self: ...
+ @classmethod
+ def from_column_indices(cls, *column_indices: int, context: _FullContext) -> Self: ...
+ @staticmethod
+ def _eval_names_indices(indices: Sequence[int], /) -> EvalNames[CompliantFrameT]:
+ def fn(df: CompliantFrameT) -> Sequence[str]:
+ column_names = df.columns
+ return [column_names[i] for i in indices]
+
+ return fn
+
+ def is_null(self) -> Self: ...
+ def abs(self) -> Self: ...
+ def all(self) -> Self: ...
+ def any(self) -> Self: ...
+ def alias(self, name: str) -> Self: ...
+ def cast(self, dtype: IntoDType) -> Self: ...
+ def count(self) -> Self: ...
+ def min(self) -> Self: ...
+ def max(self) -> Self: ...
+ def arg_min(self) -> Self: ...
+ def arg_max(self) -> Self: ...
+ def arg_true(self) -> Self: ...
+ def mean(self) -> Self: ...
+ def sum(self) -> Self: ...
+ def median(self) -> Self: ...
+ def skew(self) -> Self: ...
+ def std(self, *, ddof: int) -> Self: ...
+ def var(self, *, ddof: int) -> Self: ...
+ def n_unique(self) -> Self: ...
+ def null_count(self) -> Self: ...
+ def drop_nulls(self) -> Self: ...
+ def fill_null(
+ self,
+ value: Self | NonNestedLiteral,
+ strategy: FillNullStrategy | None,
+ limit: int | None,
+ ) -> Self: ...
+ def diff(self) -> Self: ...
+ def exp(self) -> Self: ...
+ def unique(self) -> Self: ...
+ def len(self) -> Self: ...
+ def log(self, base: float) -> Self: ...
+ def round(self, decimals: int) -> Self: ...
+ def mode(self) -> Self: ...
+ def head(self, n: int) -> Self: ...
+ def tail(self, n: int) -> Self: ...
+ def shift(self, n: int) -> Self: ...
+ def is_finite(self) -> Self: ...
+ def is_nan(self) -> Self: ...
+ def is_unique(self) -> Self: ...
+ def is_first_distinct(self) -> Self: ...
+ def is_last_distinct(self) -> Self: ...
+ def cum_sum(self, *, reverse: bool) -> Self: ...
+ def cum_count(self, *, reverse: bool) -> Self: ...
+ def cum_min(self, *, reverse: bool) -> Self: ...
+ def cum_max(self, *, reverse: bool) -> Self: ...
+ def cum_prod(self, *, reverse: bool) -> Self: ...
+ def is_in(self, other: Any) -> Self: ...
+ def sort(self, *, descending: bool, nulls_last: bool) -> Self: ...
+ def rank(self, method: RankMethod, *, descending: bool) -> Self: ...
+ def replace_strict(
+ self,
+ old: Sequence[Any] | Mapping[Any, Any],
+ new: Sequence[Any],
+ *,
+ return_dtype: IntoDType | None,
+ ) -> Self: ...
+ def over(self, partition_by: Sequence[str], order_by: Sequence[str]) -> Self: ...
+ def sample(
+ self,
+ n: int | None,
+ *,
+ fraction: float | None,
+ with_replacement: bool,
+ seed: int | None,
+ ) -> Self: ...
+ def quantile(
+ self, quantile: float, interpolation: RollingInterpolationMethod
+ ) -> Self: ...
+ def map_batches(
+ self,
+ function: Callable[[CompliantSeries[Any]], CompliantExpr[Any, Any]],
+ return_dtype: IntoDType | None,
+ ) -> Self: ...
+
+ def clip(
+ self,
+ lower_bound: Self | NumericLiteral | TemporalLiteral | None,
+ upper_bound: Self | NumericLiteral | TemporalLiteral | None,
+ ) -> Self: ...
+
+ def ewm_mean(
+ self,
+ *,
+ com: float | None,
+ span: float | None,
+ half_life: float | None,
+ alpha: float | None,
+ adjust: bool,
+ min_samples: int,
+ ignore_nulls: bool,
+ ) -> Self: ...
+
+ def rolling_sum(
+ self, window_size: int, *, min_samples: int, center: bool
+ ) -> Self: ...
+
+ def rolling_mean(
+ self, window_size: int, *, min_samples: int, center: bool
+ ) -> Self: ...
+
+ def rolling_var(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self: ...
+
+ def rolling_std(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self: ...
+
+ @deprecated("Since `1.22.0`")
+ def gather_every(self, n: int, offset: int) -> Self: ...
+ def __and__(self, other: Any) -> Self: ...
+ def __or__(self, other: Any) -> Self: ...
+ def __add__(self, other: Any) -> Self: ...
+ def __sub__(self, other: Any) -> Self: ...
+ def __mul__(self, other: Any) -> Self: ...
+ def __floordiv__(self, other: Any) -> Self: ...
+ def __truediv__(self, other: Any) -> Self: ...
+ def __mod__(self, other: Any) -> Self: ...
+ def __pow__(self, other: Any) -> Self: ...
+ def __gt__(self, other: Any) -> Self: ...
+ def __ge__(self, other: Any) -> Self: ...
+ def __lt__(self, other: Any) -> Self: ...
+ def __le__(self, other: Any) -> Self: ...
+ def __invert__(self) -> Self: ...
+ def broadcast(
+ self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]
+ ) -> Self: ...
+ def _is_multi_output_unnamed(self) -> bool:
+ """Return `True` for multi-output aggregations without names.
+
+ For example, column `'a'` only appears in the output as a grouping key:
+
+ df.group_by('a').agg(nw.all().sum())
+
+ It does not get included in:
+
+ nw.all().sum().
+ """
+ assert self._metadata is not None # noqa: S101
+ return self._metadata.expansion_kind.is_multi_unnamed()
+
+ def _evaluate_aliases(
+ self: CompliantExpr[CompliantFrameT, Any], frame: CompliantFrameT, /
+ ) -> Sequence[str]:
+ names = self._evaluate_output_names(frame)
+ return alias(names) if (alias := self._alias_output_names) else names
+
+ @property
+ def str(self) -> Any: ...
+ @property
+ def name(self) -> Any: ...
+ @property
+ def dt(self) -> Any: ...
+ @property
+ def cat(self) -> Any: ...
+ @property
+ def list(self) -> Any: ...
+ @property
+ def struct(self) -> Any: ...
+
+
+class DepthTrackingExpr(
+ CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co],
+ Protocol38[CompliantFrameT, CompliantSeriesOrNativeExprT_co],
+):
+ _depth: int
+ _function_name: str
+
+ @classmethod
+ def from_column_names(
+ cls: type[Self],
+ evaluate_column_names: EvalNames[CompliantFrameT],
+ /,
+ *,
+ context: _FullContext,
+ function_name: str = "",
+ ) -> Self: ...
+
+ def _is_elementary(self) -> bool:
+ """Check if expr is elementary.
+
+ Examples:
+ - nw.col('a').mean() # depth 1
+ - nw.mean('a') # depth 1
+ - nw.len() # depth 0
+
+ as opposed to, say
+
+ - nw.col('a').filter(nw.col('b')>nw.col('c')).max()
+
+ Elementary expressions are the only ones supported properly in
+ pandas, PyArrow, and Dask.
+ """
+ return self._depth < 2
+
+ def __repr__(self) -> str: # pragma: no cover
+ return f"{type(self).__name__}(depth={self._depth}, function_name={self._function_name})"
+
+
+class EagerExpr(
+ DepthTrackingExpr[EagerDataFrameT, EagerSeriesT],
+ Protocol38[EagerDataFrameT, EagerSeriesT],
+):
+ _call: EvalSeries[EagerDataFrameT, EagerSeriesT]
+ _scalar_kwargs: ScalarKwargs
+
+ def __init__(
+ self,
+ call: EvalSeries[EagerDataFrameT, EagerSeriesT],
+ *,
+ depth: int,
+ function_name: str,
+ evaluate_output_names: EvalNames[EagerDataFrameT],
+ alias_output_names: AliasNames | None,
+ implementation: Implementation,
+ backend_version: tuple[int, ...],
+ version: Version,
+ scalar_kwargs: ScalarKwargs | None = None,
+ ) -> None: ...
+
+ def __call__(self, df: EagerDataFrameT) -> Sequence[EagerSeriesT]:
+ return self._call(df)
+
+ def __narwhals_namespace__(
+ self,
+ ) -> EagerNamespace[EagerDataFrameT, EagerSeriesT, Self, Any]: ...
+ def __narwhals_expr__(self) -> None: ...
+
+ @classmethod
+ def _from_callable(
+ cls,
+ func: EvalSeries[EagerDataFrameT, EagerSeriesT],
+ *,
+ depth: int,
+ function_name: str,
+ evaluate_output_names: EvalNames[EagerDataFrameT],
+ alias_output_names: AliasNames | None,
+ context: _FullContext,
+ scalar_kwargs: ScalarKwargs | None = None,
+ ) -> Self:
+ return cls(
+ func,
+ depth=depth,
+ function_name=function_name,
+ evaluate_output_names=evaluate_output_names,
+ alias_output_names=alias_output_names,
+ implementation=context._implementation,
+ backend_version=context._backend_version,
+ version=context._version,
+ scalar_kwargs=scalar_kwargs,
+ )
+
+ @classmethod
+ def _from_series(cls, series: EagerSeriesT) -> Self:
+ return cls(
+ lambda _df: [series],
+ depth=0,
+ function_name="series",
+ evaluate_output_names=lambda _df: [series.name],
+ alias_output_names=None,
+ implementation=series._implementation,
+ backend_version=series._backend_version,
+ version=series._version,
+ )
+
+ def _reuse_series(
+ self,
+ method_name: str,
+ *,
+ returns_scalar: bool = False,
+ scalar_kwargs: ScalarKwargs | None = None,
+ **expressifiable_args: Any,
+ ) -> Self:
+ """Reuse Series implementation for expression.
+
+ If Series.foo is already defined, and we'd like Expr.foo to be the same, we can
+ leverage this method to do that for us.
+
+ Arguments:
+ method_name: name of method.
+ returns_scalar: whether the Series version returns a scalar. In this case,
+ the expression version should return a 1-row Series.
+ scalar_kwargs: non-expressifiable args which we may need to reuse in `agg` or `over`,
+ such as `ddof` for `std` and `var`.
+ expressifiable_args: keyword arguments to pass to function, which may
+ be expressifiable (e.g. `nw.col('a').is_between(3, nw.col('b')))`).
+ """
+ func = partial(
+ self._reuse_series_inner,
+ method_name=method_name,
+ returns_scalar=returns_scalar,
+ scalar_kwargs=scalar_kwargs or {},
+ expressifiable_args=expressifiable_args,
+ )
+ return self._from_callable(
+ func,
+ depth=self._depth + 1,
+ function_name=f"{self._function_name}->{method_name}",
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ scalar_kwargs=scalar_kwargs,
+ context=self,
+ )
+
+ # For PyArrow.Series, we return Python Scalars (like Polars does) instead of PyArrow Scalars.
+ # However, when working with expressions, we keep everything PyArrow-native.
+ def _reuse_series_extra_kwargs(
+ self, *, returns_scalar: bool = False
+ ) -> dict[str, Any]:
+ return {}
+
+ @classmethod
+ def _is_expr(cls, obj: Self | Any) -> TypeIs[Self]:
+ return hasattr(obj, "__narwhals_expr__")
+
+ def _reuse_series_inner(
+ self,
+ df: EagerDataFrameT,
+ *,
+ method_name: str,
+ returns_scalar: bool,
+ scalar_kwargs: ScalarKwargs,
+ expressifiable_args: dict[str, Any],
+ ) -> Sequence[EagerSeriesT]:
+ kwargs = {
+ **scalar_kwargs,
+ **{
+ name: df._evaluate_expr(value) if self._is_expr(value) else value
+ for name, value in expressifiable_args.items()
+ },
+ }
+ method = methodcaller(
+ method_name,
+ **self._reuse_series_extra_kwargs(returns_scalar=returns_scalar),
+ **kwargs,
+ )
+ out: Sequence[EagerSeriesT] = [
+ series._from_scalar(method(series)) if returns_scalar else method(series)
+ for series in self(df)
+ ]
+ aliases = self._evaluate_aliases(df)
+ if [s.name for s in out] != list(aliases): # pragma: no cover
+ msg = (
+ f"Safety assertion failed, please report a bug to https://github.com/narwhals-dev/narwhals/issues\n"
+ f"Expression aliases: {aliases}\n"
+ f"Series names: {[s.name for s in out]}"
+ )
+ raise AssertionError(msg)
+ return out
+
+ def _reuse_series_namespace(
+ self,
+ series_namespace: Literal["cat", "dt", "list", "name", "str", "struct"],
+ method_name: str,
+ **kwargs: Any,
+ ) -> Self:
+ """Reuse Series implementation for expression.
+
+ Just like `_reuse_series`, but for e.g. `Expr.dt.foo` instead
+ of `Expr.foo`.
+
+ Arguments:
+ series_namespace: The Series namespace.
+ method_name: name of method, within `series_namespace`.
+ kwargs: keyword arguments to pass to function.
+ """
+ return self._from_callable(
+ lambda df: [
+ getattr(getattr(series, series_namespace), method_name)(**kwargs)
+ for series in self(df)
+ ],
+ depth=self._depth + 1,
+ function_name=f"{self._function_name}->{series_namespace}.{method_name}",
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ scalar_kwargs=self._scalar_kwargs,
+ context=self,
+ )
+
+ def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Self:
+ # Mark the resulting Series with `_broadcast = True`.
+ # Then, when extracting native objects, `extract_native` will
+ # know what to do.
+ def func(df: EagerDataFrameT) -> list[EagerSeriesT]:
+ results = []
+ for result in self(df):
+ result._broadcast = True
+ results.append(result)
+ return results
+
+ return type(self)(
+ func,
+ depth=self._depth,
+ function_name=self._function_name,
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ implementation=self._implementation,
+ version=self._version,
+ scalar_kwargs=self._scalar_kwargs,
+ )
+
+ def cast(self, dtype: IntoDType) -> Self:
+ return self._reuse_series("cast", dtype=dtype)
+
+ def __eq__(self, other: Self | Any) -> Self: # type: ignore[override]
+ return self._reuse_series("__eq__", other=other)
+
+ def __ne__(self, other: Self | Any) -> Self: # type: ignore[override]
+ return self._reuse_series("__ne__", other=other)
+
+ def __ge__(self, other: Self | Any) -> Self:
+ return self._reuse_series("__ge__", other=other)
+
+ def __gt__(self, other: Self | Any) -> Self:
+ return self._reuse_series("__gt__", other=other)
+
+ def __le__(self, other: Self | Any) -> Self:
+ return self._reuse_series("__le__", other=other)
+
+ def __lt__(self, other: Self | Any) -> Self:
+ return self._reuse_series("__lt__", other=other)
+
+ def __and__(self, other: Self | bool | Any) -> Self:
+ return self._reuse_series("__and__", other=other)
+
+ def __or__(self, other: Self | bool | Any) -> Self:
+ return self._reuse_series("__or__", other=other)
+
+ def __add__(self, other: Self | Any) -> Self:
+ return self._reuse_series("__add__", other=other)
+
+ def __sub__(self, other: Self | Any) -> Self:
+ return self._reuse_series("__sub__", other=other)
+
+ def __rsub__(self, other: Self | Any) -> Self:
+ return self.alias("literal")._reuse_series("__rsub__", other=other)
+
+ def __mul__(self, other: Self | Any) -> Self:
+ return self._reuse_series("__mul__", other=other)
+
+ def __truediv__(self, other: Self | Any) -> Self:
+ return self._reuse_series("__truediv__", other=other)
+
+ def __rtruediv__(self, other: Self | Any) -> Self:
+ return self.alias("literal")._reuse_series("__rtruediv__", other=other)
+
+ def __floordiv__(self, other: Self | Any) -> Self:
+ return self._reuse_series("__floordiv__", other=other)
+
+ def __rfloordiv__(self, other: Self | Any) -> Self:
+ return self.alias("literal")._reuse_series("__rfloordiv__", other=other)
+
+ def __pow__(self, other: Self | Any) -> Self:
+ return self._reuse_series("__pow__", other=other)
+
+ def __rpow__(self, other: Self | Any) -> Self:
+ return self.alias("literal")._reuse_series("__rpow__", other=other)
+
+ def __mod__(self, other: Self | Any) -> Self:
+ return self._reuse_series("__mod__", other=other)
+
+ def __rmod__(self, other: Self | Any) -> Self:
+ return self.alias("literal")._reuse_series("__rmod__", other=other)
+
+ # Unary
+ def __invert__(self) -> Self:
+ return self._reuse_series("__invert__")
+
+ # Reductions
+ def null_count(self) -> Self:
+ return self._reuse_series("null_count", returns_scalar=True)
+
+ def n_unique(self) -> Self:
+ return self._reuse_series("n_unique", returns_scalar=True)
+
+ def sum(self) -> Self:
+ return self._reuse_series("sum", returns_scalar=True)
+
+ def count(self) -> Self:
+ return self._reuse_series("count", returns_scalar=True)
+
+ def mean(self) -> Self:
+ return self._reuse_series("mean", returns_scalar=True)
+
+ def median(self) -> Self:
+ return self._reuse_series("median", returns_scalar=True)
+
+ def std(self, *, ddof: int) -> Self:
+ return self._reuse_series(
+ "std", returns_scalar=True, scalar_kwargs={"ddof": ddof}
+ )
+
+ def var(self, *, ddof: int) -> Self:
+ return self._reuse_series(
+ "var", returns_scalar=True, scalar_kwargs={"ddof": ddof}
+ )
+
+ def skew(self) -> Self:
+ return self._reuse_series("skew", returns_scalar=True)
+
+ def any(self) -> Self:
+ return self._reuse_series("any", returns_scalar=True)
+
+ def all(self) -> Self:
+ return self._reuse_series("all", returns_scalar=True)
+
+ def max(self) -> Self:
+ return self._reuse_series("max", returns_scalar=True)
+
+ def min(self) -> Self:
+ return self._reuse_series("min", returns_scalar=True)
+
+ def arg_min(self) -> Self:
+ return self._reuse_series("arg_min", returns_scalar=True)
+
+ def arg_max(self) -> Self:
+ return self._reuse_series("arg_max", returns_scalar=True)
+
+ # Other
+
+ def clip(
+ self,
+ lower_bound: Self | NumericLiteral | TemporalLiteral | None,
+ upper_bound: Self | NumericLiteral | TemporalLiteral | None,
+ ) -> Self:
+ return self._reuse_series(
+ "clip", lower_bound=lower_bound, upper_bound=upper_bound
+ )
+
+ def is_null(self) -> Self:
+ return self._reuse_series("is_null")
+
+ def is_nan(self) -> Self:
+ return self._reuse_series("is_nan")
+
+ def fill_null(
+ self,
+ value: Self | NonNestedLiteral,
+ strategy: FillNullStrategy | None,
+ limit: int | None,
+ ) -> Self:
+ return self._reuse_series(
+ "fill_null", value=value, strategy=strategy, limit=limit
+ )
+
+ def is_in(self, other: Any) -> Self:
+ return self._reuse_series("is_in", other=other)
+
+ def arg_true(self) -> Self:
+ return self._reuse_series("arg_true")
+
+ def filter(self, *predicates: Self) -> Self:
+ plx = self.__narwhals_namespace__()
+ predicate = plx.all_horizontal(*predicates)
+ return self._reuse_series("filter", predicate=predicate)
+
+ def drop_nulls(self) -> Self:
+ return self._reuse_series("drop_nulls")
+
+ def replace_strict(
+ self,
+ old: Sequence[Any] | Mapping[Any, Any],
+ new: Sequence[Any],
+ *,
+ return_dtype: IntoDType | None,
+ ) -> Self:
+ return self._reuse_series(
+ "replace_strict", old=old, new=new, return_dtype=return_dtype
+ )
+
+ def sort(self, *, descending: bool, nulls_last: bool) -> Self:
+ return self._reuse_series("sort", descending=descending, nulls_last=nulls_last)
+
+ def abs(self) -> Self:
+ return self._reuse_series("abs")
+
+ def unique(self) -> Self:
+ return self._reuse_series("unique", maintain_order=False)
+
+ def diff(self) -> Self:
+ return self._reuse_series("diff")
+
+ def sample(
+ self,
+ n: int | None,
+ *,
+ fraction: float | None,
+ with_replacement: bool,
+ seed: int | None,
+ ) -> Self:
+ return self._reuse_series(
+ "sample", n=n, fraction=fraction, with_replacement=with_replacement, seed=seed
+ )
+
+ def alias(self, name: str) -> Self:
+ def alias_output_names(names: Sequence[str]) -> Sequence[str]:
+ if len(names) != 1:
+ msg = f"Expected function with single output, found output names: {names}"
+ raise ValueError(msg)
+ return [name]
+
+ # Define this one manually, so that we can
+ # override `output_names` and not increase depth
+ return type(self)(
+ lambda df: [series.alias(name) for series in self(df)],
+ depth=self._depth,
+ function_name=self._function_name,
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=alias_output_names,
+ backend_version=self._backend_version,
+ implementation=self._implementation,
+ version=self._version,
+ scalar_kwargs=self._scalar_kwargs,
+ )
+
+ def is_unique(self) -> Self:
+ return self._reuse_series("is_unique")
+
+ def is_first_distinct(self) -> Self:
+ return self._reuse_series("is_first_distinct")
+
+ def is_last_distinct(self) -> Self:
+ return self._reuse_series("is_last_distinct")
+
+ def quantile(
+ self, quantile: float, interpolation: RollingInterpolationMethod
+ ) -> Self:
+ return self._reuse_series(
+ "quantile",
+ quantile=quantile,
+ interpolation=interpolation,
+ returns_scalar=True,
+ )
+
+ def head(self, n: int) -> Self:
+ return self._reuse_series("head", n=n)
+
+ def tail(self, n: int) -> Self:
+ return self._reuse_series("tail", n=n)
+
+ def round(self, decimals: int) -> Self:
+ return self._reuse_series("round", decimals=decimals)
+
+ def len(self) -> Self:
+ return self._reuse_series("len", returns_scalar=True)
+
+ def gather_every(self, n: int, offset: int) -> Self:
+ return self._reuse_series("gather_every", n=n, offset=offset)
+
+ def mode(self) -> Self:
+ return self._reuse_series("mode")
+
+ def is_finite(self) -> Self:
+ return self._reuse_series("is_finite")
+
+ def rolling_mean(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ return self._reuse_series(
+ "rolling_mean",
+ window_size=window_size,
+ min_samples=min_samples,
+ center=center,
+ )
+
+ def rolling_std(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ return self._reuse_series(
+ "rolling_std",
+ window_size=window_size,
+ min_samples=min_samples,
+ center=center,
+ ddof=ddof,
+ )
+
+ def rolling_sum(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ return self._reuse_series(
+ "rolling_sum", window_size=window_size, min_samples=min_samples, center=center
+ )
+
+ def rolling_var(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ return self._reuse_series(
+ "rolling_var",
+ window_size=window_size,
+ min_samples=min_samples,
+ center=center,
+ ddof=ddof,
+ )
+
+ def map_batches(
+ self, function: Callable[[Any], Any], return_dtype: IntoDType | None
+ ) -> Self:
+ def func(df: EagerDataFrameT) -> Sequence[EagerSeriesT]:
+ input_series_list = self(df)
+ output_names = [input_series.name for input_series in input_series_list]
+ result = [function(series) for series in input_series_list]
+ if is_numpy_array(result[0]) or (
+ (np := get_numpy()) is not None and np.isscalar(result[0])
+ ):
+ from_numpy = partial(
+ self.__narwhals_namespace__()._series.from_numpy, context=self
+ )
+ result = [
+ from_numpy(array).alias(output_name)
+ for array, output_name in zip(result, output_names)
+ ]
+ if return_dtype is not None:
+ result = [series.cast(return_dtype) for series in result]
+ return result
+
+ return self._from_callable(
+ func,
+ depth=self._depth + 1,
+ function_name=self._function_name + "->map_batches",
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ context=self,
+ )
+
+ @property
+ def cat(self) -> EagerExprCatNamespace[Self]:
+ return EagerExprCatNamespace(self)
+
+ @property
+ def dt(self) -> EagerExprDateTimeNamespace[Self]:
+ return EagerExprDateTimeNamespace(self)
+
+ @property
+ def list(self) -> EagerExprListNamespace[Self]:
+ return EagerExprListNamespace(self)
+
+ @property
+ def name(self) -> EagerExprNameNamespace[Self]:
+ return EagerExprNameNamespace(self)
+
+ @property
+ def str(self) -> EagerExprStringNamespace[Self]:
+ return EagerExprStringNamespace(self)
+
+ @property
+ def struct(self) -> EagerExprStructNamespace[Self]:
+ return EagerExprStructNamespace(self)
+
+
+class LazyExpr(
+ CompliantExpr[CompliantLazyFrameT, NativeExprT],
+ Protocol38[CompliantLazyFrameT, NativeExprT],
+):
+ arg_min: not_implemented = not_implemented()
+ arg_max: not_implemented = not_implemented()
+ arg_true: not_implemented = not_implemented()
+ head: not_implemented = not_implemented()
+ tail: not_implemented = not_implemented()
+ mode: not_implemented = not_implemented()
+ sort: not_implemented = not_implemented()
+ sample: not_implemented = not_implemented()
+ map_batches: not_implemented = not_implemented()
+ ewm_mean: not_implemented = not_implemented()
+ gather_every: not_implemented = not_implemented()
+ replace_strict: not_implemented = not_implemented()
+ cat: not_implemented = not_implemented() # pyright: ignore[reportAssignmentType]
+
+ @property
+ def window_function(self) -> WindowFunction[CompliantLazyFrameT, NativeExprT]: ...
+
+ @classmethod
+ def _is_expr(cls, obj: Self | Any) -> TypeIs[Self]:
+ return hasattr(obj, "__narwhals_expr__")
+
+ def _with_callable(self, call: Callable[..., Any], /) -> Self: ...
+ def _with_alias_output_names(self, func: AliasNames | None, /) -> Self: ...
+ def alias(self, name: str) -> Self:
+ def fn(names: Sequence[str]) -> Sequence[str]:
+ if len(names) != 1:
+ msg = f"Expected function with single output, found output names: {names}"
+ raise ValueError(msg)
+ return [name]
+
+ return self._with_alias_output_names(fn)
+
+ @classmethod
+ def _alias_native(cls, expr: NativeExprT, name: str, /) -> NativeExprT: ...
+
+ @property
+ def name(self) -> LazyExprNameNamespace[Self]:
+ return LazyExprNameNamespace(self)
+
+
+class _ExprNamespace( # type: ignore[misc]
+ _StoresCompliant[CompliantExprT_co], Protocol[CompliantExprT_co]
+):
+ _compliant_expr: CompliantExprT_co
+
+ @property
+ def compliant(self) -> CompliantExprT_co:
+ return self._compliant_expr
+
+
+class EagerExprNamespace(_ExprNamespace[EagerExprT], Generic[EagerExprT]):
+ def __init__(self, expr: EagerExprT, /) -> None:
+ self._compliant_expr = expr
+
+
+class LazyExprNamespace(_ExprNamespace[LazyExprT], Generic[LazyExprT]):
+ def __init__(self, expr: LazyExprT, /) -> None:
+ self._compliant_expr = expr
+
+
+class EagerExprCatNamespace(
+ EagerExprNamespace[EagerExprT], CatNamespace[EagerExprT], Generic[EagerExprT]
+):
+ def get_categories(self) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("cat", "get_categories")
+
+
+class EagerExprDateTimeNamespace(
+ EagerExprNamespace[EagerExprT], DateTimeNamespace[EagerExprT], Generic[EagerExprT]
+):
+ def to_string(self, format: str) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("dt", "to_string", format=format)
+
+ def replace_time_zone(self, time_zone: str | None) -> EagerExprT:
+ return self.compliant._reuse_series_namespace(
+ "dt", "replace_time_zone", time_zone=time_zone
+ )
+
+ def convert_time_zone(self, time_zone: str) -> EagerExprT:
+ return self.compliant._reuse_series_namespace(
+ "dt", "convert_time_zone", time_zone=time_zone
+ )
+
+ def timestamp(self, time_unit: TimeUnit) -> EagerExprT:
+ return self.compliant._reuse_series_namespace(
+ "dt", "timestamp", time_unit=time_unit
+ )
+
+ def date(self) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("dt", "date")
+
+ def year(self) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("dt", "year")
+
+ def month(self) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("dt", "month")
+
+ def day(self) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("dt", "day")
+
+ def hour(self) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("dt", "hour")
+
+ def minute(self) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("dt", "minute")
+
+ def second(self) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("dt", "second")
+
+ def millisecond(self) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("dt", "millisecond")
+
+ def microsecond(self) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("dt", "microsecond")
+
+ def nanosecond(self) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("dt", "nanosecond")
+
+ def ordinal_day(self) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("dt", "ordinal_day")
+
+ def weekday(self) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("dt", "weekday")
+
+ def total_minutes(self) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("dt", "total_minutes")
+
+ def total_seconds(self) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("dt", "total_seconds")
+
+ def total_milliseconds(self) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("dt", "total_milliseconds")
+
+ def total_microseconds(self) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("dt", "total_microseconds")
+
+ def total_nanoseconds(self) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("dt", "total_nanoseconds")
+
+ def truncate(self, every: str) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("dt", "truncate", every=every)
+
+
+class EagerExprListNamespace(
+ EagerExprNamespace[EagerExprT], ListNamespace[EagerExprT], Generic[EagerExprT]
+):
+ def len(self) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("list", "len")
+
+
+class CompliantExprNameNamespace( # type: ignore[misc]
+ _ExprNamespace[CompliantExprT_co],
+ NameNamespace[CompliantExprT_co],
+ Protocol[CompliantExprT_co],
+):
+ def keep(self) -> CompliantExprT_co:
+ return self._from_callable(lambda name: name, alias=False)
+
+ def map(self, function: AliasName) -> CompliantExprT_co:
+ return self._from_callable(function)
+
+ def prefix(self, prefix: str) -> CompliantExprT_co:
+ return self._from_callable(lambda name: f"{prefix}{name}")
+
+ def suffix(self, suffix: str) -> CompliantExprT_co:
+ return self._from_callable(lambda name: f"{name}{suffix}")
+
+ def to_lowercase(self) -> CompliantExprT_co:
+ return self._from_callable(str.lower)
+
+ def to_uppercase(self) -> CompliantExprT_co:
+ return self._from_callable(str.upper)
+
+ @staticmethod
+ def _alias_output_names(func: AliasName, /) -> AliasNames:
+ def fn(output_names: Sequence[str], /) -> Sequence[str]:
+ return [func(name) for name in output_names]
+
+ return fn
+
+ def _from_callable(
+ self, func: AliasName, /, *, alias: bool = True
+ ) -> CompliantExprT_co: ...
+
+
+class EagerExprNameNamespace(
+ EagerExprNamespace[EagerExprT],
+ CompliantExprNameNamespace[EagerExprT],
+ Generic[EagerExprT],
+):
+ def _from_callable(self, func: AliasName, /, *, alias: bool = True) -> EagerExprT:
+ expr = self.compliant
+ return type(expr)(
+ lambda df: [
+ series.alias(func(name))
+ for series, name in zip(expr(df), expr._evaluate_output_names(df))
+ ],
+ depth=expr._depth,
+ function_name=expr._function_name,
+ evaluate_output_names=expr._evaluate_output_names,
+ alias_output_names=self._alias_output_names(func) if alias else None,
+ backend_version=expr._backend_version,
+ implementation=expr._implementation,
+ version=expr._version,
+ scalar_kwargs=expr._scalar_kwargs,
+ )
+
+
+class LazyExprNameNamespace(
+ LazyExprNamespace[LazyExprT],
+ CompliantExprNameNamespace[LazyExprT],
+ Generic[LazyExprT],
+):
+ def _from_callable(self, func: AliasName, /, *, alias: bool = True) -> LazyExprT:
+ expr = self.compliant
+ output_names = self._alias_output_names(func) if alias else None
+ return expr._with_alias_output_names(output_names)
+
+
+class EagerExprStringNamespace(
+ EagerExprNamespace[EagerExprT], StringNamespace[EagerExprT], Generic[EagerExprT]
+):
+ def len_chars(self) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("str", "len_chars")
+
+ def replace(self, pattern: str, value: str, *, literal: bool, n: int) -> EagerExprT:
+ return self.compliant._reuse_series_namespace(
+ "str", "replace", pattern=pattern, value=value, literal=literal, n=n
+ )
+
+ def replace_all(self, pattern: str, value: str, *, literal: bool) -> EagerExprT:
+ return self.compliant._reuse_series_namespace(
+ "str", "replace_all", pattern=pattern, value=value, literal=literal
+ )
+
+ def strip_chars(self, characters: str | None) -> EagerExprT:
+ return self.compliant._reuse_series_namespace(
+ "str", "strip_chars", characters=characters
+ )
+
+ def starts_with(self, prefix: str) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("str", "starts_with", prefix=prefix)
+
+ def ends_with(self, suffix: str) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("str", "ends_with", suffix=suffix)
+
+ def contains(self, pattern: str, *, literal: bool) -> EagerExprT:
+ return self.compliant._reuse_series_namespace(
+ "str", "contains", pattern=pattern, literal=literal
+ )
+
+ def slice(self, offset: int, length: int | None) -> EagerExprT:
+ return self.compliant._reuse_series_namespace(
+ "str", "slice", offset=offset, length=length
+ )
+
+ def split(self, by: str) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("str", "split", by=by)
+
+ def to_datetime(self, format: str | None) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("str", "to_datetime", format=format)
+
+ def to_lowercase(self) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("str", "to_lowercase")
+
+ def to_uppercase(self) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("str", "to_uppercase")
+
+
+class EagerExprStructNamespace(
+ EagerExprNamespace[EagerExprT], StructNamespace[EagerExprT], Generic[EagerExprT]
+):
+ def field(self, name: str) -> EagerExprT:
+ return self.compliant._reuse_series_namespace("struct", "field", name=name).alias(
+ name
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/_compliant/group_by.py b/venv/lib/python3.8/site-packages/narwhals/_compliant/group_by.py
new file mode 100644
index 0000000..778e9bc
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_compliant/group_by.py
@@ -0,0 +1,233 @@
+from __future__ import annotations
+
+import re
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ ClassVar,
+ Iterable,
+ Iterator,
+ Literal,
+ Mapping,
+ Sequence,
+ TypeVar,
+)
+
+from narwhals._compliant.typing import (
+ CompliantDataFrameAny,
+ CompliantDataFrameT,
+ CompliantDataFrameT_co,
+ CompliantExprT_contra,
+ CompliantFrameT,
+ CompliantFrameT_co,
+ CompliantLazyFrameAny,
+ CompliantLazyFrameT,
+ DepthTrackingExprAny,
+ DepthTrackingExprT_contra,
+ EagerExprT_contra,
+ LazyExprT_contra,
+ NativeExprT_co,
+)
+from narwhals._typing_compat import Protocol38
+from narwhals._utils import is_sequence_of
+
+if TYPE_CHECKING:
+ from typing_extensions import TypeAlias
+
+ _SameFrameT = TypeVar("_SameFrameT", CompliantDataFrameAny, CompliantLazyFrameAny)
+
+
+__all__ = [
+ "CompliantGroupBy",
+ "DepthTrackingGroupBy",
+ "EagerGroupBy",
+ "LazyGroupBy",
+ "NarwhalsAggregation",
+]
+
+NativeAggregationT_co = TypeVar(
+ "NativeAggregationT_co", bound="str | Callable[..., Any]", covariant=True
+)
+NarwhalsAggregation: TypeAlias = Literal[
+ "sum", "mean", "median", "max", "min", "std", "var", "len", "n_unique", "count"
+]
+
+
+_RE_LEAF_NAME: re.Pattern[str] = re.compile(r"(\w+->)")
+
+
+class CompliantGroupBy(Protocol38[CompliantFrameT_co, CompliantExprT_contra]):
+ _compliant_frame: Any
+
+ @property
+ def compliant(self) -> CompliantFrameT_co:
+ return self._compliant_frame # type: ignore[no-any-return]
+
+ def __init__(
+ self,
+ compliant_frame: CompliantFrameT_co,
+ keys: Sequence[CompliantExprT_contra] | Sequence[str],
+ /,
+ *,
+ drop_null_keys: bool,
+ ) -> None: ...
+
+ def agg(self, *exprs: CompliantExprT_contra) -> CompliantFrameT_co: ...
+
+
+class DataFrameGroupBy(
+ CompliantGroupBy[CompliantDataFrameT_co, CompliantExprT_contra],
+ Protocol38[CompliantDataFrameT_co, CompliantExprT_contra],
+):
+ def __iter__(self) -> Iterator[tuple[Any, CompliantDataFrameT_co]]: ...
+
+
+class ParseKeysGroupBy(
+ CompliantGroupBy[CompliantFrameT, CompliantExprT_contra],
+ Protocol38[CompliantFrameT, CompliantExprT_contra],
+):
+ def _parse_keys(
+ self,
+ compliant_frame: CompliantFrameT,
+ keys: Sequence[CompliantExprT_contra] | Sequence[str],
+ ) -> tuple[CompliantFrameT, list[str], list[str]]:
+ if is_sequence_of(keys, str):
+ keys_str = list(keys)
+ return compliant_frame, keys_str, keys_str.copy()
+ else:
+ return self._parse_expr_keys(compliant_frame, keys=keys)
+
+ @staticmethod
+ def _parse_expr_keys(
+ compliant_frame: _SameFrameT, keys: Sequence[CompliantExprT_contra]
+ ) -> tuple[_SameFrameT, list[str], list[str]]:
+ """Parses key expressions to set up `.agg` operation with correct information.
+
+ Since keys are expressions, it's possible to alias any such key to match
+ other dataframe column names.
+
+ In order to match polars behavior and not overwrite columns when evaluating keys:
+
+ - We evaluate what the output key names should be, in order to remap temporary column
+ names to the expected ones, and to exclude those from unnamed expressions in
+ `.agg(...)` context (see https://github.com/narwhals-dev/narwhals/pull/2325#issuecomment-2800004520)
+ - Create temporary names for evaluated key expressions that are guaranteed to have
+ no overlap with any existing column name.
+ - Add these temporary columns to the compliant dataframe.
+ """
+ tmp_name_length = max(len(str(c)) for c in compliant_frame.columns) + 1
+
+ def _temporary_name(key: str) -> str:
+ # 5 is the length of `__tmp`
+ key_str = str(key) # pandas allows non-string column names :sob:
+ return f"_{key_str}_tmp{'_' * (tmp_name_length - len(key_str) - 5)}"
+
+ output_names = compliant_frame._evaluate_aliases(*keys)
+
+ safe_keys = [
+ # multi-output expression cannot have duplicate names, hence it's safe to suffix
+ key.name.map(_temporary_name)
+ if (metadata := key._metadata) and metadata.expansion_kind.is_multi_output()
+ # otherwise it's single named and we can use Expr.alias
+ else key.alias(_temporary_name(new_name))
+ for key, new_name in zip(keys, output_names)
+ ]
+ return (
+ compliant_frame.with_columns(*safe_keys),
+ compliant_frame._evaluate_aliases(*safe_keys),
+ output_names,
+ )
+
+
+class DepthTrackingGroupBy(
+ ParseKeysGroupBy[CompliantFrameT, DepthTrackingExprT_contra],
+ Protocol38[CompliantFrameT, DepthTrackingExprT_contra, NativeAggregationT_co],
+):
+ """`CompliantGroupBy` variant, deals with `Eager` and other backends that utilize `CompliantExpr._depth`."""
+
+ _REMAP_AGGS: ClassVar[Mapping[NarwhalsAggregation, Any]]
+ """Mapping from `narwhals` to native representation.
+
+ Note:
+ - `Dask` *may* return a `Callable` instead of a `str` referring to one.
+ """
+
+ def _ensure_all_simple(self, exprs: Sequence[DepthTrackingExprT_contra]) -> None:
+ for expr in exprs:
+ if not self._is_simple(expr):
+ name = self.compliant._implementation.name.lower()
+ msg = (
+ f"Non-trivial complex aggregation found.\n\n"
+ f"Hint: you were probably trying to apply a non-elementary aggregation with a"
+ f"{name!r} table.\n"
+ "Please rewrite your query such that group-by aggregations "
+ "are elementary. For example, instead of:\n\n"
+ " df.group_by('a').agg(nw.col('b').round(2).mean())\n\n"
+ "use:\n\n"
+ " df.with_columns(nw.col('b').round(2)).group_by('a').agg(nw.col('b').mean())\n\n"
+ )
+ raise ValueError(msg)
+
+ @classmethod
+ def _is_simple(cls, expr: DepthTrackingExprAny, /) -> bool:
+ """Return `True` is we can efficiently use `expr` in a native `group_by` context."""
+ return expr._is_elementary() and cls._leaf_name(expr) in cls._REMAP_AGGS
+
+ @classmethod
+ def _remap_expr_name(
+ cls, name: NarwhalsAggregation | Any, /
+ ) -> NativeAggregationT_co:
+ """Replace `name`, with some native representation.
+
+ Arguments:
+ name: Name of a `nw.Expr` aggregation method.
+
+ Returns:
+ A native compatible representation.
+ """
+ return cls._REMAP_AGGS.get(name, name)
+
+ @classmethod
+ def _leaf_name(cls, expr: DepthTrackingExprAny, /) -> NarwhalsAggregation | Any:
+ """Return the last function name in the chain defined by `expr`."""
+ return _RE_LEAF_NAME.sub("", expr._function_name)
+
+
+class EagerGroupBy(
+ DepthTrackingGroupBy[CompliantDataFrameT, EagerExprT_contra, NativeAggregationT_co],
+ DataFrameGroupBy[CompliantDataFrameT, EagerExprT_contra],
+ Protocol38[CompliantDataFrameT, EagerExprT_contra, NativeAggregationT_co],
+): ...
+
+
+class LazyGroupBy(
+ ParseKeysGroupBy[CompliantLazyFrameT, LazyExprT_contra],
+ CompliantGroupBy[CompliantLazyFrameT, LazyExprT_contra],
+ Protocol38[CompliantLazyFrameT, LazyExprT_contra, NativeExprT_co],
+):
+ _keys: list[str]
+ _output_key_names: list[str]
+
+ def _evaluate_expr(self, expr: LazyExprT_contra, /) -> Iterator[NativeExprT_co]:
+ output_names = expr._evaluate_output_names(self.compliant)
+ aliases = (
+ expr._alias_output_names(output_names)
+ if expr._alias_output_names
+ else output_names
+ )
+ native_exprs = expr(self.compliant)
+ if expr._is_multi_output_unnamed():
+ exclude = {*self._keys, *self._output_key_names}
+ for native_expr, name, alias in zip(native_exprs, output_names, aliases):
+ if name not in exclude:
+ yield expr._alias_native(native_expr, alias)
+ else:
+ for native_expr, alias in zip(native_exprs, aliases):
+ yield expr._alias_native(native_expr, alias)
+
+ def _evaluate_exprs(
+ self, exprs: Iterable[LazyExprT_contra], /
+ ) -> Iterator[NativeExprT_co]:
+ for expr in exprs:
+ yield from self._evaluate_expr(expr)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_compliant/namespace.py b/venv/lib/python3.8/site-packages/narwhals/_compliant/namespace.py
new file mode 100644
index 0000000..e73ccc2
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_compliant/namespace.py
@@ -0,0 +1,194 @@
+from __future__ import annotations
+
+from functools import partial
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Container,
+ Iterable,
+ Mapping,
+ Protocol,
+ Sequence,
+ overload,
+)
+
+from narwhals._compliant.typing import (
+ CompliantExprT,
+ CompliantFrameT,
+ CompliantLazyFrameT,
+ DepthTrackingExprT,
+ EagerDataFrameT,
+ EagerExprT,
+ EagerSeriesT,
+ LazyExprT,
+ NativeFrameT,
+ NativeFrameT_co,
+)
+from narwhals._utils import (
+ exclude_column_names,
+ get_column_names,
+ passthrough_column_names,
+)
+from narwhals.dependencies import is_numpy_array_2d
+
+if TYPE_CHECKING:
+ from typing_extensions import TypeAlias
+
+ from narwhals._compliant.selectors import CompliantSelectorNamespace
+ from narwhals._compliant.when_then import CompliantWhen, EagerWhen
+ from narwhals._utils import Implementation, Version
+ from narwhals.dtypes import DType
+ from narwhals.schema import Schema
+ from narwhals.typing import (
+ ConcatMethod,
+ Into1DArray,
+ IntoDType,
+ NonNestedLiteral,
+ _2DArray,
+ )
+
+ Incomplete: TypeAlias = Any
+
+__all__ = ["CompliantNamespace", "EagerNamespace"]
+
+
+class CompliantNamespace(Protocol[CompliantFrameT, CompliantExprT]):
+ _implementation: Implementation
+ _backend_version: tuple[int, ...]
+ _version: Version
+
+ def all(self) -> CompliantExprT:
+ return self._expr.from_column_names(get_column_names, context=self)
+
+ def col(self, *column_names: str) -> CompliantExprT:
+ return self._expr.from_column_names(
+ passthrough_column_names(column_names), context=self
+ )
+
+ def exclude(self, excluded_names: Container[str]) -> CompliantExprT:
+ return self._expr.from_column_names(
+ partial(exclude_column_names, names=excluded_names), context=self
+ )
+
+ def nth(self, *column_indices: int) -> CompliantExprT:
+ return self._expr.from_column_indices(*column_indices, context=self)
+
+ def len(self) -> CompliantExprT: ...
+ def lit(self, value: NonNestedLiteral, dtype: IntoDType | None) -> CompliantExprT: ...
+ def all_horizontal(self, *exprs: CompliantExprT) -> CompliantExprT: ...
+ def any_horizontal(self, *exprs: CompliantExprT) -> CompliantExprT: ...
+ def sum_horizontal(self, *exprs: CompliantExprT) -> CompliantExprT: ...
+ def mean_horizontal(self, *exprs: CompliantExprT) -> CompliantExprT: ...
+ def min_horizontal(self, *exprs: CompliantExprT) -> CompliantExprT: ...
+ def max_horizontal(self, *exprs: CompliantExprT) -> CompliantExprT: ...
+ def concat(
+ self, items: Iterable[CompliantFrameT], *, how: ConcatMethod
+ ) -> CompliantFrameT: ...
+ def when(
+ self, predicate: CompliantExprT
+ ) -> CompliantWhen[CompliantFrameT, Incomplete, CompliantExprT]: ...
+ def concat_str(
+ self, *exprs: CompliantExprT, separator: str, ignore_nulls: bool
+ ) -> CompliantExprT: ...
+ @property
+ def selectors(self) -> CompliantSelectorNamespace[Any, Any]: ...
+ @property
+ def _expr(self) -> type[CompliantExprT]: ...
+
+
+class DepthTrackingNamespace(
+ CompliantNamespace[CompliantFrameT, DepthTrackingExprT],
+ Protocol[CompliantFrameT, DepthTrackingExprT],
+):
+ def all(self) -> DepthTrackingExprT:
+ return self._expr.from_column_names(
+ get_column_names, function_name="all", context=self
+ )
+
+ def col(self, *column_names: str) -> DepthTrackingExprT:
+ return self._expr.from_column_names(
+ passthrough_column_names(column_names), function_name="col", context=self
+ )
+
+ def exclude(self, excluded_names: Container[str]) -> DepthTrackingExprT:
+ return self._expr.from_column_names(
+ partial(exclude_column_names, names=excluded_names),
+ function_name="exclude",
+ context=self,
+ )
+
+
+class LazyNamespace(
+ CompliantNamespace[CompliantLazyFrameT, LazyExprT],
+ Protocol[CompliantLazyFrameT, LazyExprT, NativeFrameT_co],
+):
+ @property
+ def _lazyframe(self) -> type[CompliantLazyFrameT]: ...
+
+ def from_native(self, data: NativeFrameT_co | Any, /) -> CompliantLazyFrameT:
+ if self._lazyframe._is_native(data):
+ return self._lazyframe.from_native(data, context=self)
+ else: # pragma: no cover
+ msg = f"Unsupported type: {type(data).__name__!r}"
+ raise TypeError(msg)
+
+
+class EagerNamespace(
+ DepthTrackingNamespace[EagerDataFrameT, EagerExprT],
+ Protocol[EagerDataFrameT, EagerSeriesT, EagerExprT, NativeFrameT],
+):
+ @property
+ def _dataframe(self) -> type[EagerDataFrameT]: ...
+ @property
+ def _series(self) -> type[EagerSeriesT]: ...
+ def when(
+ self, predicate: EagerExprT
+ ) -> EagerWhen[EagerDataFrameT, EagerSeriesT, EagerExprT]: ...
+
+ def from_native(self, data: Any, /) -> EagerDataFrameT | EagerSeriesT:
+ if self._dataframe._is_native(data):
+ return self._dataframe.from_native(data, context=self)
+ elif self._series._is_native(data):
+ return self._series.from_native(data, context=self)
+ msg = f"Unsupported type: {type(data).__name__!r}"
+ raise TypeError(msg)
+
+ @overload
+ def from_numpy(self, data: Into1DArray, /, schema: None = ...) -> EagerSeriesT: ...
+
+ @overload
+ def from_numpy(
+ self,
+ data: _2DArray,
+ /,
+ schema: Mapping[str, DType] | Schema | Sequence[str] | None,
+ ) -> EagerDataFrameT: ...
+
+ def from_numpy(
+ self,
+ data: Into1DArray | _2DArray,
+ /,
+ schema: Mapping[str, DType] | Schema | Sequence[str] | None = None,
+ ) -> EagerDataFrameT | EagerSeriesT:
+ if is_numpy_array_2d(data):
+ return self._dataframe.from_numpy(data, schema=schema, context=self)
+ return self._series.from_numpy(data, context=self)
+
+ def _concat_diagonal(self, dfs: Sequence[NativeFrameT], /) -> NativeFrameT: ...
+ def _concat_horizontal(
+ self, dfs: Sequence[NativeFrameT | Any], /
+ ) -> NativeFrameT: ...
+ def _concat_vertical(self, dfs: Sequence[NativeFrameT], /) -> NativeFrameT: ...
+ def concat(
+ self, items: Iterable[EagerDataFrameT], *, how: ConcatMethod
+ ) -> EagerDataFrameT:
+ dfs = [item.native for item in items]
+ if how == "horizontal":
+ native = self._concat_horizontal(dfs)
+ elif how == "vertical":
+ native = self._concat_vertical(dfs)
+ elif how == "diagonal":
+ native = self._concat_diagonal(dfs)
+ else: # pragma: no cover
+ raise NotImplementedError
+ return self._dataframe.from_native(native, context=self)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_compliant/selectors.py b/venv/lib/python3.8/site-packages/narwhals/_compliant/selectors.py
new file mode 100644
index 0000000..9d4e468
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_compliant/selectors.py
@@ -0,0 +1,332 @@
+"""Almost entirely complete, generic `selectors` implementation."""
+
+from __future__ import annotations
+
+import re
+from functools import partial
+from typing import (
+ TYPE_CHECKING,
+ Collection,
+ Iterable,
+ Iterator,
+ Protocol,
+ Sequence,
+ TypeVar,
+ overload,
+)
+
+from narwhals._compliant.expr import CompliantExpr
+from narwhals._typing_compat import Protocol38
+from narwhals._utils import (
+ _parse_time_unit_and_time_zone,
+ dtype_matches_time_unit_and_time_zone,
+ get_column_names,
+ is_compliant_dataframe,
+)
+
+if TYPE_CHECKING:
+ from datetime import timezone
+
+ from typing_extensions import Self, TypeAlias, TypeIs
+
+ from narwhals._compliant.expr import NativeExpr
+ from narwhals._compliant.typing import (
+ CompliantDataFrameAny,
+ CompliantExprAny,
+ CompliantFrameAny,
+ CompliantLazyFrameAny,
+ CompliantSeriesAny,
+ CompliantSeriesOrNativeExprAny,
+ EvalNames,
+ EvalSeries,
+ ScalarKwargs,
+ )
+ from narwhals._utils import Implementation, Version, _FullContext
+ from narwhals.dtypes import DType
+ from narwhals.typing import TimeUnit
+
+__all__ = [
+ "CompliantSelector",
+ "CompliantSelectorNamespace",
+ "EagerSelectorNamespace",
+ "LazySelectorNamespace",
+]
+
+
+SeriesOrExprT = TypeVar("SeriesOrExprT", bound="CompliantSeriesOrNativeExprAny")
+SeriesT = TypeVar("SeriesT", bound="CompliantSeriesAny")
+ExprT = TypeVar("ExprT", bound="NativeExpr")
+FrameT = TypeVar("FrameT", bound="CompliantFrameAny")
+DataFrameT = TypeVar("DataFrameT", bound="CompliantDataFrameAny")
+LazyFrameT = TypeVar("LazyFrameT", bound="CompliantLazyFrameAny")
+SelectorOrExpr: TypeAlias = (
+ "CompliantSelector[FrameT, SeriesOrExprT] | CompliantExpr[FrameT, SeriesOrExprT]"
+)
+
+
+class CompliantSelectorNamespace(Protocol[FrameT, SeriesOrExprT]):
+ _implementation: Implementation
+ _backend_version: tuple[int, ...]
+ _version: Version
+
+ @classmethod
+ def from_namespace(cls, context: _FullContext, /) -> Self:
+ obj = cls.__new__(cls)
+ obj._implementation = context._implementation
+ obj._backend_version = context._backend_version
+ obj._version = context._version
+ return obj
+
+ @property
+ def _selector(self) -> type[CompliantSelector[FrameT, SeriesOrExprT]]: ...
+
+ def _iter_columns(self, df: FrameT, /) -> Iterator[SeriesOrExprT]: ...
+
+ def _iter_schema(self, df: FrameT, /) -> Iterator[tuple[str, DType]]: ...
+
+ def _iter_columns_dtypes(
+ self, df: FrameT, /
+ ) -> Iterator[tuple[SeriesOrExprT, DType]]: ...
+
+ def _iter_columns_names(self, df: FrameT, /) -> Iterator[tuple[SeriesOrExprT, str]]:
+ yield from zip(self._iter_columns(df), df.columns)
+
+ def _is_dtype(
+ self: CompliantSelectorNamespace[FrameT, SeriesOrExprT], dtype: type[DType], /
+ ) -> CompliantSelector[FrameT, SeriesOrExprT]:
+ def series(df: FrameT) -> Sequence[SeriesOrExprT]:
+ return [
+ ser for ser, tp in self._iter_columns_dtypes(df) if isinstance(tp, dtype)
+ ]
+
+ def names(df: FrameT) -> Sequence[str]:
+ return [name for name, tp in self._iter_schema(df) if isinstance(tp, dtype)]
+
+ return self._selector.from_callables(series, names, context=self)
+
+ def by_dtype(
+ self, dtypes: Collection[DType | type[DType]]
+ ) -> CompliantSelector[FrameT, SeriesOrExprT]:
+ def series(df: FrameT) -> Sequence[SeriesOrExprT]:
+ return [ser for ser, tp in self._iter_columns_dtypes(df) if tp in dtypes]
+
+ def names(df: FrameT) -> Sequence[str]:
+ return [name for name, tp in self._iter_schema(df) if tp in dtypes]
+
+ return self._selector.from_callables(series, names, context=self)
+
+ def matches(self, pattern: str) -> CompliantSelector[FrameT, SeriesOrExprT]:
+ p = re.compile(pattern)
+
+ def series(df: FrameT) -> Sequence[SeriesOrExprT]:
+ if (
+ is_compliant_dataframe(df)
+ and not self._implementation.is_duckdb()
+ and not self._implementation.is_ibis()
+ ):
+ return [df.get_column(col) for col in df.columns if p.search(col)]
+
+ return [ser for ser, name in self._iter_columns_names(df) if p.search(name)]
+
+ def names(df: FrameT) -> Sequence[str]:
+ return [col for col in df.columns if p.search(col)]
+
+ return self._selector.from_callables(series, names, context=self)
+
+ def numeric(self) -> CompliantSelector[FrameT, SeriesOrExprT]:
+ def series(df: FrameT) -> Sequence[SeriesOrExprT]:
+ return [ser for ser, tp in self._iter_columns_dtypes(df) if tp.is_numeric()]
+
+ def names(df: FrameT) -> Sequence[str]:
+ return [name for name, tp in self._iter_schema(df) if tp.is_numeric()]
+
+ return self._selector.from_callables(series, names, context=self)
+
+ def categorical(self) -> CompliantSelector[FrameT, SeriesOrExprT]:
+ return self._is_dtype(self._version.dtypes.Categorical)
+
+ def string(self) -> CompliantSelector[FrameT, SeriesOrExprT]:
+ return self._is_dtype(self._version.dtypes.String)
+
+ def boolean(self) -> CompliantSelector[FrameT, SeriesOrExprT]:
+ return self._is_dtype(self._version.dtypes.Boolean)
+
+ def all(self) -> CompliantSelector[FrameT, SeriesOrExprT]:
+ def series(df: FrameT) -> Sequence[SeriesOrExprT]:
+ return list(self._iter_columns(df))
+
+ return self._selector.from_callables(series, get_column_names, context=self)
+
+ def datetime(
+ self,
+ time_unit: TimeUnit | Iterable[TimeUnit] | None,
+ time_zone: str | timezone | Iterable[str | timezone | None] | None,
+ ) -> CompliantSelector[FrameT, SeriesOrExprT]:
+ time_units, time_zones = _parse_time_unit_and_time_zone(time_unit, time_zone)
+ matches = partial(
+ dtype_matches_time_unit_and_time_zone,
+ dtypes=self._version.dtypes,
+ time_units=time_units,
+ time_zones=time_zones,
+ )
+
+ def series(df: FrameT) -> Sequence[SeriesOrExprT]:
+ return [ser for ser, tp in self._iter_columns_dtypes(df) if matches(tp)]
+
+ def names(df: FrameT) -> Sequence[str]:
+ return [name for name, tp in self._iter_schema(df) if matches(tp)]
+
+ return self._selector.from_callables(series, names, context=self)
+
+
+class EagerSelectorNamespace(
+ CompliantSelectorNamespace[DataFrameT, SeriesT], Protocol[DataFrameT, SeriesT]
+):
+ def _iter_schema(self, df: DataFrameT, /) -> Iterator[tuple[str, DType]]:
+ for ser in self._iter_columns(df):
+ yield ser.name, ser.dtype
+
+ def _iter_columns(self, df: DataFrameT, /) -> Iterator[SeriesT]:
+ yield from df.iter_columns()
+
+ def _iter_columns_dtypes(self, df: DataFrameT, /) -> Iterator[tuple[SeriesT, DType]]:
+ for ser in self._iter_columns(df):
+ yield ser, ser.dtype
+
+
+class LazySelectorNamespace(
+ CompliantSelectorNamespace[LazyFrameT, ExprT], Protocol[LazyFrameT, ExprT]
+):
+ def _iter_schema(self, df: LazyFrameT) -> Iterator[tuple[str, DType]]:
+ yield from df.schema.items()
+
+ def _iter_columns(self, df: LazyFrameT) -> Iterator[ExprT]:
+ yield from df._iter_columns()
+
+ def _iter_columns_dtypes(self, df: LazyFrameT, /) -> Iterator[tuple[ExprT, DType]]:
+ yield from zip(self._iter_columns(df), df.schema.values())
+
+
+class CompliantSelector(
+ CompliantExpr[FrameT, SeriesOrExprT], Protocol38[FrameT, SeriesOrExprT]
+):
+ _call: EvalSeries[FrameT, SeriesOrExprT]
+ _window_function: None
+ _function_name: str
+ _depth: int
+ _implementation: Implementation
+ _backend_version: tuple[int, ...]
+ _version: Version
+ _scalar_kwargs: ScalarKwargs
+
+ @classmethod
+ def from_callables(
+ cls,
+ call: EvalSeries[FrameT, SeriesOrExprT],
+ evaluate_output_names: EvalNames[FrameT],
+ *,
+ context: _FullContext,
+ ) -> Self:
+ obj = cls.__new__(cls)
+ obj._call = call
+ obj._window_function = None
+ obj._depth = 0
+ obj._function_name = "selector"
+ obj._evaluate_output_names = evaluate_output_names
+ obj._alias_output_names = None
+ obj._implementation = context._implementation
+ obj._backend_version = context._backend_version
+ obj._version = context._version
+ obj._scalar_kwargs = {}
+ return obj
+
+ @property
+ def selectors(self) -> CompliantSelectorNamespace[FrameT, SeriesOrExprT]:
+ return self.__narwhals_namespace__().selectors
+
+ def _to_expr(self) -> CompliantExpr[FrameT, SeriesOrExprT]: ...
+
+ def _is_selector(
+ self, other: Self | CompliantExpr[FrameT, SeriesOrExprT]
+ ) -> TypeIs[CompliantSelector[FrameT, SeriesOrExprT]]:
+ return isinstance(other, type(self))
+
+ @overload
+ def __sub__(self, other: Self) -> Self: ...
+ @overload
+ def __sub__(
+ self, other: CompliantExpr[FrameT, SeriesOrExprT]
+ ) -> CompliantExpr[FrameT, SeriesOrExprT]: ...
+ def __sub__(
+ self, other: SelectorOrExpr[FrameT, SeriesOrExprT]
+ ) -> SelectorOrExpr[FrameT, SeriesOrExprT]:
+ if self._is_selector(other):
+
+ def series(df: FrameT) -> Sequence[SeriesOrExprT]:
+ lhs_names, rhs_names = _eval_lhs_rhs(df, self, other)
+ return [
+ x for x, name in zip(self(df), lhs_names) if name not in rhs_names
+ ]
+
+ def names(df: FrameT) -> Sequence[str]:
+ lhs_names, rhs_names = _eval_lhs_rhs(df, self, other)
+ return [x for x in lhs_names if x not in rhs_names]
+
+ return self.selectors._selector.from_callables(series, names, context=self)
+ return self._to_expr() - other
+
+ @overload
+ def __or__(self, other: Self) -> Self: ...
+ @overload
+ def __or__(
+ self, other: CompliantExpr[FrameT, SeriesOrExprT]
+ ) -> CompliantExpr[FrameT, SeriesOrExprT]: ...
+ def __or__(
+ self, other: SelectorOrExpr[FrameT, SeriesOrExprT]
+ ) -> SelectorOrExpr[FrameT, SeriesOrExprT]:
+ if self._is_selector(other):
+
+ def series(df: FrameT) -> Sequence[SeriesOrExprT]:
+ lhs_names, rhs_names = _eval_lhs_rhs(df, self, other)
+ return [
+ *(x for x, name in zip(self(df), lhs_names) if name not in rhs_names),
+ *other(df),
+ ]
+
+ def names(df: FrameT) -> Sequence[str]:
+ lhs_names, rhs_names = _eval_lhs_rhs(df, self, other)
+ return [*(x for x in lhs_names if x not in rhs_names), *rhs_names]
+
+ return self.selectors._selector.from_callables(series, names, context=self)
+ return self._to_expr() | other
+
+ @overload
+ def __and__(self, other: Self) -> Self: ...
+ @overload
+ def __and__(
+ self, other: CompliantExpr[FrameT, SeriesOrExprT]
+ ) -> CompliantExpr[FrameT, SeriesOrExprT]: ...
+ def __and__(
+ self, other: SelectorOrExpr[FrameT, SeriesOrExprT]
+ ) -> SelectorOrExpr[FrameT, SeriesOrExprT]:
+ if self._is_selector(other):
+
+ def series(df: FrameT) -> Sequence[SeriesOrExprT]:
+ lhs_names, rhs_names = _eval_lhs_rhs(df, self, other)
+ return [x for x, name in zip(self(df), lhs_names) if name in rhs_names]
+
+ def names(df: FrameT) -> Sequence[str]:
+ lhs_names, rhs_names = _eval_lhs_rhs(df, self, other)
+ return [x for x in lhs_names if x in rhs_names]
+
+ return self.selectors._selector.from_callables(series, names, context=self)
+ return self._to_expr() & other
+
+ def __invert__(self) -> CompliantSelector[FrameT, SeriesOrExprT]:
+ return self.selectors.all() - self
+
+
+def _eval_lhs_rhs(
+ df: CompliantFrameAny, lhs: CompliantExprAny, rhs: CompliantExprAny
+) -> tuple[Sequence[str], Sequence[str]]:
+ return lhs._evaluate_output_names(df), rhs._evaluate_output_names(df)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_compliant/series.py b/venv/lib/python3.8/site-packages/narwhals/_compliant/series.py
new file mode 100644
index 0000000..706fd2b
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_compliant/series.py
@@ -0,0 +1,396 @@
+from __future__ import annotations
+
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Generic,
+ Iterable,
+ Iterator,
+ Mapping,
+ Protocol,
+ Sequence,
+)
+
+from narwhals._compliant.any_namespace import (
+ CatNamespace,
+ DateTimeNamespace,
+ ListNamespace,
+ StringNamespace,
+ StructNamespace,
+)
+from narwhals._compliant.typing import (
+ CompliantSeriesT_co,
+ EagerSeriesT_co,
+ NativeSeriesT,
+ NativeSeriesT_co,
+)
+from narwhals._translate import FromIterable, FromNative, NumpyConvertible, ToNarwhals
+from narwhals._utils import (
+ _StoresCompliant,
+ _StoresNative,
+ is_compliant_series,
+ is_sized_multi_index_selector,
+ unstable,
+)
+
+if TYPE_CHECKING:
+ from types import ModuleType
+
+ import pandas as pd
+ import polars as pl
+ import pyarrow as pa
+ from typing_extensions import Self
+
+ from narwhals._compliant.dataframe import CompliantDataFrame
+ from narwhals._compliant.expr import CompliantExpr, EagerExpr
+ from narwhals._compliant.namespace import CompliantNamespace, EagerNamespace
+ from narwhals._utils import Implementation, Version, _FullContext
+ from narwhals.dtypes import DType
+ from narwhals.series import Series
+ from narwhals.typing import (
+ ClosedInterval,
+ FillNullStrategy,
+ Into1DArray,
+ IntoDType,
+ MultiIndexSelector,
+ NonNestedLiteral,
+ NumericLiteral,
+ RankMethod,
+ RollingInterpolationMethod,
+ SizedMultiIndexSelector,
+ TemporalLiteral,
+ _1DArray,
+ _SliceIndex,
+ )
+
+__all__ = ["CompliantSeries", "EagerSeries"]
+
+
+class CompliantSeries(
+ NumpyConvertible["_1DArray", "Into1DArray"],
+ FromIterable,
+ FromNative[NativeSeriesT],
+ ToNarwhals["Series[NativeSeriesT]"],
+ Protocol[NativeSeriesT],
+):
+ _implementation: Implementation
+ _backend_version: tuple[int, ...]
+ _version: Version
+
+ @property
+ def dtype(self) -> DType: ...
+ @property
+ def name(self) -> str: ...
+ @property
+ def native(self) -> NativeSeriesT: ...
+ def __narwhals_series__(self) -> Self:
+ return self
+
+ def __narwhals_namespace__(self) -> CompliantNamespace[Any, Any]: ...
+ def __native_namespace__(self) -> ModuleType: ...
+ def __array__(self, dtype: Any, *, copy: bool | None) -> _1DArray: ...
+ def __contains__(self, other: Any) -> bool: ...
+ def __getitem__(self, item: MultiIndexSelector[Self]) -> Any: ...
+ def __iter__(self) -> Iterator[Any]: ...
+ def __len__(self) -> int:
+ return len(self.native)
+
+ def _with_native(self, series: Any) -> Self: ...
+ def _with_version(self, version: Version) -> Self: ...
+ def _to_expr(self) -> CompliantExpr[Any, Self]: ...
+ @classmethod
+ def from_native(cls, data: NativeSeriesT, /, *, context: _FullContext) -> Self: ...
+ @classmethod
+ def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self: ...
+ @classmethod
+ def from_iterable(
+ cls,
+ data: Iterable[Any],
+ /,
+ *,
+ context: _FullContext,
+ name: str = "",
+ dtype: IntoDType | None = None,
+ ) -> Self: ...
+ def to_narwhals(self) -> Series[NativeSeriesT]:
+ return self._version.series(self, level="full")
+
+ # Operators
+ def __add__(self, other: Any) -> Self: ...
+ def __and__(self, other: Any) -> Self: ...
+ def __eq__(self, other: object) -> Self: ... # type: ignore[override]
+ def __floordiv__(self, other: Any) -> Self: ...
+ def __ge__(self, other: Any) -> Self: ...
+ def __gt__(self, other: Any) -> Self: ...
+ def __invert__(self) -> Self: ...
+ def __le__(self, other: Any) -> Self: ...
+ def __lt__(self, other: Any) -> Self: ...
+ def __mod__(self, other: Any) -> Self: ...
+ def __mul__(self, other: Any) -> Self: ...
+ def __ne__(self, other: object) -> Self: ... # type: ignore[override]
+ def __or__(self, other: Any) -> Self: ...
+ def __pow__(self, other: Any) -> Self: ...
+ def __radd__(self, other: Any) -> Self: ...
+ def __rand__(self, other: Any) -> Self: ...
+ def __rfloordiv__(self, other: Any) -> Self: ...
+ def __rmod__(self, other: Any) -> Self: ...
+ def __rmul__(self, other: Any) -> Self: ...
+ def __ror__(self, other: Any) -> Self: ...
+ def __rpow__(self, other: Any) -> Self: ...
+ def __rsub__(self, other: Any) -> Self: ...
+ def __rtruediv__(self, other: Any) -> Self: ...
+ def __sub__(self, other: Any) -> Self: ...
+ def __truediv__(self, other: Any) -> Self: ...
+
+ def abs(self) -> Self: ...
+ def alias(self, name: str) -> Self: ...
+ def all(self) -> bool: ...
+ def any(self) -> bool: ...
+ def arg_max(self) -> int: ...
+ def arg_min(self) -> int: ...
+ def arg_true(self) -> Self: ...
+ def cast(self, dtype: IntoDType) -> Self: ...
+ def clip(
+ self,
+ lower_bound: Self | NumericLiteral | TemporalLiteral | None,
+ upper_bound: Self | NumericLiteral | TemporalLiteral | None,
+ ) -> Self: ...
+ def count(self) -> int: ...
+ def cum_count(self, *, reverse: bool) -> Self: ...
+ def cum_max(self, *, reverse: bool) -> Self: ...
+ def cum_min(self, *, reverse: bool) -> Self: ...
+ def cum_prod(self, *, reverse: bool) -> Self: ...
+ def cum_sum(self, *, reverse: bool) -> Self: ...
+ def diff(self) -> Self: ...
+ def drop_nulls(self) -> Self: ...
+ def ewm_mean(
+ self,
+ *,
+ com: float | None,
+ span: float | None,
+ half_life: float | None,
+ alpha: float | None,
+ adjust: bool,
+ min_samples: int,
+ ignore_nulls: bool,
+ ) -> Self: ...
+ def exp(self) -> Self: ...
+ def fill_null(
+ self,
+ value: Self | NonNestedLiteral,
+ strategy: FillNullStrategy | None,
+ limit: int | None,
+ ) -> Self: ...
+ def filter(self, predicate: Any) -> Self: ...
+ def gather_every(self, n: int, offset: int) -> Self: ...
+ @unstable
+ def hist(
+ self,
+ bins: list[float | int] | None,
+ *,
+ bin_count: int | None,
+ include_breakpoint: bool,
+ ) -> CompliantDataFrame[Self, Any, Any, Any]: ...
+ def head(self, n: int) -> Self: ...
+ def is_between(
+ self, lower_bound: Any, upper_bound: Any, closed: ClosedInterval
+ ) -> Self: ...
+ def is_finite(self) -> Self: ...
+ def is_first_distinct(self) -> Self: ...
+ def is_in(self, other: Any) -> Self: ...
+ def is_last_distinct(self) -> Self: ...
+ def is_nan(self) -> Self: ...
+ def is_null(self) -> Self: ...
+ def is_sorted(self, *, descending: bool) -> bool: ...
+ def is_unique(self) -> Self: ...
+ def item(self, index: int | None) -> Any: ...
+ def len(self) -> int: ...
+ def log(self, base: float) -> Self: ...
+ def max(self) -> Any: ...
+ def mean(self) -> float: ...
+ def median(self) -> float: ...
+ def min(self) -> Any: ...
+ def mode(self) -> Self: ...
+ def n_unique(self) -> int: ...
+ def null_count(self) -> int: ...
+ def quantile(
+ self, quantile: float, interpolation: RollingInterpolationMethod
+ ) -> float: ...
+ def rank(self, method: RankMethod, *, descending: bool) -> Self: ...
+ def replace_strict(
+ self,
+ old: Sequence[Any] | Mapping[Any, Any],
+ new: Sequence[Any],
+ *,
+ return_dtype: IntoDType | None,
+ ) -> Self: ...
+ def rolling_mean(
+ self, window_size: int, *, min_samples: int, center: bool
+ ) -> Self: ...
+ def rolling_std(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self: ...
+ def rolling_sum(
+ self, window_size: int, *, min_samples: int, center: bool
+ ) -> Self: ...
+ def rolling_var(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self: ...
+ def round(self, decimals: int) -> Self: ...
+ def sample(
+ self,
+ n: int | None,
+ *,
+ fraction: float | None,
+ with_replacement: bool,
+ seed: int | None,
+ ) -> Self: ...
+ def scatter(self, indices: int | Sequence[int], values: Any) -> Self: ...
+ def shift(self, n: int) -> Self: ...
+ def skew(self) -> float | None: ...
+ def sort(self, *, descending: bool, nulls_last: bool) -> Self: ...
+ def std(self, *, ddof: int) -> float: ...
+ def sum(self) -> float: ...
+ def tail(self, n: int) -> Self: ...
+ def to_arrow(self) -> pa.Array[Any]: ...
+ def to_dummies(
+ self, *, separator: str, drop_first: bool
+ ) -> CompliantDataFrame[Self, Any, Any, Any]: ...
+ def to_frame(self) -> CompliantDataFrame[Self, Any, Any, Any]: ...
+ def to_list(self) -> list[Any]: ...
+ def to_pandas(self) -> pd.Series[Any]: ...
+ def to_polars(self) -> pl.Series: ...
+ def unique(self, *, maintain_order: bool) -> Self: ...
+ def value_counts(
+ self, *, sort: bool, parallel: bool, name: str | None, normalize: bool
+ ) -> CompliantDataFrame[Self, Any, Any, Any]: ...
+ def var(self, *, ddof: int) -> float: ...
+ def zip_with(self, mask: Any, other: Any) -> Self: ...
+
+ @property
+ def str(self) -> Any: ...
+ @property
+ def dt(self) -> Any: ...
+ @property
+ def cat(self) -> Any: ...
+ @property
+ def list(self) -> Any: ...
+ @property
+ def struct(self) -> Any: ...
+
+
+class EagerSeries(CompliantSeries[NativeSeriesT], Protocol[NativeSeriesT]):
+ _native_series: Any
+ _implementation: Implementation
+ _backend_version: tuple[int, ...]
+ _version: Version
+ _broadcast: bool
+
+ def _from_scalar(self, value: Any) -> Self:
+ return self.from_iterable([value], name=self.name, context=self)
+
+ def _with_native(
+ self, series: NativeSeriesT, *, preserve_broadcast: bool = False
+ ) -> Self:
+ """Return a new `CompliantSeries`, wrapping the native `series`.
+
+ In cases when operations are known to not affect whether a result should
+ be broadcast, we can pass `preserve_broadcast=True`.
+ Set this with care - it should only be set for unary expressions which don't
+ change length or order, such as `.alias` or `.fill_null`. If in doubt, don't
+ set it, you probably don't need it.
+ """
+ ...
+
+ def __narwhals_namespace__(self) -> EagerNamespace[Any, Self, Any, Any]: ...
+
+ def _to_expr(self) -> EagerExpr[Any, Any]:
+ return self.__narwhals_namespace__()._expr._from_series(self) # type: ignore[no-any-return]
+
+ def _gather(self, rows: SizedMultiIndexSelector[NativeSeriesT]) -> Self: ...
+ def _gather_slice(self, rows: _SliceIndex | range) -> Self: ...
+ def __getitem__(self, item: MultiIndexSelector[Self]) -> Self:
+ if isinstance(item, (slice, range)):
+ return self._gather_slice(item)
+ elif is_compliant_series(item):
+ return self._gather(item.native)
+ elif is_sized_multi_index_selector(item):
+ return self._gather(item)
+ else: # pragma: no cover
+ msg = f"Unreachable code, got unexpected type: {type(item)}"
+ raise AssertionError(msg)
+
+ @property
+ def str(self) -> EagerSeriesStringNamespace[Self, NativeSeriesT]: ...
+ @property
+ def dt(self) -> EagerSeriesDateTimeNamespace[Self, NativeSeriesT]: ...
+ @property
+ def cat(self) -> EagerSeriesCatNamespace[Self, NativeSeriesT]: ...
+ @property
+ def list(self) -> EagerSeriesListNamespace[Self, NativeSeriesT]: ...
+ @property
+ def struct(self) -> EagerSeriesStructNamespace[Self, NativeSeriesT]: ...
+
+
+class _SeriesNamespace( # type: ignore[misc]
+ _StoresCompliant[CompliantSeriesT_co],
+ _StoresNative[NativeSeriesT_co],
+ Protocol[CompliantSeriesT_co, NativeSeriesT_co],
+):
+ _compliant_series: CompliantSeriesT_co
+
+ @property
+ def compliant(self) -> CompliantSeriesT_co:
+ return self._compliant_series
+
+ @property
+ def native(self) -> NativeSeriesT_co:
+ return self._compliant_series.native # type: ignore[no-any-return]
+
+ def with_native(self, series: Any, /) -> CompliantSeriesT_co:
+ return self.compliant._with_native(series)
+
+
+class EagerSeriesNamespace(
+ _SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
+ Generic[EagerSeriesT_co, NativeSeriesT_co],
+):
+ _compliant_series: EagerSeriesT_co
+
+ def __init__(self, series: EagerSeriesT_co, /) -> None:
+ self._compliant_series = series
+
+
+class EagerSeriesCatNamespace( # type: ignore[misc]
+ _SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
+ CatNamespace[EagerSeriesT_co],
+ Protocol[EagerSeriesT_co, NativeSeriesT_co],
+): ...
+
+
+class EagerSeriesDateTimeNamespace( # type: ignore[misc]
+ _SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
+ DateTimeNamespace[EagerSeriesT_co],
+ Protocol[EagerSeriesT_co, NativeSeriesT_co],
+): ...
+
+
+class EagerSeriesListNamespace( # type: ignore[misc]
+ _SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
+ ListNamespace[EagerSeriesT_co],
+ Protocol[EagerSeriesT_co, NativeSeriesT_co],
+): ...
+
+
+class EagerSeriesStringNamespace( # type: ignore[misc]
+ _SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
+ StringNamespace[EagerSeriesT_co],
+ Protocol[EagerSeriesT_co, NativeSeriesT_co],
+): ...
+
+
+class EagerSeriesStructNamespace( # type: ignore[misc]
+ _SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
+ StructNamespace[EagerSeriesT_co],
+ Protocol[EagerSeriesT_co, NativeSeriesT_co],
+): ...
diff --git a/venv/lib/python3.8/site-packages/narwhals/_compliant/typing.py b/venv/lib/python3.8/site-packages/narwhals/_compliant/typing.py
new file mode 100644
index 0000000..4c3685b
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_compliant/typing.py
@@ -0,0 +1,154 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Callable, Sequence, TypedDict, TypeVar
+
+if TYPE_CHECKING:
+ from typing_extensions import TypeAlias
+
+ from narwhals._compliant.dataframe import (
+ CompliantDataFrame,
+ CompliantLazyFrame,
+ EagerDataFrame,
+ )
+ from narwhals._compliant.expr import (
+ CompliantExpr,
+ DepthTrackingExpr,
+ EagerExpr,
+ LazyExpr,
+ NativeExpr,
+ )
+ from narwhals._compliant.namespace import CompliantNamespace, EagerNamespace
+ from narwhals._compliant.series import CompliantSeries, EagerSeries
+ from narwhals._compliant.window import WindowInputs
+ from narwhals.typing import FillNullStrategy, NativeFrame, NativeSeries, RankMethod
+
+ class ScalarKwargs(TypedDict, total=False):
+ """Non-expressifiable args which we may need to reuse in `agg` or `over`."""
+
+ center: int
+ ddof: int
+ descending: bool
+ limit: int | None
+ method: RankMethod
+ min_samples: int
+ n: int
+ reverse: bool
+ strategy: FillNullStrategy | None
+ window_size: int
+
+
+__all__ = [
+ "AliasName",
+ "AliasNames",
+ "CompliantDataFrameT",
+ "CompliantFrameT",
+ "CompliantLazyFrameT",
+ "CompliantSeriesT",
+ "EvalNames",
+ "EvalSeries",
+ "IntoCompliantExpr",
+ "NativeFrameT_co",
+ "NativeSeriesT_co",
+]
+CompliantExprAny: TypeAlias = "CompliantExpr[Any, Any]"
+CompliantSeriesAny: TypeAlias = "CompliantSeries[Any]"
+CompliantSeriesOrNativeExprAny: TypeAlias = "CompliantSeriesAny | NativeExpr"
+CompliantDataFrameAny: TypeAlias = "CompliantDataFrame[Any, Any, Any, Any]"
+CompliantLazyFrameAny: TypeAlias = "CompliantLazyFrame[Any, Any, Any]"
+CompliantFrameAny: TypeAlias = "CompliantDataFrameAny | CompliantLazyFrameAny"
+CompliantNamespaceAny: TypeAlias = "CompliantNamespace[Any, Any]"
+
+DepthTrackingExprAny: TypeAlias = "DepthTrackingExpr[Any, Any]"
+
+EagerDataFrameAny: TypeAlias = "EagerDataFrame[Any, Any, Any]"
+EagerSeriesAny: TypeAlias = "EagerSeries[Any]"
+EagerExprAny: TypeAlias = "EagerExpr[Any, Any]"
+EagerNamespaceAny: TypeAlias = (
+ "EagerNamespace[EagerDataFrameAny, EagerSeriesAny, EagerExprAny, NativeFrame]"
+)
+
+LazyExprAny: TypeAlias = "LazyExpr[Any, Any]"
+
+NativeExprT = TypeVar("NativeExprT", bound="NativeExpr")
+NativeExprT_co = TypeVar("NativeExprT_co", bound="NativeExpr", covariant=True)
+NativeSeriesT = TypeVar("NativeSeriesT", bound="NativeSeries")
+NativeSeriesT_co = TypeVar("NativeSeriesT_co", bound="NativeSeries", covariant=True)
+NativeFrameT = TypeVar("NativeFrameT", bound="NativeFrame")
+NativeFrameT_co = TypeVar("NativeFrameT_co", bound="NativeFrame", covariant=True)
+NativeFrameT_contra = TypeVar(
+ "NativeFrameT_contra", bound="NativeFrame", contravariant=True
+)
+
+CompliantExprT = TypeVar("CompliantExprT", bound=CompliantExprAny)
+CompliantExprT_co = TypeVar("CompliantExprT_co", bound=CompliantExprAny, covariant=True)
+CompliantExprT_contra = TypeVar(
+ "CompliantExprT_contra", bound=CompliantExprAny, contravariant=True
+)
+CompliantSeriesT = TypeVar("CompliantSeriesT", bound=CompliantSeriesAny)
+CompliantSeriesT_co = TypeVar(
+ "CompliantSeriesT_co", bound=CompliantSeriesAny, covariant=True
+)
+CompliantSeriesOrNativeExprT = TypeVar(
+ "CompliantSeriesOrNativeExprT", bound=CompliantSeriesOrNativeExprAny
+)
+CompliantSeriesOrNativeExprT_co = TypeVar(
+ "CompliantSeriesOrNativeExprT_co",
+ bound=CompliantSeriesOrNativeExprAny,
+ covariant=True,
+)
+CompliantFrameT = TypeVar("CompliantFrameT", bound=CompliantFrameAny)
+CompliantFrameT_co = TypeVar(
+ "CompliantFrameT_co", bound=CompliantFrameAny, covariant=True
+)
+CompliantDataFrameT = TypeVar("CompliantDataFrameT", bound=CompliantDataFrameAny)
+CompliantDataFrameT_co = TypeVar(
+ "CompliantDataFrameT_co", bound=CompliantDataFrameAny, covariant=True
+)
+CompliantLazyFrameT = TypeVar("CompliantLazyFrameT", bound=CompliantLazyFrameAny)
+CompliantLazyFrameT_co = TypeVar(
+ "CompliantLazyFrameT_co", bound=CompliantLazyFrameAny, covariant=True
+)
+CompliantNamespaceT = TypeVar("CompliantNamespaceT", bound=CompliantNamespaceAny)
+CompliantNamespaceT_co = TypeVar(
+ "CompliantNamespaceT_co", bound=CompliantNamespaceAny, covariant=True
+)
+
+IntoCompliantExpr: TypeAlias = "CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co] | CompliantSeriesOrNativeExprT_co"
+
+DepthTrackingExprT = TypeVar("DepthTrackingExprT", bound=DepthTrackingExprAny)
+DepthTrackingExprT_contra = TypeVar(
+ "DepthTrackingExprT_contra", bound=DepthTrackingExprAny, contravariant=True
+)
+
+EagerExprT = TypeVar("EagerExprT", bound=EagerExprAny)
+EagerExprT_contra = TypeVar("EagerExprT_contra", bound=EagerExprAny, contravariant=True)
+EagerSeriesT = TypeVar("EagerSeriesT", bound=EagerSeriesAny)
+EagerSeriesT_co = TypeVar("EagerSeriesT_co", bound=EagerSeriesAny, covariant=True)
+
+# NOTE: `pyright` gives false (8) positives if this uses `EagerDataFrameAny`?
+EagerDataFrameT = TypeVar("EagerDataFrameT", bound="EagerDataFrame[Any, Any, Any]")
+
+LazyExprT = TypeVar("LazyExprT", bound=LazyExprAny)
+LazyExprT_contra = TypeVar("LazyExprT_contra", bound=LazyExprAny, contravariant=True)
+
+AliasNames: TypeAlias = Callable[[Sequence[str]], Sequence[str]]
+"""A function aliasing a *sequence* of column names."""
+
+AliasName: TypeAlias = Callable[[str], str]
+"""A function aliasing a *single* column name."""
+
+EvalSeries: TypeAlias = Callable[
+ [CompliantFrameT], Sequence[CompliantSeriesOrNativeExprT]
+]
+"""A function from a `Frame` to a sequence of `Series`*.
+
+See [underwater unicorn magic](https://narwhals-dev.github.io/narwhals/how_it_works/).
+"""
+
+EvalNames: TypeAlias = Callable[[CompliantFrameT], Sequence[str]]
+"""A function from a `Frame` to a sequence of columns names *before* any aliasing takes place."""
+
+WindowFunction: TypeAlias = (
+ "Callable[[CompliantFrameT, WindowInputs[NativeExprT]], Sequence[NativeExprT]]"
+)
+"""A function evaluated with `over(partition_by=..., order_by=...)`."""
diff --git a/venv/lib/python3.8/site-packages/narwhals/_compliant/when_then.py b/venv/lib/python3.8/site-packages/narwhals/_compliant/when_then.py
new file mode 100644
index 0000000..1de91f9
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_compliant/when_then.py
@@ -0,0 +1,232 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Callable, Sequence, TypeVar, cast
+
+from narwhals._compliant.expr import CompliantExpr
+from narwhals._compliant.typing import (
+ CompliantExprAny,
+ CompliantFrameAny,
+ CompliantLazyFrameT,
+ CompliantSeriesOrNativeExprAny,
+ EagerDataFrameT,
+ EagerExprT,
+ EagerSeriesT,
+ LazyExprAny,
+ NativeExprT,
+ WindowFunction,
+)
+from narwhals._typing_compat import Protocol38
+
+if TYPE_CHECKING:
+ from typing_extensions import Self, TypeAlias
+
+ from narwhals._compliant.typing import EvalSeries, ScalarKwargs
+ from narwhals._compliant.window import WindowInputs
+ from narwhals._utils import Implementation, Version, _FullContext
+ from narwhals.typing import NonNestedLiteral
+
+
+__all__ = ["CompliantThen", "CompliantWhen", "EagerWhen", "LazyThen", "LazyWhen"]
+
+ExprT = TypeVar("ExprT", bound=CompliantExprAny)
+LazyExprT = TypeVar("LazyExprT", bound=LazyExprAny)
+SeriesT = TypeVar("SeriesT", bound=CompliantSeriesOrNativeExprAny)
+FrameT = TypeVar("FrameT", bound=CompliantFrameAny)
+
+Scalar: TypeAlias = Any
+"""A native literal value."""
+
+IntoExpr: TypeAlias = "SeriesT | ExprT | NonNestedLiteral | Scalar"
+"""Anything that is convertible into a `CompliantExpr`."""
+
+
+class CompliantWhen(Protocol38[FrameT, SeriesT, ExprT]):
+ _condition: ExprT
+ _then_value: IntoExpr[SeriesT, ExprT]
+ _otherwise_value: IntoExpr[SeriesT, ExprT] | None
+ _implementation: Implementation
+ _backend_version: tuple[int, ...]
+ _version: Version
+
+ @property
+ def _then(self) -> type[CompliantThen[FrameT, SeriesT, ExprT]]: ...
+ def __call__(self, compliant_frame: FrameT, /) -> Sequence[SeriesT]: ...
+ def _window_function(
+ self, compliant_frame: FrameT, window_inputs: WindowInputs[Any]
+ ) -> Sequence[SeriesT]: ...
+
+ def then(
+ self, value: IntoExpr[SeriesT, ExprT], /
+ ) -> CompliantThen[FrameT, SeriesT, ExprT]:
+ return self._then.from_when(self, value)
+
+ @classmethod
+ def from_expr(cls, condition: ExprT, /, *, context: _FullContext) -> Self:
+ obj = cls.__new__(cls)
+ obj._condition = condition
+ obj._then_value = None
+ obj._otherwise_value = None
+ obj._implementation = context._implementation
+ obj._backend_version = context._backend_version
+ obj._version = context._version
+ return obj
+
+
+class CompliantThen(CompliantExpr[FrameT, SeriesT], Protocol38[FrameT, SeriesT, ExprT]):
+ _call: EvalSeries[FrameT, SeriesT]
+ _when_value: CompliantWhen[FrameT, SeriesT, ExprT]
+ _function_name: str
+ _depth: int
+ _implementation: Implementation
+ _backend_version: tuple[int, ...]
+ _version: Version
+ _scalar_kwargs: ScalarKwargs
+
+ @classmethod
+ def from_when(
+ cls,
+ when: CompliantWhen[FrameT, SeriesT, ExprT],
+ then: IntoExpr[SeriesT, ExprT],
+ /,
+ ) -> Self:
+ when._then_value = then
+ obj = cls.__new__(cls)
+ obj._call = when
+ obj._when_value = when
+ obj._depth = 0
+ obj._function_name = "whenthen"
+ obj._evaluate_output_names = getattr(
+ then, "_evaluate_output_names", lambda _df: ["literal"]
+ )
+ obj._alias_output_names = getattr(then, "_alias_output_names", None)
+ obj._implementation = when._implementation
+ obj._backend_version = when._backend_version
+ obj._version = when._version
+ obj._scalar_kwargs = {}
+ return obj
+
+ def otherwise(self, otherwise: IntoExpr[SeriesT, ExprT], /) -> ExprT:
+ self._when_value._otherwise_value = otherwise
+ self._function_name = "whenotherwise"
+ return cast("ExprT", self)
+
+
+class LazyThen(
+ CompliantThen[CompliantLazyFrameT, NativeExprT, LazyExprT],
+ Protocol38[CompliantLazyFrameT, NativeExprT, LazyExprT],
+):
+ _window_function: WindowFunction[CompliantLazyFrameT, NativeExprT] | None
+
+ @classmethod
+ def from_when(
+ cls,
+ when: CompliantWhen[CompliantLazyFrameT, NativeExprT, LazyExprT],
+ then: IntoExpr[NativeExprT, LazyExprT],
+ /,
+ ) -> Self:
+ when._then_value = then
+ obj = cls.__new__(cls)
+ obj._call = when
+
+ obj._window_function = when._window_function
+
+ obj._when_value = when
+ obj._depth = 0
+ obj._function_name = "whenthen"
+ obj._evaluate_output_names = getattr(
+ then, "_evaluate_output_names", lambda _df: ["literal"]
+ )
+ obj._alias_output_names = getattr(then, "_alias_output_names", None)
+ obj._implementation = when._implementation
+ obj._backend_version = when._backend_version
+ obj._version = when._version
+ obj._scalar_kwargs = {}
+ return obj
+
+
+class EagerWhen(
+ CompliantWhen[EagerDataFrameT, EagerSeriesT, EagerExprT],
+ Protocol38[EagerDataFrameT, EagerSeriesT, EagerExprT],
+):
+ def _if_then_else(
+ self, when: EagerSeriesT, then: EagerSeriesT, otherwise: EagerSeriesT | None, /
+ ) -> EagerSeriesT: ...
+
+ def __call__(self, df: EagerDataFrameT, /) -> Sequence[EagerSeriesT]:
+ is_expr = self._condition._is_expr
+ when: EagerSeriesT = self._condition(df)[0]
+ then: EagerSeriesT
+
+ if is_expr(self._then_value):
+ then = self._then_value(df)[0]
+ else:
+ then = when.alias("literal")._from_scalar(self._then_value)
+ then._broadcast = True
+
+ if is_expr(self._otherwise_value):
+ otherwise = self._otherwise_value(df)[0]
+ elif self._otherwise_value is not None:
+ otherwise = when._from_scalar(self._otherwise_value)
+ otherwise._broadcast = True
+ else:
+ otherwise = self._otherwise_value
+ return [self._if_then_else(when, then, otherwise)]
+
+
+class LazyWhen(
+ CompliantWhen[CompliantLazyFrameT, NativeExprT, LazyExprT],
+ Protocol38[CompliantLazyFrameT, NativeExprT, LazyExprT],
+):
+ when: Callable[..., NativeExprT]
+ lit: Callable[..., NativeExprT]
+
+ def __call__(self, df: CompliantLazyFrameT) -> Sequence[NativeExprT]:
+ is_expr = self._condition._is_expr
+ when = self.when
+ lit = self.lit
+ condition = df._evaluate_expr(self._condition)
+ then_ = self._then_value
+ then = df._evaluate_expr(then_) if is_expr(then_) else lit(then_)
+ other_ = self._otherwise_value
+ if other_ is None:
+ result = when(condition, then)
+ else:
+ otherwise = df._evaluate_expr(other_) if is_expr(other_) else lit(other_)
+ result = when(condition, then).otherwise(otherwise) # type: ignore # noqa: PGH003
+ return [result]
+
+ @classmethod
+ def from_expr(cls, condition: LazyExprT, /, *, context: _FullContext) -> Self:
+ obj = cls.__new__(cls)
+ obj._condition = condition
+
+ obj._then_value = None
+ obj._otherwise_value = None
+ obj._implementation = context._implementation
+ obj._backend_version = context._backend_version
+ obj._version = context._version
+ return obj
+
+ def _window_function(
+ self, df: CompliantLazyFrameT, window_inputs: WindowInputs[NativeExprT]
+ ) -> Sequence[NativeExprT]:
+ is_expr = self._condition._is_expr
+ condition = self._condition.window_function(df, window_inputs)[0]
+ then_ = self._then_value
+ then = (
+ then_.window_function(df, window_inputs)[0]
+ if is_expr(then_)
+ else self.lit(then_)
+ )
+
+ other_ = self._otherwise_value
+ if other_ is None:
+ result = self.when(condition, then)
+ else:
+ other = (
+ other_.window_function(df, window_inputs)[0]
+ if is_expr(other_)
+ else self.lit(other_)
+ )
+ result = self.when(condition, then).otherwise(other) # type: ignore # noqa: PGH003
+ return [result]
diff --git a/venv/lib/python3.8/site-packages/narwhals/_compliant/window.py b/venv/lib/python3.8/site-packages/narwhals/_compliant/window.py
new file mode 100644
index 0000000..07d37cc
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_compliant/window.py
@@ -0,0 +1,15 @@
+from __future__ import annotations
+
+from typing import Generic, Sequence
+
+from narwhals._compliant.typing import NativeExprT_co
+
+
+class WindowInputs(Generic[NativeExprT_co]):
+ __slots__ = ("order_by", "partition_by")
+
+ def __init__(
+ self, partition_by: Sequence[str | NativeExprT_co], order_by: Sequence[str]
+ ) -> None:
+ self.partition_by = partition_by
+ self.order_by = order_by
diff --git a/venv/lib/python3.8/site-packages/narwhals/_dask/__init__.py b/venv/lib/python3.8/site-packages/narwhals/_dask/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_dask/__init__.py
diff --git a/venv/lib/python3.8/site-packages/narwhals/_dask/dataframe.py b/venv/lib/python3.8/site-packages/narwhals/_dask/dataframe.py
new file mode 100644
index 0000000..f03c763
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_dask/dataframe.py
@@ -0,0 +1,443 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Iterator, Mapping, Sequence
+
+import dask.dataframe as dd
+import pandas as pd
+
+from narwhals._dask.utils import add_row_index, evaluate_exprs
+from narwhals._pandas_like.utils import native_to_narwhals_dtype, select_columns_by_name
+from narwhals._utils import (
+ Implementation,
+ _remap_full_join_keys,
+ check_column_names_are_unique,
+ generate_temporary_column_name,
+ not_implemented,
+ parse_columns_to_drop,
+ parse_version,
+ validate_backend_version,
+)
+from narwhals.typing import CompliantLazyFrame
+
+if TYPE_CHECKING:
+ from types import ModuleType
+
+ import dask.dataframe.dask_expr as dx
+ from typing_extensions import Self, TypeIs
+
+ from narwhals._compliant.typing import CompliantDataFrameAny
+ from narwhals._dask.expr import DaskExpr
+ from narwhals._dask.group_by import DaskLazyGroupBy
+ from narwhals._dask.namespace import DaskNamespace
+ from narwhals._utils import Version, _FullContext
+ from narwhals.dataframe import LazyFrame
+ from narwhals.dtypes import DType
+ from narwhals.typing import AsofJoinStrategy, JoinStrategy, LazyUniqueKeepStrategy
+
+
+class DaskLazyFrame(
+ CompliantLazyFrame["DaskExpr", "dd.DataFrame", "LazyFrame[dd.DataFrame]"]
+):
+ def __init__(
+ self,
+ native_dataframe: dd.DataFrame,
+ *,
+ backend_version: tuple[int, ...],
+ version: Version,
+ ) -> None:
+ self._native_frame: dd.DataFrame = native_dataframe
+ self._backend_version = backend_version
+ self._implementation = Implementation.DASK
+ self._version = version
+ self._cached_schema: dict[str, DType] | None = None
+ self._cached_columns: list[str] | None = None
+ validate_backend_version(self._implementation, self._backend_version)
+
+ @staticmethod
+ def _is_native(obj: dd.DataFrame | Any) -> TypeIs[dd.DataFrame]:
+ return isinstance(obj, dd.DataFrame)
+
+ @classmethod
+ def from_native(cls, data: dd.DataFrame, /, *, context: _FullContext) -> Self:
+ return cls(
+ data, backend_version=context._backend_version, version=context._version
+ )
+
+ def to_narwhals(self) -> LazyFrame[dd.DataFrame]:
+ return self._version.lazyframe(self, level="lazy")
+
+ def __native_namespace__(self) -> ModuleType:
+ if self._implementation is Implementation.DASK:
+ return self._implementation.to_native_namespace()
+
+ msg = f"Expected dask, got: {type(self._implementation)}" # pragma: no cover
+ raise AssertionError(msg)
+
+ def __narwhals_namespace__(self) -> DaskNamespace:
+ from narwhals._dask.namespace import DaskNamespace
+
+ return DaskNamespace(backend_version=self._backend_version, version=self._version)
+
+ def __narwhals_lazyframe__(self) -> Self:
+ return self
+
+ def _with_version(self, version: Version) -> Self:
+ return self.__class__(
+ self.native, backend_version=self._backend_version, version=version
+ )
+
+ def _with_native(self, df: Any) -> Self:
+ return self.__class__(
+ df, backend_version=self._backend_version, version=self._version
+ )
+
+ def _iter_columns(self) -> Iterator[dx.Series]:
+ for _col, ser in self.native.items(): # noqa: PERF102
+ yield ser
+
+ def with_columns(self, *exprs: DaskExpr) -> Self:
+ new_series = evaluate_exprs(self, *exprs)
+ return self._with_native(self.native.assign(**dict(new_series)))
+
+ def collect(
+ self, backend: Implementation | None, **kwargs: Any
+ ) -> CompliantDataFrameAny:
+ result = self.native.compute(**kwargs)
+
+ if backend is None or backend is Implementation.PANDAS:
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+
+ return PandasLikeDataFrame(
+ result,
+ implementation=Implementation.PANDAS,
+ backend_version=parse_version(pd),
+ version=self._version,
+ validate_column_names=True,
+ )
+
+ if backend is Implementation.POLARS:
+ import polars as pl # ignore-banned-import
+
+ from narwhals._polars.dataframe import PolarsDataFrame
+
+ return PolarsDataFrame(
+ pl.from_pandas(result),
+ backend_version=parse_version(pl),
+ version=self._version,
+ )
+
+ if backend is Implementation.PYARROW:
+ import pyarrow as pa # ignore-banned-import
+
+ from narwhals._arrow.dataframe import ArrowDataFrame
+
+ return ArrowDataFrame(
+ pa.Table.from_pandas(result),
+ backend_version=parse_version(pa),
+ version=self._version,
+ validate_column_names=True,
+ )
+
+ msg = f"Unsupported `backend` value: {backend}" # pragma: no cover
+ raise ValueError(msg) # pragma: no cover
+
+ @property
+ def columns(self) -> list[str]:
+ if self._cached_columns is None:
+ self._cached_columns = (
+ list(self.schema)
+ if self._cached_schema is not None
+ else self.native.columns.tolist()
+ )
+ return self._cached_columns
+
+ def filter(self, predicate: DaskExpr) -> Self:
+ # `[0]` is safe as the predicate's expression only returns a single column
+ mask = predicate(self)[0]
+ return self._with_native(self.native.loc[mask])
+
+ def simple_select(self, *column_names: str) -> Self:
+ native = select_columns_by_name(
+ self.native, list(column_names), self._backend_version, self._implementation
+ )
+ return self._with_native(native)
+
+ def aggregate(self, *exprs: DaskExpr) -> Self:
+ new_series = evaluate_exprs(self, *exprs)
+ df = dd.concat([val.rename(name) for name, val in new_series], axis=1)
+ return self._with_native(df)
+
+ def select(self, *exprs: DaskExpr) -> Self:
+ new_series = evaluate_exprs(self, *exprs)
+ df = select_columns_by_name(
+ self.native.assign(**dict(new_series)),
+ [s[0] for s in new_series],
+ self._backend_version,
+ self._implementation,
+ )
+ return self._with_native(df)
+
+ def drop_nulls(self, subset: Sequence[str] | None) -> Self:
+ if subset is None:
+ return self._with_native(self.native.dropna())
+ plx = self.__narwhals_namespace__()
+ return self.filter(~plx.any_horizontal(plx.col(*subset).is_null()))
+
+ @property
+ def schema(self) -> dict[str, DType]:
+ if self._cached_schema is None:
+ native_dtypes = self.native.dtypes
+ self._cached_schema = {
+ col: native_to_narwhals_dtype(
+ native_dtypes[col], self._version, self._implementation
+ )
+ for col in self.native.columns
+ }
+ return self._cached_schema
+
+ def collect_schema(self) -> dict[str, DType]:
+ return self.schema
+
+ def drop(self, columns: Sequence[str], *, strict: bool) -> Self:
+ to_drop = parse_columns_to_drop(self, columns, strict=strict)
+
+ return self._with_native(self.native.drop(columns=to_drop))
+
+ def with_row_index(self, name: str) -> Self:
+ # Implementation is based on the following StackOverflow reply:
+ # https://stackoverflow.com/questions/60831518/in-dask-how-does-one-add-a-range-of-integersauto-increment-to-a-new-column/60852409#60852409
+ return self._with_native(
+ add_row_index(self.native, name, self._backend_version, self._implementation)
+ )
+
+ def rename(self, mapping: Mapping[str, str]) -> Self:
+ return self._with_native(self.native.rename(columns=mapping))
+
+ def head(self, n: int) -> Self:
+ return self._with_native(self.native.head(n=n, compute=False, npartitions=-1))
+
+ def unique(
+ self, subset: Sequence[str] | None, *, keep: LazyUniqueKeepStrategy
+ ) -> Self:
+ if subset and (error := self._check_columns_exist(subset)):
+ raise error
+ if keep == "none":
+ subset = subset or self.columns
+ token = generate_temporary_column_name(n_bytes=8, columns=subset)
+ ser = self.native.groupby(subset).size().rename(token)
+ ser = ser[ser == 1]
+ unique = ser.reset_index().drop(columns=token)
+ result = self.native.merge(unique, on=subset, how="inner")
+ else:
+ mapped_keep = {"any": "first"}.get(keep, keep)
+ result = self.native.drop_duplicates(subset=subset, keep=mapped_keep)
+ return self._with_native(result)
+
+ def sort(self, *by: str, descending: bool | Sequence[bool], nulls_last: bool) -> Self:
+ if isinstance(descending, bool):
+ ascending: bool | list[bool] = not descending
+ else:
+ ascending = [not d for d in descending]
+ position = "last" if nulls_last else "first"
+ return self._with_native(
+ self.native.sort_values(list(by), ascending=ascending, na_position=position)
+ )
+
+ def join( # noqa: C901
+ self,
+ other: Self,
+ *,
+ how: JoinStrategy,
+ left_on: Sequence[str] | None,
+ right_on: Sequence[str] | None,
+ suffix: str,
+ ) -> Self:
+ if how == "cross":
+ key_token = generate_temporary_column_name(
+ n_bytes=8, columns=[*self.columns, *other.columns]
+ )
+
+ return self._with_native(
+ self.native.assign(**{key_token: 0})
+ .merge(
+ other.native.assign(**{key_token: 0}),
+ how="inner",
+ left_on=key_token,
+ right_on=key_token,
+ suffixes=("", suffix),
+ )
+ .drop(columns=key_token)
+ )
+
+ if how == "anti":
+ indicator_token = generate_temporary_column_name(
+ n_bytes=8, columns=[*self.columns, *other.columns]
+ )
+
+ if right_on is None: # pragma: no cover
+ msg = "`right_on` cannot be `None` in anti-join"
+ raise TypeError(msg)
+ other_native = (
+ select_columns_by_name(
+ other.native,
+ list(right_on),
+ self._backend_version,
+ self._implementation,
+ )
+ .rename( # rename to avoid creating extra columns in join
+ columns=dict(zip(right_on, left_on)) # type: ignore[arg-type]
+ )
+ .drop_duplicates()
+ )
+ df = self.native.merge(
+ other_native,
+ how="outer",
+ indicator=indicator_token, # pyright: ignore[reportArgumentType]
+ left_on=left_on,
+ right_on=left_on,
+ )
+ return self._with_native(
+ df[df[indicator_token] == "left_only"].drop(columns=[indicator_token])
+ )
+
+ if how == "semi":
+ if right_on is None: # pragma: no cover
+ msg = "`right_on` cannot be `None` in semi-join"
+ raise TypeError(msg)
+ other_native = (
+ select_columns_by_name(
+ other.native,
+ list(right_on),
+ self._backend_version,
+ self._implementation,
+ )
+ .rename( # rename to avoid creating extra columns in join
+ columns=dict(zip(right_on, left_on)) # type: ignore[arg-type]
+ )
+ .drop_duplicates() # avoids potential rows duplication from inner join
+ )
+ return self._with_native(
+ self.native.merge(
+ other_native, how="inner", left_on=left_on, right_on=left_on
+ )
+ )
+
+ if how == "left":
+ result_native = self.native.merge(
+ other.native,
+ how="left",
+ left_on=left_on,
+ right_on=right_on,
+ suffixes=("", suffix),
+ )
+ extra = []
+ for left_key, right_key in zip(left_on, right_on): # type: ignore[arg-type]
+ if right_key != left_key and right_key not in self.columns:
+ extra.append(right_key)
+ elif right_key != left_key:
+ extra.append(f"{right_key}_right")
+ return self._with_native(result_native.drop(columns=extra))
+
+ if how == "full":
+ # dask does not retain keys post-join
+ # we must append the suffix to each key before-hand
+
+ # help mypy
+ assert left_on is not None # noqa: S101
+ assert right_on is not None # noqa: S101
+
+ right_on_mapper = _remap_full_join_keys(left_on, right_on, suffix)
+ other_native = other.native.rename(columns=right_on_mapper)
+ check_column_names_are_unique(other_native.columns)
+ right_on = list(right_on_mapper.values()) # we now have the suffixed keys
+ return self._with_native(
+ self.native.merge(
+ other_native,
+ left_on=left_on,
+ right_on=right_on,
+ how="outer",
+ suffixes=("", suffix),
+ )
+ )
+
+ return self._with_native(
+ self.native.merge(
+ other.native,
+ left_on=left_on,
+ right_on=right_on,
+ how=how,
+ suffixes=("", suffix),
+ )
+ )
+
+ def join_asof(
+ self,
+ other: Self,
+ *,
+ left_on: str,
+ right_on: str,
+ by_left: Sequence[str] | None,
+ by_right: Sequence[str] | None,
+ strategy: AsofJoinStrategy,
+ suffix: str,
+ ) -> Self:
+ plx = self.__native_namespace__()
+ return self._with_native(
+ plx.merge_asof(
+ self.native,
+ other.native,
+ left_on=left_on,
+ right_on=right_on,
+ left_by=by_left,
+ right_by=by_right,
+ direction=strategy,
+ suffixes=("", suffix),
+ )
+ )
+
+ def group_by(
+ self, keys: Sequence[str] | Sequence[DaskExpr], *, drop_null_keys: bool
+ ) -> DaskLazyGroupBy:
+ from narwhals._dask.group_by import DaskLazyGroupBy
+
+ return DaskLazyGroupBy(self, keys, drop_null_keys=drop_null_keys)
+
+ def tail(self, n: int) -> Self: # pragma: no cover
+ native_frame = self.native
+ n_partitions = native_frame.npartitions
+
+ if n_partitions == 1:
+ return self._with_native(self.native.tail(n=n, compute=False))
+ else:
+ msg = "`LazyFrame.tail` is not supported for Dask backend with multiple partitions."
+ raise NotImplementedError(msg)
+
+ def gather_every(self, n: int, offset: int) -> Self:
+ row_index_token = generate_temporary_column_name(n_bytes=8, columns=self.columns)
+ plx = self.__narwhals_namespace__()
+ return (
+ self.with_row_index(row_index_token)
+ .filter(
+ (plx.col(row_index_token) >= offset)
+ & ((plx.col(row_index_token) - offset) % n == 0)
+ )
+ .drop([row_index_token], strict=False)
+ )
+
+ def unpivot(
+ self,
+ on: Sequence[str] | None,
+ index: Sequence[str] | None,
+ variable_name: str,
+ value_name: str,
+ ) -> Self:
+ return self._with_native(
+ self.native.melt(
+ id_vars=index,
+ value_vars=on,
+ var_name=variable_name,
+ value_name=value_name,
+ )
+ )
+
+ explode = not_implemented()
diff --git a/venv/lib/python3.8/site-packages/narwhals/_dask/expr.py b/venv/lib/python3.8/site-packages/narwhals/_dask/expr.py
new file mode 100644
index 0000000..aa51997
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_dask/expr.py
@@ -0,0 +1,675 @@
+from __future__ import annotations
+
+import warnings
+from typing import TYPE_CHECKING, Any, Callable, Literal, Sequence
+
+from narwhals._compliant import LazyExpr
+from narwhals._compliant.expr import DepthTrackingExpr
+from narwhals._dask.expr_dt import DaskExprDateTimeNamespace
+from narwhals._dask.expr_str import DaskExprStringNamespace
+from narwhals._dask.utils import (
+ add_row_index,
+ maybe_evaluate_expr,
+ narwhals_to_native_dtype,
+)
+from narwhals._expression_parsing import ExprKind, evaluate_output_names_and_aliases
+from narwhals._pandas_like.utils import native_to_narwhals_dtype
+from narwhals._utils import (
+ Implementation,
+ generate_temporary_column_name,
+ not_implemented,
+)
+from narwhals.exceptions import InvalidOperationError
+
+if TYPE_CHECKING:
+ import dask.dataframe.dask_expr as dx
+ from typing_extensions import Self
+
+ from narwhals._compliant.typing import AliasNames, EvalNames, EvalSeries, ScalarKwargs
+ from narwhals._dask.dataframe import DaskLazyFrame
+ from narwhals._dask.namespace import DaskNamespace
+ from narwhals._expression_parsing import ExprKind, ExprMetadata
+ from narwhals._utils import Version, _FullContext
+ from narwhals.typing import (
+ FillNullStrategy,
+ IntoDType,
+ NonNestedLiteral,
+ NumericLiteral,
+ RollingInterpolationMethod,
+ TemporalLiteral,
+ )
+
+
+class DaskExpr(
+ LazyExpr["DaskLazyFrame", "dx.Series"],
+ DepthTrackingExpr["DaskLazyFrame", "dx.Series"],
+):
+ _implementation: Implementation = Implementation.DASK
+
+ def __init__(
+ self,
+ call: EvalSeries[DaskLazyFrame, dx.Series],
+ *,
+ depth: int,
+ function_name: str,
+ evaluate_output_names: EvalNames[DaskLazyFrame],
+ alias_output_names: AliasNames | None,
+ backend_version: tuple[int, ...],
+ version: Version,
+ scalar_kwargs: ScalarKwargs | None = None,
+ ) -> None:
+ self._call = call
+ self._depth = depth
+ self._function_name = function_name
+ self._evaluate_output_names = evaluate_output_names
+ self._alias_output_names = alias_output_names
+ self._backend_version = backend_version
+ self._version = version
+ self._scalar_kwargs = scalar_kwargs or {}
+ self._metadata: ExprMetadata | None = None
+
+ def __call__(self, df: DaskLazyFrame) -> Sequence[dx.Series]:
+ return self._call(df)
+
+ def __narwhals_expr__(self) -> None: ...
+
+ def __narwhals_namespace__(self) -> DaskNamespace: # pragma: no cover
+ # Unused, just for compatibility with PandasLikeExpr
+ from narwhals._dask.namespace import DaskNamespace
+
+ return DaskNamespace(backend_version=self._backend_version, version=self._version)
+
+ def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Self:
+ def func(df: DaskLazyFrame) -> list[dx.Series]:
+ # result.loc[0][0] is a workaround for dask~<=2024.10.0/dask_expr~<=1.1.16
+ # that raised a KeyErrror for result[0] during collection.
+ return [result.loc[0][0] for result in self(df)]
+
+ return self.__class__(
+ func,
+ depth=self._depth,
+ function_name=self._function_name,
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ version=self._version,
+ scalar_kwargs=self._scalar_kwargs,
+ )
+
+ @classmethod
+ def from_column_names(
+ cls: type[Self],
+ evaluate_column_names: EvalNames[DaskLazyFrame],
+ /,
+ *,
+ context: _FullContext,
+ function_name: str = "",
+ ) -> Self:
+ def func(df: DaskLazyFrame) -> list[dx.Series]:
+ try:
+ return [
+ df._native_frame[column_name]
+ for column_name in evaluate_column_names(df)
+ ]
+ except KeyError as e:
+ if error := df._check_columns_exist(evaluate_column_names(df)):
+ raise error from e
+ raise
+
+ return cls(
+ func,
+ depth=0,
+ function_name=function_name,
+ evaluate_output_names=evaluate_column_names,
+ alias_output_names=None,
+ backend_version=context._backend_version,
+ version=context._version,
+ )
+
+ @classmethod
+ def from_column_indices(cls, *column_indices: int, context: _FullContext) -> Self:
+ def func(df: DaskLazyFrame) -> list[dx.Series]:
+ return [df.native.iloc[:, i] for i in column_indices]
+
+ return cls(
+ func,
+ depth=0,
+ function_name="nth",
+ evaluate_output_names=cls._eval_names_indices(column_indices),
+ alias_output_names=None,
+ backend_version=context._backend_version,
+ version=context._version,
+ )
+
+ def _with_callable(
+ self,
+ # First argument to `call` should be `dx.Series`
+ call: Callable[..., dx.Series],
+ /,
+ expr_name: str = "",
+ scalar_kwargs: ScalarKwargs | None = None,
+ **expressifiable_args: Self | Any,
+ ) -> Self:
+ def func(df: DaskLazyFrame) -> list[dx.Series]:
+ native_results: list[dx.Series] = []
+ native_series_list = self._call(df)
+ other_native_series = {
+ key: maybe_evaluate_expr(df, value)
+ for key, value in expressifiable_args.items()
+ }
+ for native_series in native_series_list:
+ result_native = call(native_series, **other_native_series)
+ native_results.append(result_native)
+ return native_results
+
+ return self.__class__(
+ func,
+ depth=self._depth + 1,
+ function_name=f"{self._function_name}->{expr_name}",
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ version=self._version,
+ scalar_kwargs=scalar_kwargs,
+ )
+
+ def _with_alias_output_names(self, func: AliasNames | None, /) -> Self:
+ return type(self)(
+ call=self._call,
+ depth=self._depth,
+ function_name=self._function_name,
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=func,
+ backend_version=self._backend_version,
+ version=self._version,
+ scalar_kwargs=self._scalar_kwargs,
+ )
+
+ def __add__(self, other: Any) -> Self:
+ return self._with_callable(
+ lambda expr, other: expr.__add__(other), "__add__", other=other
+ )
+
+ def __sub__(self, other: Any) -> Self:
+ return self._with_callable(
+ lambda expr, other: expr.__sub__(other), "__sub__", other=other
+ )
+
+ def __rsub__(self, other: Any) -> Self:
+ return self._with_callable(
+ lambda expr, other: other - expr, "__rsub__", other=other
+ ).alias("literal")
+
+ def __mul__(self, other: Any) -> Self:
+ return self._with_callable(
+ lambda expr, other: expr.__mul__(other), "__mul__", other=other
+ )
+
+ def __truediv__(self, other: Any) -> Self:
+ return self._with_callable(
+ lambda expr, other: expr.__truediv__(other), "__truediv__", other=other
+ )
+
+ def __rtruediv__(self, other: Any) -> Self:
+ return self._with_callable(
+ lambda expr, other: other / expr, "__rtruediv__", other=other
+ ).alias("literal")
+
+ def __floordiv__(self, other: Any) -> Self:
+ return self._with_callable(
+ lambda expr, other: expr.__floordiv__(other), "__floordiv__", other=other
+ )
+
+ def __rfloordiv__(self, other: Any) -> Self:
+ return self._with_callable(
+ lambda expr, other: other // expr, "__rfloordiv__", other=other
+ ).alias("literal")
+
+ def __pow__(self, other: Any) -> Self:
+ return self._with_callable(
+ lambda expr, other: expr.__pow__(other), "__pow__", other=other
+ )
+
+ def __rpow__(self, other: Any) -> Self:
+ return self._with_callable(
+ lambda expr, other: other**expr, "__rpow__", other=other
+ ).alias("literal")
+
+ def __mod__(self, other: Any) -> Self:
+ return self._with_callable(
+ lambda expr, other: expr.__mod__(other), "__mod__", other=other
+ )
+
+ def __rmod__(self, other: Any) -> Self:
+ return self._with_callable(
+ lambda expr, other: other % expr, "__rmod__", other=other
+ ).alias("literal")
+
+ def __eq__(self, other: DaskExpr) -> Self: # type: ignore[override]
+ return self._with_callable(
+ lambda expr, other: expr.__eq__(other), "__eq__", other=other
+ )
+
+ def __ne__(self, other: DaskExpr) -> Self: # type: ignore[override]
+ return self._with_callable(
+ lambda expr, other: expr.__ne__(other), "__ne__", other=other
+ )
+
+ def __ge__(self, other: DaskExpr | Any) -> Self:
+ return self._with_callable(
+ lambda expr, other: expr.__ge__(other), "__ge__", other=other
+ )
+
+ def __gt__(self, other: DaskExpr) -> Self:
+ return self._with_callable(
+ lambda expr, other: expr.__gt__(other), "__gt__", other=other
+ )
+
+ def __le__(self, other: DaskExpr) -> Self:
+ return self._with_callable(
+ lambda expr, other: expr.__le__(other), "__le__", other=other
+ )
+
+ def __lt__(self, other: DaskExpr) -> Self:
+ return self._with_callable(
+ lambda expr, other: expr.__lt__(other), "__lt__", other=other
+ )
+
+ def __and__(self, other: DaskExpr | Any) -> Self:
+ return self._with_callable(
+ lambda expr, other: expr.__and__(other), "__and__", other=other
+ )
+
+ def __or__(self, other: DaskExpr) -> Self:
+ return self._with_callable(
+ lambda expr, other: expr.__or__(other), "__or__", other=other
+ )
+
+ def __invert__(self) -> Self:
+ return self._with_callable(lambda expr: expr.__invert__(), "__invert__")
+
+ def mean(self) -> Self:
+ return self._with_callable(lambda expr: expr.mean().to_series(), "mean")
+
+ def median(self) -> Self:
+ from narwhals.exceptions import InvalidOperationError
+
+ def func(s: dx.Series) -> dx.Series:
+ dtype = native_to_narwhals_dtype(s.dtype, self._version, Implementation.DASK)
+ if not dtype.is_numeric():
+ msg = "`median` operation not supported for non-numeric input type."
+ raise InvalidOperationError(msg)
+ return s.median_approximate().to_series()
+
+ return self._with_callable(func, "median")
+
+ def min(self) -> Self:
+ return self._with_callable(lambda expr: expr.min().to_series(), "min")
+
+ def max(self) -> Self:
+ return self._with_callable(lambda expr: expr.max().to_series(), "max")
+
+ def std(self, ddof: int) -> Self:
+ return self._with_callable(
+ lambda expr: expr.std(ddof=ddof).to_series(),
+ "std",
+ scalar_kwargs={"ddof": ddof},
+ )
+
+ def var(self, ddof: int) -> Self:
+ return self._with_callable(
+ lambda expr: expr.var(ddof=ddof).to_series(),
+ "var",
+ scalar_kwargs={"ddof": ddof},
+ )
+
+ def skew(self) -> Self:
+ return self._with_callable(lambda expr: expr.skew().to_series(), "skew")
+
+ def shift(self, n: int) -> Self:
+ return self._with_callable(lambda expr: expr.shift(n), "shift")
+
+ def cum_sum(self, *, reverse: bool) -> Self:
+ if reverse: # pragma: no cover
+ # https://github.com/dask/dask/issues/11802
+ msg = "`cum_sum(reverse=True)` is not supported with Dask backend"
+ raise NotImplementedError(msg)
+
+ return self._with_callable(lambda expr: expr.cumsum(), "cum_sum")
+
+ def cum_count(self, *, reverse: bool) -> Self:
+ if reverse: # pragma: no cover
+ msg = "`cum_count(reverse=True)` is not supported with Dask backend"
+ raise NotImplementedError(msg)
+
+ return self._with_callable(
+ lambda expr: (~expr.isna()).astype(int).cumsum(), "cum_count"
+ )
+
+ def cum_min(self, *, reverse: bool) -> Self:
+ if reverse: # pragma: no cover
+ msg = "`cum_min(reverse=True)` is not supported with Dask backend"
+ raise NotImplementedError(msg)
+
+ return self._with_callable(lambda expr: expr.cummin(), "cum_min")
+
+ def cum_max(self, *, reverse: bool) -> Self:
+ if reverse: # pragma: no cover
+ msg = "`cum_max(reverse=True)` is not supported with Dask backend"
+ raise NotImplementedError(msg)
+
+ return self._with_callable(lambda expr: expr.cummax(), "cum_max")
+
+ def cum_prod(self, *, reverse: bool) -> Self:
+ if reverse: # pragma: no cover
+ msg = "`cum_prod(reverse=True)` is not supported with Dask backend"
+ raise NotImplementedError(msg)
+
+ return self._with_callable(lambda expr: expr.cumprod(), "cum_prod")
+
+ def rolling_sum(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ return self._with_callable(
+ lambda expr: expr.rolling(
+ window=window_size, min_periods=min_samples, center=center
+ ).sum(),
+ "rolling_sum",
+ )
+
+ def rolling_mean(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ return self._with_callable(
+ lambda expr: expr.rolling(
+ window=window_size, min_periods=min_samples, center=center
+ ).mean(),
+ "rolling_mean",
+ )
+
+ def rolling_var(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ if ddof == 1:
+ return self._with_callable(
+ lambda expr: expr.rolling(
+ window=window_size, min_periods=min_samples, center=center
+ ).var(),
+ "rolling_var",
+ )
+ else:
+ msg = "Dask backend only supports `ddof=1` for `rolling_var`"
+ raise NotImplementedError(msg)
+
+ def rolling_std(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ if ddof == 1:
+ return self._with_callable(
+ lambda expr: expr.rolling(
+ window=window_size, min_periods=min_samples, center=center
+ ).std(),
+ "rolling_std",
+ )
+ else:
+ msg = "Dask backend only supports `ddof=1` for `rolling_std`"
+ raise NotImplementedError(msg)
+
+ def sum(self) -> Self:
+ return self._with_callable(lambda expr: expr.sum().to_series(), "sum")
+
+ def count(self) -> Self:
+ return self._with_callable(lambda expr: expr.count().to_series(), "count")
+
+ def round(self, decimals: int) -> Self:
+ return self._with_callable(lambda expr: expr.round(decimals), "round")
+
+ def unique(self) -> Self:
+ return self._with_callable(lambda expr: expr.unique(), "unique")
+
+ def drop_nulls(self) -> Self:
+ return self._with_callable(lambda expr: expr.dropna(), "drop_nulls")
+
+ def abs(self) -> Self:
+ return self._with_callable(lambda expr: expr.abs(), "abs")
+
+ def all(self) -> Self:
+ return self._with_callable(
+ lambda expr: expr.all(
+ axis=None, skipna=True, split_every=False, out=None
+ ).to_series(),
+ "all",
+ )
+
+ def any(self) -> Self:
+ return self._with_callable(
+ lambda expr: expr.any(axis=0, skipna=True, split_every=False).to_series(),
+ "any",
+ )
+
+ def fill_null(
+ self,
+ value: Self | NonNestedLiteral,
+ strategy: FillNullStrategy | None,
+ limit: int | None,
+ ) -> Self:
+ def func(expr: dx.Series) -> dx.Series:
+ if value is not None:
+ res_ser = expr.fillna(value)
+ else:
+ res_ser = (
+ expr.ffill(limit=limit)
+ if strategy == "forward"
+ else expr.bfill(limit=limit)
+ )
+ return res_ser
+
+ return self._with_callable(func, "fillna")
+
+ def clip(
+ self,
+ lower_bound: Self | NumericLiteral | TemporalLiteral | None,
+ upper_bound: Self | NumericLiteral | TemporalLiteral | None,
+ ) -> Self:
+ return self._with_callable(
+ lambda expr, lower_bound, upper_bound: expr.clip(
+ lower=lower_bound, upper=upper_bound
+ ),
+ "clip",
+ lower_bound=lower_bound,
+ upper_bound=upper_bound,
+ )
+
+ def diff(self) -> Self:
+ return self._with_callable(lambda expr: expr.diff(), "diff")
+
+ def n_unique(self) -> Self:
+ return self._with_callable(
+ lambda expr: expr.nunique(dropna=False).to_series(), "n_unique"
+ )
+
+ def is_null(self) -> Self:
+ return self._with_callable(lambda expr: expr.isna(), "is_null")
+
+ def is_nan(self) -> Self:
+ def func(expr: dx.Series) -> dx.Series:
+ dtype = native_to_narwhals_dtype(
+ expr.dtype, self._version, self._implementation
+ )
+ if dtype.is_numeric():
+ return expr != expr # pyright: ignore[reportReturnType] # noqa: PLR0124
+ msg = f"`.is_nan` only supported for numeric dtypes and not {dtype}, did you mean `.is_null`?"
+ raise InvalidOperationError(msg)
+
+ return self._with_callable(func, "is_null")
+
+ def len(self) -> Self:
+ return self._with_callable(lambda expr: expr.size.to_series(), "len")
+
+ def quantile(
+ self, quantile: float, interpolation: RollingInterpolationMethod
+ ) -> Self:
+ if interpolation == "linear":
+
+ def func(expr: dx.Series, quantile: float) -> dx.Series:
+ if expr.npartitions > 1:
+ msg = "`Expr.quantile` is not supported for Dask backend with multiple partitions."
+ raise NotImplementedError(msg)
+ return expr.quantile(
+ q=quantile, method="dask"
+ ).to_series() # pragma: no cover
+
+ return self._with_callable(func, "quantile", quantile=quantile)
+ else:
+ msg = "`higher`, `lower`, `midpoint`, `nearest` - interpolation methods are not supported by Dask. Please use `linear` instead."
+ raise NotImplementedError(msg)
+
+ def is_first_distinct(self) -> Self:
+ def func(expr: dx.Series) -> dx.Series:
+ _name = expr.name
+ col_token = generate_temporary_column_name(n_bytes=8, columns=[_name])
+ frame = add_row_index(
+ expr.to_frame(), col_token, self._backend_version, self._implementation
+ )
+ first_distinct_index = frame.groupby(_name).agg({col_token: "min"})[col_token]
+ return frame[col_token].isin(first_distinct_index)
+
+ return self._with_callable(func, "is_first_distinct")
+
+ def is_last_distinct(self) -> Self:
+ def func(expr: dx.Series) -> dx.Series:
+ _name = expr.name
+ col_token = generate_temporary_column_name(n_bytes=8, columns=[_name])
+ frame = add_row_index(
+ expr.to_frame(), col_token, self._backend_version, self._implementation
+ )
+ last_distinct_index = frame.groupby(_name).agg({col_token: "max"})[col_token]
+ return frame[col_token].isin(last_distinct_index)
+
+ return self._with_callable(func, "is_last_distinct")
+
+ def is_unique(self) -> Self:
+ def func(expr: dx.Series) -> dx.Series:
+ _name = expr.name
+ return (
+ expr.to_frame()
+ .groupby(_name, dropna=False)
+ .transform("size", meta=(_name, int))
+ == 1
+ )
+
+ return self._with_callable(func, "is_unique")
+
+ def is_in(self, other: Any) -> Self:
+ return self._with_callable(lambda expr: expr.isin(other), "is_in")
+
+ def null_count(self) -> Self:
+ return self._with_callable(
+ lambda expr: expr.isna().sum().to_series(), "null_count"
+ )
+
+ def over(self, partition_by: Sequence[str], order_by: Sequence[str]) -> Self:
+ # pandas is a required dependency of dask so it's safe to import this
+ from narwhals._pandas_like.group_by import PandasLikeGroupBy
+
+ if not partition_by:
+ assert order_by # noqa: S101
+
+ # This is something like `nw.col('a').cum_sum().order_by(key)`
+ # which we can always easily support, as it doesn't require grouping.
+ def func(df: DaskLazyFrame) -> Sequence[dx.Series]:
+ return self(df.sort(*order_by, descending=False, nulls_last=False))
+ elif not self._is_elementary(): # pragma: no cover
+ msg = (
+ "Only elementary expressions are supported for `.over` in dask.\n\n"
+ "Please see: "
+ "https://narwhals-dev.github.io/narwhals/concepts/improve_group_by_operation/"
+ )
+ raise NotImplementedError(msg)
+ elif order_by:
+ # Wrong results https://github.com/dask/dask/issues/11806.
+ msg = "`over` with `order_by` is not yet supported in Dask."
+ raise NotImplementedError(msg)
+ else:
+ function_name = PandasLikeGroupBy._leaf_name(self)
+ try:
+ dask_function_name = PandasLikeGroupBy._REMAP_AGGS[function_name]
+ except KeyError:
+ # window functions are unsupported: https://github.com/dask/dask/issues/11806
+ msg = (
+ f"Unsupported function: {function_name} in `over` context.\n\n"
+ f"Supported functions are {', '.join(PandasLikeGroupBy._REMAP_AGGS)}\n"
+ )
+ raise NotImplementedError(msg) from None
+
+ def func(df: DaskLazyFrame) -> Sequence[dx.Series]:
+ output_names, aliases = evaluate_output_names_and_aliases(self, df, [])
+
+ with warnings.catch_warnings():
+ # https://github.com/dask/dask/issues/11804
+ warnings.filterwarnings(
+ "ignore",
+ message=".*`meta` is not specified",
+ category=UserWarning,
+ )
+ grouped = df.native.groupby(partition_by)
+ if dask_function_name == "size":
+ if len(output_names) != 1: # pragma: no cover
+ msg = "Safety check failed, please report a bug."
+ raise AssertionError(msg)
+ res_native = grouped.transform(
+ dask_function_name, **self._scalar_kwargs
+ ).to_frame(output_names[0])
+ else:
+ res_native = grouped[list(output_names)].transform(
+ dask_function_name, **self._scalar_kwargs
+ )
+ result_frame = df._with_native(
+ res_native.rename(columns=dict(zip(output_names, aliases)))
+ ).native
+ return [result_frame[name] for name in aliases]
+
+ return self.__class__(
+ func,
+ depth=self._depth + 1,
+ function_name=self._function_name + "->over",
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def cast(self, dtype: IntoDType) -> Self:
+ def func(expr: dx.Series) -> dx.Series:
+ native_dtype = narwhals_to_native_dtype(dtype, self._version)
+ return expr.astype(native_dtype)
+
+ return self._with_callable(func, "cast")
+
+ def is_finite(self) -> Self:
+ import dask.array as da
+
+ return self._with_callable(da.isfinite, "is_finite")
+
+ def log(self, base: float) -> Self:
+ import dask.array as da
+
+ def _log(expr: dx.Series) -> dx.Series:
+ return da.log(expr) / da.log(base)
+
+ return self._with_callable(_log, "log")
+
+ def exp(self) -> Self:
+ import dask.array as da
+
+ return self._with_callable(da.exp, "exp")
+
+ @property
+ def str(self) -> DaskExprStringNamespace:
+ return DaskExprStringNamespace(self)
+
+ @property
+ def dt(self) -> DaskExprDateTimeNamespace:
+ return DaskExprDateTimeNamespace(self)
+
+ list = not_implemented() # pyright: ignore[reportAssignmentType]
+ struct = not_implemented() # pyright: ignore[reportAssignmentType]
+ rank = not_implemented() # pyright: ignore[reportAssignmentType]
+ _alias_native = not_implemented()
+ window_function = not_implemented() # pyright: ignore[reportAssignmentType]
diff --git a/venv/lib/python3.8/site-packages/narwhals/_dask/expr_dt.py b/venv/lib/python3.8/site-packages/narwhals/_dask/expr_dt.py
new file mode 100644
index 0000000..14481a3
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_dask/expr_dt.py
@@ -0,0 +1,162 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from narwhals._duration import parse_interval_string
+from narwhals._pandas_like.utils import (
+ UNIT_DICT,
+ calculate_timestamp_date,
+ calculate_timestamp_datetime,
+ native_to_narwhals_dtype,
+)
+from narwhals._utils import Implementation
+
+if TYPE_CHECKING:
+ import dask.dataframe.dask_expr as dx
+
+ from narwhals._dask.expr import DaskExpr
+ from narwhals.typing import TimeUnit
+
+
+class DaskExprDateTimeNamespace:
+ def __init__(self, expr: DaskExpr) -> None:
+ self._compliant_expr = expr
+
+ def date(self) -> DaskExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.dt.date, "date")
+
+ def year(self) -> DaskExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.dt.year, "year")
+
+ def month(self) -> DaskExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.dt.month, "month")
+
+ def day(self) -> DaskExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.dt.day, "day")
+
+ def hour(self) -> DaskExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.dt.hour, "hour")
+
+ def minute(self) -> DaskExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.dt.minute, "minute")
+
+ def second(self) -> DaskExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.dt.second, "second")
+
+ def millisecond(self) -> DaskExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: expr.dt.microsecond // 1000, "millisecond"
+ )
+
+ def microsecond(self) -> DaskExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: expr.dt.microsecond, "microsecond"
+ )
+
+ def nanosecond(self) -> DaskExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: expr.dt.microsecond * 1000 + expr.dt.nanosecond, "nanosecond"
+ )
+
+ def ordinal_day(self) -> DaskExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: expr.dt.dayofyear, "ordinal_day"
+ )
+
+ def weekday(self) -> DaskExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: expr.dt.weekday + 1, # Dask is 0-6
+ "weekday",
+ )
+
+ def to_string(self, format: str) -> DaskExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr, format: expr.dt.strftime(format.replace("%.f", ".%f")),
+ "strftime",
+ format=format,
+ )
+
+ def replace_time_zone(self, time_zone: str | None) -> DaskExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr, time_zone: expr.dt.tz_localize(None).dt.tz_localize(time_zone)
+ if time_zone is not None
+ else expr.dt.tz_localize(None),
+ "tz_localize",
+ time_zone=time_zone,
+ )
+
+ def convert_time_zone(self, time_zone: str) -> DaskExpr:
+ def func(s: dx.Series, time_zone: str) -> dx.Series:
+ dtype = native_to_narwhals_dtype(
+ s.dtype, self._compliant_expr._version, Implementation.DASK
+ )
+ if dtype.time_zone is None: # type: ignore[attr-defined]
+ return s.dt.tz_localize("UTC").dt.tz_convert(time_zone) # pyright: ignore[reportAttributeAccessIssue]
+ else:
+ return s.dt.tz_convert(time_zone) # pyright: ignore[reportAttributeAccessIssue]
+
+ return self._compliant_expr._with_callable(
+ func, "tz_convert", time_zone=time_zone
+ )
+
+ def timestamp(self, time_unit: TimeUnit) -> DaskExpr:
+ def func(s: dx.Series, time_unit: TimeUnit) -> dx.Series:
+ dtype = native_to_narwhals_dtype(
+ s.dtype, self._compliant_expr._version, Implementation.DASK
+ )
+ is_pyarrow_dtype = "pyarrow" in str(dtype)
+ mask_na = s.isna()
+ dtypes = self._compliant_expr._version.dtypes
+ if dtype == dtypes.Date:
+ # Date is only supported in pandas dtypes if pyarrow-backed
+ s_cast = s.astype("Int32[pyarrow]")
+ result = calculate_timestamp_date(s_cast, time_unit)
+ elif isinstance(dtype, dtypes.Datetime):
+ original_time_unit = dtype.time_unit
+ s_cast = (
+ s.astype("Int64[pyarrow]") if is_pyarrow_dtype else s.astype("int64")
+ )
+ result = calculate_timestamp_datetime(
+ s_cast, original_time_unit, time_unit
+ )
+ else:
+ msg = "Input should be either of Date or Datetime type"
+ raise TypeError(msg)
+ return result.where(~mask_na) # pyright: ignore[reportReturnType]
+
+ return self._compliant_expr._with_callable(func, "datetime", time_unit=time_unit)
+
+ def total_minutes(self) -> DaskExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: expr.dt.total_seconds() // 60, "total_minutes"
+ )
+
+ def total_seconds(self) -> DaskExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: expr.dt.total_seconds() // 1, "total_seconds"
+ )
+
+ def total_milliseconds(self) -> DaskExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: expr.dt.total_seconds() * 1000 // 1, "total_milliseconds"
+ )
+
+ def total_microseconds(self) -> DaskExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: expr.dt.total_seconds() * 1_000_000 // 1, "total_microseconds"
+ )
+
+ def total_nanoseconds(self) -> DaskExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: expr.dt.total_seconds() * 1_000_000_000 // 1, "total_nanoseconds"
+ )
+
+ def truncate(self, every: str) -> DaskExpr:
+ multiple, unit = parse_interval_string(every)
+ if unit in {"mo", "q", "y"}:
+ msg = f"Truncating to {unit} is not supported yet for dask."
+ raise NotImplementedError(msg)
+ freq = f"{multiple}{UNIT_DICT.get(unit, unit)}"
+ return self._compliant_expr._with_callable(
+ lambda expr: expr.dt.floor(freq), "truncate"
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/_dask/expr_str.py b/venv/lib/python3.8/site-packages/narwhals/_dask/expr_str.py
new file mode 100644
index 0000000..b770b53
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_dask/expr_str.py
@@ -0,0 +1,98 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import dask.dataframe as dd
+
+if TYPE_CHECKING:
+ from narwhals._dask.expr import DaskExpr
+
+
+class DaskExprStringNamespace:
+ def __init__(self, expr: DaskExpr) -> None:
+ self._compliant_expr = expr
+
+ def len_chars(self) -> DaskExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.str.len(), "len")
+
+ def replace(self, pattern: str, value: str, *, literal: bool, n: int) -> DaskExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr, pattern, value, literal, n: expr.str.replace(
+ pattern, value, regex=not literal, n=n
+ ),
+ "replace",
+ pattern=pattern,
+ value=value,
+ literal=literal,
+ n=n,
+ )
+
+ def replace_all(self, pattern: str, value: str, *, literal: bool) -> DaskExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr, pattern, value, literal: expr.str.replace(
+ pattern, value, n=-1, regex=not literal
+ ),
+ "replace",
+ pattern=pattern,
+ value=value,
+ literal=literal,
+ )
+
+ def strip_chars(self, characters: str | None) -> DaskExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr, characters: expr.str.strip(characters),
+ "strip",
+ characters=characters,
+ )
+
+ def starts_with(self, prefix: str) -> DaskExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr, prefix: expr.str.startswith(prefix), "starts_with", prefix=prefix
+ )
+
+ def ends_with(self, suffix: str) -> DaskExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr, suffix: expr.str.endswith(suffix), "ends_with", suffix=suffix
+ )
+
+ def contains(self, pattern: str, *, literal: bool) -> DaskExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr, pattern, literal: expr.str.contains(
+ pat=pattern, regex=not literal
+ ),
+ "contains",
+ pattern=pattern,
+ literal=literal,
+ )
+
+ def slice(self, offset: int, length: int | None) -> DaskExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr, offset, length: expr.str.slice(
+ start=offset, stop=offset + length if length else None
+ ),
+ "slice",
+ offset=offset,
+ length=length,
+ )
+
+ def split(self, by: str) -> DaskExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr, by: expr.str.split(pat=by), "split", by=by
+ )
+
+ def to_datetime(self, format: str | None) -> DaskExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr, format: dd.to_datetime(expr, format=format),
+ "to_datetime",
+ format=format,
+ )
+
+ def to_uppercase(self) -> DaskExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: expr.str.upper(), "to_uppercase"
+ )
+
+ def to_lowercase(self) -> DaskExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: expr.str.lower(), "to_lowercase"
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/_dask/group_by.py b/venv/lib/python3.8/site-packages/narwhals/_dask/group_by.py
new file mode 100644
index 0000000..d71c3fa
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_dask/group_by.py
@@ -0,0 +1,122 @@
+from __future__ import annotations
+
+from functools import partial
+from typing import TYPE_CHECKING, Any, Callable, ClassVar, Mapping, Sequence
+
+import dask.dataframe as dd
+
+from narwhals._compliant import DepthTrackingGroupBy
+from narwhals._expression_parsing import evaluate_output_names_and_aliases
+
+if TYPE_CHECKING:
+ import pandas as pd
+ from dask.dataframe.api import GroupBy as _DaskGroupBy
+ from pandas.core.groupby import SeriesGroupBy as _PandasSeriesGroupBy
+ from typing_extensions import TypeAlias
+
+ from narwhals._compliant.group_by import NarwhalsAggregation
+ from narwhals._dask.dataframe import DaskLazyFrame
+ from narwhals._dask.expr import DaskExpr
+
+ PandasSeriesGroupBy: TypeAlias = _PandasSeriesGroupBy[Any, Any]
+ _AggFn: TypeAlias = Callable[..., Any]
+
+else:
+ try:
+ import dask.dataframe.dask_expr as dx
+ except ModuleNotFoundError: # pragma: no cover
+ import dask_expr as dx
+ _DaskGroupBy = dx._groupby.GroupBy
+
+Aggregation: TypeAlias = "str | _AggFn"
+"""The name of an aggregation function, or the function itself."""
+
+
+def n_unique() -> dd.Aggregation:
+ def chunk(s: PandasSeriesGroupBy) -> pd.Series[Any]:
+ return s.nunique(dropna=False)
+
+ def agg(s0: PandasSeriesGroupBy) -> pd.Series[Any]:
+ return s0.sum()
+
+ return dd.Aggregation(name="nunique", chunk=chunk, agg=agg)
+
+
+def var(ddof: int) -> _AggFn:
+ return partial(_DaskGroupBy.var, ddof=ddof)
+
+
+def std(ddof: int) -> _AggFn:
+ return partial(_DaskGroupBy.std, ddof=ddof)
+
+
+class DaskLazyGroupBy(DepthTrackingGroupBy["DaskLazyFrame", "DaskExpr", Aggregation]):
+ _REMAP_AGGS: ClassVar[Mapping[NarwhalsAggregation, Aggregation]] = {
+ "sum": "sum",
+ "mean": "mean",
+ "median": "median",
+ "max": "max",
+ "min": "min",
+ "std": std,
+ "var": var,
+ "len": "size",
+ "n_unique": n_unique,
+ "count": "count",
+ }
+
+ def __init__(
+ self,
+ df: DaskLazyFrame,
+ keys: Sequence[DaskExpr] | Sequence[str],
+ /,
+ *,
+ drop_null_keys: bool,
+ ) -> None:
+ self._compliant_frame, self._keys, self._output_key_names = self._parse_keys(
+ df, keys=keys
+ )
+ self._grouped = self.compliant.native.groupby(
+ self._keys, dropna=drop_null_keys, observed=True
+ )
+
+ def agg(self, *exprs: DaskExpr) -> DaskLazyFrame:
+ from narwhals._dask.dataframe import DaskLazyFrame
+
+ if not exprs:
+ # No aggregation provided
+ return (
+ self.compliant.simple_select(*self._keys)
+ .unique(self._keys, keep="any")
+ .rename(dict(zip(self._keys, self._output_key_names)))
+ )
+
+ self._ensure_all_simple(exprs)
+ # This should be the fastpath, but cuDF is too far behind to use it.
+ # - https://github.com/rapidsai/cudf/issues/15118
+ # - https://github.com/rapidsai/cudf/issues/15084
+ simple_aggregations: dict[str, tuple[str, Aggregation]] = {}
+ exclude = (*self._keys, *self._output_key_names)
+ for expr in exprs:
+ output_names, aliases = evaluate_output_names_and_aliases(
+ expr, self.compliant, exclude
+ )
+ if expr._depth == 0:
+ # e.g. `agg(nw.len())`
+ column = self._keys[0]
+ agg_fn = self._remap_expr_name(expr._function_name)
+ simple_aggregations.update(dict.fromkeys(aliases, (column, agg_fn)))
+ continue
+
+ # e.g. `agg(nw.mean('a'))`
+ agg_fn = self._remap_expr_name(self._leaf_name(expr))
+ # deal with n_unique case in a "lazy" mode to not depend on dask globally
+ agg_fn = agg_fn(**expr._scalar_kwargs) if callable(agg_fn) else agg_fn
+ simple_aggregations.update(
+ (alias, (output_name, agg_fn))
+ for alias, output_name in zip(aliases, output_names)
+ )
+ return DaskLazyFrame(
+ self._grouped.agg(**simple_aggregations).reset_index(),
+ backend_version=self.compliant._backend_version,
+ version=self.compliant._version,
+ ).rename(dict(zip(self._keys, self._output_key_names)))
diff --git a/venv/lib/python3.8/site-packages/narwhals/_dask/namespace.py b/venv/lib/python3.8/site-packages/narwhals/_dask/namespace.py
new file mode 100644
index 0000000..3e0506d
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_dask/namespace.py
@@ -0,0 +1,320 @@
+from __future__ import annotations
+
+import operator
+from functools import reduce
+from typing import TYPE_CHECKING, Iterable, Sequence, cast
+
+import dask.dataframe as dd
+import pandas as pd
+
+from narwhals._compliant import CompliantThen, CompliantWhen, LazyNamespace
+from narwhals._compliant.namespace import DepthTrackingNamespace
+from narwhals._dask.dataframe import DaskLazyFrame
+from narwhals._dask.expr import DaskExpr
+from narwhals._dask.selectors import DaskSelectorNamespace
+from narwhals._dask.utils import (
+ align_series_full_broadcast,
+ narwhals_to_native_dtype,
+ validate_comparand,
+)
+from narwhals._expression_parsing import (
+ ExprKind,
+ combine_alias_output_names,
+ combine_evaluate_output_names,
+)
+from narwhals._utils import Implementation
+
+if TYPE_CHECKING:
+ import dask.dataframe.dask_expr as dx
+
+ from narwhals._utils import Version
+ from narwhals.typing import ConcatMethod, IntoDType, NonNestedLiteral
+
+
+class DaskNamespace(
+ LazyNamespace[DaskLazyFrame, DaskExpr, dd.DataFrame],
+ DepthTrackingNamespace[DaskLazyFrame, DaskExpr],
+):
+ _implementation: Implementation = Implementation.DASK
+
+ @property
+ def selectors(self) -> DaskSelectorNamespace:
+ return DaskSelectorNamespace.from_namespace(self)
+
+ @property
+ def _expr(self) -> type[DaskExpr]:
+ return DaskExpr
+
+ @property
+ def _lazyframe(self) -> type[DaskLazyFrame]:
+ return DaskLazyFrame
+
+ def __init__(self, *, backend_version: tuple[int, ...], version: Version) -> None:
+ self._backend_version = backend_version
+ self._version = version
+
+ def lit(self, value: NonNestedLiteral, dtype: IntoDType | None) -> DaskExpr:
+ def func(df: DaskLazyFrame) -> list[dx.Series]:
+ if dtype is not None:
+ native_dtype = narwhals_to_native_dtype(dtype, self._version)
+ native_pd_series = pd.Series([value], dtype=native_dtype, name="literal")
+ else:
+ native_pd_series = pd.Series([value], name="literal")
+ npartitions = df._native_frame.npartitions
+ dask_series = dd.from_pandas(native_pd_series, npartitions=npartitions)
+ return [dask_series[0].to_series()]
+
+ return self._expr(
+ func,
+ depth=0,
+ function_name="lit",
+ evaluate_output_names=lambda _df: ["literal"],
+ alias_output_names=None,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def len(self) -> DaskExpr:
+ def func(df: DaskLazyFrame) -> list[dx.Series]:
+ # We don't allow dataframes with 0 columns, so `[0]` is safe.
+ return [df._native_frame[df.columns[0]].size.to_series()]
+
+ return self._expr(
+ func,
+ depth=0,
+ function_name="len",
+ evaluate_output_names=lambda _df: ["len"],
+ alias_output_names=None,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def all_horizontal(self, *exprs: DaskExpr) -> DaskExpr:
+ def func(df: DaskLazyFrame) -> list[dx.Series]:
+ series = align_series_full_broadcast(
+ df, *(s for _expr in exprs for s in _expr(df))
+ )
+ return [reduce(operator.and_, series)]
+
+ return self._expr(
+ call=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="all_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def any_horizontal(self, *exprs: DaskExpr) -> DaskExpr:
+ def func(df: DaskLazyFrame) -> list[dx.Series]:
+ series = align_series_full_broadcast(
+ df, *(s for _expr in exprs for s in _expr(df))
+ )
+ return [reduce(operator.or_, series)]
+
+ return self._expr(
+ call=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="any_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def sum_horizontal(self, *exprs: DaskExpr) -> DaskExpr:
+ def func(df: DaskLazyFrame) -> list[dx.Series]:
+ series = align_series_full_broadcast(
+ df, *(s for _expr in exprs for s in _expr(df))
+ )
+ return [dd.concat(series, axis=1).sum(axis=1)]
+
+ return self._expr(
+ call=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="sum_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def concat(
+ self, items: Iterable[DaskLazyFrame], *, how: ConcatMethod
+ ) -> DaskLazyFrame:
+ if not items:
+ msg = "No items to concatenate" # pragma: no cover
+ raise AssertionError(msg)
+ dfs = [i._native_frame for i in items]
+ cols_0 = dfs[0].columns
+ if how == "vertical":
+ for i, df in enumerate(dfs[1:], start=1):
+ cols_current = df.columns
+ if not (
+ (len(cols_current) == len(cols_0)) and (cols_current == cols_0).all()
+ ):
+ msg = (
+ "unable to vstack, column names don't match:\n"
+ f" - dataframe 0: {cols_0.to_list()}\n"
+ f" - dataframe {i}: {cols_current.to_list()}\n"
+ )
+ raise TypeError(msg)
+ return DaskLazyFrame(
+ dd.concat(dfs, axis=0, join="inner"),
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+ if how == "diagonal":
+ return DaskLazyFrame(
+ dd.concat(dfs, axis=0, join="outer"),
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ raise NotImplementedError
+
+ def mean_horizontal(self, *exprs: DaskExpr) -> DaskExpr:
+ def func(df: DaskLazyFrame) -> list[dx.Series]:
+ expr_results = [s for _expr in exprs for s in _expr(df)]
+ series = align_series_full_broadcast(df, *(s.fillna(0) for s in expr_results))
+ non_na = align_series_full_broadcast(
+ df, *(1 - s.isna() for s in expr_results)
+ )
+ num = reduce(lambda x, y: x + y, series) # pyright: ignore[reportOperatorIssue]
+ den = reduce(lambda x, y: x + y, non_na) # pyright: ignore[reportOperatorIssue]
+ return [cast("dx.Series", num / den)] # pyright: ignore[reportOperatorIssue]
+
+ return self._expr(
+ call=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="mean_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def min_horizontal(self, *exprs: DaskExpr) -> DaskExpr:
+ def func(df: DaskLazyFrame) -> list[dx.Series]:
+ series = align_series_full_broadcast(
+ df, *(s for _expr in exprs for s in _expr(df))
+ )
+
+ return [dd.concat(series, axis=1).min(axis=1)]
+
+ return self._expr(
+ call=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="min_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def max_horizontal(self, *exprs: DaskExpr) -> DaskExpr:
+ def func(df: DaskLazyFrame) -> list[dx.Series]:
+ series = align_series_full_broadcast(
+ df, *(s for _expr in exprs for s in _expr(df))
+ )
+
+ return [dd.concat(series, axis=1).max(axis=1)]
+
+ return self._expr(
+ call=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="max_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def when(self, predicate: DaskExpr) -> DaskWhen:
+ return DaskWhen.from_expr(predicate, context=self)
+
+ def concat_str(
+ self, *exprs: DaskExpr, separator: str, ignore_nulls: bool
+ ) -> DaskExpr:
+ def func(df: DaskLazyFrame) -> list[dx.Series]:
+ expr_results = [s for _expr in exprs for s in _expr(df)]
+ series = (
+ s.astype(str) for s in align_series_full_broadcast(df, *expr_results)
+ )
+ null_mask = [s.isna() for s in align_series_full_broadcast(df, *expr_results)]
+
+ if not ignore_nulls:
+ null_mask_result = reduce(operator.or_, null_mask)
+ result = reduce(lambda x, y: x + separator + y, series).where(
+ ~null_mask_result, None
+ )
+ else:
+ init_value, *values = [
+ s.where(~nm, "") for s, nm in zip(series, null_mask)
+ ]
+
+ separators = (
+ nm.map({True: "", False: separator}, meta=str)
+ for nm in null_mask[:-1]
+ )
+ result = reduce(
+ operator.add, (s + v for s, v in zip(separators, values)), init_value
+ )
+
+ return [result]
+
+ return self._expr(
+ call=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="concat_str",
+ evaluate_output_names=getattr(
+ exprs[0], "_evaluate_output_names", lambda _df: ["literal"]
+ ),
+ alias_output_names=getattr(exprs[0], "_alias_output_names", None),
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+
+class DaskWhen(CompliantWhen[DaskLazyFrame, "dx.Series", DaskExpr]):
+ @property
+ def _then(self) -> type[DaskThen]:
+ return DaskThen
+
+ def __call__(self, df: DaskLazyFrame) -> Sequence[dx.Series]:
+ then_value = (
+ self._then_value(df)[0]
+ if isinstance(self._then_value, DaskExpr)
+ else self._then_value
+ )
+ otherwise_value = (
+ self._otherwise_value(df)[0]
+ if isinstance(self._otherwise_value, DaskExpr)
+ else self._otherwise_value
+ )
+
+ condition = self._condition(df)[0]
+ # re-evaluate DataFrame if the condition aggregates to force
+ # then/otherwise to be evaluated against the aggregated frame
+ assert self._condition._metadata is not None # noqa: S101
+ if self._condition._metadata.is_scalar_like:
+ new_df = df._with_native(condition.to_frame())
+ condition = self._condition.broadcast(ExprKind.AGGREGATION)(df)[0]
+ df = new_df
+
+ if self._otherwise_value is None:
+ (condition, then_series) = align_series_full_broadcast(
+ df, condition, then_value
+ )
+ validate_comparand(condition, then_series)
+ return [then_series.where(condition)] # pyright: ignore[reportArgumentType]
+ (condition, then_series, otherwise_series) = align_series_full_broadcast(
+ df, condition, then_value, otherwise_value
+ )
+ validate_comparand(condition, then_series)
+ validate_comparand(condition, otherwise_series)
+ return [then_series.where(condition, otherwise_series)] # pyright: ignore[reportArgumentType]
+
+
+class DaskThen(CompliantThen[DaskLazyFrame, "dx.Series", DaskExpr], DaskExpr): ...
diff --git a/venv/lib/python3.8/site-packages/narwhals/_dask/selectors.py b/venv/lib/python3.8/site-packages/narwhals/_dask/selectors.py
new file mode 100644
index 0000000..218b1e3
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_dask/selectors.py
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from narwhals._compliant import CompliantSelector, LazySelectorNamespace
+from narwhals._dask.expr import DaskExpr
+
+if TYPE_CHECKING:
+ import dask.dataframe.dask_expr as dx # noqa: F401
+
+ from narwhals._dask.dataframe import DaskLazyFrame # noqa: F401
+
+
+class DaskSelectorNamespace(LazySelectorNamespace["DaskLazyFrame", "dx.Series"]):
+ @property
+ def _selector(self) -> type[DaskSelector]:
+ return DaskSelector
+
+
+class DaskSelector(CompliantSelector["DaskLazyFrame", "dx.Series"], DaskExpr): # type: ignore[misc]
+ def _to_expr(self) -> DaskExpr:
+ return DaskExpr(
+ self._call,
+ depth=self._depth,
+ function_name=self._function_name,
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/_dask/utils.py b/venv/lib/python3.8/site-packages/narwhals/_dask/utils.py
new file mode 100644
index 0000000..fa2a2b0
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_dask/utils.py
@@ -0,0 +1,160 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Sequence
+
+from narwhals._pandas_like.utils import select_columns_by_name
+from narwhals._utils import (
+ Implementation,
+ Version,
+ isinstance_or_issubclass,
+ parse_version,
+)
+from narwhals.dependencies import get_pandas, get_pyarrow
+
+if TYPE_CHECKING:
+ import dask.dataframe as dd
+ import dask.dataframe.dask_expr as dx
+
+ from narwhals._dask.dataframe import DaskLazyFrame
+ from narwhals._dask.expr import DaskExpr
+ from narwhals.typing import IntoDType
+else:
+ try:
+ import dask.dataframe.dask_expr as dx
+ except ModuleNotFoundError: # pragma: no cover
+ import dask_expr as dx
+
+
+def maybe_evaluate_expr(df: DaskLazyFrame, obj: DaskExpr | object) -> dx.Series | object:
+ from narwhals._dask.expr import DaskExpr
+
+ if isinstance(obj, DaskExpr):
+ results = obj._call(df)
+ assert len(results) == 1 # debug assertion # noqa: S101
+ return results[0]
+ return obj
+
+
+def evaluate_exprs(df: DaskLazyFrame, /, *exprs: DaskExpr) -> list[tuple[str, dx.Series]]:
+ native_results: list[tuple[str, dx.Series]] = []
+ for expr in exprs:
+ native_series_list = expr(df)
+ aliases = expr._evaluate_aliases(df)
+ if len(aliases) != len(native_series_list): # pragma: no cover
+ msg = f"Internal error: got aliases {aliases}, but only got {len(native_series_list)} results"
+ raise AssertionError(msg)
+ native_results.extend(zip(aliases, native_series_list))
+ return native_results
+
+
+def align_series_full_broadcast(
+ df: DaskLazyFrame, *series: dx.Series | object
+) -> Sequence[dx.Series]:
+ return [
+ s if isinstance(s, dx.Series) else df._native_frame.assign(_tmp=s)["_tmp"]
+ for s in series
+ ] # pyright: ignore[reportReturnType]
+
+
+def add_row_index(
+ frame: dd.DataFrame,
+ name: str,
+ backend_version: tuple[int, ...],
+ implementation: Implementation,
+) -> dd.DataFrame:
+ original_cols = frame.columns
+ frame = frame.assign(**{name: 1})
+ return select_columns_by_name(
+ frame.assign(**{name: frame[name].cumsum(method="blelloch") - 1}),
+ [name, *original_cols],
+ backend_version,
+ implementation,
+ )
+
+
+def validate_comparand(lhs: dx.Series, rhs: dx.Series) -> None:
+ if not dx.expr.are_co_aligned(lhs._expr, rhs._expr): # pragma: no cover
+ # are_co_aligned is a method which cheaply checks if two Dask expressions
+ # have the same index, and therefore don't require index alignment.
+ # If someone only operates on a Dask DataFrame via expressions, then this
+ # should always be the case: expression outputs (by definition) all come from the
+ # same input dataframe, and Dask Series does not have any operations which
+ # change the index. Nonetheless, we perform this safety check anyway.
+
+ # However, we still need to carefully vet which methods we support for Dask, to
+ # avoid issues where `are_co_aligned` doesn't do what we want it to do:
+ # https://github.com/dask/dask-expr/issues/1112.
+ msg = "Objects are not co-aligned, so this operation is not supported for Dask backend"
+ raise RuntimeError(msg)
+
+
+def narwhals_to_native_dtype(dtype: IntoDType, version: Version) -> Any: # noqa: C901, PLR0912
+ dtypes = version.dtypes
+ if isinstance_or_issubclass(dtype, dtypes.Float64):
+ return "float64"
+ if isinstance_or_issubclass(dtype, dtypes.Float32):
+ return "float32"
+ if isinstance_or_issubclass(dtype, dtypes.Int64):
+ return "int64"
+ if isinstance_or_issubclass(dtype, dtypes.Int32):
+ return "int32"
+ if isinstance_or_issubclass(dtype, dtypes.Int16):
+ return "int16"
+ if isinstance_or_issubclass(dtype, dtypes.Int8):
+ return "int8"
+ if isinstance_or_issubclass(dtype, dtypes.UInt64):
+ return "uint64"
+ if isinstance_or_issubclass(dtype, dtypes.UInt32):
+ return "uint32"
+ if isinstance_or_issubclass(dtype, dtypes.UInt16):
+ return "uint16"
+ if isinstance_or_issubclass(dtype, dtypes.UInt8):
+ return "uint8"
+ if isinstance_or_issubclass(dtype, dtypes.String):
+ if (pd := get_pandas()) is not None and parse_version(pd) >= (2, 0, 0):
+ if get_pyarrow() is not None:
+ return "string[pyarrow]"
+ return "string[python]" # pragma: no cover
+ return "object" # pragma: no cover
+ if isinstance_or_issubclass(dtype, dtypes.Boolean):
+ return "bool"
+ if isinstance_or_issubclass(dtype, dtypes.Enum):
+ if version is Version.V1:
+ msg = "Converting to Enum is not supported in narwhals.stable.v1"
+ raise NotImplementedError(msg)
+ if isinstance(dtype, dtypes.Enum):
+ import pandas as pd
+
+ # NOTE: `pandas-stubs.core.dtypes.dtypes.CategoricalDtype.categories` is too narrow
+ # Should be one of the `ListLike*` types
+ # https://github.com/pandas-dev/pandas-stubs/blob/8434bde95460b996323cc8c0fea7b0a8bb00ea26/pandas-stubs/_typing.pyi#L497-L505
+ return pd.CategoricalDtype(dtype.categories, ordered=True) # pyright: ignore[reportArgumentType]
+ msg = "Can not cast / initialize Enum without categories present"
+ raise ValueError(msg)
+
+ if isinstance_or_issubclass(dtype, dtypes.Categorical):
+ return "category"
+ if isinstance_or_issubclass(dtype, dtypes.Datetime):
+ return "datetime64[us]"
+ if isinstance_or_issubclass(dtype, dtypes.Date):
+ return "date32[day][pyarrow]"
+ if isinstance_or_issubclass(dtype, dtypes.Duration):
+ return "timedelta64[ns]"
+ if isinstance_or_issubclass(dtype, dtypes.List): # pragma: no cover
+ msg = "Converting to List dtype is not supported yet"
+ return NotImplementedError(msg)
+ if isinstance_or_issubclass(dtype, dtypes.Struct): # pragma: no cover
+ msg = "Converting to Struct dtype is not supported yet"
+ return NotImplementedError(msg)
+ if isinstance_or_issubclass(dtype, dtypes.Array): # pragma: no cover
+ msg = "Converting to Array dtype is not supported yet"
+ return NotImplementedError(msg)
+ if isinstance_or_issubclass(dtype, dtypes.Time): # pragma: no cover
+ msg = "Converting to Time dtype is not supported yet"
+ return NotImplementedError(msg)
+ if isinstance_or_issubclass(dtype, dtypes.Binary): # pragma: no cover
+ msg = "Converting to Binary dtype is not supported yet"
+ return NotImplementedError(msg)
+
+ msg = f"Unknown dtype: {dtype}" # pragma: no cover
+ raise AssertionError(msg)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_duckdb/__init__.py b/venv/lib/python3.8/site-packages/narwhals/_duckdb/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_duckdb/__init__.py
diff --git a/venv/lib/python3.8/site-packages/narwhals/_duckdb/dataframe.py b/venv/lib/python3.8/site-packages/narwhals/_duckdb/dataframe.py
new file mode 100644
index 0000000..6b4b197
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_duckdb/dataframe.py
@@ -0,0 +1,512 @@
+from __future__ import annotations
+
+import contextlib
+from functools import reduce
+from operator import and_
+from typing import TYPE_CHECKING, Any, Iterator, Mapping, Sequence
+
+import duckdb
+from duckdb import FunctionExpression, StarExpression
+
+from narwhals._duckdb.utils import (
+ DeferredTimeZone,
+ col,
+ evaluate_exprs,
+ generate_partition_by_sql,
+ lit,
+ native_to_narwhals_dtype,
+)
+from narwhals._utils import (
+ Implementation,
+ Version,
+ generate_temporary_column_name,
+ not_implemented,
+ parse_columns_to_drop,
+ parse_version,
+ validate_backend_version,
+)
+from narwhals.dependencies import get_duckdb
+from narwhals.exceptions import InvalidOperationError
+from narwhals.typing import CompliantLazyFrame
+
+if TYPE_CHECKING:
+ from types import ModuleType
+
+ import pandas as pd
+ import pyarrow as pa
+ from duckdb import Expression
+ from duckdb.typing import DuckDBPyType
+ from typing_extensions import Self, TypeIs
+
+ from narwhals._compliant.typing import CompliantDataFrameAny
+ from narwhals._duckdb.expr import DuckDBExpr
+ from narwhals._duckdb.group_by import DuckDBGroupBy
+ from narwhals._duckdb.namespace import DuckDBNamespace
+ from narwhals._duckdb.series import DuckDBInterchangeSeries
+ from narwhals._utils import _FullContext
+ from narwhals.dataframe import LazyFrame
+ from narwhals.dtypes import DType
+ from narwhals.stable.v1 import DataFrame as DataFrameV1
+ from narwhals.typing import AsofJoinStrategy, JoinStrategy, LazyUniqueKeepStrategy
+
+with contextlib.suppress(ImportError): # requires duckdb>=1.3.0
+ from duckdb import SQLExpression
+
+
+class DuckDBLazyFrame(
+ CompliantLazyFrame[
+ "DuckDBExpr",
+ "duckdb.DuckDBPyRelation",
+ "LazyFrame[duckdb.DuckDBPyRelation] | DataFrameV1[duckdb.DuckDBPyRelation]",
+ ]
+):
+ _implementation = Implementation.DUCKDB
+
+ def __init__(
+ self,
+ df: duckdb.DuckDBPyRelation,
+ *,
+ backend_version: tuple[int, ...],
+ version: Version,
+ ) -> None:
+ self._native_frame: duckdb.DuckDBPyRelation = df
+ self._version = version
+ self._backend_version = backend_version
+ self._cached_native_schema: dict[str, DuckDBPyType] | None = None
+ self._cached_columns: list[str] | None = None
+ validate_backend_version(self._implementation, self._backend_version)
+
+ @staticmethod
+ def _is_native(obj: duckdb.DuckDBPyRelation | Any) -> TypeIs[duckdb.DuckDBPyRelation]:
+ return isinstance(obj, duckdb.DuckDBPyRelation)
+
+ @classmethod
+ def from_native(
+ cls, data: duckdb.DuckDBPyRelation, /, *, context: _FullContext
+ ) -> Self:
+ return cls(
+ data, backend_version=context._backend_version, version=context._version
+ )
+
+ def to_narwhals(
+ self, *args: Any, **kwds: Any
+ ) -> LazyFrame[duckdb.DuckDBPyRelation] | DataFrameV1[duckdb.DuckDBPyRelation]:
+ if self._version is Version.MAIN:
+ return self._version.lazyframe(self, level="lazy")
+
+ from narwhals.stable.v1 import DataFrame as DataFrameV1
+
+ return DataFrameV1(self, level="interchange") # type: ignore[no-any-return]
+
+ def __narwhals_dataframe__(self) -> Self: # pragma: no cover
+ # Keep around for backcompat.
+ if self._version is not Version.V1:
+ msg = "__narwhals_dataframe__ is not implemented for DuckDBLazyFrame"
+ raise AttributeError(msg)
+ return self
+
+ def __narwhals_lazyframe__(self) -> Self:
+ return self
+
+ def __native_namespace__(self) -> ModuleType:
+ return get_duckdb() # type: ignore[no-any-return]
+
+ def __narwhals_namespace__(self) -> DuckDBNamespace:
+ from narwhals._duckdb.namespace import DuckDBNamespace
+
+ return DuckDBNamespace(
+ backend_version=self._backend_version, version=self._version
+ )
+
+ def get_column(self, name: str) -> DuckDBInterchangeSeries:
+ from narwhals._duckdb.series import DuckDBInterchangeSeries
+
+ return DuckDBInterchangeSeries(self.native.select(name), version=self._version)
+
+ def _iter_columns(self) -> Iterator[Expression]:
+ for name in self.columns:
+ yield col(name)
+
+ def collect(
+ self, backend: ModuleType | Implementation | str | None, **kwargs: Any
+ ) -> CompliantDataFrameAny:
+ if backend is None or backend is Implementation.PYARROW:
+ import pyarrow as pa # ignore-banned-import
+
+ from narwhals._arrow.dataframe import ArrowDataFrame
+
+ return ArrowDataFrame(
+ self.native.arrow(),
+ backend_version=parse_version(pa),
+ version=self._version,
+ validate_column_names=True,
+ )
+
+ if backend is Implementation.PANDAS:
+ import pandas as pd # ignore-banned-import
+
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+
+ return PandasLikeDataFrame(
+ self.native.df(),
+ implementation=Implementation.PANDAS,
+ backend_version=parse_version(pd),
+ version=self._version,
+ validate_column_names=True,
+ )
+
+ if backend is Implementation.POLARS:
+ import polars as pl # ignore-banned-import
+
+ from narwhals._polars.dataframe import PolarsDataFrame
+
+ return PolarsDataFrame(
+ self.native.pl(), backend_version=parse_version(pl), version=self._version
+ )
+
+ msg = f"Unsupported `backend` value: {backend}" # pragma: no cover
+ raise ValueError(msg) # pragma: no cover
+
+ def head(self, n: int) -> Self:
+ return self._with_native(self.native.limit(n))
+
+ def simple_select(self, *column_names: str) -> Self:
+ return self._with_native(self.native.select(*column_names))
+
+ def aggregate(self, *exprs: DuckDBExpr) -> Self:
+ selection = [val.alias(name) for name, val in evaluate_exprs(self, *exprs)]
+ return self._with_native(self.native.aggregate(selection)) # type: ignore[arg-type]
+
+ def select(self, *exprs: DuckDBExpr) -> Self:
+ selection = (val.alias(name) for name, val in evaluate_exprs(self, *exprs))
+ return self._with_native(self.native.select(*selection))
+
+ def drop(self, columns: Sequence[str], *, strict: bool) -> Self:
+ columns_to_drop = parse_columns_to_drop(self, columns, strict=strict)
+ selection = (name for name in self.columns if name not in columns_to_drop)
+ return self._with_native(self.native.select(*selection))
+
+ def lazy(self, *, backend: Implementation | None = None) -> Self:
+ # The `backend`` argument has no effect but we keep it here for
+ # backwards compatibility because in `narwhals.stable.v1`
+ # function `.from_native()` will return a DataFrame for DuckDB.
+
+ if backend is not None: # pragma: no cover
+ msg = "`backend` argument is not supported for DuckDB"
+ raise ValueError(msg)
+ return self
+
+ def with_columns(self, *exprs: DuckDBExpr) -> Self:
+ new_columns_map = dict(evaluate_exprs(self, *exprs))
+ result = [
+ new_columns_map.pop(name).alias(name)
+ if name in new_columns_map
+ else col(name)
+ for name in self.columns
+ ]
+ result.extend(value.alias(name) for name, value in new_columns_map.items())
+ return self._with_native(self.native.select(*result))
+
+ def filter(self, predicate: DuckDBExpr) -> Self:
+ # `[0]` is safe as the predicate's expression only returns a single column
+ mask = predicate(self)[0]
+ return self._with_native(self.native.filter(mask))
+
+ @property
+ def schema(self) -> dict[str, DType]:
+ if self._cached_native_schema is None:
+ # Note: prefer `self._cached_native_schema` over `functools.cached_property`
+ # due to Python3.13 failures.
+ self._cached_native_schema = dict(zip(self.columns, self.native.types))
+
+ deferred_time_zone = DeferredTimeZone(self.native)
+ return {
+ column_name: native_to_narwhals_dtype(
+ duckdb_dtype, self._version, deferred_time_zone
+ )
+ for column_name, duckdb_dtype in zip(self.native.columns, self.native.types)
+ }
+
+ @property
+ def columns(self) -> list[str]:
+ if self._cached_columns is None:
+ self._cached_columns = (
+ list(self.schema)
+ if self._cached_native_schema is not None
+ else self.native.columns
+ )
+ return self._cached_columns
+
+ def to_pandas(self) -> pd.DataFrame:
+ # only if version is v1, keep around for backcompat
+ import pandas as pd # ignore-banned-import()
+
+ if parse_version(pd) >= (1, 0, 0):
+ return self.native.df()
+ else: # pragma: no cover
+ msg = f"Conversion to pandas requires 'pandas>=1.0.0', found {pd.__version__}"
+ raise NotImplementedError(msg)
+
+ def to_arrow(self) -> pa.Table:
+ # only if version is v1, keep around for backcompat
+ return self.native.arrow()
+
+ def _with_version(self, version: Version) -> Self:
+ return self.__class__(
+ self.native, version=version, backend_version=self._backend_version
+ )
+
+ def _with_native(self, df: duckdb.DuckDBPyRelation) -> Self:
+ return self.__class__(
+ df, backend_version=self._backend_version, version=self._version
+ )
+
+ def group_by(
+ self, keys: Sequence[str] | Sequence[DuckDBExpr], *, drop_null_keys: bool
+ ) -> DuckDBGroupBy:
+ from narwhals._duckdb.group_by import DuckDBGroupBy
+
+ return DuckDBGroupBy(self, keys, drop_null_keys=drop_null_keys)
+
+ def rename(self, mapping: Mapping[str, str]) -> Self:
+ df = self.native
+ selection = (
+ col(name).alias(mapping[name]) if name in mapping else col(name)
+ for name in df.columns
+ )
+ return self._with_native(self.native.select(*selection))
+
+ def join(
+ self,
+ other: Self,
+ *,
+ how: JoinStrategy,
+ left_on: Sequence[str] | None,
+ right_on: Sequence[str] | None,
+ suffix: str,
+ ) -> Self:
+ native_how = "outer" if how == "full" else how
+
+ if native_how == "cross":
+ if self._backend_version < (1, 1, 4):
+ msg = f"'duckdb>=1.1.4' is required for cross-join, found version: {self._backend_version}"
+ raise NotImplementedError(msg)
+ rel = self.native.set_alias("lhs").cross(other.native.set_alias("rhs"))
+ else:
+ # help mypy
+ assert left_on is not None # noqa: S101
+ assert right_on is not None # noqa: S101
+ it = (
+ col(f'lhs."{left}"') == col(f'rhs."{right}"')
+ for left, right in zip(left_on, right_on)
+ )
+ condition: Expression = reduce(and_, it)
+ rel = self.native.set_alias("lhs").join(
+ other.native.set_alias("rhs"),
+ # NOTE: Fixed in `--pre` https://github.com/duckdb/duckdb/pull/16933
+ condition=condition, # type: ignore[arg-type, unused-ignore]
+ how=native_how,
+ )
+
+ if native_how in {"inner", "left", "cross", "outer"}:
+ select = [col(f'lhs."{x}"') for x in self.columns]
+ for name in other.columns:
+ col_in_lhs: bool = name in self.columns
+ if native_how == "outer" and not col_in_lhs:
+ select.append(col(f'rhs."{name}"'))
+ elif (native_how == "outer") or (
+ col_in_lhs and (right_on is None or name not in right_on)
+ ):
+ select.append(col(f'rhs."{name}"').alias(f"{name}{suffix}"))
+ elif right_on is None or name not in right_on:
+ select.append(col(name))
+ res = rel.select(*select).set_alias(self.native.alias)
+ else: # semi, anti
+ res = rel.select("lhs.*").set_alias(self.native.alias)
+
+ return self._with_native(res)
+
+ def join_asof(
+ self,
+ other: Self,
+ *,
+ left_on: str,
+ right_on: str,
+ by_left: Sequence[str] | None,
+ by_right: Sequence[str] | None,
+ strategy: AsofJoinStrategy,
+ suffix: str,
+ ) -> Self:
+ lhs = self.native
+ rhs = other.native
+ conditions: list[Expression] = []
+ if by_left is not None and by_right is not None:
+ conditions.extend(
+ col(f'lhs."{left}"') == col(f'rhs."{right}"')
+ for left, right in zip(by_left, by_right)
+ )
+ else:
+ by_left = by_right = []
+ if strategy == "backward":
+ conditions.append(col(f'lhs."{left_on}"') >= col(f'rhs."{right_on}"'))
+ elif strategy == "forward":
+ conditions.append(col(f'lhs."{left_on}"') <= col(f'rhs."{right_on}"'))
+ else:
+ msg = "Only 'backward' and 'forward' strategies are currently supported for DuckDB"
+ raise NotImplementedError(msg)
+ condition: Expression = reduce(and_, conditions)
+ select = ["lhs.*"]
+ for name in rhs.columns:
+ if name in lhs.columns and (
+ right_on is None or name not in {right_on, *by_right}
+ ):
+ select.append(f'rhs."{name}" as "{name}{suffix}"')
+ elif right_on is None or name not in {right_on, *by_right}:
+ select.append(str(col(name)))
+ # Replace with Python API call once
+ # https://github.com/duckdb/duckdb/discussions/16947 is addressed.
+ query = f"""
+ SELECT {",".join(select)}
+ FROM lhs
+ ASOF LEFT JOIN rhs
+ ON {condition}
+ """ # noqa: S608
+ return self._with_native(duckdb.sql(query))
+
+ def collect_schema(self) -> dict[str, DType]:
+ return self.schema
+
+ def unique(
+ self, subset: Sequence[str] | None, *, keep: LazyUniqueKeepStrategy
+ ) -> Self:
+ if subset_ := subset if keep == "any" else (subset or self.columns):
+ if self._backend_version < (1, 3):
+ msg = (
+ "At least version 1.3 of DuckDB is required for `unique` operation\n"
+ "with `subset` specified."
+ )
+ raise NotImplementedError(msg)
+ # Sanitise input
+ if error := self._check_columns_exist(subset_):
+ raise error
+ idx_name = generate_temporary_column_name(8, self.columns)
+ count_name = generate_temporary_column_name(8, [*self.columns, idx_name])
+ partition_by_sql = generate_partition_by_sql(*(subset_))
+ name = count_name if keep == "none" else idx_name
+ idx_expr = SQLExpression(
+ f"{FunctionExpression('row_number')} over ({partition_by_sql})"
+ ).alias(idx_name)
+ count_expr = SQLExpression(
+ f"{FunctionExpression('count', StarExpression())} over ({partition_by_sql})"
+ ).alias(count_name)
+ return self._with_native(
+ self.native.select(StarExpression(), idx_expr, count_expr)
+ .filter(col(name) == lit(1))
+ .select(StarExpression(exclude=[count_name, idx_name]))
+ )
+ return self._with_native(self.native.unique(", ".join(self.columns)))
+
+ def sort(self, *by: str, descending: bool | Sequence[bool], nulls_last: bool) -> Self:
+ if isinstance(descending, bool):
+ descending = [descending] * len(by)
+ if nulls_last:
+ it = (
+ col(name).nulls_last() if not desc else col(name).desc().nulls_last()
+ for name, desc in zip(by, descending)
+ )
+ else:
+ it = (
+ col(name).nulls_first() if not desc else col(name).desc().nulls_first()
+ for name, desc in zip(by, descending)
+ )
+ return self._with_native(self.native.sort(*it))
+
+ def drop_nulls(self, subset: Sequence[str] | None) -> Self:
+ subset_ = subset if subset is not None else self.columns
+ keep_condition = reduce(and_, (col(name).isnotnull() for name in subset_))
+ return self._with_native(self.native.filter(keep_condition))
+
+ def explode(self, columns: Sequence[str]) -> Self:
+ dtypes = self._version.dtypes
+ schema = self.collect_schema()
+ for name in columns:
+ dtype = schema[name]
+ if dtype != dtypes.List:
+ msg = (
+ f"`explode` operation not supported for dtype `{dtype}`, "
+ "expected List type"
+ )
+ raise InvalidOperationError(msg)
+
+ if len(columns) != 1:
+ msg = (
+ "Exploding on multiple columns is not supported with DuckDB backend since "
+ "we cannot guarantee that the exploded columns have matching element counts."
+ )
+ raise NotImplementedError(msg)
+
+ col_to_explode = col(columns[0])
+ rel = self.native
+ original_columns = self.columns
+
+ not_null_condition = col_to_explode.isnotnull() & FunctionExpression(
+ "len", col_to_explode
+ ) > lit(0)
+ non_null_rel = rel.filter(not_null_condition).select(
+ *(
+ FunctionExpression("unnest", col_to_explode).alias(name)
+ if name in columns
+ else name
+ for name in original_columns
+ )
+ )
+
+ null_rel = rel.filter(~not_null_condition).select(
+ *(
+ lit(None).alias(name) if name in columns else name
+ for name in original_columns
+ )
+ )
+
+ return self._with_native(non_null_rel.union(null_rel))
+
+ def unpivot(
+ self,
+ on: Sequence[str] | None,
+ index: Sequence[str] | None,
+ variable_name: str,
+ value_name: str,
+ ) -> Self:
+ index_ = [] if index is None else index
+ on_ = [c for c in self.columns if c not in index_] if on is None else on
+
+ if variable_name == "":
+ msg = "`variable_name` cannot be empty string for duckdb backend."
+ raise NotImplementedError(msg)
+
+ if value_name == "":
+ msg = "`value_name` cannot be empty string for duckdb backend."
+ raise NotImplementedError(msg)
+
+ unpivot_on = ", ".join(str(col(name)) for name in on_)
+ rel = self.native # noqa: F841
+ # Replace with Python API once
+ # https://github.com/duckdb/duckdb/discussions/16980 is addressed.
+ query = f"""
+ unpivot rel
+ on {unpivot_on}
+ into
+ name "{variable_name}"
+ value "{value_name}"
+ """
+ return self._with_native(
+ duckdb.sql(query).select(*[*index_, variable_name, value_name])
+ )
+
+ gather_every = not_implemented.deprecated(
+ "`LazyFrame.gather_every` is deprecated and will be removed in a future version."
+ )
+ tail = not_implemented.deprecated(
+ "`LazyFrame.tail` is deprecated and will be removed in a future version."
+ )
+ with_row_index = not_implemented()
diff --git a/venv/lib/python3.8/site-packages/narwhals/_duckdb/expr.py b/venv/lib/python3.8/site-packages/narwhals/_duckdb/expr.py
new file mode 100644
index 0000000..b3d55f3
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_duckdb/expr.py
@@ -0,0 +1,898 @@
+from __future__ import annotations
+
+import contextlib
+import operator
+from typing import TYPE_CHECKING, Any, Callable, Literal, Sequence, cast
+
+from duckdb import CoalesceOperator, FunctionExpression, StarExpression
+from duckdb.typing import DuckDBPyType
+
+from narwhals._compliant import LazyExpr
+from narwhals._compliant.window import WindowInputs
+from narwhals._duckdb.expr_dt import DuckDBExprDateTimeNamespace
+from narwhals._duckdb.expr_list import DuckDBExprListNamespace
+from narwhals._duckdb.expr_str import DuckDBExprStringNamespace
+from narwhals._duckdb.expr_struct import DuckDBExprStructNamespace
+from narwhals._duckdb.utils import (
+ col,
+ generate_order_by_sql,
+ generate_partition_by_sql,
+ lit,
+ narwhals_to_native_dtype,
+ when,
+)
+from narwhals._expression_parsing import ExprKind
+from narwhals._utils import Implementation, not_implemented, requires
+
+if TYPE_CHECKING:
+ from duckdb import Expression
+ from typing_extensions import Self
+
+ from narwhals._compliant.typing import (
+ AliasNames,
+ EvalNames,
+ EvalSeries,
+ WindowFunction,
+ )
+ from narwhals._duckdb.dataframe import DuckDBLazyFrame
+ from narwhals._duckdb.namespace import DuckDBNamespace
+ from narwhals._expression_parsing import ExprMetadata
+ from narwhals._utils import Version, _FullContext
+ from narwhals.typing import (
+ FillNullStrategy,
+ IntoDType,
+ NonNestedLiteral,
+ NumericLiteral,
+ RankMethod,
+ RollingInterpolationMethod,
+ TemporalLiteral,
+ )
+
+ DuckDBWindowFunction = WindowFunction[DuckDBLazyFrame, Expression]
+ DuckDBWindowInputs = WindowInputs[Expression]
+
+
+with contextlib.suppress(ImportError): # requires duckdb>=1.3.0
+ from duckdb import SQLExpression
+
+
+class DuckDBExpr(LazyExpr["DuckDBLazyFrame", "Expression"]):
+ _implementation = Implementation.DUCKDB
+
+ def __init__(
+ self,
+ call: EvalSeries[DuckDBLazyFrame, Expression],
+ window_function: DuckDBWindowFunction | None = None,
+ *,
+ evaluate_output_names: EvalNames[DuckDBLazyFrame],
+ alias_output_names: AliasNames | None,
+ backend_version: tuple[int, ...],
+ version: Version,
+ ) -> None:
+ self._call = call
+ self._evaluate_output_names = evaluate_output_names
+ self._alias_output_names = alias_output_names
+ self._backend_version = backend_version
+ self._version = version
+ self._metadata: ExprMetadata | None = None
+ self._window_function: DuckDBWindowFunction | None = window_function
+
+ @property
+ def window_function(self) -> DuckDBWindowFunction:
+ def default_window_func(
+ df: DuckDBLazyFrame, window_inputs: DuckDBWindowInputs
+ ) -> list[Expression]:
+ assert not window_inputs.order_by # noqa: S101
+ partition_by_sql = generate_partition_by_sql(*window_inputs.partition_by)
+ template = f"{{expr}} over ({partition_by_sql})"
+ return [SQLExpression(template.format(expr=expr)) for expr in self(df)]
+
+ return self._window_function or default_window_func
+
+ def __call__(self, df: DuckDBLazyFrame) -> Sequence[Expression]:
+ return self._call(df)
+
+ def __narwhals_expr__(self) -> None: ...
+
+ def __narwhals_namespace__(self) -> DuckDBNamespace: # pragma: no cover
+ # Unused, just for compatibility with PandasLikeExpr
+ from narwhals._duckdb.namespace import DuckDBNamespace
+
+ return DuckDBNamespace(
+ backend_version=self._backend_version, version=self._version
+ )
+
+ def _cum_window_func(
+ self,
+ *,
+ reverse: bool,
+ func_name: Literal["sum", "max", "min", "count", "product"],
+ ) -> DuckDBWindowFunction:
+ def func(df: DuckDBLazyFrame, inputs: DuckDBWindowInputs) -> list[Expression]:
+ order_by_sql = generate_order_by_sql(*inputs.order_by, ascending=not reverse)
+ partition_by_sql = generate_partition_by_sql(*inputs.partition_by)
+ sql = (
+ f"{func_name} ({{expr}}) over ({partition_by_sql} {order_by_sql} "
+ "rows between unbounded preceding and current row)"
+ )
+ return [SQLExpression(sql.format(expr=expr)) for expr in self(df)]
+
+ return func
+
+ def _rolling_window_func(
+ self,
+ *,
+ func_name: Literal["sum", "mean", "std", "var"],
+ center: bool,
+ window_size: int,
+ min_samples: int,
+ ddof: int | None = None,
+ ) -> DuckDBWindowFunction:
+ supported_funcs = ["sum", "mean", "std", "var"]
+ if center:
+ half = (window_size - 1) // 2
+ remainder = (window_size - 1) % 2
+ start = f"{half + remainder} preceding"
+ end = f"{half} following"
+ else:
+ start = f"{window_size - 1} preceding"
+ end = "current row"
+
+ def func(df: DuckDBLazyFrame, inputs: DuckDBWindowInputs) -> list[Expression]:
+ order_by_sql = generate_order_by_sql(*inputs.order_by, ascending=True)
+ partition_by_sql = generate_partition_by_sql(*inputs.partition_by)
+ window = f"({partition_by_sql} {order_by_sql} rows between {start} and {end})"
+ if func_name in {"sum", "mean"}:
+ func_: str = func_name
+ elif func_name == "var" and ddof == 0:
+ func_ = "var_pop"
+ elif func_name in "var" and ddof == 1:
+ func_ = "var_samp"
+ elif func_name == "std" and ddof == 0:
+ func_ = "stddev_pop"
+ elif func_name == "std" and ddof == 1:
+ func_ = "stddev_samp"
+ elif func_name in {"var", "std"}: # pragma: no cover
+ msg = f"Only ddof=0 and ddof=1 are currently supported for rolling_{func_name}."
+ raise ValueError(msg)
+ else: # pragma: no cover
+ msg = f"Only the following functions are supported: {supported_funcs}.\nGot: {func_name}."
+ raise ValueError(msg)
+ condition_sql = f"count({{expr}}) over {window} >= {min_samples}"
+ value_sql = f"{func_}({{expr}}) over {window}"
+ return [
+ when(
+ SQLExpression(condition_sql.format(expr=expr)),
+ SQLExpression(value_sql.format(expr=expr)),
+ )
+ for expr in self(df)
+ ]
+
+ return func
+
+ def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Self:
+ if kind is ExprKind.LITERAL:
+ return self
+ if self._backend_version < (1, 3):
+ msg = "At least version 1.3 of DuckDB is required for binary operations between aggregates and columns."
+ raise NotImplementedError(msg)
+ return self.over([lit(1)], [])
+
+ @classmethod
+ def from_column_names(
+ cls,
+ evaluate_column_names: EvalNames[DuckDBLazyFrame],
+ /,
+ *,
+ context: _FullContext,
+ ) -> Self:
+ def func(df: DuckDBLazyFrame) -> list[Expression]:
+ return [col(name) for name in evaluate_column_names(df)]
+
+ return cls(
+ func,
+ evaluate_output_names=evaluate_column_names,
+ alias_output_names=None,
+ backend_version=context._backend_version,
+ version=context._version,
+ )
+
+ @classmethod
+ def from_column_indices(cls, *column_indices: int, context: _FullContext) -> Self:
+ def func(df: DuckDBLazyFrame) -> list[Expression]:
+ columns = df.columns
+ return [col(columns[i]) for i in column_indices]
+
+ return cls(
+ func,
+ evaluate_output_names=cls._eval_names_indices(column_indices),
+ alias_output_names=None,
+ backend_version=context._backend_version,
+ version=context._version,
+ )
+
+ def _callable_to_eval_series(
+ self, call: Callable[..., Expression], /, **expressifiable_args: Self | Any
+ ) -> EvalSeries[DuckDBLazyFrame, Expression]:
+ def func(df: DuckDBLazyFrame) -> list[Expression]:
+ native_series_list = self(df)
+ other_native_series = {
+ key: df._evaluate_expr(value) if self._is_expr(value) else lit(value)
+ for key, value in expressifiable_args.items()
+ }
+ return [
+ call(native_series, **other_native_series)
+ for native_series in native_series_list
+ ]
+
+ return func
+
+ def _push_down_window_function(
+ self, call: Callable[..., Expression], /, **expressifiable_args: Self | Any
+ ) -> DuckDBWindowFunction:
+ def window_f(
+ df: DuckDBLazyFrame, window_inputs: DuckDBWindowInputs
+ ) -> list[Expression]:
+ # If a function `f` is elementwise, and `g` is another function, then
+ # - `f(g) over (window)`
+ # - `f(g over (window))
+ # are equivalent.
+ # Make sure to only use with if `call` is elementwise!
+ native_series_list = self.window_function(df, window_inputs)
+ other_native_series = {
+ key: df._evaluate_window_expr(value, window_inputs)
+ if self._is_expr(value)
+ else lit(value)
+ for key, value in expressifiable_args.items()
+ }
+ return [
+ call(native_series, **other_native_series)
+ for native_series in native_series_list
+ ]
+
+ return window_f
+
+ def _with_callable(
+ self, call: Callable[..., Expression], /, **expressifiable_args: Self | Any
+ ) -> Self:
+ """Create expression from callable.
+
+ Arguments:
+ call: Callable from compliant DataFrame to native Expression
+ expr_name: Expression name
+ expressifiable_args: arguments pass to expression which should be parsed
+ as expressions (e.g. in `nw.col('a').is_between('b', 'c')`)
+ """
+ return self.__class__(
+ self._callable_to_eval_series(call, **expressifiable_args),
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def _with_elementwise(
+ self, call: Callable[..., Expression], /, **expressifiable_args: Self | Any
+ ) -> Self:
+ return self.__class__(
+ self._callable_to_eval_series(call, **expressifiable_args),
+ self._push_down_window_function(call, **expressifiable_args),
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def _with_binary(self, op: Callable[..., Expression], other: Self | Any) -> Self:
+ return self.__class__(
+ self._callable_to_eval_series(op, other=other),
+ self._push_down_window_function(op, other=other),
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def _with_alias_output_names(self, func: AliasNames | None, /) -> Self:
+ return type(self)(
+ self._call,
+ self._window_function,
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=func,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def _with_window_function(self, window_function: DuckDBWindowFunction) -> Self:
+ return self.__class__(
+ self._call,
+ window_function,
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ @classmethod
+ def _alias_native(cls, expr: Expression, name: str) -> Expression:
+ return expr.alias(name)
+
+ def __and__(self, other: DuckDBExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr & other, other)
+
+ def __or__(self, other: DuckDBExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr | other, other)
+
+ def __add__(self, other: DuckDBExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr + other, other)
+
+ def __truediv__(self, other: DuckDBExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr / other, other)
+
+ def __rtruediv__(self, other: DuckDBExpr) -> Self:
+ return self._with_binary(
+ lambda expr, other: other.__truediv__(expr), other
+ ).alias("literal")
+
+ def __floordiv__(self, other: DuckDBExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr // other, other)
+
+ def __rfloordiv__(self, other: DuckDBExpr) -> Self:
+ return self._with_binary(
+ lambda expr, other: other.__floordiv__(expr), other
+ ).alias("literal")
+
+ def __mod__(self, other: DuckDBExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr % other, other)
+
+ def __rmod__(self, other: DuckDBExpr) -> Self:
+ return self._with_binary(lambda expr, other: other.__mod__(expr), other).alias(
+ "literal"
+ )
+
+ def __sub__(self, other: DuckDBExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr - other, other)
+
+ def __rsub__(self, other: DuckDBExpr) -> Self:
+ return self._with_binary(lambda expr, other: other.__sub__(expr), other).alias(
+ "literal"
+ )
+
+ def __mul__(self, other: DuckDBExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr * other, other)
+
+ def __pow__(self, other: DuckDBExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr**other, other)
+
+ def __rpow__(self, other: DuckDBExpr) -> Self:
+ return self._with_binary(lambda expr, other: other.__pow__(expr), other).alias(
+ "literal"
+ )
+
+ def __lt__(self, other: DuckDBExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr < other, other)
+
+ def __gt__(self, other: DuckDBExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr > other, other)
+
+ def __le__(self, other: DuckDBExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr <= other, other)
+
+ def __ge__(self, other: DuckDBExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr >= other, other)
+
+ def __eq__(self, other: DuckDBExpr) -> Self: # type: ignore[override]
+ return self._with_binary(lambda expr, other: expr == other, other)
+
+ def __ne__(self, other: DuckDBExpr) -> Self: # type: ignore[override]
+ return self._with_binary(lambda expr, other: expr != other, other)
+
+ def __invert__(self) -> Self:
+ invert = cast("Callable[..., Expression]", operator.invert)
+ return self._with_elementwise(invert)
+
+ def abs(self) -> Self:
+ return self._with_elementwise(lambda expr: FunctionExpression("abs", expr))
+
+ def mean(self) -> Self:
+ return self._with_callable(lambda expr: FunctionExpression("mean", expr))
+
+ def skew(self) -> Self:
+ def func(expr: Expression) -> Expression:
+ count = FunctionExpression("count", expr)
+ # Adjust population skewness by correction factor to get sample skewness
+ sample_skewness = (
+ FunctionExpression("skewness", expr)
+ * (count - lit(2))
+ / FunctionExpression("sqrt", count * (count - lit(1)))
+ )
+ return when(count == lit(0), lit(None)).otherwise(
+ when(count == lit(1), lit(float("nan"))).otherwise(
+ when(count == lit(2), lit(0.0)).otherwise(sample_skewness)
+ )
+ )
+
+ return self._with_callable(func)
+
+ def median(self) -> Self:
+ return self._with_callable(lambda expr: FunctionExpression("median", expr))
+
+ def all(self) -> Self:
+ def f(expr: Expression) -> Expression:
+ return CoalesceOperator(FunctionExpression("bool_and", expr), lit(True)) # noqa: FBT003
+
+ def window_f(
+ df: DuckDBLazyFrame, window_inputs: DuckDBWindowInputs
+ ) -> list[Expression]:
+ pb = generate_partition_by_sql(*window_inputs.partition_by)
+ return [
+ CoalesceOperator(
+ SQLExpression(f"{FunctionExpression('bool_and', expr)} over ({pb})"),
+ lit(True), # noqa: FBT003
+ )
+ for expr in self(df)
+ ]
+
+ return self._with_callable(f)._with_window_function(window_f)
+
+ def any(self) -> Self:
+ def f(expr: Expression) -> Expression:
+ return CoalesceOperator(FunctionExpression("bool_or", expr), lit(False)) # noqa: FBT003
+
+ def window_f(
+ df: DuckDBLazyFrame, window_inputs: DuckDBWindowInputs
+ ) -> list[Expression]:
+ pb = generate_partition_by_sql(*window_inputs.partition_by)
+ return [
+ CoalesceOperator(
+ SQLExpression(f"{FunctionExpression('bool_or', expr)} over ({pb})"),
+ lit(False), # noqa: FBT003
+ )
+ for expr in self(df)
+ ]
+
+ return self._with_callable(f)._with_window_function(window_f)
+
+ def quantile(
+ self, quantile: float, interpolation: RollingInterpolationMethod
+ ) -> Self:
+ def func(expr: Expression) -> Expression:
+ if interpolation == "linear":
+ return FunctionExpression("quantile_cont", expr, lit(quantile))
+ msg = "Only linear interpolation methods are supported for DuckDB quantile."
+ raise NotImplementedError(msg)
+
+ return self._with_callable(func)
+
+ def clip(
+ self,
+ lower_bound: Self | NumericLiteral | TemporalLiteral | None,
+ upper_bound: Self | NumericLiteral | TemporalLiteral | None,
+ ) -> Self:
+ def _clip_lower(expr: Expression, lower_bound: Any) -> Expression:
+ return FunctionExpression("greatest", expr, lower_bound)
+
+ def _clip_upper(expr: Expression, upper_bound: Any) -> Expression:
+ return FunctionExpression("least", expr, upper_bound)
+
+ def _clip_both(
+ expr: Expression, lower_bound: Any, upper_bound: Any
+ ) -> Expression:
+ return FunctionExpression(
+ "greatest", FunctionExpression("least", expr, upper_bound), lower_bound
+ )
+
+ if lower_bound is None:
+ return self._with_elementwise(_clip_upper, upper_bound=upper_bound)
+ if upper_bound is None:
+ return self._with_elementwise(_clip_lower, lower_bound=lower_bound)
+ return self._with_elementwise(
+ _clip_both, lower_bound=lower_bound, upper_bound=upper_bound
+ )
+
+ def sum(self) -> Self:
+ def f(expr: Expression) -> Expression:
+ return CoalesceOperator(FunctionExpression("sum", expr), lit(0))
+
+ def window_f(
+ df: DuckDBLazyFrame, window_inputs: DuckDBWindowInputs
+ ) -> list[Expression]:
+ pb = generate_partition_by_sql(*window_inputs.partition_by)
+ return [
+ CoalesceOperator(
+ SQLExpression(f"{FunctionExpression('sum', expr)} over ({pb})"),
+ lit(0),
+ )
+ for expr in self(df)
+ ]
+
+ return self._with_callable(f)._with_window_function(window_f)
+
+ def n_unique(self) -> Self:
+ def func(expr: Expression) -> Expression:
+ # https://stackoverflow.com/a/79338887/4451315
+ return FunctionExpression(
+ "array_unique", FunctionExpression("array_agg", expr)
+ ) + FunctionExpression(
+ "max", when(expr.isnotnull(), lit(0)).otherwise(lit(1))
+ )
+
+ return self._with_callable(func)
+
+ def count(self) -> Self:
+ return self._with_callable(lambda expr: FunctionExpression("count", expr))
+
+ def len(self) -> Self:
+ return self._with_callable(lambda _expr: FunctionExpression("count"))
+
+ def std(self, ddof: int) -> Self:
+ if ddof == 0:
+ return self._with_callable(
+ lambda expr: FunctionExpression("stddev_pop", expr)
+ )
+ if ddof == 1:
+ return self._with_callable(
+ lambda expr: FunctionExpression("stddev_samp", expr)
+ )
+
+ def _std(expr: Expression) -> Expression:
+ n_samples = FunctionExpression("count", expr)
+ return (
+ FunctionExpression("stddev_pop", expr)
+ * FunctionExpression("sqrt", n_samples)
+ / (FunctionExpression("sqrt", (n_samples - lit(ddof))))
+ )
+
+ return self._with_callable(_std)
+
+ def var(self, ddof: int) -> Self:
+ if ddof == 0:
+ return self._with_callable(lambda expr: FunctionExpression("var_pop", expr))
+ if ddof == 1:
+ return self._with_callable(lambda expr: FunctionExpression("var_samp", expr))
+
+ def _var(expr: Expression) -> Expression:
+ n_samples = FunctionExpression("count", expr)
+ return (
+ FunctionExpression("var_pop", expr) * n_samples / (n_samples - lit(ddof))
+ )
+
+ return self._with_callable(_var)
+
+ def max(self) -> Self:
+ return self._with_callable(lambda expr: FunctionExpression("max", expr))
+
+ def min(self) -> Self:
+ return self._with_callable(lambda expr: FunctionExpression("min", expr))
+
+ def null_count(self) -> Self:
+ return self._with_callable(
+ lambda expr: FunctionExpression("sum", expr.isnull().cast("int"))
+ )
+
+ @requires.backend_version((1, 3))
+ def over(
+ self, partition_by: Sequence[str | Expression], order_by: Sequence[str]
+ ) -> Self:
+ def func(df: DuckDBLazyFrame) -> Sequence[Expression]:
+ return self.window_function(df, WindowInputs(partition_by, order_by))
+
+ return self.__class__(
+ func,
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def is_null(self) -> Self:
+ return self._with_elementwise(lambda expr: expr.isnull())
+
+ def is_nan(self) -> Self:
+ return self._with_elementwise(lambda expr: FunctionExpression("isnan", expr))
+
+ def is_finite(self) -> Self:
+ return self._with_elementwise(lambda expr: FunctionExpression("isfinite", expr))
+
+ def is_in(self, other: Sequence[Any]) -> Self:
+ return self._with_elementwise(
+ lambda expr: FunctionExpression("contains", lit(other), expr)
+ )
+
+ def round(self, decimals: int) -> Self:
+ return self._with_elementwise(
+ lambda expr: FunctionExpression("round", expr, lit(decimals))
+ )
+
+ @requires.backend_version((1, 3))
+ def shift(self, n: int) -> Self:
+ def func(df: DuckDBLazyFrame, inputs: DuckDBWindowInputs) -> Sequence[Expression]:
+ order_by_sql = generate_order_by_sql(*inputs.order_by, ascending=True)
+ partition_by_sql = generate_partition_by_sql(*inputs.partition_by)
+ sql = f"lag({{expr}}, {n}) over ({partition_by_sql} {order_by_sql})"
+ return [SQLExpression(sql.format(expr=expr)) for expr in self(df)]
+
+ return self._with_window_function(func)
+
+ @requires.backend_version((1, 3))
+ def is_first_distinct(self) -> Self:
+ def func(df: DuckDBLazyFrame, inputs: DuckDBWindowInputs) -> Sequence[Expression]:
+ order_by_sql = generate_order_by_sql(*inputs.order_by, ascending=True)
+ if inputs.partition_by:
+ partition_by_sql = (
+ generate_partition_by_sql(*inputs.partition_by) + ", {expr}"
+ )
+ else:
+ partition_by_sql = "partition by {expr}"
+ sql = (
+ f"{FunctionExpression('row_number')} "
+ f"over({partition_by_sql} {order_by_sql})"
+ )
+ return [SQLExpression(sql.format(expr=expr)) == lit(1) for expr in self(df)]
+
+ return self._with_window_function(func)
+
+ @requires.backend_version((1, 3))
+ def is_last_distinct(self) -> Self:
+ def func(df: DuckDBLazyFrame, inputs: DuckDBWindowInputs) -> Sequence[Expression]:
+ order_by_sql = generate_order_by_sql(*inputs.order_by, ascending=False)
+ if inputs.partition_by:
+ partition_by_sql = (
+ generate_partition_by_sql(*inputs.partition_by) + ", {expr}"
+ )
+ else:
+ partition_by_sql = "partition by {expr}"
+ sql = (
+ f"{FunctionExpression('row_number')} "
+ f"over({partition_by_sql} {order_by_sql})"
+ )
+ return [SQLExpression(sql.format(expr=expr)) == lit(1) for expr in self(df)]
+
+ return self._with_window_function(func)
+
+ @requires.backend_version((1, 3))
+ def diff(self) -> Self:
+ def func(df: DuckDBLazyFrame, inputs: DuckDBWindowInputs) -> list[Expression]:
+ order_by_sql = generate_order_by_sql(*inputs.order_by, ascending=True)
+ partition_by_sql = generate_partition_by_sql(*inputs.partition_by)
+ sql = f"lag({{expr}}) over ({partition_by_sql} {order_by_sql})"
+ return [expr - SQLExpression(sql.format(expr=expr)) for expr in self(df)]
+
+ return self._with_window_function(func)
+
+ @requires.backend_version((1, 3))
+ def cum_sum(self, *, reverse: bool) -> Self:
+ return self._with_window_function(
+ self._cum_window_func(reverse=reverse, func_name="sum")
+ )
+
+ @requires.backend_version((1, 3))
+ def cum_max(self, *, reverse: bool) -> Self:
+ return self._with_window_function(
+ self._cum_window_func(reverse=reverse, func_name="max")
+ )
+
+ @requires.backend_version((1, 3))
+ def cum_min(self, *, reverse: bool) -> Self:
+ return self._with_window_function(
+ self._cum_window_func(reverse=reverse, func_name="min")
+ )
+
+ @requires.backend_version((1, 3))
+ def cum_count(self, *, reverse: bool) -> Self:
+ return self._with_window_function(
+ self._cum_window_func(reverse=reverse, func_name="count")
+ )
+
+ @requires.backend_version((1, 3))
+ def cum_prod(self, *, reverse: bool) -> Self:
+ return self._with_window_function(
+ self._cum_window_func(reverse=reverse, func_name="product")
+ )
+
+ @requires.backend_version((1, 3))
+ def rolling_sum(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ return self._with_window_function(
+ self._rolling_window_func(
+ func_name="sum",
+ center=center,
+ window_size=window_size,
+ min_samples=min_samples,
+ )
+ )
+
+ @requires.backend_version((1, 3))
+ def rolling_mean(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ return self._with_window_function(
+ self._rolling_window_func(
+ func_name="mean",
+ center=center,
+ window_size=window_size,
+ min_samples=min_samples,
+ )
+ )
+
+ @requires.backend_version((1, 3))
+ def rolling_var(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ return self._with_window_function(
+ self._rolling_window_func(
+ func_name="var",
+ center=center,
+ window_size=window_size,
+ min_samples=min_samples,
+ ddof=ddof,
+ )
+ )
+
+ @requires.backend_version((1, 3))
+ def rolling_std(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ return self._with_window_function(
+ self._rolling_window_func(
+ func_name="std",
+ center=center,
+ window_size=window_size,
+ min_samples=min_samples,
+ ddof=ddof,
+ )
+ )
+
+ def fill_null(
+ self,
+ value: Self | NonNestedLiteral,
+ strategy: FillNullStrategy | None,
+ limit: int | None,
+ ) -> Self:
+ if strategy is not None:
+ if self._backend_version < (1, 3): # pragma: no cover
+ msg = f"`fill_null` with `strategy={strategy}` is only available in 'duckdb>=1.3.0'."
+ raise NotImplementedError(msg)
+
+ def _fill_with_strategy(
+ df: DuckDBLazyFrame, inputs: DuckDBWindowInputs
+ ) -> Sequence[Expression]:
+ order_by_sql = generate_order_by_sql(*inputs.order_by, ascending=True)
+ partition_by_sql = generate_partition_by_sql(*inputs.partition_by)
+
+ fill_func = "last_value" if strategy == "forward" else "first_value"
+ _limit = "unbounded" if limit is None else limit
+ rows_between = (
+ f"{_limit} preceding and current row"
+ if strategy == "forward"
+ else f"current row and {_limit} following"
+ )
+ sql = (
+ f"{fill_func}({{expr}} ignore nulls) over "
+ f"({partition_by_sql} {order_by_sql} rows between {rows_between})"
+ )
+ return [SQLExpression(sql.format(expr=expr)) for expr in self(df)]
+
+ return self._with_window_function(_fill_with_strategy)
+
+ def _fill_constant(expr: Expression, value: Any) -> Expression:
+ return CoalesceOperator(expr, value)
+
+ return self._with_elementwise(_fill_constant, value=value)
+
+ def cast(self, dtype: IntoDType) -> Self:
+ def func(expr: Expression) -> Expression:
+ native_dtype = narwhals_to_native_dtype(dtype, self._version)
+ return expr.cast(DuckDBPyType(native_dtype))
+
+ return self._with_elementwise(func)
+
+ @requires.backend_version((1, 3))
+ def is_unique(self) -> Self:
+ def _is_unique(expr: Expression, *partition_by: str | Expression) -> Expression:
+ pb = generate_partition_by_sql(expr, *partition_by)
+ sql = f"{FunctionExpression('count', col('*'))} over ({pb})"
+ return SQLExpression(sql) == lit(1)
+
+ def _unpartitioned_is_unique(expr: Expression) -> Expression:
+ return _is_unique(expr)
+
+ def _partitioned_is_unique(
+ df: DuckDBLazyFrame, inputs: DuckDBWindowInputs
+ ) -> Sequence[Expression]:
+ assert not inputs.order_by # noqa: S101
+ return [_is_unique(expr, *inputs.partition_by) for expr in self(df)]
+
+ return self._with_callable(_unpartitioned_is_unique)._with_window_function(
+ _partitioned_is_unique
+ )
+
+ @requires.backend_version((1, 3))
+ def rank(self, method: RankMethod, *, descending: bool) -> Self:
+ if method in {"min", "max", "average"}:
+ func = FunctionExpression("rank")
+ elif method == "dense":
+ func = FunctionExpression("dense_rank")
+ else: # method == "ordinal"
+ func = FunctionExpression("row_number")
+
+ def _rank(
+ expr: Expression,
+ *,
+ descending: bool,
+ partition_by: Sequence[str | Expression] | None = None,
+ ) -> Expression:
+ order_by_sql = (
+ f"order by {expr} desc nulls last"
+ if descending
+ else f"order by {expr} asc nulls last"
+ )
+ count_expr = FunctionExpression("count", StarExpression())
+ if partition_by is not None:
+ window = f"{generate_partition_by_sql(*partition_by)} {order_by_sql}"
+ count_window = f"{generate_partition_by_sql(*partition_by, expr)}"
+ else:
+ window = order_by_sql
+ count_window = generate_partition_by_sql(expr)
+ if method == "max":
+ rank_expr = (
+ SQLExpression(f"{func} OVER ({window})")
+ + SQLExpression(f"{count_expr} over ({count_window})")
+ - lit(1)
+ )
+ elif method == "average":
+ rank_expr = SQLExpression(f"{func} OVER ({window})") + (
+ SQLExpression(f"{count_expr} over ({count_window})") - lit(1)
+ ) / lit(2.0)
+ else:
+ rank_expr = SQLExpression(f"{func} OVER ({window})")
+ return when(expr.isnotnull(), rank_expr)
+
+ def _unpartitioned_rank(expr: Expression) -> Expression:
+ return _rank(expr, descending=descending)
+
+ def _partitioned_rank(
+ df: DuckDBLazyFrame, inputs: DuckDBWindowInputs
+ ) -> Sequence[Expression]:
+ assert not inputs.order_by # noqa: S101
+ return [
+ _rank(expr, descending=descending, partition_by=inputs.partition_by)
+ for expr in self(df)
+ ]
+
+ return self._with_callable(_unpartitioned_rank)._with_window_function(
+ _partitioned_rank
+ )
+
+ def log(self, base: float) -> Self:
+ def _log(expr: Expression) -> Expression:
+ log = FunctionExpression("log", expr)
+ return (
+ when(expr < lit(0), lit(float("nan")))
+ .when(expr == lit(0), lit(float("-inf")))
+ .otherwise(log / FunctionExpression("log", lit(base)))
+ )
+
+ return self._with_elementwise(_log)
+
+ def exp(self) -> Self:
+ def _exp(expr: Expression) -> Expression:
+ return FunctionExpression("exp", expr)
+
+ return self._with_elementwise(_exp)
+
+ @property
+ def str(self) -> DuckDBExprStringNamespace:
+ return DuckDBExprStringNamespace(self)
+
+ @property
+ def dt(self) -> DuckDBExprDateTimeNamespace:
+ return DuckDBExprDateTimeNamespace(self)
+
+ @property
+ def list(self) -> DuckDBExprListNamespace:
+ return DuckDBExprListNamespace(self)
+
+ @property
+ def struct(self) -> DuckDBExprStructNamespace:
+ return DuckDBExprStructNamespace(self)
+
+ drop_nulls = not_implemented()
+ unique = not_implemented()
diff --git a/venv/lib/python3.8/site-packages/narwhals/_duckdb/expr_dt.py b/venv/lib/python3.8/site-packages/narwhals/_duckdb/expr_dt.py
new file mode 100644
index 0000000..68f0f9b
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_duckdb/expr_dt.py
@@ -0,0 +1,160 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Sequence
+
+from duckdb import FunctionExpression
+
+from narwhals._duckdb.utils import UNITS_DICT, fetch_rel_time_zone, lit
+from narwhals._duration import parse_interval_string
+from narwhals._utils import not_implemented
+
+if TYPE_CHECKING:
+ from duckdb import Expression
+
+ from narwhals._duckdb.dataframe import DuckDBLazyFrame
+ from narwhals._duckdb.expr import DuckDBExpr
+
+
+class DuckDBExprDateTimeNamespace:
+ def __init__(self, expr: DuckDBExpr) -> None:
+ self._compliant_expr = expr
+
+ def year(self) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression("year", expr)
+ )
+
+ def month(self) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression("month", expr)
+ )
+
+ def day(self) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression("day", expr)
+ )
+
+ def hour(self) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression("hour", expr)
+ )
+
+ def minute(self) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression("minute", expr)
+ )
+
+ def second(self) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression("second", expr)
+ )
+
+ def millisecond(self) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression("millisecond", expr)
+ - FunctionExpression("second", expr) * lit(1_000)
+ )
+
+ def microsecond(self) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression("microsecond", expr)
+ - FunctionExpression("second", expr) * lit(1_000_000)
+ )
+
+ def nanosecond(self) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression("nanosecond", expr)
+ - FunctionExpression("second", expr) * lit(1_000_000_000)
+ )
+
+ def to_string(self, format: str) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression("strftime", expr, lit(format))
+ )
+
+ def weekday(self) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression("isodow", expr)
+ )
+
+ def ordinal_day(self) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression("dayofyear", expr)
+ )
+
+ def date(self) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.cast("date"))
+
+ def total_minutes(self) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression("datepart", lit("minute"), expr)
+ )
+
+ def total_seconds(self) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: lit(60) * FunctionExpression("datepart", lit("minute"), expr)
+ + FunctionExpression("datepart", lit("second"), expr)
+ )
+
+ def total_milliseconds(self) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: lit(60_000) * FunctionExpression("datepart", lit("minute"), expr)
+ + FunctionExpression("datepart", lit("millisecond"), expr)
+ )
+
+ def total_microseconds(self) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: lit(60_000_000)
+ * FunctionExpression("datepart", lit("minute"), expr)
+ + FunctionExpression("datepart", lit("microsecond"), expr)
+ )
+
+ def truncate(self, every: str) -> DuckDBExpr:
+ multiple, unit = parse_interval_string(every)
+ if multiple != 1:
+ # https://github.com/duckdb/duckdb/issues/17554
+ msg = f"Only multiple 1 is currently supported for DuckDB.\nGot {multiple!s}."
+ raise ValueError(msg)
+ if unit == "ns":
+ msg = "Truncating to nanoseconds is not yet supported for DuckDB."
+ raise NotImplementedError(msg)
+ format = lit(UNITS_DICT[unit])
+
+ def _truncate(expr: Expression) -> Expression:
+ return FunctionExpression("date_trunc", format, expr)
+
+ return self._compliant_expr._with_callable(_truncate)
+
+ def _no_op_time_zone(self, time_zone: str) -> DuckDBExpr:
+ def func(df: DuckDBLazyFrame) -> Sequence[Expression]:
+ native_series_list = self._compliant_expr(df)
+ conn_time_zone = fetch_rel_time_zone(df.native)
+ if conn_time_zone != time_zone:
+ msg = (
+ "DuckDB stores the time zone in the connection, rather than in the "
+ f"data type, so changing the timezone to anything other than {conn_time_zone} "
+ " (the current connection time zone) is not supported."
+ )
+ raise NotImplementedError(msg)
+ return native_series_list
+
+ return self._compliant_expr.__class__(
+ func,
+ evaluate_output_names=self._compliant_expr._evaluate_output_names,
+ alias_output_names=self._compliant_expr._alias_output_names,
+ backend_version=self._compliant_expr._backend_version,
+ version=self._compliant_expr._version,
+ )
+
+ def convert_time_zone(self, time_zone: str) -> DuckDBExpr:
+ return self._no_op_time_zone(time_zone)
+
+ def replace_time_zone(self, time_zone: str | None) -> DuckDBExpr:
+ if time_zone is None:
+ return self._compliant_expr._with_callable(
+ lambda _input: _input.cast("timestamp")
+ )
+ else:
+ return self._no_op_time_zone(time_zone)
+
+ total_nanoseconds = not_implemented()
diff --git a/venv/lib/python3.8/site-packages/narwhals/_duckdb/expr_list.py b/venv/lib/python3.8/site-packages/narwhals/_duckdb/expr_list.py
new file mode 100644
index 0000000..60562e5
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_duckdb/expr_list.py
@@ -0,0 +1,18 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from duckdb import FunctionExpression
+
+if TYPE_CHECKING:
+ from narwhals._duckdb.expr import DuckDBExpr
+
+
+class DuckDBExprListNamespace:
+ def __init__(self, expr: DuckDBExpr) -> None:
+ self._compliant_expr = expr
+
+ def len(self) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression("len", expr)
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/_duckdb/expr_str.py b/venv/lib/python3.8/site-packages/narwhals/_duckdb/expr_str.py
new file mode 100644
index 0000000..6a9d8c2
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_duckdb/expr_str.py
@@ -0,0 +1,103 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from duckdb import FunctionExpression
+
+from narwhals._duckdb.utils import lit
+from narwhals._utils import not_implemented
+
+if TYPE_CHECKING:
+ from duckdb import Expression
+
+ from narwhals._duckdb.expr import DuckDBExpr
+
+
+class DuckDBExprStringNamespace:
+ def __init__(self, expr: DuckDBExpr) -> None:
+ self._compliant_expr = expr
+
+ def starts_with(self, prefix: str) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression("starts_with", expr, lit(prefix))
+ )
+
+ def ends_with(self, suffix: str) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression("ends_with", expr, lit(suffix))
+ )
+
+ def contains(self, pattern: str, *, literal: bool) -> DuckDBExpr:
+ def func(expr: Expression) -> Expression:
+ if literal:
+ return FunctionExpression("contains", expr, lit(pattern))
+ return FunctionExpression("regexp_matches", expr, lit(pattern))
+
+ return self._compliant_expr._with_callable(func)
+
+ def slice(self, offset: int, length: int) -> DuckDBExpr:
+ def func(expr: Expression) -> Expression:
+ offset_lit = lit(offset)
+ return FunctionExpression(
+ "array_slice",
+ expr,
+ lit(offset + 1)
+ if offset >= 0
+ else FunctionExpression("length", expr) + offset_lit + lit(1),
+ FunctionExpression("length", expr)
+ if length is None
+ else lit(length) + offset_lit,
+ )
+
+ return self._compliant_expr._with_callable(func)
+
+ def split(self, by: str) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression("str_split", expr, lit(by))
+ )
+
+ def len_chars(self) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression("length", expr)
+ )
+
+ def to_lowercase(self) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression("lower", expr)
+ )
+
+ def to_uppercase(self) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression("upper", expr)
+ )
+
+ def strip_chars(self, characters: str | None) -> DuckDBExpr:
+ import string
+
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression(
+ "trim", expr, lit(string.whitespace if characters is None else characters)
+ )
+ )
+
+ def replace_all(self, pattern: str, value: str, *, literal: bool) -> DuckDBExpr:
+ if not literal:
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression(
+ "regexp_replace", expr, lit(pattern), lit(value), lit("g")
+ )
+ )
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression("replace", expr, lit(pattern), lit(value))
+ )
+
+ def to_datetime(self, format: str | None) -> DuckDBExpr:
+ if format is None:
+ msg = "Cannot infer format with DuckDB backend, please specify `format` explicitly."
+ raise NotImplementedError(msg)
+
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression("strptime", expr, lit(format))
+ )
+
+ replace = not_implemented()
diff --git a/venv/lib/python3.8/site-packages/narwhals/_duckdb/expr_struct.py b/venv/lib/python3.8/site-packages/narwhals/_duckdb/expr_struct.py
new file mode 100644
index 0000000..3124204
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_duckdb/expr_struct.py
@@ -0,0 +1,20 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from duckdb import FunctionExpression
+
+from narwhals._duckdb.utils import lit
+
+if TYPE_CHECKING:
+ from narwhals._duckdb.expr import DuckDBExpr
+
+
+class DuckDBExprStructNamespace:
+ def __init__(self, expr: DuckDBExpr) -> None:
+ self._compliant_expr = expr
+
+ def field(self, name: str) -> DuckDBExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: FunctionExpression("struct_extract", expr, lit(name))
+ ).alias(name)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_duckdb/group_by.py b/venv/lib/python3.8/site-packages/narwhals/_duckdb/group_by.py
new file mode 100644
index 0000000..8fa2978
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_duckdb/group_by.py
@@ -0,0 +1,31 @@
+from __future__ import annotations
+
+from itertools import chain
+from typing import TYPE_CHECKING, Sequence
+
+from narwhals._compliant import LazyGroupBy
+
+if TYPE_CHECKING:
+ from duckdb import Expression # noqa: F401
+
+ from narwhals._duckdb.dataframe import DuckDBLazyFrame
+ from narwhals._duckdb.expr import DuckDBExpr
+
+
+class DuckDBGroupBy(LazyGroupBy["DuckDBLazyFrame", "DuckDBExpr", "Expression"]):
+ def __init__(
+ self,
+ df: DuckDBLazyFrame,
+ keys: Sequence[DuckDBExpr] | Sequence[str],
+ /,
+ *,
+ drop_null_keys: bool,
+ ) -> None:
+ frame, self._keys, self._output_key_names = self._parse_keys(df, keys=keys)
+ self._compliant_frame = frame.drop_nulls(self._keys) if drop_null_keys else frame
+
+ def agg(self, *exprs: DuckDBExpr) -> DuckDBLazyFrame:
+ agg_columns = list(chain(self._keys, self._evaluate_exprs(exprs)))
+ return self.compliant._with_native(
+ self.compliant.native.aggregate(agg_columns) # type: ignore[arg-type]
+ ).rename(dict(zip(self._keys, self._output_key_names)))
diff --git a/venv/lib/python3.8/site-packages/narwhals/_duckdb/namespace.py b/venv/lib/python3.8/site-packages/narwhals/_duckdb/namespace.py
new file mode 100644
index 0000000..3616f2e
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_duckdb/namespace.py
@@ -0,0 +1,207 @@
+from __future__ import annotations
+
+import operator
+from functools import reduce
+from itertools import chain
+from typing import TYPE_CHECKING, Callable, Iterable, Sequence
+
+import duckdb
+from duckdb import CoalesceOperator, Expression, FunctionExpression
+from duckdb.typing import BIGINT, VARCHAR
+
+from narwhals._compliant import LazyNamespace, LazyThen, LazyWhen
+from narwhals._duckdb.dataframe import DuckDBLazyFrame
+from narwhals._duckdb.expr import DuckDBExpr
+from narwhals._duckdb.selectors import DuckDBSelectorNamespace
+from narwhals._duckdb.utils import concat_str, lit, narwhals_to_native_dtype, when
+from narwhals._expression_parsing import (
+ combine_alias_output_names,
+ combine_evaluate_output_names,
+)
+from narwhals._utils import Implementation
+
+if TYPE_CHECKING:
+ from narwhals._duckdb.expr import DuckDBWindowInputs
+ from narwhals._utils import Version
+ from narwhals.typing import ConcatMethod, IntoDType, NonNestedLiteral
+
+
+class DuckDBNamespace(
+ LazyNamespace[DuckDBLazyFrame, DuckDBExpr, duckdb.DuckDBPyRelation]
+):
+ _implementation: Implementation = Implementation.DUCKDB
+
+ def __init__(self, *, backend_version: tuple[int, ...], version: Version) -> None:
+ self._backend_version = backend_version
+ self._version = version
+
+ @property
+ def selectors(self) -> DuckDBSelectorNamespace:
+ return DuckDBSelectorNamespace.from_namespace(self)
+
+ @property
+ def _expr(self) -> type[DuckDBExpr]:
+ return DuckDBExpr
+
+ @property
+ def _lazyframe(self) -> type[DuckDBLazyFrame]:
+ return DuckDBLazyFrame
+
+ def _with_elementwise(
+ self, func: Callable[[Iterable[Expression]], Expression], *exprs: DuckDBExpr
+ ) -> DuckDBExpr:
+ def call(df: DuckDBLazyFrame) -> list[Expression]:
+ cols = (col for _expr in exprs for col in _expr(df))
+ return [func(cols)]
+
+ def window_function(
+ df: DuckDBLazyFrame, window_inputs: DuckDBWindowInputs
+ ) -> list[Expression]:
+ cols = (
+ col for _expr in exprs for col in _expr.window_function(df, window_inputs)
+ )
+ return [func(cols)]
+
+ return self._expr(
+ call=call,
+ window_function=window_function,
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def concat(
+ self, items: Iterable[DuckDBLazyFrame], *, how: ConcatMethod
+ ) -> DuckDBLazyFrame:
+ native_items = [item._native_frame for item in items]
+ items = list(items)
+ first = items[0]
+ schema = first.schema
+ if how == "vertical" and not all(x.schema == schema for x in items[1:]):
+ msg = "inputs should all have the same schema"
+ raise TypeError(msg)
+ res = reduce(lambda x, y: x.union(y), native_items)
+ return first._with_native(res)
+
+ def concat_str(
+ self, *exprs: DuckDBExpr, separator: str, ignore_nulls: bool
+ ) -> DuckDBExpr:
+ def func(df: DuckDBLazyFrame) -> list[Expression]:
+ cols = list(chain.from_iterable(expr(df) for expr in exprs))
+ if not ignore_nulls:
+ null_mask_result = reduce(operator.or_, (s.isnull() for s in cols))
+ cols_separated = [
+ y
+ for x in [
+ (col.cast(VARCHAR),)
+ if i == len(cols) - 1
+ else (col.cast(VARCHAR), lit(separator))
+ for i, col in enumerate(cols)
+ ]
+ for y in x
+ ]
+ return [when(~null_mask_result, concat_str(*cols_separated))]
+ else:
+ return [concat_str(*cols, separator=separator)]
+
+ return self._expr(
+ call=func,
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def all_horizontal(self, *exprs: DuckDBExpr) -> DuckDBExpr:
+ def func(cols: Iterable[Expression]) -> Expression:
+ return reduce(operator.and_, cols)
+
+ return self._with_elementwise(func, *exprs)
+
+ def any_horizontal(self, *exprs: DuckDBExpr) -> DuckDBExpr:
+ def func(cols: Iterable[Expression]) -> Expression:
+ return reduce(operator.or_, cols)
+
+ return self._with_elementwise(func, *exprs)
+
+ def max_horizontal(self, *exprs: DuckDBExpr) -> DuckDBExpr:
+ def func(cols: Iterable[Expression]) -> Expression:
+ return FunctionExpression("greatest", *cols)
+
+ return self._with_elementwise(func, *exprs)
+
+ def min_horizontal(self, *exprs: DuckDBExpr) -> DuckDBExpr:
+ def func(cols: Iterable[Expression]) -> Expression:
+ return FunctionExpression("least", *cols)
+
+ return self._with_elementwise(func, *exprs)
+
+ def sum_horizontal(self, *exprs: DuckDBExpr) -> DuckDBExpr:
+ def func(cols: Iterable[Expression]) -> Expression:
+ return reduce(operator.add, (CoalesceOperator(col, lit(0)) for col in cols))
+
+ return self._with_elementwise(func, *exprs)
+
+ def mean_horizontal(self, *exprs: DuckDBExpr) -> DuckDBExpr:
+ def func(cols: Iterable[Expression]) -> Expression:
+ cols = list(cols)
+ return reduce(
+ operator.add, (CoalesceOperator(col, lit(0)) for col in cols)
+ ) / reduce(operator.add, (col.isnotnull().cast(BIGINT) for col in cols))
+
+ return self._with_elementwise(func, *exprs)
+
+ def when(self, predicate: DuckDBExpr) -> DuckDBWhen:
+ return DuckDBWhen.from_expr(predicate, context=self)
+
+ def lit(self, value: NonNestedLiteral, dtype: IntoDType | None) -> DuckDBExpr:
+ def func(_df: DuckDBLazyFrame) -> list[Expression]:
+ if dtype is not None:
+ return [
+ lit(value).cast(
+ narwhals_to_native_dtype(dtype, version=self._version) # type: ignore[arg-type]
+ )
+ ]
+ return [lit(value)]
+
+ return self._expr(
+ func,
+ evaluate_output_names=lambda _df: ["literal"],
+ alias_output_names=None,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def len(self) -> DuckDBExpr:
+ def func(_df: DuckDBLazyFrame) -> list[Expression]:
+ return [FunctionExpression("count")]
+
+ return self._expr(
+ call=func,
+ evaluate_output_names=lambda _df: ["len"],
+ alias_output_names=None,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+
+class DuckDBWhen(LazyWhen["DuckDBLazyFrame", Expression, DuckDBExpr]):
+ @property
+ def _then(self) -> type[DuckDBThen]:
+ return DuckDBThen
+
+ def __call__(self, df: DuckDBLazyFrame) -> Sequence[Expression]:
+ self.when = when
+ self.lit = lit
+ return super().__call__(df)
+
+ def _window_function(
+ self, df: DuckDBLazyFrame, window_inputs: DuckDBWindowInputs
+ ) -> Sequence[Expression]:
+ self.when = when
+ self.lit = lit
+ return super()._window_function(df, window_inputs)
+
+
+class DuckDBThen(LazyThen["DuckDBLazyFrame", Expression, DuckDBExpr], DuckDBExpr): ...
diff --git a/venv/lib/python3.8/site-packages/narwhals/_duckdb/selectors.py b/venv/lib/python3.8/site-packages/narwhals/_duckdb/selectors.py
new file mode 100644
index 0000000..ea1c6ba
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_duckdb/selectors.py
@@ -0,0 +1,31 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from narwhals._compliant import CompliantSelector, LazySelectorNamespace
+from narwhals._duckdb.expr import DuckDBExpr
+
+if TYPE_CHECKING:
+ from duckdb import Expression # noqa: F401
+
+ from narwhals._duckdb.dataframe import DuckDBLazyFrame # noqa: F401
+
+
+class DuckDBSelectorNamespace(LazySelectorNamespace["DuckDBLazyFrame", "Expression"]):
+ @property
+ def _selector(self) -> type[DuckDBSelector]:
+ return DuckDBSelector
+
+
+class DuckDBSelector( # type: ignore[misc]
+ CompliantSelector["DuckDBLazyFrame", "Expression"], DuckDBExpr
+):
+ def _to_expr(self) -> DuckDBExpr:
+ return DuckDBExpr(
+ self._call,
+ self._window_function,
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/_duckdb/series.py b/venv/lib/python3.8/site-packages/narwhals/_duckdb/series.py
new file mode 100644
index 0000000..5b284b9
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_duckdb/series.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from narwhals._duckdb.utils import DeferredTimeZone, native_to_narwhals_dtype
+from narwhals.dependencies import get_duckdb
+
+if TYPE_CHECKING:
+ from types import ModuleType
+
+ import duckdb
+ from typing_extensions import Never, Self
+
+ from narwhals._utils import Version
+ from narwhals.dtypes import DType
+
+
+class DuckDBInterchangeSeries:
+ def __init__(self, df: duckdb.DuckDBPyRelation, version: Version) -> None:
+ self._native_series = df
+ self._version = version
+
+ def __narwhals_series__(self) -> Self:
+ return self
+
+ def __native_namespace__(self) -> ModuleType:
+ return get_duckdb() # type: ignore[no-any-return]
+
+ @property
+ def dtype(self) -> DType:
+ return native_to_narwhals_dtype(
+ self._native_series.types[0],
+ self._version,
+ DeferredTimeZone(self._native_series),
+ )
+
+ def __getattr__(self, attr: str) -> Never:
+ msg = ( # pragma: no cover
+ f"Attribute {attr} is not supported for interchange-level dataframes.\n\n"
+ "If you would like to see this kind of object better supported in "
+ "Narwhals, please open a feature request "
+ "at https://github.com/narwhals-dev/narwhals/issues."
+ )
+ raise NotImplementedError(msg) # pragma: no cover
diff --git a/venv/lib/python3.8/site-packages/narwhals/_duckdb/utils.py b/venv/lib/python3.8/site-packages/narwhals/_duckdb/utils.py
new file mode 100644
index 0000000..c5d4872
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_duckdb/utils.py
@@ -0,0 +1,287 @@
+from __future__ import annotations
+
+from functools import lru_cache
+from typing import TYPE_CHECKING, Any
+
+import duckdb
+
+from narwhals._utils import Version, isinstance_or_issubclass
+
+if TYPE_CHECKING:
+ from duckdb import DuckDBPyRelation, Expression
+ from duckdb.typing import DuckDBPyType
+
+ from narwhals._duckdb.dataframe import DuckDBLazyFrame
+ from narwhals._duckdb.expr import DuckDBExpr
+ from narwhals.dtypes import DType
+ from narwhals.typing import IntoDType
+
+UNITS_DICT = {
+ "y": "year",
+ "q": "quarter",
+ "mo": "month",
+ "d": "day",
+ "h": "hour",
+ "m": "minute",
+ "s": "second",
+ "ms": "millisecond",
+ "us": "microsecond",
+ "ns": "nanosecond",
+}
+
+col = duckdb.ColumnExpression
+"""Alias for `duckdb.ColumnExpression`."""
+
+lit = duckdb.ConstantExpression
+"""Alias for `duckdb.ConstantExpression`."""
+
+when = duckdb.CaseExpression
+"""Alias for `duckdb.CaseExpression`."""
+
+
+def concat_str(*exprs: Expression, separator: str = "") -> Expression:
+ """Concatenate many strings, NULL inputs are skipped.
+
+ Wraps [concat] and [concat_ws] `FunctionExpression`(s).
+
+ Arguments:
+ exprs: Native columns.
+ separator: String that will be used to separate the values of each column.
+
+ Returns:
+ A new native expression.
+
+ [concat]: https://duckdb.org/docs/stable/sql/functions/char.html#concatstring-
+ [concat_ws]: https://duckdb.org/docs/stable/sql/functions/char.html#concat_wsseparator-string-
+ """
+ return (
+ duckdb.FunctionExpression("concat_ws", lit(separator), *exprs)
+ if separator
+ else duckdb.FunctionExpression("concat", *exprs)
+ )
+
+
+def evaluate_exprs(
+ df: DuckDBLazyFrame, /, *exprs: DuckDBExpr
+) -> list[tuple[str, Expression]]:
+ native_results: list[tuple[str, Expression]] = []
+ for expr in exprs:
+ native_series_list = expr._call(df)
+ output_names = expr._evaluate_output_names(df)
+ if expr._alias_output_names is not None:
+ output_names = expr._alias_output_names(output_names)
+ if len(output_names) != len(native_series_list): # pragma: no cover
+ msg = f"Internal error: got output names {output_names}, but only got {len(native_series_list)} results"
+ raise AssertionError(msg)
+ native_results.extend(zip(output_names, native_series_list))
+ return native_results
+
+
+class DeferredTimeZone:
+ """Object which gets passed between `native_to_narwhals_dtype` calls.
+
+ DuckDB stores the time zone in the connection, rather than in the dtypes, so
+ this ensures that when calculating the schema of a dataframe with multiple
+ timezone-aware columns, that the connection's time zone is only fetched once.
+
+ Note: we cannot make the time zone a cached `DuckDBLazyFrame` property because
+ the time zone can be modified after `DuckDBLazyFrame` creation:
+
+ ```python
+ df = nw.from_native(rel)
+ print(df.collect_schema())
+ rel.query("set timezone = 'Asia/Kolkata'")
+ print(df.collect_schema()) # should change to reflect new time zone
+ ```
+ """
+
+ _cached_time_zone: str | None = None
+
+ def __init__(self, rel: DuckDBPyRelation) -> None:
+ self._rel = rel
+
+ @property
+ def time_zone(self) -> str:
+ """Fetch relation time zone (if it wasn't calculated already)."""
+ if self._cached_time_zone is None:
+ self._cached_time_zone = fetch_rel_time_zone(self._rel)
+ return self._cached_time_zone
+
+
+def native_to_narwhals_dtype(
+ duckdb_dtype: DuckDBPyType, version: Version, deferred_time_zone: DeferredTimeZone
+) -> DType:
+ duckdb_dtype_id = duckdb_dtype.id
+ dtypes = version.dtypes
+
+ # Handle nested data types first
+ if duckdb_dtype_id == "list":
+ return dtypes.List(
+ native_to_narwhals_dtype(duckdb_dtype.child, version, deferred_time_zone)
+ )
+
+ if duckdb_dtype_id == "struct":
+ children = duckdb_dtype.children
+ return dtypes.Struct(
+ [
+ dtypes.Field(
+ name=child[0],
+ dtype=native_to_narwhals_dtype(child[1], version, deferred_time_zone),
+ )
+ for child in children
+ ]
+ )
+
+ if duckdb_dtype_id == "array":
+ child, size = duckdb_dtype.children
+ shape: list[int] = [size[1]]
+
+ while child[1].id == "array":
+ child, size = child[1].children
+ shape.insert(0, size[1])
+
+ inner = native_to_narwhals_dtype(child[1], version, deferred_time_zone)
+ return dtypes.Array(inner=inner, shape=tuple(shape))
+
+ if duckdb_dtype_id == "enum":
+ if version is Version.V1:
+ return dtypes.Enum() # type: ignore[call-arg]
+ categories = duckdb_dtype.children[0][1]
+ return dtypes.Enum(categories=categories)
+
+ if duckdb_dtype_id == "timestamp with time zone":
+ return dtypes.Datetime(time_zone=deferred_time_zone.time_zone)
+
+ return _non_nested_native_to_narwhals_dtype(duckdb_dtype_id, version)
+
+
+def fetch_rel_time_zone(rel: duckdb.DuckDBPyRelation) -> str:
+ result = rel.query(
+ "duckdb_settings()", "select value from duckdb_settings() where name = 'TimeZone'"
+ ).fetchone()
+ assert result is not None # noqa: S101
+ return result[0] # type: ignore[no-any-return]
+
+
+@lru_cache(maxsize=16)
+def _non_nested_native_to_narwhals_dtype(duckdb_dtype_id: str, version: Version) -> DType:
+ dtypes = version.dtypes
+ return {
+ "hugeint": dtypes.Int128(),
+ "bigint": dtypes.Int64(),
+ "integer": dtypes.Int32(),
+ "smallint": dtypes.Int16(),
+ "tinyint": dtypes.Int8(),
+ "uhugeint": dtypes.UInt128(),
+ "ubigint": dtypes.UInt64(),
+ "uinteger": dtypes.UInt32(),
+ "usmallint": dtypes.UInt16(),
+ "utinyint": dtypes.UInt8(),
+ "double": dtypes.Float64(),
+ "float": dtypes.Float32(),
+ "varchar": dtypes.String(),
+ "date": dtypes.Date(),
+ "timestamp": dtypes.Datetime(),
+ "boolean": dtypes.Boolean(),
+ "interval": dtypes.Duration(),
+ "decimal": dtypes.Decimal(),
+ "time": dtypes.Time(),
+ "blob": dtypes.Binary(),
+ }.get(duckdb_dtype_id, dtypes.Unknown())
+
+
+def narwhals_to_native_dtype(dtype: IntoDType, version: Version) -> str: # noqa: C901, PLR0912, PLR0915
+ dtypes = version.dtypes
+ if isinstance_or_issubclass(dtype, dtypes.Decimal):
+ msg = "Casting to Decimal is not supported yet."
+ raise NotImplementedError(msg)
+ if isinstance_or_issubclass(dtype, dtypes.Float64):
+ return "DOUBLE"
+ if isinstance_or_issubclass(dtype, dtypes.Float32):
+ return "FLOAT"
+ if isinstance_or_issubclass(dtype, dtypes.Int128):
+ return "INT128"
+ if isinstance_or_issubclass(dtype, dtypes.Int64):
+ return "BIGINT"
+ if isinstance_or_issubclass(dtype, dtypes.Int32):
+ return "INTEGER"
+ if isinstance_or_issubclass(dtype, dtypes.Int16):
+ return "SMALLINT"
+ if isinstance_or_issubclass(dtype, dtypes.Int8):
+ return "TINYINT"
+ if isinstance_or_issubclass(dtype, dtypes.UInt128):
+ return "UINT128"
+ if isinstance_or_issubclass(dtype, dtypes.UInt64):
+ return "UBIGINT"
+ if isinstance_or_issubclass(dtype, dtypes.UInt32):
+ return "UINTEGER"
+ if isinstance_or_issubclass(dtype, dtypes.UInt16): # pragma: no cover
+ return "USMALLINT"
+ if isinstance_or_issubclass(dtype, dtypes.UInt8): # pragma: no cover
+ return "UTINYINT"
+ if isinstance_or_issubclass(dtype, dtypes.String):
+ return "VARCHAR"
+ if isinstance_or_issubclass(dtype, dtypes.Boolean): # pragma: no cover
+ return "BOOLEAN"
+ if isinstance_or_issubclass(dtype, dtypes.Time):
+ return "TIME"
+ if isinstance_or_issubclass(dtype, dtypes.Binary):
+ return "BLOB"
+ if isinstance_or_issubclass(dtype, dtypes.Categorical):
+ msg = "Categorical not supported by DuckDB"
+ raise NotImplementedError(msg)
+ if isinstance_or_issubclass(dtype, dtypes.Enum):
+ if version is Version.V1:
+ msg = "Converting to Enum is not supported in narwhals.stable.v1"
+ raise NotImplementedError(msg)
+ if isinstance(dtype, dtypes.Enum):
+ categories = "'" + "', '".join(dtype.categories) + "'"
+ return f"ENUM ({categories})"
+ msg = "Can not cast / initialize Enum without categories present"
+ raise ValueError(msg)
+
+ if isinstance_or_issubclass(dtype, dtypes.Datetime):
+ _time_unit = dtype.time_unit
+ _time_zone = dtype.time_zone
+ msg = "todo"
+ raise NotImplementedError(msg)
+ if isinstance_or_issubclass(dtype, dtypes.Duration): # pragma: no cover
+ _time_unit = dtype.time_unit
+ msg = "todo"
+ raise NotImplementedError(msg)
+ if isinstance_or_issubclass(dtype, dtypes.Date): # pragma: no cover
+ return "DATE"
+ if isinstance_or_issubclass(dtype, dtypes.List):
+ inner = narwhals_to_native_dtype(dtype.inner, version)
+ return f"{inner}[]"
+ if isinstance_or_issubclass(dtype, dtypes.Struct): # pragma: no cover
+ inner = ", ".join(
+ f'"{field.name}" {narwhals_to_native_dtype(field.dtype, version)}'
+ for field in dtype.fields
+ )
+ return f"STRUCT({inner})"
+ if isinstance_or_issubclass(dtype, dtypes.Array): # pragma: no cover
+ shape = dtype.shape
+ duckdb_shape_fmt = "".join(f"[{item}]" for item in shape)
+ inner_dtype: Any = dtype
+ for _ in shape:
+ inner_dtype = inner_dtype.inner
+ duckdb_inner = narwhals_to_native_dtype(inner_dtype, version)
+ return f"{duckdb_inner}{duckdb_shape_fmt}"
+ msg = f"Unknown dtype: {dtype}" # pragma: no cover
+ raise AssertionError(msg)
+
+
+def generate_partition_by_sql(*partition_by: str | Expression) -> str:
+ if not partition_by:
+ return ""
+ by_sql = ", ".join([f"{col(x) if isinstance(x, str) else x}" for x in partition_by])
+ return f"partition by {by_sql}"
+
+
+def generate_order_by_sql(*order_by: str, ascending: bool) -> str:
+ if ascending:
+ by_sql = ", ".join([f"{col(x)} asc nulls first" for x in order_by])
+ else:
+ by_sql = ", ".join([f"{col(x)} desc nulls last" for x in order_by])
+ return f"order by {by_sql}"
diff --git a/venv/lib/python3.8/site-packages/narwhals/_duration.py b/venv/lib/python3.8/site-packages/narwhals/_duration.py
new file mode 100644
index 0000000..87b6d83
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_duration.py
@@ -0,0 +1,60 @@
+"""Tools for working with the Polars duration string language."""
+
+from __future__ import annotations
+
+import re
+from typing import TYPE_CHECKING, Literal, cast, get_args
+
+if TYPE_CHECKING:
+ from typing_extensions import TypeAlias
+
+__all__ = ["IntervalUnit", "parse_interval_string"]
+
+IntervalUnit: TypeAlias = Literal["ns", "us", "ms", "s", "m", "h", "d", "mo", "q", "y"]
+"""A Polars duration string interval unit.
+
+- 'ns': nanosecond.
+- 'us': microsecond.
+- 'ms': millisecond.
+- 's': second.
+- 'm': minute.
+- 'h': hour.
+- 'd': day.
+- 'mo': month.
+- 'q': quarter.
+- 'y': year.
+"""
+
+PATTERN_INTERVAL: re.Pattern[str] = re.compile(
+ r"^(?P<multiple>\d+)(?P<unit>ns|us|ms|mo|m|s|h|d|q|y)\Z"
+)
+MONTH_MULTIPLES = frozenset([1, 2, 3, 4, 6, 12])
+QUARTER_MULTIPLES = frozenset([1, 2, 4])
+
+
+def parse_interval_string(every: str) -> tuple[int, IntervalUnit]:
+ """Parse a string like "1d", "2h", "3m" into a tuple of (number, unit).
+
+ Returns:
+ A tuple of multiple and unit parsed from the interval string.
+ """
+ if match := PATTERN_INTERVAL.match(every):
+ multiple = int(match["multiple"])
+ unit = cast("IntervalUnit", match["unit"])
+ if unit == "mo" and multiple not in MONTH_MULTIPLES:
+ msg = f"Only the following multiples are supported for 'mo' unit: {MONTH_MULTIPLES}.\nGot: {multiple}."
+ raise ValueError(msg)
+ if unit == "q" and multiple not in QUARTER_MULTIPLES:
+ msg = f"Only the following multiples are supported for 'q' unit: {QUARTER_MULTIPLES}.\nGot: {multiple}."
+ raise ValueError(msg)
+ if unit == "y" and multiple != 1:
+ msg = (
+ f"Only multiple 1 is currently supported for 'y' unit.\nGot: {multiple}."
+ )
+ raise ValueError(msg)
+ return multiple, unit
+ msg = (
+ f"Invalid `every` string: {every}. Expected string of kind <number><unit>, "
+ f"where 'unit' is one of: {get_args(IntervalUnit)}."
+ )
+ raise ValueError(msg)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_enum.py b/venv/lib/python3.8/site-packages/narwhals/_enum.py
new file mode 100644
index 0000000..929eecc
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_enum.py
@@ -0,0 +1,42 @@
+from __future__ import annotations
+
+# ruff: noqa: ARG004
+from enum import Enum
+from typing import Any
+
+
+class NoAutoEnum(Enum):
+ """Enum base class that prohibits the use of enum.auto() for value assignment.
+
+ This behavior is achieved by overriding the value generation mechanism.
+
+ Examples:
+ >>> from enum import auto
+ >>> from narwhals._enum import NoAutoEnum
+ >>>
+ >>> class Colors(NoAutoEnum):
+ ... RED = 1
+ ... GREEN = 2
+ >>> Colors.RED
+ <Colors.RED: 1>
+
+ >>> class ColorsWithAuto(NoAutoEnum):
+ ... RED = 1
+ ... GREEN = auto()
+ Traceback (most recent call last):
+ ...
+ ValueError: Creating values with `auto()` is not allowed. Please provide a value manually instead.
+
+ Raises:
+ ValueError: If `auto()` is attempted to be used for any enum member value.
+ """
+
+ @staticmethod
+ def _generate_next_value_(
+ name: str, start: int, count: int, last_values: list[Any]
+ ) -> Any:
+ msg = "Creating values with `auto()` is not allowed. Please provide a value manually instead."
+ raise ValueError(msg)
+
+
+__all__ = ["NoAutoEnum"]
diff --git a/venv/lib/python3.8/site-packages/narwhals/_expression_parsing.py b/venv/lib/python3.8/site-packages/narwhals/_expression_parsing.py
new file mode 100644
index 0000000..c442c3b
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_expression_parsing.py
@@ -0,0 +1,609 @@
+# Utilities for expression parsing
+# Useful for backends which don't have any concept of expressions, such
+# and pandas or PyArrow.
+from __future__ import annotations
+
+from enum import Enum, auto
+from itertools import chain
+from typing import TYPE_CHECKING, Any, Literal, Sequence, TypeVar, cast
+
+from narwhals._utils import is_compliant_expr
+from narwhals.dependencies import is_narwhals_series, is_numpy_array
+from narwhals.exceptions import (
+ InvalidOperationError,
+ LengthChangingExprError,
+ MultiOutputExpressionError,
+ ShapeError,
+)
+
+if TYPE_CHECKING:
+ from typing_extensions import Never, TypeIs
+
+ from narwhals._compliant import CompliantExpr, CompliantFrameT
+ from narwhals._compliant.typing import (
+ AliasNames,
+ CompliantExprAny,
+ CompliantFrameAny,
+ CompliantNamespaceAny,
+ EagerNamespaceAny,
+ EvalNames,
+ )
+ from narwhals.expr import Expr
+ from narwhals.series import Series
+ from narwhals.typing import IntoExpr, NonNestedLiteral, _1DArray
+
+ T = TypeVar("T")
+
+
+def is_expr(obj: Any) -> TypeIs[Expr]:
+ """Check whether `obj` is a Narwhals Expr."""
+ from narwhals.expr import Expr
+
+ return isinstance(obj, Expr)
+
+
+def is_series(obj: Any) -> TypeIs[Series[Any]]:
+ """Check whether `obj` is a Narwhals Expr."""
+ from narwhals.series import Series
+
+ return isinstance(obj, Series)
+
+
+def combine_evaluate_output_names(
+ *exprs: CompliantExpr[CompliantFrameT, Any],
+) -> EvalNames[CompliantFrameT]:
+ # Follow left-hand-rule for naming. E.g. `nw.sum_horizontal(expr1, expr2)` takes the
+ # first name of `expr1`.
+ if not is_compliant_expr(exprs[0]): # pragma: no cover
+ msg = f"Safety assertion failed, expected expression, got: {type(exprs[0])}. Please report a bug."
+ raise AssertionError(msg)
+
+ def evaluate_output_names(df: CompliantFrameT) -> Sequence[str]:
+ return exprs[0]._evaluate_output_names(df)[:1]
+
+ return evaluate_output_names
+
+
+def combine_alias_output_names(*exprs: CompliantExprAny) -> AliasNames | None:
+ # Follow left-hand-rule for naming. E.g. `nw.sum_horizontal(expr1.alias(alias), expr2)` takes the
+ # aliasing function of `expr1` and apply it to the first output name of `expr1`.
+ if exprs[0]._alias_output_names is None:
+ return None
+
+ def alias_output_names(names: Sequence[str]) -> Sequence[str]:
+ return exprs[0]._alias_output_names(names)[:1] # type: ignore[misc]
+
+ return alias_output_names
+
+
+def extract_compliant(
+ plx: CompliantNamespaceAny,
+ other: IntoExpr | NonNestedLiteral | _1DArray,
+ *,
+ str_as_lit: bool,
+) -> CompliantExprAny | NonNestedLiteral:
+ if is_expr(other):
+ return other._to_compliant_expr(plx)
+ if isinstance(other, str) and not str_as_lit:
+ return plx.col(other)
+ if is_narwhals_series(other):
+ return other._compliant_series._to_expr()
+ if is_numpy_array(other):
+ ns = cast("EagerNamespaceAny", plx)
+ return ns._series.from_numpy(other, context=ns)._to_expr()
+ return other
+
+
+def evaluate_output_names_and_aliases(
+ expr: CompliantExprAny, df: CompliantFrameAny, exclude: Sequence[str]
+) -> tuple[Sequence[str], Sequence[str]]:
+ output_names = expr._evaluate_output_names(df)
+ aliases = (
+ output_names
+ if expr._alias_output_names is None
+ else expr._alias_output_names(output_names)
+ )
+ if exclude:
+ assert expr._metadata is not None # noqa: S101
+ if expr._metadata.expansion_kind.is_multi_unnamed():
+ output_names, aliases = zip(
+ *[
+ (x, alias)
+ for x, alias in zip(output_names, aliases)
+ if x not in exclude
+ ]
+ )
+ return output_names, aliases
+
+
+class ExprKind(Enum):
+ """Describe which kind of expression we are dealing with."""
+
+ LITERAL = auto()
+ """e.g. `nw.lit(1)`"""
+
+ AGGREGATION = auto()
+ """Reduces to a single value, not affected by row order, e.g. `nw.col('a').mean()`"""
+
+ ORDERABLE_AGGREGATION = auto()
+ """Reduces to a single value, affected by row order, e.g. `nw.col('a').arg_max()`"""
+
+ ELEMENTWISE = auto()
+ """Preserves length, can operate without context for surrounding rows, e.g. `nw.col('a').abs()`."""
+
+ ORDERABLE_WINDOW = auto()
+ """Depends on the rows around it and on their order, e.g. `diff`."""
+
+ UNORDERABLE_WINDOW = auto()
+ """Depends on the rows around it but not on their order, e.g. `rank`."""
+
+ FILTRATION = auto()
+ """Changes length, not affected by row order, e.g. `drop_nulls`."""
+
+ ORDERABLE_FILTRATION = auto()
+ """Changes length, affected by row order, e.g. `tail`."""
+
+ NARY = auto()
+ """Results from the combination of multiple expressions."""
+
+ OVER = auto()
+ """Results from calling `.over` on expression."""
+
+ UNKNOWN = auto()
+ """Based on the information we have, we can't determine the ExprKind."""
+
+ @property
+ def is_scalar_like(self) -> bool:
+ return self in {ExprKind.LITERAL, ExprKind.AGGREGATION}
+
+ @property
+ def is_orderable_window(self) -> bool:
+ return self in {ExprKind.ORDERABLE_WINDOW, ExprKind.ORDERABLE_AGGREGATION}
+
+ @classmethod
+ def from_expr(cls, obj: Expr) -> ExprKind:
+ meta = obj._metadata
+ if meta.is_literal:
+ return ExprKind.LITERAL
+ if meta.is_scalar_like:
+ return ExprKind.AGGREGATION
+ if meta.is_elementwise:
+ return ExprKind.ELEMENTWISE
+ return ExprKind.UNKNOWN
+
+ @classmethod
+ def from_into_expr(
+ cls, obj: IntoExpr | NonNestedLiteral | _1DArray, *, str_as_lit: bool
+ ) -> ExprKind:
+ if is_expr(obj):
+ return cls.from_expr(obj)
+ if (
+ is_narwhals_series(obj)
+ or is_numpy_array(obj)
+ or (isinstance(obj, str) and not str_as_lit)
+ ):
+ return ExprKind.ELEMENTWISE
+ return ExprKind.LITERAL
+
+
+def is_scalar_like(
+ obj: ExprKind,
+) -> TypeIs[Literal[ExprKind.LITERAL, ExprKind.AGGREGATION]]:
+ return obj.is_scalar_like
+
+
+class ExpansionKind(Enum):
+ """Describe what kind of expansion the expression performs."""
+
+ SINGLE = auto()
+ """e.g. `nw.col('a'), nw.sum_horizontal(nw.all())`"""
+
+ MULTI_NAMED = auto()
+ """e.g. `nw.col('a', 'b')`"""
+
+ MULTI_UNNAMED = auto()
+ """e.g. `nw.all()`, nw.nth(0, 1)"""
+
+ def is_multi_unnamed(self) -> bool:
+ return self is ExpansionKind.MULTI_UNNAMED
+
+ def is_multi_output(self) -> bool:
+ return self in {ExpansionKind.MULTI_NAMED, ExpansionKind.MULTI_UNNAMED}
+
+ def __and__(self, other: ExpansionKind) -> Literal[ExpansionKind.MULTI_UNNAMED]:
+ if self is ExpansionKind.MULTI_UNNAMED and other is ExpansionKind.MULTI_UNNAMED:
+ # e.g. nw.selectors.all() - nw.selectors.numeric().
+ return ExpansionKind.MULTI_UNNAMED
+ # Don't attempt anything more complex, keep it simple and raise in the face of ambiguity.
+ msg = f"Unsupported ExpansionKind combination, got {self} and {other}, please report a bug." # pragma: no cover
+ raise AssertionError(msg) # pragma: no cover
+
+
+class ExprMetadata:
+ __slots__ = (
+ "expansion_kind",
+ "has_windows",
+ "is_elementwise",
+ "is_literal",
+ "is_scalar_like",
+ "last_node",
+ "n_orderable_ops",
+ "preserves_length",
+ )
+
+ def __init__(
+ self,
+ expansion_kind: ExpansionKind,
+ last_node: ExprKind,
+ *,
+ has_windows: bool = False,
+ n_orderable_ops: int = 0,
+ preserves_length: bool = True,
+ is_elementwise: bool = True,
+ is_scalar_like: bool = False,
+ is_literal: bool = False,
+ ) -> None:
+ if is_literal:
+ assert is_scalar_like # noqa: S101 # debug assertion
+ if is_elementwise:
+ assert preserves_length # noqa: S101 # debug assertion
+ self.expansion_kind: ExpansionKind = expansion_kind
+ self.last_node: ExprKind = last_node
+ self.has_windows: bool = has_windows
+ self.n_orderable_ops: int = n_orderable_ops
+ self.is_elementwise: bool = is_elementwise
+ self.preserves_length: bool = preserves_length
+ self.is_scalar_like: bool = is_scalar_like
+ self.is_literal: bool = is_literal
+
+ def __init_subclass__(cls, /, *args: Any, **kwds: Any) -> Never: # pragma: no cover
+ msg = f"Cannot subclass {cls.__name__!r}"
+ raise TypeError(msg)
+
+ def __repr__(self) -> str: # pragma: no cover
+ return (
+ f"ExprMetadata(\n"
+ f" expansion_kind: {self.expansion_kind},\n"
+ f" last_node: {self.last_node},\n"
+ f" has_windows: {self.has_windows},\n"
+ f" n_orderable_ops: {self.n_orderable_ops},\n"
+ f" is_elementwise: {self.is_elementwise},\n"
+ f" preserves_length: {self.preserves_length},\n"
+ f" is_scalar_like: {self.is_scalar_like},\n"
+ f" is_literal: {self.is_literal},\n"
+ ")"
+ )
+
+ @property
+ def is_filtration(self) -> bool:
+ return not self.preserves_length and not self.is_scalar_like
+
+ def with_aggregation(self) -> ExprMetadata:
+ if self.is_scalar_like:
+ msg = "Can't apply aggregations to scalar-like expressions."
+ raise InvalidOperationError(msg)
+ return ExprMetadata(
+ self.expansion_kind,
+ ExprKind.AGGREGATION,
+ has_windows=self.has_windows,
+ n_orderable_ops=self.n_orderable_ops,
+ preserves_length=False,
+ is_elementwise=False,
+ is_scalar_like=True,
+ is_literal=False,
+ )
+
+ def with_orderable_aggregation(self) -> ExprMetadata:
+ if self.is_scalar_like:
+ msg = "Can't apply aggregations to scalar-like expressions."
+ raise InvalidOperationError(msg)
+ return ExprMetadata(
+ self.expansion_kind,
+ ExprKind.ORDERABLE_AGGREGATION,
+ has_windows=self.has_windows,
+ n_orderable_ops=self.n_orderable_ops + 1,
+ preserves_length=False,
+ is_elementwise=False,
+ is_scalar_like=True,
+ is_literal=False,
+ )
+
+ def with_elementwise_op(self) -> ExprMetadata:
+ return ExprMetadata(
+ self.expansion_kind,
+ ExprKind.ELEMENTWISE,
+ has_windows=self.has_windows,
+ n_orderable_ops=self.n_orderable_ops,
+ preserves_length=self.preserves_length,
+ is_elementwise=self.is_elementwise,
+ is_scalar_like=self.is_scalar_like,
+ is_literal=self.is_literal,
+ )
+
+ def with_unorderable_window(self) -> ExprMetadata:
+ if self.is_scalar_like:
+ msg = "Can't apply unorderable window (`rank`, `is_unique`) to scalar-like expression."
+ raise InvalidOperationError(msg)
+ return ExprMetadata(
+ self.expansion_kind,
+ ExprKind.UNORDERABLE_WINDOW,
+ has_windows=self.has_windows,
+ n_orderable_ops=self.n_orderable_ops,
+ preserves_length=self.preserves_length,
+ is_elementwise=False,
+ is_scalar_like=False,
+ is_literal=False,
+ )
+
+ def with_orderable_window(self) -> ExprMetadata:
+ if self.is_scalar_like:
+ msg = "Can't apply orderable window (e.g. `diff`, `shift`) to scalar-like expression."
+ raise InvalidOperationError(msg)
+ return ExprMetadata(
+ self.expansion_kind,
+ ExprKind.ORDERABLE_WINDOW,
+ has_windows=self.has_windows,
+ n_orderable_ops=self.n_orderable_ops + 1,
+ preserves_length=self.preserves_length,
+ is_elementwise=False,
+ is_scalar_like=False,
+ is_literal=False,
+ )
+
+ def with_ordered_over(self) -> ExprMetadata:
+ if self.has_windows:
+ msg = "Cannot nest `over` statements."
+ raise InvalidOperationError(msg)
+ if self.is_elementwise or self.is_filtration:
+ msg = (
+ "Cannot use `over` on expressions which are elementwise\n"
+ "(e.g. `abs`) or which change length (e.g. `drop_nulls`)."
+ )
+ raise InvalidOperationError(msg)
+ n_orderable_ops = self.n_orderable_ops
+ if not n_orderable_ops:
+ msg = "Cannot use `order_by` in `over` on expression which isn't orderable."
+ raise InvalidOperationError(msg)
+ if self.last_node.is_orderable_window:
+ n_orderable_ops -= 1
+ return ExprMetadata(
+ self.expansion_kind,
+ ExprKind.OVER,
+ has_windows=True,
+ n_orderable_ops=n_orderable_ops,
+ preserves_length=True,
+ is_elementwise=False,
+ is_scalar_like=False,
+ is_literal=False,
+ )
+
+ def with_partitioned_over(self) -> ExprMetadata:
+ if self.has_windows:
+ msg = "Cannot nest `over` statements."
+ raise InvalidOperationError(msg)
+ if self.is_elementwise or self.is_filtration:
+ msg = (
+ "Cannot use `over` on expressions which are elementwise\n"
+ "(e.g. `abs`) or which change length (e.g. `drop_nulls`)."
+ )
+ raise InvalidOperationError(msg)
+ return ExprMetadata(
+ self.expansion_kind,
+ ExprKind.OVER,
+ has_windows=True,
+ n_orderable_ops=self.n_orderable_ops,
+ preserves_length=True,
+ is_elementwise=False,
+ is_scalar_like=False,
+ is_literal=False,
+ )
+
+ def with_filtration(self) -> ExprMetadata:
+ if self.is_scalar_like:
+ msg = "Can't apply filtration (e.g. `drop_nulls`) to scalar-like expression."
+ raise InvalidOperationError(msg)
+ return ExprMetadata(
+ self.expansion_kind,
+ ExprKind.FILTRATION,
+ has_windows=self.has_windows,
+ n_orderable_ops=self.n_orderable_ops,
+ preserves_length=False,
+ is_elementwise=False,
+ is_scalar_like=False,
+ is_literal=False,
+ )
+
+ def with_orderable_filtration(self) -> ExprMetadata:
+ if self.is_scalar_like:
+ msg = "Can't apply filtration (e.g. `drop_nulls`) to scalar-like expression."
+ raise InvalidOperationError(msg)
+ return ExprMetadata(
+ self.expansion_kind,
+ ExprKind.ORDERABLE_FILTRATION,
+ has_windows=self.has_windows,
+ n_orderable_ops=self.n_orderable_ops + 1,
+ preserves_length=False,
+ is_elementwise=False,
+ is_scalar_like=False,
+ is_literal=False,
+ )
+
+ @staticmethod
+ def aggregation() -> ExprMetadata:
+ return ExprMetadata(
+ ExpansionKind.SINGLE,
+ ExprKind.AGGREGATION,
+ is_elementwise=False,
+ preserves_length=False,
+ is_scalar_like=True,
+ )
+
+ @staticmethod
+ def literal() -> ExprMetadata:
+ return ExprMetadata(
+ ExpansionKind.SINGLE,
+ ExprKind.LITERAL,
+ is_elementwise=False,
+ preserves_length=False,
+ is_literal=True,
+ is_scalar_like=True,
+ )
+
+ @staticmethod
+ def selector_single() -> ExprMetadata:
+ # e.g. `nw.col('a')`, `nw.nth(0)`
+ return ExprMetadata(ExpansionKind.SINGLE, ExprKind.ELEMENTWISE)
+
+ @staticmethod
+ def selector_multi_named() -> ExprMetadata:
+ # e.g. `nw.col('a', 'b')`
+ return ExprMetadata(ExpansionKind.MULTI_NAMED, ExprKind.ELEMENTWISE)
+
+ @staticmethod
+ def selector_multi_unnamed() -> ExprMetadata:
+ # e.g. `nw.all()`
+ return ExprMetadata(ExpansionKind.MULTI_UNNAMED, ExprKind.ELEMENTWISE)
+
+ @classmethod
+ def from_binary_op(cls, lhs: Expr, rhs: IntoExpr, /) -> ExprMetadata:
+ # We may be able to allow multi-output rhs in the future:
+ # https://github.com/narwhals-dev/narwhals/issues/2244.
+ return combine_metadata(
+ lhs, rhs, str_as_lit=True, allow_multi_output=False, to_single_output=False
+ )
+
+ @classmethod
+ def from_horizontal_op(cls, *exprs: IntoExpr) -> ExprMetadata:
+ return combine_metadata(
+ *exprs, str_as_lit=False, allow_multi_output=True, to_single_output=True
+ )
+
+
+def combine_metadata( # noqa: C901, PLR0912
+ *args: IntoExpr | object | None,
+ str_as_lit: bool,
+ allow_multi_output: bool,
+ to_single_output: bool,
+) -> ExprMetadata:
+ """Combine metadata from `args`.
+
+ Arguments:
+ args: Arguments, maybe expressions, literals, or Series.
+ str_as_lit: Whether to interpret strings as literals or as column names.
+ allow_multi_output: Whether to allow multi-output inputs.
+ to_single_output: Whether the result is always single-output, regardless
+ of the inputs (e.g. `nw.sum_horizontal`).
+ """
+ n_filtrations = 0
+ result_expansion_kind = ExpansionKind.SINGLE
+ result_has_windows = False
+ result_n_orderable_ops = 0
+ # result preserves length if at least one input does
+ result_preserves_length = False
+ # result is elementwise if all inputs are elementwise
+ result_is_not_elementwise = False
+ # result is scalar-like if all inputs are scalar-like
+ result_is_not_scalar_like = False
+ # result is literal if all inputs are literal
+ result_is_not_literal = False
+
+ for i, arg in enumerate(args): # noqa: PLR1702
+ if (isinstance(arg, str) and not str_as_lit) or is_series(arg):
+ result_preserves_length = True
+ result_is_not_scalar_like = True
+ result_is_not_literal = True
+ elif is_expr(arg):
+ metadata = arg._metadata
+ if metadata.expansion_kind.is_multi_output():
+ expansion_kind = metadata.expansion_kind
+ if i > 0 and not allow_multi_output:
+ # Left-most argument is always allowed to be multi-output.
+ msg = (
+ "Multi-output expressions (e.g. nw.col('a', 'b'), nw.all()) "
+ "are not supported in this context."
+ )
+ raise MultiOutputExpressionError(msg)
+ if not to_single_output:
+ if i == 0:
+ result_expansion_kind = expansion_kind
+ else:
+ result_expansion_kind = result_expansion_kind & expansion_kind
+
+ if metadata.has_windows:
+ result_has_windows = True
+ result_n_orderable_ops += metadata.n_orderable_ops
+ if metadata.preserves_length:
+ result_preserves_length = True
+ if not metadata.is_elementwise:
+ result_is_not_elementwise = True
+ if not metadata.is_scalar_like:
+ result_is_not_scalar_like = True
+ if not metadata.is_literal:
+ result_is_not_literal = True
+ if metadata.is_filtration:
+ n_filtrations += 1
+
+ if n_filtrations > 1:
+ msg = "Length-changing expressions can only be used in isolation, or followed by an aggregation"
+ raise LengthChangingExprError(msg)
+ if result_preserves_length and n_filtrations:
+ msg = "Cannot combine length-changing expressions with length-preserving ones or aggregations"
+ raise ShapeError(msg)
+
+ return ExprMetadata(
+ result_expansion_kind,
+ ExprKind.NARY,
+ has_windows=result_has_windows,
+ n_orderable_ops=result_n_orderable_ops,
+ preserves_length=result_preserves_length,
+ is_elementwise=not result_is_not_elementwise,
+ is_scalar_like=not result_is_not_scalar_like,
+ is_literal=not result_is_not_literal,
+ )
+
+
+def check_expressions_preserve_length(*args: IntoExpr, function_name: str) -> None:
+ # Raise if any argument in `args` isn't length-preserving.
+ # For Series input, we don't raise (yet), we let such checks happen later,
+ # as this function works lazily and so can't evaluate lengths.
+ from narwhals.series import Series
+
+ if not all(
+ (is_expr(x) and x._metadata.preserves_length) or isinstance(x, (str, Series))
+ for x in args
+ ):
+ msg = f"Expressions which aggregate or change length cannot be passed to '{function_name}'."
+ raise ShapeError(msg)
+
+
+def all_exprs_are_scalar_like(*args: IntoExpr, **kwargs: IntoExpr) -> bool:
+ # Raise if any argument in `args` isn't an aggregation or literal.
+ # For Series input, we don't raise (yet), we let such checks happen later,
+ # as this function works lazily and so can't evaluate lengths.
+ exprs = chain(args, kwargs.values())
+ return all(is_expr(x) and x._metadata.is_scalar_like for x in exprs)
+
+
+def apply_n_ary_operation(
+ plx: CompliantNamespaceAny,
+ function: Any,
+ *comparands: IntoExpr | NonNestedLiteral | _1DArray,
+ str_as_lit: bool,
+) -> CompliantExprAny:
+ compliant_exprs = (
+ extract_compliant(plx, comparand, str_as_lit=str_as_lit)
+ for comparand in comparands
+ )
+ kinds = [
+ ExprKind.from_into_expr(comparand, str_as_lit=str_as_lit)
+ for comparand in comparands
+ ]
+
+ broadcast = any(not kind.is_scalar_like for kind in kinds)
+ compliant_exprs = (
+ compliant_expr.broadcast(kind)
+ if broadcast and is_compliant_expr(compliant_expr) and is_scalar_like(kind)
+ else compliant_expr
+ for compliant_expr, kind in zip(compliant_exprs, kinds)
+ )
+ return function(*compliant_exprs)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/__init__.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_ibis/__init__.py
diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/dataframe.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/dataframe.py
new file mode 100644
index 0000000..4e18fa6
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_ibis/dataframe.py
@@ -0,0 +1,430 @@
+from __future__ import annotations
+
+import operator
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Iterable,
+ Iterator,
+ Literal,
+ Mapping,
+ Sequence,
+ cast,
+)
+
+import ibis
+import ibis.expr.types as ir
+
+from narwhals._ibis.utils import evaluate_exprs, native_to_narwhals_dtype
+from narwhals._utils import (
+ Implementation,
+ Version,
+ not_implemented,
+ parse_columns_to_drop,
+ parse_version,
+ validate_backend_version,
+)
+from narwhals.exceptions import ColumnNotFoundError, InvalidOperationError
+from narwhals.typing import CompliantLazyFrame
+
+if TYPE_CHECKING:
+ from types import ModuleType
+
+ import pandas as pd
+ import pyarrow as pa
+ from ibis.expr.operations import Binary
+ from typing_extensions import Self, TypeAlias, TypeIs
+
+ from narwhals._compliant.typing import CompliantDataFrameAny
+ from narwhals._ibis.expr import IbisExpr
+ from narwhals._ibis.group_by import IbisGroupBy
+ from narwhals._ibis.namespace import IbisNamespace
+ from narwhals._ibis.series import IbisInterchangeSeries
+ from narwhals._utils import _FullContext
+ from narwhals.dataframe import LazyFrame
+ from narwhals.dtypes import DType
+ from narwhals.stable.v1 import DataFrame as DataFrameV1
+ from narwhals.typing import AsofJoinStrategy, JoinStrategy, LazyUniqueKeepStrategy
+
+ JoinPredicates: TypeAlias = "Sequence[ir.BooleanColumn] | Sequence[str]"
+
+
+class IbisLazyFrame(
+ CompliantLazyFrame[
+ "IbisExpr", "ir.Table", "LazyFrame[ir.Table] | DataFrameV1[ir.Table]"
+ ]
+):
+ _implementation = Implementation.IBIS
+
+ def __init__(
+ self, df: ir.Table, *, backend_version: tuple[int, ...], version: Version
+ ) -> None:
+ self._native_frame: ir.Table = df
+ self._version = version
+ self._backend_version = backend_version
+ self._cached_schema: dict[str, DType] | None = None
+ self._cached_columns: list[str] | None = None
+ validate_backend_version(self._implementation, self._backend_version)
+
+ @staticmethod
+ def _is_native(obj: ir.Table | Any) -> TypeIs[ir.Table]:
+ return isinstance(obj, ir.Table)
+
+ @classmethod
+ def from_native(cls, data: ir.Table, /, *, context: _FullContext) -> Self:
+ return cls(
+ data, backend_version=context._backend_version, version=context._version
+ )
+
+ def to_narwhals(self) -> LazyFrame[ir.Table] | DataFrameV1[ir.Table]:
+ if self._version is Version.MAIN:
+ return self._version.lazyframe(self, level="lazy")
+
+ from narwhals.stable.v1 import DataFrame as DataFrameV1
+
+ return DataFrameV1(self, level="interchange")
+
+ def __narwhals_dataframe__(self) -> Self: # pragma: no cover
+ # Keep around for backcompat.
+ if self._version is not Version.V1:
+ msg = "__narwhals_dataframe__ is not implemented for IbisLazyFrame"
+ raise AttributeError(msg)
+ return self
+
+ def __narwhals_lazyframe__(self) -> Self:
+ return self
+
+ def __native_namespace__(self) -> ModuleType:
+ return ibis
+
+ def __narwhals_namespace__(self) -> IbisNamespace:
+ from narwhals._ibis.namespace import IbisNamespace
+
+ return IbisNamespace(backend_version=self._backend_version, version=self._version)
+
+ def get_column(self, name: str) -> IbisInterchangeSeries:
+ from narwhals._ibis.series import IbisInterchangeSeries
+
+ return IbisInterchangeSeries(self.native.select(name), version=self._version)
+
+ def _iter_columns(self) -> Iterator[ir.Expr]:
+ for name in self.columns:
+ yield self.native[name]
+
+ def collect(
+ self, backend: ModuleType | Implementation | str | None, **kwargs: Any
+ ) -> CompliantDataFrameAny:
+ if backend is None or backend is Implementation.PYARROW:
+ import pyarrow as pa # ignore-banned-import
+
+ from narwhals._arrow.dataframe import ArrowDataFrame
+
+ return ArrowDataFrame(
+ self.native.to_pyarrow(),
+ backend_version=parse_version(pa),
+ version=self._version,
+ validate_column_names=True,
+ )
+
+ if backend is Implementation.PANDAS:
+ import pandas as pd # ignore-banned-import
+
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+
+ return PandasLikeDataFrame(
+ self.native.to_pandas(),
+ implementation=Implementation.PANDAS,
+ backend_version=parse_version(pd),
+ version=self._version,
+ validate_column_names=True,
+ )
+
+ if backend is Implementation.POLARS:
+ import polars as pl # ignore-banned-import
+
+ from narwhals._polars.dataframe import PolarsDataFrame
+
+ return PolarsDataFrame(
+ self.native.to_polars(),
+ backend_version=parse_version(pl),
+ version=self._version,
+ )
+
+ msg = f"Unsupported `backend` value: {backend}" # pragma: no cover
+ raise ValueError(msg) # pragma: no cover
+
+ def head(self, n: int) -> Self:
+ return self._with_native(self.native.head(n))
+
+ def simple_select(self, *column_names: str) -> Self:
+ return self._with_native(self.native.select(*column_names))
+
+ def aggregate(self, *exprs: IbisExpr) -> Self:
+ selection = [
+ cast("ir.Scalar", val.name(name))
+ for name, val in evaluate_exprs(self, *exprs)
+ ]
+ return self._with_native(self.native.aggregate(selection))
+
+ def select(self, *exprs: IbisExpr) -> Self:
+ selection = [val.name(name) for name, val in evaluate_exprs(self, *exprs)]
+ if not selection:
+ msg = "At least one expression must be provided to `select` with the Ibis backend."
+ raise ValueError(msg)
+
+ t = self.native.select(*selection)
+ return self._with_native(t)
+
+ def drop(self, columns: Sequence[str], *, strict: bool) -> Self:
+ columns_to_drop = parse_columns_to_drop(self, columns, strict=strict)
+ selection = (col for col in self.columns if col not in columns_to_drop)
+ return self._with_native(self.native.select(*selection))
+
+ def lazy(self, *, backend: Implementation | None = None) -> Self:
+ # The `backend`` argument has no effect but we keep it here for
+ # backwards compatibility because in `narwhals.stable.v1`
+ # function `.from_native()` will return a DataFrame for Ibis.
+
+ if backend is not None: # pragma: no cover
+ msg = "`backend` argument is not supported for Ibis"
+ raise ValueError(msg)
+ return self
+
+ def with_columns(self, *exprs: IbisExpr) -> Self:
+ new_columns_map = dict(evaluate_exprs(self, *exprs))
+ return self._with_native(self.native.mutate(**new_columns_map))
+
+ def filter(self, predicate: IbisExpr) -> Self:
+ # `[0]` is safe as the predicate's expression only returns a single column
+ mask = cast("ir.BooleanValue", predicate(self)[0])
+ return self._with_native(self.native.filter(mask))
+
+ @property
+ def schema(self) -> dict[str, DType]:
+ if self._cached_schema is None:
+ # Note: prefer `self._cached_schema` over `functools.cached_property`
+ # due to Python3.13 failures.
+ self._cached_schema = {
+ name: native_to_narwhals_dtype(dtype, self._version)
+ for name, dtype in self.native.schema().fields.items()
+ }
+ return self._cached_schema
+
+ @property
+ def columns(self) -> list[str]:
+ if self._cached_columns is None:
+ self._cached_columns = (
+ list(self.schema)
+ if self._cached_schema is not None
+ else list(self.native.columns)
+ )
+ return self._cached_columns
+
+ def to_pandas(self) -> pd.DataFrame:
+ # only if version is v1, keep around for backcompat
+ import pandas as pd # ignore-banned-import()
+
+ if parse_version(pd) >= (1, 0, 0):
+ return self.native.to_pandas()
+ else: # pragma: no cover
+ msg = f"Conversion to pandas requires pandas>=1.0.0, found {pd.__version__}"
+ raise NotImplementedError(msg)
+
+ def to_arrow(self) -> pa.Table:
+ # only if version is v1, keep around for backcompat
+ return self.native.to_pyarrow()
+
+ def _with_version(self, version: Version) -> Self:
+ return self.__class__(
+ self.native, version=version, backend_version=self._backend_version
+ )
+
+ def _with_native(self, df: ir.Table) -> Self:
+ return self.__class__(
+ df, backend_version=self._backend_version, version=self._version
+ )
+
+ def group_by(
+ self, keys: Sequence[str] | Sequence[IbisExpr], *, drop_null_keys: bool
+ ) -> IbisGroupBy:
+ from narwhals._ibis.group_by import IbisGroupBy
+
+ return IbisGroupBy(self, keys, drop_null_keys=drop_null_keys)
+
+ def rename(self, mapping: Mapping[str, str]) -> Self:
+ def _rename(col: str) -> str:
+ return mapping.get(col, col)
+
+ return self._with_native(self.native.rename(_rename))
+
+ @staticmethod
+ def _join_drop_duplicate_columns(df: ir.Table, columns: Iterable[str], /) -> ir.Table:
+ """Ibis adds a suffix to the right table col, even when it matches the left during a join."""
+ duplicates = set(df.columns).intersection(columns)
+ return df.drop(*duplicates) if duplicates else df
+
+ def join(
+ self,
+ other: Self,
+ *,
+ how: JoinStrategy,
+ left_on: Sequence[str] | None,
+ right_on: Sequence[str] | None,
+ suffix: str,
+ ) -> Self:
+ how_native = "outer" if how == "full" else how
+ rname = "{name}" + suffix
+ if other == self:
+ # Ibis does not support self-references unless created as a view
+ other = self._with_native(other.native.view())
+ if how_native == "cross":
+ joined = self.native.join(other.native, how=how_native, rname=rname)
+ return self._with_native(joined)
+ # help mypy
+ assert left_on is not None # noqa: S101
+ assert right_on is not None # noqa: S101
+ predicates = self._convert_predicates(other, left_on, right_on)
+ joined = self.native.join(other.native, predicates, how=how_native, rname=rname)
+ if how_native == "left":
+ right_names = (n + suffix for n in right_on)
+ joined = self._join_drop_duplicate_columns(joined, right_names)
+ it = (cast("Binary", p.op()) for p in predicates if not isinstance(p, str))
+ to_drop = []
+ for pred in it:
+ right = pred.right.name
+ # Mirrors how polars works.
+ if right not in self.columns and pred.left.name != right:
+ to_drop.append(right)
+ if to_drop:
+ joined = joined.drop(*to_drop)
+ return self._with_native(joined)
+
+ def join_asof(
+ self,
+ other: Self,
+ *,
+ left_on: str,
+ right_on: str,
+ by_left: Sequence[str] | None,
+ by_right: Sequence[str] | None,
+ strategy: AsofJoinStrategy,
+ suffix: str,
+ ) -> Self:
+ rname = "{name}" + suffix
+ strategy_op = {"backward": operator.ge, "forward": operator.le}
+ predicates: JoinPredicates = []
+ if op := strategy_op.get(strategy):
+ on: ir.BooleanColumn = op(self.native[left_on], other.native[right_on])
+ else:
+ msg = "Only `backward` and `forward` strategies are currently supported for Ibis"
+ raise NotImplementedError(msg)
+ if by_left is not None and by_right is not None:
+ predicates = self._convert_predicates(other, by_left, by_right)
+ joined = self.native.asof_join(other.native, on, predicates, rname=rname)
+ joined = self._join_drop_duplicate_columns(joined, [right_on + suffix])
+ if by_right is not None:
+ right_names = (n + suffix for n in by_right)
+ joined = self._join_drop_duplicate_columns(joined, right_names)
+ return self._with_native(joined)
+
+ def _convert_predicates(
+ self, other: Self, left_on: Sequence[str], right_on: Sequence[str]
+ ) -> JoinPredicates:
+ if left_on == right_on:
+ return left_on
+ return [
+ cast("ir.BooleanColumn", (self.native[left] == other.native[right]))
+ for left, right in zip(left_on, right_on)
+ ]
+
+ def collect_schema(self) -> dict[str, DType]:
+ return {
+ name: native_to_narwhals_dtype(dtype, self._version)
+ for name, dtype in self.native.schema().fields.items()
+ }
+
+ def unique(
+ self, subset: Sequence[str] | None, *, keep: LazyUniqueKeepStrategy
+ ) -> Self:
+ if subset_ := subset if keep == "any" else (subset or self.columns):
+ # Sanitise input
+ if any(x not in self.columns for x in subset_):
+ msg = f"Columns {set(subset_).difference(self.columns)} not found in {self.columns}."
+ raise ColumnNotFoundError(msg)
+
+ mapped_keep: dict[str, Literal["first"] | None] = {
+ "any": "first",
+ "none": None,
+ }
+ to_keep = mapped_keep[keep]
+ return self._with_native(self.native.distinct(on=subset_, keep=to_keep))
+ return self._with_native(self.native.distinct(on=subset))
+
+ def sort(self, *by: str, descending: bool | Sequence[bool], nulls_last: bool) -> Self:
+ if isinstance(descending, bool):
+ descending = [descending for _ in range(len(by))]
+
+ sort_cols = []
+
+ for i in range(len(by)):
+ direction_fn = ibis.desc if descending[i] else ibis.asc
+ col = direction_fn(by[i], nulls_first=not nulls_last)
+ sort_cols.append(cast("ir.Column", col))
+
+ return self._with_native(self.native.order_by(*sort_cols))
+
+ def drop_nulls(self, subset: Sequence[str] | None) -> Self:
+ subset_ = subset if subset is not None else self.columns
+ return self._with_native(self.native.drop_null(subset_))
+
+ def explode(self, columns: Sequence[str]) -> Self:
+ dtypes = self._version.dtypes
+ schema = self.collect_schema()
+ for col in columns:
+ dtype = schema[col]
+
+ if dtype != dtypes.List:
+ msg = (
+ f"`explode` operation not supported for dtype `{dtype}`, "
+ "expected List type"
+ )
+ raise InvalidOperationError(msg)
+
+ if len(columns) != 1:
+ msg = (
+ "Exploding on multiple columns is not supported with Ibis backend since "
+ "we cannot guarantee that the exploded columns have matching element counts."
+ )
+ raise NotImplementedError(msg)
+
+ return self._with_native(self.native.unnest(columns[0], keep_empty=True))
+
+ def unpivot(
+ self,
+ on: Sequence[str] | None,
+ index: Sequence[str] | None,
+ variable_name: str,
+ value_name: str,
+ ) -> Self:
+ import ibis.selectors as s
+
+ index_: Sequence[str] = [] if index is None else index
+ on_: Sequence[str] = (
+ [c for c in self.columns if c not in index_] if on is None else on
+ )
+
+ # Discard columns not in the index
+ final_columns = list(dict.fromkeys([*index_, variable_name, value_name]))
+
+ unpivoted = self.native.pivot_longer(
+ s.cols(*on_), names_to=variable_name, values_to=value_name
+ )
+ return self._with_native(unpivoted.select(*final_columns))
+
+ gather_every = not_implemented.deprecated(
+ "`LazyFrame.gather_every` is deprecated and will be removed in a future version."
+ )
+ tail = not_implemented.deprecated(
+ "`LazyFrame.tail` is deprecated and will be removed in a future version."
+ )
+ with_row_index = not_implemented()
diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/expr.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/expr.py
new file mode 100644
index 0000000..4fc8c79
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_ibis/expr.py
@@ -0,0 +1,698 @@
+from __future__ import annotations
+
+import operator
+from functools import partial
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ Iterator,
+ Literal,
+ Sequence,
+ TypeVar,
+ cast,
+)
+
+import ibis
+
+from narwhals._compliant import LazyExpr
+from narwhals._compliant.window import WindowInputs
+from narwhals._ibis.expr_dt import IbisExprDateTimeNamespace
+from narwhals._ibis.expr_list import IbisExprListNamespace
+from narwhals._ibis.expr_str import IbisExprStringNamespace
+from narwhals._ibis.expr_struct import IbisExprStructNamespace
+from narwhals._ibis.utils import is_floating, lit, narwhals_to_native_dtype
+from narwhals._utils import Implementation, not_implemented
+
+if TYPE_CHECKING:
+ import ibis.expr.types as ir
+ from typing_extensions import Self
+
+ from narwhals._compliant.typing import (
+ AliasNames,
+ EvalNames,
+ EvalSeries,
+ WindowFunction,
+ )
+ from narwhals._expression_parsing import ExprKind, ExprMetadata
+ from narwhals._ibis.dataframe import IbisLazyFrame
+ from narwhals._ibis.namespace import IbisNamespace
+ from narwhals._utils import Version, _FullContext
+ from narwhals.typing import IntoDType, RankMethod, RollingInterpolationMethod
+
+ ExprT = TypeVar("ExprT", bound=ir.Value)
+ IbisWindowFunction = WindowFunction[IbisLazyFrame, ir.Value]
+ IbisWindowInputs = WindowInputs[ir.Value]
+
+
+class IbisExpr(LazyExpr["IbisLazyFrame", "ir.Column"]):
+ _implementation = Implementation.IBIS
+
+ def __init__(
+ self,
+ call: EvalSeries[IbisLazyFrame, ir.Value],
+ window_function: IbisWindowFunction | None = None,
+ *,
+ evaluate_output_names: EvalNames[IbisLazyFrame],
+ alias_output_names: AliasNames | None,
+ backend_version: tuple[int, ...],
+ version: Version,
+ ) -> None:
+ self._call = call
+ self._evaluate_output_names = evaluate_output_names
+ self._alias_output_names = alias_output_names
+ self._backend_version = backend_version
+ self._version = version
+ self._metadata: ExprMetadata | None = None
+ self._window_function: IbisWindowFunction | None = window_function
+
+ @property
+ def window_function(self) -> IbisWindowFunction:
+ def default_window_func(
+ df: IbisLazyFrame, window_inputs: IbisWindowInputs
+ ) -> list[ir.Value]:
+ assert not window_inputs.order_by # noqa: S101
+ return [
+ expr.over(ibis.window(group_by=window_inputs.partition_by))
+ for expr in self(df)
+ ]
+
+ return self._window_function or default_window_func
+
+ def __call__(self, df: IbisLazyFrame) -> Sequence[ir.Value]:
+ return self._call(df)
+
+ def __narwhals_expr__(self) -> None: ...
+
+ def __narwhals_namespace__(self) -> IbisNamespace: # pragma: no cover
+ # Unused, just for compatibility with PandasLikeExpr
+ from narwhals._ibis.namespace import IbisNamespace
+
+ return IbisNamespace(backend_version=self._backend_version, version=self._version)
+
+ def _cum_window_func(
+ self, *, reverse: bool, func_name: Literal["sum", "max", "min", "count"]
+ ) -> IbisWindowFunction:
+ def func(df: IbisLazyFrame, inputs: IbisWindowInputs) -> Sequence[ir.Value]:
+ window = ibis.window(
+ group_by=list(inputs.partition_by),
+ order_by=self._sort(
+ *inputs.order_by, descending=reverse, nulls_last=reverse
+ ),
+ preceding=None, # unbounded
+ following=0,
+ )
+
+ return [getattr(expr, func_name)().over(window) for expr in self(df)]
+
+ return func
+
+ def _rolling_window_func(
+ self,
+ *,
+ func_name: Literal["sum", "mean", "std", "var"],
+ center: bool,
+ window_size: int,
+ min_samples: int,
+ ddof: int | None = None,
+ ) -> IbisWindowFunction:
+ supported_funcs = ["sum", "mean", "std", "var"]
+
+ if center:
+ preceding = window_size // 2
+ following = window_size - preceding - 1
+ else:
+ preceding = window_size - 1
+ following = 0
+
+ def func(df: IbisLazyFrame, inputs: IbisWindowInputs) -> Sequence[ir.Value]:
+ window = ibis.window(
+ group_by=list(inputs.partition_by),
+ order_by=self._sort(*inputs.order_by),
+ preceding=preceding,
+ following=following,
+ )
+
+ def inner_f(expr: ir.NumericColumn) -> ir.Value:
+ if func_name in {"sum", "mean"}:
+ func_ = getattr(expr, func_name)()
+ elif func_name == "var" and ddof == 0:
+ func_ = expr.var(how="pop")
+ elif func_name in "var" and ddof == 1:
+ func_ = expr.var(how="sample")
+ elif func_name == "std" and ddof == 0:
+ func_ = expr.std(how="pop")
+ elif func_name == "std" and ddof == 1:
+ func_ = expr.std(how="sample")
+ elif func_name in {"var", "std"}: # pragma: no cover
+ msg = f"Only ddof=0 and ddof=1 are currently supported for rolling_{func_name}."
+ raise ValueError(msg)
+ else: # pragma: no cover
+ msg = f"Only the following functions are supported: {supported_funcs}.\nGot: {func_name}."
+ raise ValueError(msg)
+
+ rolling_calc = func_.over(window)
+ valid_count = expr.count().over(window)
+ return ibis.cases(
+ (valid_count >= ibis.literal(min_samples), rolling_calc),
+ else_=ibis.null(),
+ )
+
+ return [inner_f(cast("ir.NumericColumn", expr)) for expr in self(df)]
+
+ return func
+
+ def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Self:
+ # Ibis does its own broadcasting.
+ return self
+
+ def _sort(
+ self, *cols: ir.Column | str, descending: bool = False, nulls_last: bool = False
+ ) -> Iterator[ir.Column]:
+ mapping = {
+ (False, False): partial(ibis.asc, nulls_first=True),
+ (False, True): partial(ibis.asc, nulls_first=False),
+ (True, False): partial(ibis.desc, nulls_first=True),
+ (True, True): partial(ibis.desc, nulls_first=False),
+ }
+ sort = mapping[(descending, nulls_last)]
+ yield from (cast("ir.Column", sort(col)) for col in cols)
+
+ @classmethod
+ def from_column_names(
+ cls: type[Self],
+ evaluate_column_names: EvalNames[IbisLazyFrame],
+ /,
+ *,
+ context: _FullContext,
+ ) -> Self:
+ def func(df: IbisLazyFrame) -> list[ir.Column]:
+ return [df.native[name] for name in evaluate_column_names(df)]
+
+ return cls(
+ func,
+ evaluate_output_names=evaluate_column_names,
+ alias_output_names=None,
+ backend_version=context._backend_version,
+ version=context._version,
+ )
+
+ @classmethod
+ def from_column_indices(cls, *column_indices: int, context: _FullContext) -> Self:
+ def func(df: IbisLazyFrame) -> list[ir.Column]:
+ return [df.native[i] for i in column_indices]
+
+ return cls(
+ func,
+ evaluate_output_names=cls._eval_names_indices(column_indices),
+ alias_output_names=None,
+ backend_version=context._backend_version,
+ version=context._version,
+ )
+
+ def _with_callable(
+ self, call: Callable[..., ir.Value], /, **expressifiable_args: Self | Any
+ ) -> Self:
+ """Create expression from callable.
+
+ Arguments:
+ call: Callable from compliant DataFrame to native Expression
+ expr_name: Expression name
+ expressifiable_args: arguments pass to expression which should be parsed
+ as expressions (e.g. in `nw.col('a').is_between('b', 'c')`)
+ """
+
+ def func(df: IbisLazyFrame) -> list[ir.Value]:
+ native_series_list = self(df)
+ other_native_series = {
+ key: df._evaluate_expr(value) if self._is_expr(value) else value
+ for key, value in expressifiable_args.items()
+ }
+ return [
+ call(native_series, **other_native_series)
+ for native_series in native_series_list
+ ]
+
+ return self.__class__(
+ func,
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def _with_alias_output_names(self, func: AliasNames | None, /) -> Self:
+ return type(self)(
+ self._call,
+ self._window_function,
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=func,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def _with_window_function(self, window_function: IbisWindowFunction) -> Self:
+ return self.__class__(
+ self._call,
+ window_function,
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ @classmethod
+ def _alias_native(cls, expr: ExprT, name: str, /) -> ExprT:
+ return cast("ExprT", expr.name(name))
+
+ def __and__(self, other: IbisExpr) -> Self:
+ return self._with_callable(lambda expr, other: expr & other, other=other)
+
+ def __or__(self, other: IbisExpr) -> Self:
+ return self._with_callable(lambda expr, other: expr | other, other=other)
+
+ def __add__(self, other: IbisExpr) -> Self:
+ return self._with_callable(lambda expr, other: expr + other, other=other)
+
+ def __truediv__(self, other: IbisExpr) -> Self:
+ return self._with_callable(lambda expr, other: expr / other, other=other)
+
+ def __rtruediv__(self, other: IbisExpr) -> Self:
+ return self._with_callable(
+ lambda expr, other: expr.__rtruediv__(other), other=other
+ ).alias("literal")
+
+ def __floordiv__(self, other: IbisExpr) -> Self:
+ return self._with_callable(
+ lambda expr, other: expr.__floordiv__(other), other=other
+ )
+
+ def __rfloordiv__(self, other: IbisExpr) -> Self:
+ return self._with_callable(
+ lambda expr, other: expr.__rfloordiv__(other), other=other
+ ).alias("literal")
+
+ def __mod__(self, other: IbisExpr) -> Self:
+ return self._with_callable(lambda expr, other: expr.__mod__(other), other=other)
+
+ def __rmod__(self, other: IbisExpr) -> Self:
+ return self._with_callable(
+ lambda expr, other: expr.__rmod__(other), other=other
+ ).alias("literal")
+
+ def __sub__(self, other: IbisExpr) -> Self:
+ return self._with_callable(lambda expr, other: expr - other, other=other)
+
+ def __rsub__(self, other: IbisExpr) -> Self:
+ return self._with_callable(
+ lambda expr, other: expr.__rsub__(other), other=other
+ ).alias("literal")
+
+ def __mul__(self, other: IbisExpr) -> Self:
+ return self._with_callable(lambda expr, other: expr * other, other=other)
+
+ def __pow__(self, other: IbisExpr) -> Self:
+ return self._with_callable(lambda expr, other: expr**other, other=other)
+
+ def __rpow__(self, other: IbisExpr) -> Self:
+ return self._with_callable(
+ lambda expr, other: expr.__rpow__(other), other=other
+ ).alias("literal")
+
+ def __lt__(self, other: IbisExpr) -> Self:
+ return self._with_callable(lambda expr, other: expr < other, other=other)
+
+ def __gt__(self, other: IbisExpr) -> Self:
+ return self._with_callable(lambda expr, other: expr > other, other=other)
+
+ def __le__(self, other: IbisExpr) -> Self:
+ return self._with_callable(lambda expr, other: expr <= other, other=other)
+
+ def __ge__(self, other: IbisExpr) -> Self:
+ return self._with_callable(lambda expr, other: expr >= other, other=other)
+
+ def __eq__(self, other: IbisExpr) -> Self: # type: ignore[override]
+ return self._with_callable(lambda expr, other: expr == other, other=other)
+
+ def __ne__(self, other: IbisExpr) -> Self: # type: ignore[override]
+ return self._with_callable(lambda expr, other: expr != other, other=other)
+
+ def __invert__(self) -> Self:
+ invert = cast("Callable[..., ir.Value]", operator.invert)
+ return self._with_callable(invert)
+
+ def abs(self) -> Self:
+ return self._with_callable(lambda expr: expr.abs())
+
+ def mean(self) -> Self:
+ return self._with_callable(lambda expr: expr.mean())
+
+ def median(self) -> Self:
+ return self._with_callable(lambda expr: expr.median())
+
+ def all(self) -> Self:
+ return self._with_callable(lambda expr: expr.all().fill_null(lit(True))) # noqa: FBT003
+
+ def any(self) -> Self:
+ return self._with_callable(lambda expr: expr.any().fill_null(lit(False))) # noqa: FBT003
+
+ def quantile(
+ self, quantile: float, interpolation: RollingInterpolationMethod
+ ) -> Self:
+ if interpolation != "linear":
+ msg = "Only linear interpolation methods are supported for Ibis quantile."
+ raise NotImplementedError(msg)
+ return self._with_callable(lambda expr: expr.quantile(quantile))
+
+ def clip(self, lower_bound: Any, upper_bound: Any) -> Self:
+ def _clip(expr: ir.NumericValue, lower: Any, upper: Any) -> ir.NumericValue:
+ return expr.clip(lower=lower, upper=upper)
+
+ return self._with_callable(_clip, lower=lower_bound, upper=upper_bound)
+
+ def sum(self) -> Self:
+ return self._with_callable(lambda expr: expr.sum().fill_null(lit(0)))
+
+ def n_unique(self) -> Self:
+ return self._with_callable(
+ lambda expr: expr.nunique() + expr.isnull().any().cast("int8")
+ )
+
+ def count(self) -> Self:
+ return self._with_callable(lambda expr: expr.count())
+
+ def len(self) -> Self:
+ def func(df: IbisLazyFrame) -> list[ir.IntegerScalar]:
+ return [df.native.count()]
+
+ return self.__class__(
+ func,
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def std(self, ddof: int) -> Self:
+ def _std(expr: ir.NumericColumn, ddof: int) -> ir.Value:
+ if ddof == 0:
+ return expr.std(how="pop")
+ elif ddof == 1:
+ return expr.std(how="sample")
+ else:
+ n_samples = expr.count()
+ std_pop = expr.std(how="pop")
+ ddof_lit = cast("ir.IntegerScalar", ibis.literal(ddof))
+ return std_pop * n_samples.sqrt() / (n_samples - ddof_lit).sqrt()
+
+ return self._with_callable(lambda expr: _std(expr, ddof))
+
+ def var(self, ddof: int) -> Self:
+ def _var(expr: ir.NumericColumn, ddof: int) -> ir.Value:
+ if ddof == 0:
+ return expr.var(how="pop")
+ elif ddof == 1:
+ return expr.var(how="sample")
+ else:
+ n_samples = expr.count()
+ var_pop = expr.var(how="pop")
+ ddof_lit = cast("ir.IntegerScalar", ibis.literal(ddof))
+ return var_pop * n_samples / (n_samples - ddof_lit)
+
+ return self._with_callable(lambda expr: _var(expr, ddof))
+
+ def max(self) -> Self:
+ return self._with_callable(lambda expr: expr.max())
+
+ def min(self) -> Self:
+ return self._with_callable(lambda expr: expr.min())
+
+ def null_count(self) -> Self:
+ return self._with_callable(lambda expr: expr.isnull().sum())
+
+ def over(self, partition_by: Sequence[str], order_by: Sequence[str]) -> Self:
+ def func(df: IbisLazyFrame) -> Sequence[ir.Value]:
+ return self.window_function(df, WindowInputs(partition_by, order_by))
+
+ return self.__class__(
+ func,
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def is_null(self) -> Self:
+ return self._with_callable(lambda expr: expr.isnull())
+
+ def is_nan(self) -> Self:
+ def func(expr: ir.FloatingValue | Any) -> ir.Value:
+ otherwise = expr.isnan() if is_floating(expr.type()) else False
+ return ibis.ifelse(expr.isnull(), None, otherwise)
+
+ return self._with_callable(func)
+
+ def is_finite(self) -> Self:
+ return self._with_callable(lambda expr: ~(expr.isinf() | expr.isnan()))
+
+ def is_in(self, other: Sequence[Any]) -> Self:
+ return self._with_callable(lambda expr: expr.isin(other))
+
+ def round(self, decimals: int) -> Self:
+ return self._with_callable(lambda expr: expr.round(decimals))
+
+ def shift(self, n: int) -> Self:
+ def _func(df: IbisLazyFrame, inputs: IbisWindowInputs) -> Sequence[ir.Value]:
+ return [
+ expr.lag(n).over( # type: ignore[attr-defined, unused-ignore]
+ ibis.window(
+ group_by=inputs.partition_by,
+ order_by=self._sort(*inputs.order_by),
+ )
+ )
+ for expr in self(df)
+ ]
+
+ return self._with_window_function(_func)
+
+ def is_first_distinct(self) -> Self:
+ def func(
+ df: IbisLazyFrame, inputs: IbisWindowInputs
+ ) -> Sequence[ir.BooleanValue]:
+ # ibis row_number starts at 0, so need to compare with 0 instead of the usual `1`
+ return [
+ ibis.row_number().over(
+ ibis.window(
+ group_by=[*inputs.partition_by, expr],
+ order_by=self._sort(*inputs.order_by),
+ )
+ )
+ == lit(0)
+ for expr in self(df)
+ ]
+
+ return self._with_window_function(func)
+
+ def is_last_distinct(self) -> Self:
+ def func(
+ df: IbisLazyFrame, inputs: IbisWindowInputs
+ ) -> Sequence[ir.BooleanValue]:
+ # ibis row_number starts at 0, so need to compare with 0 instead of the usual `1`
+ return [
+ ibis.row_number().over(
+ ibis.window(
+ group_by=[*inputs.partition_by, expr],
+ order_by=self._sort(
+ *inputs.order_by, descending=True, nulls_last=True
+ ),
+ )
+ )
+ == lit(0)
+ for expr in self(df)
+ ]
+
+ return self._with_window_function(func)
+
+ def diff(self) -> Self:
+ def _func(df: IbisLazyFrame, inputs: IbisWindowInputs) -> Sequence[ir.Value]:
+ return [
+ expr
+ - expr.lag().over( # type: ignore[attr-defined, unused-ignore]
+ ibis.window(
+ following=0,
+ group_by=inputs.partition_by,
+ order_by=self._sort(*inputs.order_by),
+ )
+ )
+ for expr in self(df)
+ ]
+
+ return self._with_window_function(_func)
+
+ def cum_sum(self, *, reverse: bool) -> Self:
+ return self._with_window_function(
+ self._cum_window_func(reverse=reverse, func_name="sum")
+ )
+
+ def cum_max(self, *, reverse: bool) -> Self:
+ return self._with_window_function(
+ self._cum_window_func(reverse=reverse, func_name="max")
+ )
+
+ def cum_min(self, *, reverse: bool) -> Self:
+ return self._with_window_function(
+ self._cum_window_func(reverse=reverse, func_name="min")
+ )
+
+ def cum_count(self, *, reverse: bool) -> Self:
+ return self._with_window_function(
+ self._cum_window_func(reverse=reverse, func_name="count")
+ )
+
+ def rolling_sum(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ return self._with_window_function(
+ self._rolling_window_func(
+ func_name="sum",
+ center=center,
+ window_size=window_size,
+ min_samples=min_samples,
+ )
+ )
+
+ def rolling_mean(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ return self._with_window_function(
+ self._rolling_window_func(
+ func_name="mean",
+ center=center,
+ window_size=window_size,
+ min_samples=min_samples,
+ )
+ )
+
+ def rolling_var(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ return self._with_window_function(
+ self._rolling_window_func(
+ func_name="var",
+ center=center,
+ window_size=window_size,
+ min_samples=min_samples,
+ ddof=ddof,
+ )
+ )
+
+ def rolling_std(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ return self._with_window_function(
+ self._rolling_window_func(
+ func_name="std",
+ center=center,
+ window_size=window_size,
+ min_samples=min_samples,
+ ddof=ddof,
+ )
+ )
+
+ def fill_null(self, value: Self | Any, strategy: Any, limit: int | None) -> Self:
+ # Ibis doesn't yet allow ignoring nulls in first/last with window functions, which makes forward/backward
+ # strategies inconsistent when there are nulls present: https://github.com/ibis-project/ibis/issues/9539
+ if strategy is not None:
+ msg = "`strategy` is not supported for the Ibis backend"
+ raise NotImplementedError(msg)
+ if limit is not None:
+ msg = "`limit` is not supported for the Ibis backend" # pragma: no cover
+ raise NotImplementedError(msg)
+
+ def _fill_null(expr: ir.Value, value: ir.Scalar) -> ir.Value:
+ return expr.fill_null(value)
+
+ return self._with_callable(_fill_null, value=value)
+
+ def cast(self, dtype: IntoDType) -> Self:
+ def _func(expr: ir.Column) -> ir.Value:
+ native_dtype = narwhals_to_native_dtype(dtype, self._version)
+ # ibis `cast` overloads do not include DataType, only literals
+ return expr.cast(native_dtype) # type: ignore[unused-ignore]
+
+ return self._with_callable(_func)
+
+ def is_unique(self) -> Self:
+ return self._with_callable(
+ lambda expr: expr.isnull().count().over(ibis.window(group_by=(expr))) == 1
+ )
+
+ def rank(self, method: RankMethod, *, descending: bool) -> Self:
+ def _rank(expr: ir.Column) -> ir.Column:
+ order_by = next(self._sort(expr, descending=descending, nulls_last=True))
+ window = ibis.window(order_by=order_by)
+
+ if method == "dense":
+ rank_ = order_by.dense_rank()
+ elif method == "ordinal":
+ rank_ = cast("ir.IntegerColumn", ibis.row_number().over(window))
+ else:
+ rank_ = order_by.rank()
+
+ # Ibis uses 0-based ranking. Add 1 to match polars 1-based rank.
+ rank_ = rank_ + cast("ir.IntegerValue", lit(1))
+
+ # For "max" and "average", adjust using the count of rows in the partition.
+ if method == "max":
+ # Define a window partitioned by expr (i.e. each distinct value)
+ partition = ibis.window(group_by=[expr])
+ cnt = cast("ir.IntegerValue", expr.count().over(partition))
+ rank_ = rank_ + cnt - cast("ir.IntegerValue", lit(1))
+ elif method == "average":
+ partition = ibis.window(group_by=[expr])
+ cnt = cast("ir.IntegerValue", expr.count().over(partition))
+ avg = cast(
+ "ir.NumericValue", (cnt - cast("ir.IntegerScalar", lit(1))) / lit(2.0)
+ )
+ rank_ = rank_ + avg
+
+ return cast("ir.Column", ibis.cases((expr.notnull(), rank_)))
+
+ return self._with_callable(_rank)
+
+ def log(self, base: float) -> Self:
+ def _log(expr: ir.NumericColumn) -> ir.Value:
+ otherwise = expr.log(cast("ir.NumericValue", lit(base)))
+ return ibis.cases(
+ (expr < lit(0), lit(float("nan"))),
+ (expr == lit(0), lit(float("-inf"))),
+ else_=otherwise,
+ )
+
+ return self._with_callable(_log)
+
+ def exp(self) -> Self:
+ def _exp(expr: ir.NumericColumn) -> ir.Value:
+ return expr.exp()
+
+ return self._with_callable(_exp)
+
+ @property
+ def str(self) -> IbisExprStringNamespace:
+ return IbisExprStringNamespace(self)
+
+ @property
+ def dt(self) -> IbisExprDateTimeNamespace:
+ return IbisExprDateTimeNamespace(self)
+
+ @property
+ def list(self) -> IbisExprListNamespace:
+ return IbisExprListNamespace(self)
+
+ @property
+ def struct(self) -> IbisExprStructNamespace:
+ return IbisExprStructNamespace(self)
+
+ # NOTE: https://github.com/ibis-project/ibis/issues/10542
+ cum_prod = not_implemented()
+ drop_nulls = not_implemented()
+
+ # NOTE: https://github.com/ibis-project/ibis/issues/11176
+ skew = not_implemented()
+ unique = not_implemented()
diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_dt.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_dt.py
new file mode 100644
index 0000000..14d9d06
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_dt.py
@@ -0,0 +1,98 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Callable
+
+from narwhals._duration import parse_interval_string
+from narwhals._ibis.utils import UNITS_DICT_BUCKET, UNITS_DICT_TRUNCATE
+from narwhals._utils import not_implemented
+
+if TYPE_CHECKING:
+ import ibis.expr.types as ir
+
+ from narwhals._ibis.expr import IbisExpr
+ from narwhals._ibis.utils import BucketUnit, TruncateUnit
+
+
+class IbisExprDateTimeNamespace:
+ def __init__(self, expr: IbisExpr) -> None:
+ self._compliant_expr = expr
+
+ def year(self) -> IbisExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.year())
+
+ def month(self) -> IbisExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.month())
+
+ def day(self) -> IbisExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.day())
+
+ def hour(self) -> IbisExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.hour())
+
+ def minute(self) -> IbisExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.minute())
+
+ def second(self) -> IbisExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.second())
+
+ def millisecond(self) -> IbisExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.millisecond())
+
+ def microsecond(self) -> IbisExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.microsecond())
+
+ def to_string(self, format: str) -> IbisExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.strftime(format))
+
+ def weekday(self) -> IbisExpr:
+ # Ibis uses 0-6 for Monday-Sunday. Add 1 to match polars.
+ return self._compliant_expr._with_callable(
+ lambda expr: expr.day_of_week.index() + 1
+ )
+
+ def ordinal_day(self) -> IbisExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.day_of_year())
+
+ def date(self) -> IbisExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.date())
+
+ def _bucket(self, kwds: dict[BucketUnit, Any], /) -> Callable[..., ir.TimestampValue]:
+ def fn(expr: ir.TimestampValue) -> ir.TimestampValue:
+ return expr.bucket(**kwds)
+
+ return fn
+
+ def _truncate(self, unit: TruncateUnit, /) -> Callable[..., ir.TimestampValue]:
+ def fn(expr: ir.TimestampValue) -> ir.TimestampValue:
+ return expr.truncate(unit)
+
+ return fn
+
+ def truncate(self, every: str) -> IbisExpr:
+ multiple, unit = parse_interval_string(every)
+ if unit == "q":
+ multiple, unit = 3 * multiple, "mo"
+ if multiple != 1:
+ if self._compliant_expr._backend_version < (7, 1): # pragma: no cover
+ msg = "Truncating datetimes with multiples of the unit is only supported in Ibis >= 7.1."
+ raise NotImplementedError(msg)
+ fn = self._bucket({UNITS_DICT_BUCKET[unit]: multiple})
+ else:
+ fn = self._truncate(UNITS_DICT_TRUNCATE[unit])
+ return self._compliant_expr._with_callable(fn)
+
+ def replace_time_zone(self, time_zone: str | None) -> IbisExpr:
+ if time_zone is None:
+ return self._compliant_expr._with_callable(
+ lambda _input: _input.cast("timestamp")
+ )
+ else: # pragma: no cover
+ msg = "`replace_time_zone` with non-null `time_zone` not yet implemented for Ibis"
+ raise NotImplementedError(msg)
+
+ nanosecond = not_implemented()
+ total_minutes = not_implemented()
+ total_seconds = not_implemented()
+ total_milliseconds = not_implemented()
+ total_microseconds = not_implemented()
+ total_nanoseconds = not_implemented()
diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_list.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_list.py
new file mode 100644
index 0000000..b29fc83
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_list.py
@@ -0,0 +1,14 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from narwhals._ibis.expr import IbisExpr
+
+
+class IbisExprListNamespace:
+ def __init__(self, expr: IbisExpr) -> None:
+ self._compliant_expr = expr
+
+ def len(self) -> IbisExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.length())
diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_str.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_str.py
new file mode 100644
index 0000000..1c0d6e5
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_str.py
@@ -0,0 +1,103 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Callable
+
+from ibis.expr.datatypes import Timestamp
+
+from narwhals._utils import _is_naive_format, not_implemented
+
+if TYPE_CHECKING:
+ import ibis.expr.types as ir
+
+ from narwhals._ibis.expr import IbisExpr
+
+
+class IbisExprStringNamespace:
+ def __init__(self, expr: IbisExpr) -> None:
+ self._compliant_expr = expr
+
+ def starts_with(self, prefix: str) -> IbisExpr:
+ def fn(expr: ir.StringColumn) -> ir.BooleanValue:
+ return expr.startswith(prefix)
+
+ return self._compliant_expr._with_callable(fn)
+
+ def ends_with(self, suffix: str) -> IbisExpr:
+ def fn(expr: ir.StringColumn) -> ir.BooleanValue:
+ return expr.endswith(suffix)
+
+ return self._compliant_expr._with_callable(fn)
+
+ def contains(self, pattern: str, *, literal: bool) -> IbisExpr:
+ def fn(expr: ir.StringColumn) -> ir.BooleanValue:
+ return expr.contains(pattern) if literal else expr.re_search(pattern)
+
+ return self._compliant_expr._with_callable(fn)
+
+ def slice(self, offset: int, length: int) -> IbisExpr:
+ def fn(expr: ir.StringColumn) -> ir.StringValue:
+ return expr.substr(start=offset, length=length)
+
+ return self._compliant_expr._with_callable(fn)
+
+ def split(self, by: str) -> IbisExpr:
+ def fn(expr: ir.StringColumn) -> ir.ArrayValue:
+ return expr.split(by)
+
+ return self._compliant_expr._with_callable(fn)
+
+ def len_chars(self) -> IbisExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.length())
+
+ def to_lowercase(self) -> IbisExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.lower())
+
+ def to_uppercase(self) -> IbisExpr:
+ return self._compliant_expr._with_callable(lambda expr: expr.upper())
+
+ def strip_chars(self, characters: str | None) -> IbisExpr:
+ if characters is not None:
+ msg = "Ibis does not support `characters` argument in `str.strip_chars`"
+ raise NotImplementedError(msg)
+
+ return self._compliant_expr._with_callable(lambda expr: expr.strip())
+
+ def _replace_all(self, pattern: str, value: str) -> Callable[..., ir.StringValue]:
+ def fn(expr: ir.StringColumn) -> ir.StringValue:
+ return expr.re_replace(pattern, value)
+
+ return fn
+
+ def _replace_all_literal(
+ self, pattern: str, value: str
+ ) -> Callable[..., ir.StringValue]:
+ def fn(expr: ir.StringColumn) -> ir.StringValue:
+ return expr.replace(pattern, value) # pyright: ignore[reportArgumentType]
+
+ return fn
+
+ def replace_all(self, pattern: str, value: str, *, literal: bool) -> IbisExpr:
+ fn = self._replace_all_literal if literal else self._replace_all
+ return self._compliant_expr._with_callable(fn(pattern, value))
+
+ def _to_datetime(self, format: str) -> Callable[..., ir.TimestampValue]:
+ def fn(expr: ir.StringColumn) -> ir.TimestampValue:
+ return expr.as_timestamp(format)
+
+ return fn
+
+ def _to_datetime_naive(self, format: str) -> Callable[..., ir.TimestampValue]:
+ def fn(expr: ir.StringColumn) -> ir.TimestampValue:
+ dtype: Any = Timestamp(timezone=None)
+ return expr.as_timestamp(format).cast(dtype)
+
+ return fn
+
+ def to_datetime(self, format: str | None) -> IbisExpr:
+ if format is None:
+ msg = "Cannot infer format with Ibis backend"
+ raise NotImplementedError(msg)
+ fn = self._to_datetime_naive if _is_naive_format(format) else self._to_datetime
+ return self._compliant_expr._with_callable(fn(format))
+
+ replace = not_implemented()
diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_struct.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_struct.py
new file mode 100644
index 0000000..f268281
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_ibis/expr_struct.py
@@ -0,0 +1,19 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+ import ibis.expr.types as ir
+
+ from narwhals._ibis.expr import IbisExpr
+
+
+class IbisExprStructNamespace:
+ def __init__(self, expr: IbisExpr) -> None:
+ self._compliant_expr = expr
+
+ def field(self, name: str) -> IbisExpr:
+ def func(expr: ir.StructColumn) -> ir.Column:
+ return expr[name]
+
+ return self._compliant_expr._with_callable(func).alias(name)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/group_by.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/group_by.py
new file mode 100644
index 0000000..54fa037
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_ibis/group_by.py
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Sequence
+
+from narwhals._compliant import LazyGroupBy
+
+if TYPE_CHECKING:
+ import ibis.expr.types as ir # noqa: F401
+
+ from narwhals._ibis.dataframe import IbisLazyFrame
+ from narwhals._ibis.expr import IbisExpr
+
+
+class IbisGroupBy(LazyGroupBy["IbisLazyFrame", "IbisExpr", "ir.Value"]):
+ def __init__(
+ self,
+ df: IbisLazyFrame,
+ keys: Sequence[str] | Sequence[IbisExpr],
+ /,
+ *,
+ drop_null_keys: bool,
+ ) -> None:
+ frame, self._keys, self._output_key_names = self._parse_keys(df, keys=keys)
+ self._compliant_frame = frame.drop_nulls(self._keys) if drop_null_keys else frame
+
+ def agg(self, *exprs: IbisExpr) -> IbisLazyFrame:
+ native = self.compliant.native
+ return self.compliant._with_native(
+ native.group_by(self._keys).aggregate(*self._evaluate_exprs(exprs))
+ ).rename(dict(zip(self._keys, self._output_key_names)))
diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/namespace.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/namespace.py
new file mode 100644
index 0000000..25393cf
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_ibis/namespace.py
@@ -0,0 +1,227 @@
+from __future__ import annotations
+
+import operator
+from functools import reduce
+from itertools import chain
+from typing import TYPE_CHECKING, Any, Iterable, Sequence, cast
+
+import ibis
+import ibis.expr.types as ir
+
+from narwhals._compliant import LazyNamespace, LazyThen, LazyWhen
+from narwhals._expression_parsing import (
+ combine_alias_output_names,
+ combine_evaluate_output_names,
+)
+from narwhals._ibis.dataframe import IbisLazyFrame
+from narwhals._ibis.expr import IbisExpr
+from narwhals._ibis.selectors import IbisSelectorNamespace
+from narwhals._ibis.utils import lit, narwhals_to_native_dtype
+from narwhals._utils import Implementation, requires
+
+if TYPE_CHECKING:
+ from narwhals._utils import Version
+ from narwhals.typing import ConcatMethod, IntoDType
+
+
+class IbisNamespace(LazyNamespace[IbisLazyFrame, IbisExpr, "ir.Table"]):
+ _implementation: Implementation = Implementation.IBIS
+
+ def __init__(self, *, backend_version: tuple[int, ...], version: Version) -> None:
+ self._backend_version = backend_version
+ self._version = version
+
+ @property
+ def selectors(self) -> IbisSelectorNamespace:
+ return IbisSelectorNamespace.from_namespace(self)
+
+ @property
+ def _expr(self) -> type[IbisExpr]:
+ return IbisExpr
+
+ @property
+ def _lazyframe(self) -> type[IbisLazyFrame]:
+ return IbisLazyFrame
+
+ def concat(
+ self, items: Iterable[IbisLazyFrame], *, how: ConcatMethod
+ ) -> IbisLazyFrame:
+ if how == "diagonal":
+ msg = "diagonal concat not supported for Ibis. Please join instead."
+ raise NotImplementedError(msg)
+
+ items = list(items)
+ native_items = [item.native for item in items]
+ schema = items[0].schema
+ if not all(x.schema == schema for x in items[1:]):
+ msg = "inputs should all have the same schema"
+ raise TypeError(msg)
+ return self._lazyframe.from_native(ibis.union(*native_items), context=self)
+
+ def concat_str(
+ self, *exprs: IbisExpr, separator: str, ignore_nulls: bool
+ ) -> IbisExpr:
+ def func(df: IbisLazyFrame) -> list[ir.Value]:
+ cols = list(chain.from_iterable(expr(df) for expr in exprs))
+ cols_casted = [s.cast("string") for s in cols]
+
+ if not ignore_nulls:
+ result = cols_casted[0]
+ for col in cols_casted[1:]:
+ result = result + separator + col
+ else:
+ sep = cast("ir.StringValue", lit(separator))
+ result = sep.join(cols_casted)
+
+ return [result]
+
+ return self._expr(
+ call=func,
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def all_horizontal(self, *exprs: IbisExpr) -> IbisExpr:
+ def func(df: IbisLazyFrame) -> list[ir.Value]:
+ cols = chain.from_iterable(expr(df) for expr in exprs)
+ return [reduce(operator.and_, cols)]
+
+ return self._expr(
+ call=func,
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def any_horizontal(self, *exprs: IbisExpr) -> IbisExpr:
+ def func(df: IbisLazyFrame) -> list[ir.Value]:
+ cols = chain.from_iterable(expr(df) for expr in exprs)
+ return [reduce(operator.or_, cols)]
+
+ return self._expr(
+ call=func,
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def max_horizontal(self, *exprs: IbisExpr) -> IbisExpr:
+ def func(df: IbisLazyFrame) -> list[ir.Value]:
+ cols = chain.from_iterable(expr(df) for expr in exprs)
+ return [ibis.greatest(*cols)]
+
+ return self._expr(
+ call=func,
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def min_horizontal(self, *exprs: IbisExpr) -> IbisExpr:
+ def func(df: IbisLazyFrame) -> list[ir.Value]:
+ cols = chain.from_iterable(expr(df) for expr in exprs)
+ return [ibis.least(*cols)]
+
+ return self._expr(
+ call=func,
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def sum_horizontal(self, *exprs: IbisExpr) -> IbisExpr:
+ def func(df: IbisLazyFrame) -> list[ir.Value]:
+ cols = [e.fill_null(lit(0)) for _expr in exprs for e in _expr(df)]
+ return [reduce(operator.add, cols)]
+
+ return self._expr(
+ call=func,
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def mean_horizontal(self, *exprs: IbisExpr) -> IbisExpr:
+ def func(df: IbisLazyFrame) -> list[ir.Value]:
+ expr = (
+ cast("ir.NumericColumn", e.fill_null(lit(0)))
+ for _expr in exprs
+ for e in _expr(df)
+ )
+ non_null = (
+ cast("ir.NumericColumn", e.isnull().ifelse(lit(0), lit(1)))
+ for _expr in exprs
+ for e in _expr(df)
+ )
+
+ return [
+ (reduce(lambda x, y: x + y, expr) / reduce(lambda x, y: x + y, non_null))
+ ]
+
+ return self._expr(
+ call=func,
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ @requires.backend_version((10, 0))
+ def when(self, predicate: IbisExpr) -> IbisWhen:
+ return IbisWhen.from_expr(predicate, context=self)
+
+ def lit(self, value: Any, dtype: IntoDType | None) -> IbisExpr:
+ def func(_df: IbisLazyFrame) -> list[ir.Value]:
+ ibis_dtype = narwhals_to_native_dtype(dtype, self._version) if dtype else None
+ return [lit(value, ibis_dtype)]
+
+ return self._expr(
+ func,
+ evaluate_output_names=lambda _df: ["literal"],
+ alias_output_names=None,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def len(self) -> IbisExpr:
+ def func(_df: IbisLazyFrame) -> list[ir.Value]:
+ return [_df.native.count()]
+
+ return self._expr(
+ call=func,
+ evaluate_output_names=lambda _df: ["len"],
+ alias_output_names=None,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+
+class IbisWhen(LazyWhen["IbisLazyFrame", "ir.Value", IbisExpr]):
+ lit = lit
+
+ @property
+ def _then(self) -> type[IbisThen]:
+ return IbisThen
+
+ def __call__(self, df: IbisLazyFrame) -> Sequence[ir.Value]:
+ is_expr = self._condition._is_expr
+ condition = df._evaluate_expr(self._condition)
+ then_ = self._then_value
+ then = df._evaluate_expr(then_) if is_expr(then_) else lit(then_)
+ other_ = self._otherwise_value
+ if other_ is None:
+ result = ibis.cases((condition, then))
+ else:
+ otherwise = df._evaluate_expr(other_) if is_expr(other_) else lit(other_)
+ result = ibis.cases((condition, then), else_=otherwise)
+ return [result]
+
+
+class IbisThen(LazyThen["IbisLazyFrame", "ir.Value", IbisExpr], IbisExpr): ...
diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/selectors.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/selectors.py
new file mode 100644
index 0000000..f96243b
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_ibis/selectors.py
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from narwhals._compliant import CompliantSelector, LazySelectorNamespace
+from narwhals._ibis.expr import IbisExpr
+
+if TYPE_CHECKING:
+ import ibis.expr.types as ir # noqa: F401
+
+ from narwhals._ibis.dataframe import IbisLazyFrame # noqa: F401
+
+
+class IbisSelectorNamespace(LazySelectorNamespace["IbisLazyFrame", "ir.Value"]):
+ @property
+ def _selector(self) -> type[IbisSelector]:
+ return IbisSelector
+
+
+class IbisSelector( # type: ignore[misc]
+ CompliantSelector["IbisLazyFrame", "ir.Value"], IbisExpr
+):
+ def _to_expr(self) -> IbisExpr:
+ return IbisExpr(
+ self._call,
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/series.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/series.py
new file mode 100644
index 0000000..3c55d3c
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_ibis/series.py
@@ -0,0 +1,41 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, NoReturn
+
+from narwhals._ibis.utils import native_to_narwhals_dtype
+from narwhals.dependencies import get_ibis
+
+if TYPE_CHECKING:
+ from types import ModuleType
+
+ from typing_extensions import Self
+
+ from narwhals._utils import Version
+ from narwhals.dtypes import DType
+
+
+class IbisInterchangeSeries:
+ def __init__(self, df: Any, version: Version) -> None:
+ self._native_series = df
+ self._version = version
+
+ def __narwhals_series__(self) -> Self:
+ return self
+
+ def __native_namespace__(self) -> ModuleType:
+ return get_ibis()
+
+ @property
+ def dtype(self) -> DType:
+ return native_to_narwhals_dtype(
+ self._native_series.schema().types[0], self._version
+ )
+
+ def __getattr__(self, attr: str) -> NoReturn:
+ msg = (
+ f"Attribute {attr} is not supported for interchange-level dataframes.\n\n"
+ "If you would like to see this kind of object better supported in "
+ "Narwhals, please open a feature request "
+ "at https://github.com/narwhals-dev/narwhals/issues."
+ )
+ raise NotImplementedError(msg)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_ibis/utils.py b/venv/lib/python3.8/site-packages/narwhals/_ibis/utils.py
new file mode 100644
index 0000000..477781f
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_ibis/utils.py
@@ -0,0 +1,227 @@
+from __future__ import annotations
+
+from functools import lru_cache
+from typing import TYPE_CHECKING, Any, Literal, Mapping
+
+import ibis
+import ibis.expr.datatypes as ibis_dtypes
+
+from narwhals._utils import isinstance_or_issubclass
+
+if TYPE_CHECKING:
+ import ibis.expr.types as ir
+ from ibis.expr.datatypes import DataType as IbisDataType
+ from typing_extensions import TypeAlias, TypeIs
+
+ from narwhals._duration import IntervalUnit
+ from narwhals._ibis.dataframe import IbisLazyFrame
+ from narwhals._ibis.expr import IbisExpr
+ from narwhals._utils import Version
+ from narwhals.dtypes import DType
+ from narwhals.typing import IntoDType
+
+lit = ibis.literal
+"""Alias for `ibis.literal`."""
+
+BucketUnit: TypeAlias = Literal[
+ "years",
+ "quarters",
+ "months",
+ "days",
+ "hours",
+ "minutes",
+ "seconds",
+ "milliseconds",
+ "microseconds",
+ "nanoseconds",
+]
+TruncateUnit: TypeAlias = Literal[
+ "Y", "Q", "M", "W", "D", "h", "m", "s", "ms", "us", "ns"
+]
+
+UNITS_DICT_BUCKET: Mapping[IntervalUnit, BucketUnit] = {
+ "y": "years",
+ "q": "quarters",
+ "mo": "months",
+ "d": "days",
+ "h": "hours",
+ "m": "minutes",
+ "s": "seconds",
+ "ms": "milliseconds",
+ "us": "microseconds",
+ "ns": "nanoseconds",
+}
+
+UNITS_DICT_TRUNCATE: Mapping[IntervalUnit, TruncateUnit] = {
+ "y": "Y",
+ "q": "Q",
+ "mo": "M",
+ "d": "D",
+ "h": "h",
+ "m": "m",
+ "s": "s",
+ "ms": "ms",
+ "us": "us",
+ "ns": "ns",
+}
+
+
+def evaluate_exprs(df: IbisLazyFrame, /, *exprs: IbisExpr) -> list[tuple[str, ir.Value]]:
+ native_results: list[tuple[str, ir.Value]] = []
+ for expr in exprs:
+ native_series_list = expr(df)
+ output_names = expr._evaluate_output_names(df)
+ if expr._alias_output_names is not None:
+ output_names = expr._alias_output_names(output_names)
+ if len(output_names) != len(native_series_list): # pragma: no cover
+ msg = f"Internal error: got output names {output_names}, but only got {len(native_series_list)} results"
+ raise AssertionError(msg)
+ native_results.extend(zip(output_names, native_series_list))
+ return native_results
+
+
+@lru_cache(maxsize=16)
+def native_to_narwhals_dtype(ibis_dtype: IbisDataType, version: Version) -> DType: # noqa: C901, PLR0912
+ dtypes = version.dtypes
+ if ibis_dtype.is_int64():
+ return dtypes.Int64()
+ if ibis_dtype.is_int32():
+ return dtypes.Int32()
+ if ibis_dtype.is_int16():
+ return dtypes.Int16()
+ if ibis_dtype.is_int8():
+ return dtypes.Int8()
+ if ibis_dtype.is_uint64():
+ return dtypes.UInt64()
+ if ibis_dtype.is_uint32():
+ return dtypes.UInt32()
+ if ibis_dtype.is_uint16():
+ return dtypes.UInt16()
+ if ibis_dtype.is_uint8():
+ return dtypes.UInt8()
+ if ibis_dtype.is_boolean():
+ return dtypes.Boolean()
+ if ibis_dtype.is_float64():
+ return dtypes.Float64()
+ if ibis_dtype.is_float32():
+ return dtypes.Float32()
+ if ibis_dtype.is_string():
+ return dtypes.String()
+ if ibis_dtype.is_date():
+ return dtypes.Date()
+ if ibis_dtype.is_timestamp():
+ return dtypes.Datetime()
+ if is_interval(ibis_dtype):
+ _time_unit = ibis_dtype.unit.value
+ if _time_unit not in {"ns", "us", "ms", "s"}: # pragma: no cover
+ msg = f"Unsupported interval unit: {_time_unit}"
+ raise NotImplementedError(msg)
+ return dtypes.Duration(_time_unit)
+ if is_array(ibis_dtype):
+ if ibis_dtype.length:
+ return dtypes.Array(
+ native_to_narwhals_dtype(ibis_dtype.value_type, version),
+ ibis_dtype.length,
+ )
+ else:
+ return dtypes.List(native_to_narwhals_dtype(ibis_dtype.value_type, version))
+ if is_struct(ibis_dtype):
+ return dtypes.Struct(
+ [
+ dtypes.Field(name, native_to_narwhals_dtype(dtype, version))
+ for name, dtype in ibis_dtype.items()
+ ]
+ )
+ if ibis_dtype.is_decimal(): # pragma: no cover
+ return dtypes.Decimal()
+ if ibis_dtype.is_time():
+ return dtypes.Time()
+ if ibis_dtype.is_binary():
+ return dtypes.Binary()
+ return dtypes.Unknown() # pragma: no cover
+
+
+def is_interval(obj: IbisDataType) -> TypeIs[ibis_dtypes.Interval]:
+ return obj.is_interval()
+
+
+def is_array(obj: IbisDataType) -> TypeIs[ibis_dtypes.Array[Any]]:
+ return obj.is_array()
+
+
+def is_struct(obj: IbisDataType) -> TypeIs[ibis_dtypes.Struct]:
+ return obj.is_struct()
+
+
+def is_floating(obj: IbisDataType) -> TypeIs[ibis_dtypes.Floating]:
+ return obj.is_floating()
+
+
+def narwhals_to_native_dtype( # noqa: C901, PLR0912
+ dtype: IntoDType, version: Version
+) -> IbisDataType:
+ dtypes = version.dtypes
+
+ if isinstance_or_issubclass(dtype, dtypes.Decimal): # pragma: no cover
+ return ibis_dtypes.Decimal()
+ if isinstance_or_issubclass(dtype, dtypes.Float64):
+ return ibis_dtypes.Float64()
+ if isinstance_or_issubclass(dtype, dtypes.Float32):
+ return ibis_dtypes.Float32()
+ if isinstance_or_issubclass(dtype, dtypes.Int128): # pragma: no cover
+ msg = "Int128 not supported by Ibis"
+ raise NotImplementedError(msg)
+ if isinstance_or_issubclass(dtype, dtypes.Int64):
+ return ibis_dtypes.Int64()
+ if isinstance_or_issubclass(dtype, dtypes.Int32):
+ return ibis_dtypes.Int32()
+ if isinstance_or_issubclass(dtype, dtypes.Int16):
+ return ibis_dtypes.Int16()
+ if isinstance_or_issubclass(dtype, dtypes.Int8):
+ return ibis_dtypes.Int8()
+ if isinstance_or_issubclass(dtype, dtypes.UInt128): # pragma: no cover
+ msg = "UInt128 not supported by Ibis"
+ raise NotImplementedError(msg)
+ if isinstance_or_issubclass(dtype, dtypes.UInt64):
+ return ibis_dtypes.UInt64()
+ if isinstance_or_issubclass(dtype, dtypes.UInt32):
+ return ibis_dtypes.UInt32()
+ if isinstance_or_issubclass(dtype, dtypes.UInt16):
+ return ibis_dtypes.UInt16()
+ if isinstance_or_issubclass(dtype, dtypes.UInt8):
+ return ibis_dtypes.UInt8()
+ if isinstance_or_issubclass(dtype, dtypes.String):
+ return ibis_dtypes.String()
+ if isinstance_or_issubclass(dtype, dtypes.Boolean):
+ return ibis_dtypes.Boolean()
+ if isinstance_or_issubclass(dtype, dtypes.Categorical):
+ msg = "Categorical not supported by Ibis"
+ raise NotImplementedError(msg)
+ if isinstance_or_issubclass(dtype, dtypes.Datetime):
+ return ibis_dtypes.Timestamp()
+ if isinstance_or_issubclass(dtype, dtypes.Duration):
+ return ibis_dtypes.Interval(unit=dtype.time_unit) # pyright: ignore[reportArgumentType]
+ if isinstance_or_issubclass(dtype, dtypes.Date):
+ return ibis_dtypes.Date()
+ if isinstance_or_issubclass(dtype, dtypes.Time):
+ return ibis_dtypes.Time()
+ if isinstance_or_issubclass(dtype, dtypes.List):
+ inner = narwhals_to_native_dtype(dtype.inner, version)
+ return ibis_dtypes.Array(value_type=inner)
+ if isinstance_or_issubclass(dtype, dtypes.Struct):
+ fields = [
+ (field.name, narwhals_to_native_dtype(field.dtype, version))
+ for field in dtype.fields
+ ]
+ return ibis_dtypes.Struct.from_tuples(fields)
+ if isinstance_or_issubclass(dtype, dtypes.Array):
+ inner = narwhals_to_native_dtype(dtype.inner, version)
+ return ibis_dtypes.Array(value_type=inner, length=dtype.size)
+ if isinstance_or_issubclass(dtype, dtypes.Binary):
+ return ibis_dtypes.Binary()
+ if isinstance_or_issubclass(dtype, dtypes.Enum):
+ # Ibis does not support: https://github.com/ibis-project/ibis/issues/10991
+ msg = "Enum not supported by Ibis"
+ raise NotImplementedError(msg)
+ msg = f"Unknown dtype: {dtype}" # pragma: no cover
+ raise AssertionError(msg)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_interchange/__init__.py b/venv/lib/python3.8/site-packages/narwhals/_interchange/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_interchange/__init__.py
diff --git a/venv/lib/python3.8/site-packages/narwhals/_interchange/dataframe.py b/venv/lib/python3.8/site-packages/narwhals/_interchange/dataframe.py
new file mode 100644
index 0000000..f508ed8
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_interchange/dataframe.py
@@ -0,0 +1,156 @@
+from __future__ import annotations
+
+import enum
+from typing import TYPE_CHECKING, Any, NoReturn
+
+from narwhals._utils import Version, parse_version
+
+if TYPE_CHECKING:
+ import pandas as pd
+ import pyarrow as pa
+ from typing_extensions import Self
+
+ from narwhals._interchange.series import InterchangeSeries
+ from narwhals.dtypes import DType
+ from narwhals.typing import DataFrameLike
+
+
+class DtypeKind(enum.IntEnum):
+ # https://data-apis.org/dataframe-protocol/latest/API.html
+ INT = 0
+ UINT = 1
+ FLOAT = 2
+ BOOL = 20
+ STRING = 21 # UTF-8
+ DATETIME = 22
+ CATEGORICAL = 23
+
+
+def map_interchange_dtype_to_narwhals_dtype( # noqa: C901, PLR0911, PLR0912
+ interchange_dtype: tuple[DtypeKind, int, Any, Any],
+) -> DType:
+ dtypes = Version.V1.dtypes
+ if interchange_dtype[0] == DtypeKind.INT:
+ if interchange_dtype[1] == 64:
+ return dtypes.Int64()
+ if interchange_dtype[1] == 32:
+ return dtypes.Int32()
+ if interchange_dtype[1] == 16:
+ return dtypes.Int16()
+ if interchange_dtype[1] == 8:
+ return dtypes.Int8()
+ msg = "Invalid bit width for INT" # pragma: no cover
+ raise AssertionError(msg)
+ if interchange_dtype[0] == DtypeKind.UINT:
+ if interchange_dtype[1] == 64:
+ return dtypes.UInt64()
+ if interchange_dtype[1] == 32:
+ return dtypes.UInt32()
+ if interchange_dtype[1] == 16:
+ return dtypes.UInt16()
+ if interchange_dtype[1] == 8:
+ return dtypes.UInt8()
+ msg = "Invalid bit width for UINT" # pragma: no cover
+ raise AssertionError(msg)
+ if interchange_dtype[0] == DtypeKind.FLOAT:
+ if interchange_dtype[1] == 64:
+ return dtypes.Float64()
+ if interchange_dtype[1] == 32:
+ return dtypes.Float32()
+ msg = "Invalid bit width for FLOAT" # pragma: no cover
+ raise AssertionError(msg)
+ if interchange_dtype[0] == DtypeKind.BOOL:
+ return dtypes.Boolean()
+ if interchange_dtype[0] == DtypeKind.STRING:
+ return dtypes.String()
+ if interchange_dtype[0] == DtypeKind.DATETIME:
+ return dtypes.Datetime()
+ if interchange_dtype[0] == DtypeKind.CATEGORICAL: # pragma: no cover
+ # upstream issue: https://github.com/ibis-project/ibis/issues/9570
+ return dtypes.Categorical()
+ msg = f"Invalid dtype, got: {interchange_dtype}" # pragma: no cover
+ raise AssertionError(msg)
+
+
+class InterchangeFrame:
+ _version = Version.V1
+
+ def __init__(self, df: DataFrameLike) -> None:
+ self._interchange_frame = df.__dataframe__()
+
+ def __narwhals_dataframe__(self) -> Self:
+ return self
+
+ def __native_namespace__(self) -> NoReturn:
+ msg = (
+ "Cannot access native namespace for interchange-level dataframes with unknown backend."
+ "If you would like to see this kind of object supported in Narwhals, please "
+ "open a feature request at https://github.com/narwhals-dev/narwhals/issues."
+ )
+ raise NotImplementedError(msg)
+
+ def get_column(self, name: str) -> InterchangeSeries:
+ from narwhals._interchange.series import InterchangeSeries
+
+ return InterchangeSeries(self._interchange_frame.get_column_by_name(name))
+
+ def to_pandas(self) -> pd.DataFrame:
+ import pandas as pd # ignore-banned-import()
+
+ if parse_version(pd) >= (1, 5, 0):
+ return pd.api.interchange.from_dataframe(self._interchange_frame)
+ else: # pragma: no cover
+ msg = (
+ "Conversion to pandas is achieved via interchange protocol which requires"
+ f" 'pandas>=1.5.0' to be installed, found {pd.__version__}"
+ )
+ raise NotImplementedError(msg)
+
+ def to_arrow(self) -> pa.Table:
+ from pyarrow.interchange.from_dataframe import ( # ignore-banned-import()
+ from_dataframe,
+ )
+
+ return from_dataframe(self._interchange_frame)
+
+ @property
+ def schema(self) -> dict[str, DType]:
+ return {
+ column_name: map_interchange_dtype_to_narwhals_dtype(
+ self._interchange_frame.get_column_by_name(column_name).dtype
+ )
+ for column_name in self._interchange_frame.column_names()
+ }
+
+ @property
+ def columns(self) -> list[str]:
+ return list(self._interchange_frame.column_names())
+
+ def __getattr__(self, attr: str) -> NoReturn:
+ msg = (
+ f"Attribute {attr} is not supported for interchange-level dataframes.\n\n"
+ "Hint: you probably called `nw.from_native` on an object which isn't fully "
+ "supported by Narwhals, yet implements `__dataframe__`. If you would like to "
+ "see this kind of object supported in Narwhals, please open a feature request "
+ "at https://github.com/narwhals-dev/narwhals/issues."
+ )
+ raise NotImplementedError(msg)
+
+ def simple_select(self, *column_names: str) -> Self:
+ frame = self._interchange_frame.select_columns_by_name(list(column_names))
+ if not hasattr(frame, "_df"): # pragma: no cover
+ msg = (
+ "Expected interchange object to implement `_df` property to allow for recovering original object.\n"
+ "See https://github.com/data-apis/dataframe-api/issues/360."
+ )
+ raise NotImplementedError(msg)
+ return self.__class__(frame._df)
+
+ def select(self, *exprs: str) -> Self: # pragma: no cover
+ msg = (
+ "`select`-ing not by name is not supported for interchange-only level.\n\n"
+ "If you would like to see this kind of object better supported in "
+ "Narwhals, please open a feature request "
+ "at https://github.com/narwhals-dev/narwhals/issues."
+ )
+ raise NotImplementedError(msg)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_interchange/series.py b/venv/lib/python3.8/site-packages/narwhals/_interchange/series.py
new file mode 100644
index 0000000..67f062b
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_interchange/series.py
@@ -0,0 +1,47 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, NoReturn
+
+from narwhals._interchange.dataframe import map_interchange_dtype_to_narwhals_dtype
+from narwhals._utils import Version
+
+if TYPE_CHECKING:
+ from typing_extensions import Self
+
+ from narwhals.dtypes import DType
+
+
+class InterchangeSeries:
+ _version = Version.V1
+
+ def __init__(self, df: Any) -> None:
+ self._native_series = df
+
+ def __narwhals_series__(self) -> Self:
+ return self
+
+ def __native_namespace__(self) -> NoReturn:
+ msg = (
+ "Cannot access native namespace for interchange-level series with unknown backend. "
+ "If you would like to see this kind of object supported in Narwhals, please "
+ "open a feature request at https://github.com/narwhals-dev/narwhals/issues."
+ )
+ raise NotImplementedError(msg)
+
+ @property
+ def dtype(self) -> DType:
+ return map_interchange_dtype_to_narwhals_dtype(self._native_series.dtype)
+
+ @property
+ def native(self) -> Any:
+ return self._native_series
+
+ def __getattr__(self, attr: str) -> NoReturn:
+ msg = ( # pragma: no cover
+ f"Attribute {attr} is not supported for interchange-level dataframes.\n\n"
+ "Hint: you probably called `nw.from_native` on an object which isn't fully "
+ "supported by Narwhals, yet implements `__dataframe__`. If you would like to "
+ "see this kind of object supported in Narwhals, please open a feature request "
+ "at https://github.com/narwhals-dev/narwhals/issues."
+ )
+ raise NotImplementedError(msg)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_namespace.py b/venv/lib/python3.8/site-packages/narwhals/_namespace.py
new file mode 100644
index 0000000..2d52a16
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_namespace.py
@@ -0,0 +1,397 @@
+"""Narwhals-level equivalent of `CompliantNamespace`."""
+
+from __future__ import annotations
+
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ Generic,
+ Literal,
+ Protocol,
+ TypeVar,
+ overload,
+)
+
+from narwhals._compliant.typing import CompliantNamespaceAny, CompliantNamespaceT_co
+from narwhals._utils import Implementation, Version
+from narwhals.dependencies import (
+ get_cudf,
+ get_modin,
+ get_pandas,
+ get_polars,
+ get_pyarrow,
+ is_dask_dataframe,
+ is_duckdb_relation,
+ is_ibis_table,
+ is_pyspark_connect_dataframe,
+ is_pyspark_dataframe,
+ is_sqlframe_dataframe,
+)
+
+if TYPE_CHECKING:
+ from types import ModuleType
+ from typing import ClassVar
+
+ import duckdb
+ import pandas as pd
+ import polars as pl
+ import pyarrow as pa
+ import pyspark.sql as pyspark_sql
+ from pyspark.sql.connect.dataframe import DataFrame as PySparkConnectDataFrame
+ from typing_extensions import TypeAlias, TypeIs
+
+ from narwhals._arrow.namespace import ArrowNamespace
+ from narwhals._dask.namespace import DaskNamespace
+ from narwhals._duckdb.namespace import DuckDBNamespace
+ from narwhals._ibis.namespace import IbisNamespace
+ from narwhals._pandas_like.namespace import PandasLikeNamespace
+ from narwhals._polars.namespace import PolarsNamespace
+ from narwhals._spark_like.dataframe import SQLFrameDataFrame
+ from narwhals._spark_like.namespace import SparkLikeNamespace
+ from narwhals.typing import DataFrameLike, NativeFrame, NativeLazyFrame, NativeSeries
+
+ T = TypeVar("T")
+
+ _Guard: TypeAlias = "Callable[[Any], TypeIs[T]]"
+
+ _Polars: TypeAlias = Literal["polars"]
+ _Arrow: TypeAlias = Literal["pyarrow"]
+ _Dask: TypeAlias = Literal["dask"]
+ _DuckDB: TypeAlias = Literal["duckdb"]
+ _PandasLike: TypeAlias = Literal["pandas", "cudf", "modin"]
+ _Ibis: TypeAlias = Literal["ibis"]
+ _SparkLike: TypeAlias = Literal["pyspark", "sqlframe", "pyspark[connect]"]
+ _EagerOnly: TypeAlias = "_PandasLike | _Arrow"
+ _EagerAllowed: TypeAlias = "_Polars | _EagerOnly"
+ _LazyOnly: TypeAlias = "_SparkLike | _Dask | _DuckDB | _Ibis"
+ _LazyAllowed: TypeAlias = "_Polars | _LazyOnly"
+
+ Polars: TypeAlias = Literal[_Polars, Implementation.POLARS]
+ Arrow: TypeAlias = Literal[_Arrow, Implementation.PYARROW]
+ Dask: TypeAlias = Literal[_Dask, Implementation.DASK]
+ DuckDB: TypeAlias = Literal[_DuckDB, Implementation.DUCKDB]
+ Ibis: TypeAlias = Literal[_Ibis, Implementation.IBIS]
+ PandasLike: TypeAlias = Literal[
+ _PandasLike, Implementation.PANDAS, Implementation.CUDF, Implementation.MODIN
+ ]
+ SparkLike: TypeAlias = Literal[
+ _SparkLike,
+ Implementation.PYSPARK,
+ Implementation.SQLFRAME,
+ Implementation.PYSPARK_CONNECT,
+ ]
+ EagerOnly: TypeAlias = "PandasLike | Arrow"
+ EagerAllowed: TypeAlias = "EagerOnly | Polars"
+ LazyOnly: TypeAlias = "SparkLike | Dask | DuckDB | Ibis"
+ LazyAllowed: TypeAlias = "LazyOnly | Polars"
+
+ BackendName: TypeAlias = "_EagerAllowed | _LazyAllowed"
+ IntoBackend: TypeAlias = "BackendName | Implementation | ModuleType"
+
+ EagerAllowedNamespace: TypeAlias = "Namespace[PandasLikeNamespace] | Namespace[ArrowNamespace] | Namespace[PolarsNamespace]"
+ EagerAllowedImplementation: TypeAlias = Literal[
+ Implementation.PANDAS,
+ Implementation.CUDF,
+ Implementation.MODIN,
+ Implementation.PYARROW,
+ Implementation.POLARS,
+ ]
+
+ class _NativeDask(Protocol):
+ _partition_type: type[pd.DataFrame]
+
+ class _NativeCuDF(Protocol):
+ def to_pylibcudf(self, *args: Any, **kwds: Any) -> Any: ...
+
+ class _NativeIbis(Protocol):
+ def sql(self, *args: Any, **kwds: Any) -> Any: ...
+ def __pyarrow_result__(self, *args: Any, **kwds: Any) -> Any: ...
+ def __pandas_result__(self, *args: Any, **kwds: Any) -> Any: ...
+ def __polars_result__(self, *args: Any, **kwds: Any) -> Any: ...
+
+ class _ModinDataFrame(Protocol):
+ _pandas_class: type[pd.DataFrame]
+
+ class _ModinSeries(Protocol):
+ _pandas_class: type[pd.Series[Any]]
+
+ _NativePolars: TypeAlias = "pl.DataFrame | pl.LazyFrame | pl.Series"
+ _NativeArrow: TypeAlias = "pa.Table | pa.ChunkedArray[Any]"
+ _NativeDuckDB: TypeAlias = "duckdb.DuckDBPyRelation"
+ _NativePandas: TypeAlias = "pd.DataFrame | pd.Series[Any]"
+ _NativeModin: TypeAlias = "_ModinDataFrame | _ModinSeries"
+ _NativePandasLike: TypeAlias = "_NativePandas | _NativeCuDF | _NativeModin"
+ _NativeSQLFrame: TypeAlias = "SQLFrameDataFrame"
+ _NativePySpark: TypeAlias = "pyspark_sql.DataFrame"
+ _NativePySparkConnect: TypeAlias = "PySparkConnectDataFrame"
+ _NativeSparkLike: TypeAlias = (
+ "_NativeSQLFrame | _NativePySpark | _NativePySparkConnect"
+ )
+
+ NativeKnown: TypeAlias = "_NativePolars | _NativeArrow | _NativePandasLike | _NativeSparkLike | _NativeDuckDB | _NativeDask | _NativeIbis"
+ NativeUnknown: TypeAlias = (
+ "NativeFrame | NativeSeries | NativeLazyFrame | DataFrameLike"
+ )
+ NativeAny: TypeAlias = "NativeKnown | NativeUnknown"
+
+__all__ = ["Namespace"]
+
+
+class Namespace(Generic[CompliantNamespaceT_co]):
+ _compliant_namespace: CompliantNamespaceT_co
+ _version: ClassVar[Version] = Version.MAIN
+
+ def __init__(self, namespace: CompliantNamespaceT_co, /) -> None:
+ self._compliant_namespace = namespace
+
+ def __init_subclass__(cls, *args: Any, version: Version, **kwds: Any) -> None:
+ super().__init_subclass__(*args, **kwds)
+
+ if isinstance(version, Version):
+ cls._version = version
+ else:
+ msg = f"Expected {Version} but got {type(version).__name__!r}"
+ raise TypeError(msg)
+
+ def __repr__(self) -> str:
+ return f"Namespace[{type(self.compliant).__name__}]"
+
+ @property
+ def compliant(self) -> CompliantNamespaceT_co:
+ return self._compliant_namespace
+
+ @property
+ def implementation(self) -> Implementation:
+ return self.compliant._implementation
+
+ @property
+ def version(self) -> Version:
+ return self._version
+
+ @overload
+ @classmethod
+ def from_backend(cls, backend: PandasLike, /) -> Namespace[PandasLikeNamespace]: ...
+
+ @overload
+ @classmethod
+ def from_backend(cls, backend: Polars, /) -> Namespace[PolarsNamespace]: ...
+
+ @overload
+ @classmethod
+ def from_backend(cls, backend: Arrow, /) -> Namespace[ArrowNamespace]: ...
+
+ @overload
+ @classmethod
+ def from_backend(cls, backend: SparkLike, /) -> Namespace[SparkLikeNamespace]: ...
+
+ @overload
+ @classmethod
+ def from_backend(cls, backend: DuckDB, /) -> Namespace[DuckDBNamespace]: ...
+
+ @overload
+ @classmethod
+ def from_backend(cls, backend: Dask, /) -> Namespace[DaskNamespace]: ...
+
+ @overload
+ @classmethod
+ def from_backend(cls, backend: Ibis, /) -> Namespace[IbisNamespace]: ...
+
+ @overload
+ @classmethod
+ def from_backend(cls, backend: EagerAllowed, /) -> EagerAllowedNamespace: ...
+
+ @overload
+ @classmethod
+ def from_backend(
+ cls, backend: IntoBackend, /
+ ) -> Namespace[CompliantNamespaceAny]: ...
+
+ @classmethod
+ def from_backend(
+ cls: type[Namespace[Any]], backend: IntoBackend, /
+ ) -> Namespace[Any]:
+ """Instantiate from native namespace module, string, or Implementation.
+
+ Arguments:
+ backend: native namespace module, string, or Implementation.
+
+ Returns:
+ Namespace.
+
+ Examples:
+ >>> from narwhals._namespace import Namespace
+ >>> Namespace.from_backend("polars")
+ Namespace[PolarsNamespace]
+ """
+ impl = Implementation.from_backend(backend)
+ backend_version = impl._backend_version()
+ version = cls._version
+ ns: CompliantNamespaceAny
+ if impl.is_pandas_like():
+ from narwhals._pandas_like.namespace import PandasLikeNamespace
+
+ ns = PandasLikeNamespace(
+ implementation=impl, backend_version=backend_version, version=version
+ )
+
+ elif impl.is_polars():
+ from narwhals._polars.namespace import PolarsNamespace
+
+ ns = PolarsNamespace(backend_version=backend_version, version=version)
+ elif impl.is_pyarrow():
+ from narwhals._arrow.namespace import ArrowNamespace
+
+ ns = ArrowNamespace(backend_version=backend_version, version=version)
+ elif impl.is_spark_like():
+ from narwhals._spark_like.namespace import SparkLikeNamespace
+
+ ns = SparkLikeNamespace(
+ implementation=impl, backend_version=backend_version, version=version
+ )
+ elif impl.is_duckdb():
+ from narwhals._duckdb.namespace import DuckDBNamespace
+
+ ns = DuckDBNamespace(backend_version=backend_version, version=version)
+ elif impl.is_dask():
+ from narwhals._dask.namespace import DaskNamespace
+
+ ns = DaskNamespace(backend_version=backend_version, version=version)
+ elif impl.is_ibis():
+ from narwhals._ibis.namespace import IbisNamespace
+
+ ns = IbisNamespace(backend_version=backend_version, version=version)
+ else:
+ msg = "Not supported Implementation" # pragma: no cover
+ raise AssertionError(msg)
+ return cls(ns)
+
+ @overload
+ @classmethod
+ def from_native_object(
+ cls, native: _NativePolars, /
+ ) -> Namespace[PolarsNamespace]: ...
+
+ @overload
+ @classmethod
+ def from_native_object(
+ cls, native: _NativePandasLike, /
+ ) -> Namespace[PandasLikeNamespace]: ...
+
+ @overload
+ @classmethod
+ def from_native_object(cls, native: _NativeArrow, /) -> Namespace[ArrowNamespace]: ...
+
+ @overload
+ @classmethod
+ def from_native_object(
+ cls, native: _NativeSparkLike, /
+ ) -> Namespace[SparkLikeNamespace]: ...
+
+ @overload
+ @classmethod
+ def from_native_object(
+ cls, native: _NativeDuckDB, /
+ ) -> Namespace[DuckDBNamespace]: ...
+
+ @overload
+ @classmethod
+ def from_native_object(cls, native: _NativeDask, /) -> Namespace[DaskNamespace]: ...
+
+ @overload
+ @classmethod
+ def from_native_object(cls, native: _NativeIbis, /) -> Namespace[IbisNamespace]: ...
+
+ @overload
+ @classmethod
+ def from_native_object(
+ cls, native: NativeUnknown, /
+ ) -> Namespace[CompliantNamespaceAny]: ...
+
+ @classmethod
+ def from_native_object( # noqa: PLR0911
+ cls: type[Namespace[Any]], native: NativeAny, /
+ ) -> Namespace[Any]:
+ if is_native_polars(native):
+ return cls.from_backend(Implementation.POLARS)
+ elif is_native_pandas(native):
+ return cls.from_backend(Implementation.PANDAS)
+ elif is_native_arrow(native):
+ return cls.from_backend(Implementation.PYARROW)
+ elif is_native_spark_like(native):
+ return cls.from_backend(
+ Implementation.SQLFRAME
+ if is_native_sqlframe(native)
+ else Implementation.PYSPARK_CONNECT
+ if is_native_pyspark_connect(native)
+ else Implementation.PYSPARK
+ )
+ elif is_native_dask(native):
+ return cls.from_backend(Implementation.DASK) # pragma: no cover
+ elif is_native_duckdb(native):
+ return cls.from_backend(Implementation.DUCKDB)
+ elif is_native_cudf(native): # pragma: no cover
+ return cls.from_backend(Implementation.CUDF)
+ elif is_native_modin(native): # pragma: no cover
+ return cls.from_backend(Implementation.MODIN)
+ elif is_native_ibis(native):
+ return cls.from_backend(Implementation.IBIS)
+ else:
+ msg = f"Unsupported type: {type(native).__qualname__!r}"
+ raise TypeError(msg)
+
+
+def is_native_polars(obj: Any) -> TypeIs[_NativePolars]:
+ return (pl := get_polars()) is not None and isinstance(
+ obj, (pl.DataFrame, pl.Series, pl.LazyFrame)
+ )
+
+
+def is_native_arrow(obj: Any) -> TypeIs[_NativeArrow]:
+ return (pa := get_pyarrow()) is not None and isinstance(
+ obj, (pa.Table, pa.ChunkedArray)
+ )
+
+
+def is_native_dask(obj: Any) -> TypeIs[_NativeDask]:
+ return is_dask_dataframe(obj)
+
+
+is_native_duckdb: _Guard[_NativeDuckDB] = is_duckdb_relation
+is_native_sqlframe: _Guard[_NativeSQLFrame] = is_sqlframe_dataframe
+is_native_pyspark: _Guard[_NativePySpark] = is_pyspark_dataframe
+is_native_pyspark_connect: _Guard[_NativePySparkConnect] = is_pyspark_connect_dataframe
+
+
+def is_native_pandas(obj: Any) -> TypeIs[_NativePandas]:
+ return (pd := get_pandas()) is not None and isinstance(obj, (pd.DataFrame, pd.Series))
+
+
+def is_native_modin(obj: Any) -> TypeIs[_NativeModin]:
+ return (mpd := get_modin()) is not None and isinstance(
+ obj, (mpd.DataFrame, mpd.Series)
+ ) # pragma: no cover
+
+
+def is_native_cudf(obj: Any) -> TypeIs[_NativeCuDF]:
+ return (cudf := get_cudf()) is not None and isinstance(
+ obj, (cudf.DataFrame, cudf.Series)
+ ) # pragma: no cover
+
+
+def is_native_pandas_like(obj: Any) -> TypeIs[_NativePandasLike]:
+ return (
+ is_native_pandas(obj) or is_native_cudf(obj) or is_native_modin(obj)
+ ) # pragma: no cover
+
+
+def is_native_spark_like(obj: Any) -> TypeIs[_NativeSparkLike]:
+ return (
+ is_native_sqlframe(obj)
+ or is_native_pyspark(obj)
+ or is_native_pyspark_connect(obj)
+ )
+
+
+def is_native_ibis(obj: Any) -> TypeIs[_NativeIbis]:
+ return is_ibis_table(obj)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/__init__.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/__init__.py
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/dataframe.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/dataframe.py
new file mode 100644
index 0000000..bf5287f
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/dataframe.py
@@ -0,0 +1,1148 @@
+from __future__ import annotations
+
+from itertools import chain, product
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ Iterable,
+ Iterator,
+ Literal,
+ Mapping,
+ Sequence,
+ cast,
+ overload,
+)
+
+import numpy as np
+
+from narwhals._compliant import EagerDataFrame
+from narwhals._pandas_like.series import PANDAS_TO_NUMPY_DTYPE_MISSING, PandasLikeSeries
+from narwhals._pandas_like.utils import (
+ align_and_extract_native,
+ align_series_full_broadcast,
+ check_column_names_are_unique,
+ get_dtype_backend,
+ native_to_narwhals_dtype,
+ object_native_to_narwhals_dtype,
+ rename,
+ select_columns_by_name,
+ set_index,
+)
+from narwhals._utils import (
+ Implementation,
+ _into_arrow_table,
+ _remap_full_join_keys,
+ exclude_column_names,
+ generate_temporary_column_name,
+ parse_columns_to_drop,
+ parse_version,
+ scale_bytes,
+ validate_backend_version,
+)
+from narwhals.dependencies import is_pandas_like_dataframe
+from narwhals.exceptions import InvalidOperationError, ShapeError
+
+if TYPE_CHECKING:
+ from io import BytesIO
+ from pathlib import Path
+ from types import ModuleType
+
+ import pandas as pd
+ import polars as pl
+ from typing_extensions import Self, TypeAlias, TypeIs
+
+ from narwhals._compliant.typing import CompliantDataFrameAny, CompliantLazyFrameAny
+ from narwhals._pandas_like.expr import PandasLikeExpr
+ from narwhals._pandas_like.group_by import PandasLikeGroupBy
+ from narwhals._pandas_like.namespace import PandasLikeNamespace
+ from narwhals._translate import IntoArrowTable
+ from narwhals._utils import Version, _FullContext
+ from narwhals.dtypes import DType
+ from narwhals.schema import Schema
+ from narwhals.typing import (
+ AsofJoinStrategy,
+ DTypeBackend,
+ JoinStrategy,
+ PivotAgg,
+ SizedMultiIndexSelector,
+ SizedMultiNameSelector,
+ SizeUnit,
+ UniqueKeepStrategy,
+ _2DArray,
+ _SliceIndex,
+ _SliceName,
+ )
+
+ Constructor: TypeAlias = Callable[..., pd.DataFrame]
+
+
+CLASSICAL_NUMPY_DTYPES: frozenset[np.dtype[Any]] = frozenset(
+ [
+ np.dtype("float64"),
+ np.dtype("float32"),
+ np.dtype("int64"),
+ np.dtype("int32"),
+ np.dtype("int16"),
+ np.dtype("int8"),
+ np.dtype("uint64"),
+ np.dtype("uint32"),
+ np.dtype("uint16"),
+ np.dtype("uint8"),
+ np.dtype("bool"),
+ np.dtype("datetime64[s]"),
+ np.dtype("datetime64[ms]"),
+ np.dtype("datetime64[us]"),
+ np.dtype("datetime64[ns]"),
+ np.dtype("timedelta64[s]"),
+ np.dtype("timedelta64[ms]"),
+ np.dtype("timedelta64[us]"),
+ np.dtype("timedelta64[ns]"),
+ np.dtype("object"),
+ ]
+)
+
+
+class PandasLikeDataFrame(EagerDataFrame["PandasLikeSeries", "PandasLikeExpr", "Any"]):
+ def __init__(
+ self,
+ native_dataframe: Any,
+ *,
+ implementation: Implementation,
+ backend_version: tuple[int, ...],
+ version: Version,
+ validate_column_names: bool,
+ ) -> None:
+ self._native_frame = native_dataframe
+ self._implementation = implementation
+ self._backend_version = backend_version
+ self._version = version
+ validate_backend_version(self._implementation, self._backend_version)
+ if validate_column_names:
+ check_column_names_are_unique(native_dataframe.columns)
+
+ @classmethod
+ def from_arrow(cls, data: IntoArrowTable, /, *, context: _FullContext) -> Self:
+ implementation = context._implementation
+ tbl = _into_arrow_table(data, context)
+ if implementation.is_pandas():
+ native = tbl.to_pandas()
+ elif implementation.is_modin(): # pragma: no cover
+ from modin.pandas.utils import (
+ from_arrow as mpd_from_arrow, # pyright: ignore[reportAttributeAccessIssue]
+ )
+
+ native = mpd_from_arrow(tbl)
+ elif implementation.is_cudf(): # pragma: no cover
+ native = implementation.to_native_namespace().DataFrame.from_arrow(tbl)
+ else: # pragma: no cover
+ msg = "congratulations, you entered unreachable code - please report a bug"
+ raise AssertionError(msg)
+ return cls.from_native(native, context=context)
+
+ @classmethod
+ def from_dict(
+ cls,
+ data: Mapping[str, Any],
+ /,
+ *,
+ context: _FullContext,
+ schema: Mapping[str, DType] | Schema | None,
+ ) -> Self:
+ from narwhals.schema import Schema
+
+ implementation = context._implementation
+ ns = implementation.to_native_namespace()
+ Series = cast("type[pd.Series[Any]]", ns.Series) # noqa: N806
+ DataFrame = cast("type[pd.DataFrame]", ns.DataFrame) # noqa: N806
+ aligned_data: dict[str, pd.Series[Any] | Any] = {}
+ left_most: PandasLikeSeries | None = None
+ for name, series in data.items():
+ if isinstance(series, Series):
+ compliant = PandasLikeSeries.from_native(series, context=context)
+ if left_most is None:
+ left_most = compliant
+ aligned_data[name] = series
+ else:
+ aligned_data[name] = align_and_extract_native(left_most, compliant)[1]
+ else:
+ aligned_data[name] = series
+
+ native = DataFrame.from_dict(aligned_data)
+ if schema:
+ it: Iterable[DTypeBackend] = (
+ get_dtype_backend(dtype, implementation) for dtype in native.dtypes
+ )
+ native = native.astype(Schema(schema).to_pandas(it))
+ return cls.from_native(native, context=context)
+
+ @staticmethod
+ def _is_native(obj: Any) -> TypeIs[Any]:
+ return is_pandas_like_dataframe(obj) # pragma: no cover
+
+ @classmethod
+ def from_native(cls, data: Any, /, *, context: _FullContext) -> Self:
+ return cls(
+ data,
+ implementation=context._implementation,
+ backend_version=context._backend_version,
+ version=context._version,
+ validate_column_names=True,
+ )
+
+ @classmethod
+ def from_numpy(
+ cls,
+ data: _2DArray,
+ /,
+ *,
+ context: _FullContext,
+ schema: Mapping[str, DType] | Schema | Sequence[str] | None,
+ ) -> Self:
+ from narwhals.schema import Schema
+
+ implementation = context._implementation
+ DataFrame: Constructor = implementation.to_native_namespace().DataFrame # noqa: N806
+ if isinstance(schema, (Mapping, Schema)):
+ it: Iterable[DTypeBackend] = (
+ get_dtype_backend(native_type, implementation)
+ for native_type in schema.values()
+ )
+ native = DataFrame(data, columns=schema.keys()).astype(
+ Schema(schema).to_pandas(it)
+ )
+ else:
+ native = DataFrame(data, columns=cls._numpy_column_names(data, schema))
+ return cls.from_native(native, context=context)
+
+ def __narwhals_dataframe__(self) -> Self:
+ return self
+
+ def __narwhals_lazyframe__(self) -> Self:
+ return self
+
+ def __narwhals_namespace__(self) -> PandasLikeNamespace:
+ from narwhals._pandas_like.namespace import PandasLikeNamespace
+
+ return PandasLikeNamespace(
+ self._implementation, self._backend_version, version=self._version
+ )
+
+ def __native_namespace__(self) -> ModuleType:
+ if self._implementation in {
+ Implementation.PANDAS,
+ Implementation.MODIN,
+ Implementation.CUDF,
+ }:
+ return self._implementation.to_native_namespace()
+
+ msg = f"Expected pandas/modin/cudf, got: {type(self._implementation)}" # pragma: no cover
+ raise AssertionError(msg)
+
+ def __len__(self) -> int:
+ return len(self.native)
+
+ def _with_version(self, version: Version) -> Self:
+ return self.__class__(
+ self.native,
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=version,
+ validate_column_names=False,
+ )
+
+ def _with_native(self, df: Any, *, validate_column_names: bool = True) -> Self:
+ return self.__class__(
+ df,
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=self._version,
+ validate_column_names=validate_column_names,
+ )
+
+ def _extract_comparand(self, other: PandasLikeSeries) -> pd.Series[Any]:
+ index = self.native.index
+ if other._broadcast:
+ s = other.native
+ return type(s)(s.iloc[0], index=index, dtype=s.dtype, name=s.name)
+ if (len_other := len(other)) != (len_idx := len(index)):
+ msg = f"Expected object of length {len_idx}, got: {len_other}."
+ raise ShapeError(msg)
+ if other.native.index is not index:
+ return set_index(
+ other.native,
+ index,
+ implementation=other._implementation,
+ backend_version=other._backend_version,
+ )
+ return other.native
+
+ def get_column(self, name: str) -> PandasLikeSeries:
+ return PandasLikeSeries.from_native(self.native[name], context=self)
+
+ def __array__(self, dtype: Any = None, *, copy: bool | None = None) -> _2DArray:
+ return self.to_numpy(dtype=dtype, copy=copy)
+
+ def _gather(self, rows: SizedMultiIndexSelector[pd.Series[Any]]) -> Self:
+ items = list(rows) if isinstance(rows, tuple) else rows
+ return self._with_native(self.native.iloc[items, :])
+
+ def _gather_slice(self, rows: _SliceIndex | range) -> Self:
+ return self._with_native(
+ self.native.iloc[slice(rows.start, rows.stop, rows.step), :],
+ validate_column_names=False,
+ )
+
+ def _select_slice_name(self, columns: _SliceName) -> Self:
+ start = (
+ self.native.columns.get_loc(columns.start)
+ if columns.start is not None
+ else None
+ )
+ stop = (
+ self.native.columns.get_loc(columns.stop) + 1
+ if columns.stop is not None
+ else None
+ )
+ selector = slice(start, stop, columns.step)
+ return self._with_native(
+ self.native.iloc[:, selector], validate_column_names=False
+ )
+
+ def _select_slice_index(self, columns: _SliceIndex | range) -> Self:
+ return self._with_native(
+ self.native.iloc[:, columns], validate_column_names=False
+ )
+
+ def _select_multi_index(
+ self, columns: SizedMultiIndexSelector[pd.Series[Any]]
+ ) -> Self:
+ columns = list(columns) if isinstance(columns, tuple) else columns
+ return self._with_native(
+ self.native.iloc[:, columns], validate_column_names=False
+ )
+
+ def _select_multi_name(
+ self, columns: SizedMultiNameSelector[pd.Series[Any]]
+ ) -> PandasLikeDataFrame:
+ return self._with_native(self.native.loc[:, columns])
+
+ # --- properties ---
+ @property
+ def columns(self) -> list[str]:
+ return self.native.columns.tolist()
+
+ @overload
+ def rows(self, *, named: Literal[True]) -> list[dict[str, Any]]: ...
+
+ @overload
+ def rows(self, *, named: Literal[False]) -> list[tuple[Any, ...]]: ...
+
+ @overload
+ def rows(self, *, named: bool) -> list[tuple[Any, ...]] | list[dict[str, Any]]: ...
+
+ def rows(self, *, named: bool) -> list[tuple[Any, ...]] | list[dict[str, Any]]:
+ if not named:
+ # cuDF does not support itertuples. But it does support to_dict!
+ if self._implementation is Implementation.CUDF:
+ # Extract the row values from the named rows
+ return [tuple(row.values()) for row in self.rows(named=True)]
+
+ return list(self.native.itertuples(index=False, name=None))
+
+ return self.native.to_dict(orient="records")
+
+ def iter_columns(self) -> Iterator[PandasLikeSeries]:
+ for _name, series in self.native.items(): # noqa: PERF102
+ yield PandasLikeSeries.from_native(series, context=self)
+
+ _iter_columns = iter_columns
+
+ def iter_rows(
+ self, *, named: bool, buffer_size: int
+ ) -> Iterator[tuple[Any, ...]] | Iterator[dict[str, Any]]:
+ # The param ``buffer_size`` is only here for compatibility with the Polars API
+ # and has no effect on the output.
+ if not named:
+ yield from self.native.itertuples(index=False, name=None)
+ else:
+ col_names = self.native.columns
+ for row in self.native.itertuples(index=False):
+ yield dict(zip(col_names, row))
+
+ @property
+ def schema(self) -> dict[str, DType]:
+ native_dtypes = self.native.dtypes
+ return {
+ col: native_to_narwhals_dtype(
+ native_dtypes[col], self._version, self._implementation
+ )
+ if native_dtypes[col] != "object"
+ else object_native_to_narwhals_dtype(
+ self.native[col], self._version, self._implementation
+ )
+ for col in self.native.columns
+ }
+
+ def collect_schema(self) -> dict[str, DType]:
+ return self.schema
+
+ # --- reshape ---
+ def simple_select(self, *column_names: str) -> Self:
+ return self._with_native(
+ select_columns_by_name(
+ self.native,
+ list(column_names),
+ self._backend_version,
+ self._implementation,
+ ),
+ validate_column_names=False,
+ )
+
+ def select(self: PandasLikeDataFrame, *exprs: PandasLikeExpr) -> PandasLikeDataFrame:
+ new_series = self._evaluate_into_exprs(*exprs)
+ if not new_series:
+ # return empty dataframe, like Polars does
+ return self._with_native(self.native.__class__(), validate_column_names=False)
+ new_series = align_series_full_broadcast(*new_series)
+ namespace = self.__narwhals_namespace__()
+ df = namespace._concat_horizontal([s.native for s in new_series])
+ # `concat` creates a new object, so fine to modify `.columns.name` inplace.
+ df.columns.name = self.native.columns.name
+ return self._with_native(df, validate_column_names=True)
+
+ def drop_nulls(
+ self: PandasLikeDataFrame, subset: Sequence[str] | None
+ ) -> PandasLikeDataFrame:
+ if subset is None:
+ return self._with_native(
+ self.native.dropna(axis=0), validate_column_names=False
+ )
+ plx = self.__narwhals_namespace__()
+ return self.filter(~plx.any_horizontal(plx.col(*subset).is_null()))
+
+ def estimated_size(self, unit: SizeUnit) -> int | float:
+ sz = self.native.memory_usage(deep=True).sum()
+ return scale_bytes(sz, unit=unit)
+
+ def with_row_index(self, name: str) -> Self:
+ frame = self.native
+ namespace = self.__narwhals_namespace__()
+ row_index = namespace._series.from_iterable(
+ range(len(frame)), context=self, index=frame.index
+ ).alias(name)
+ return self._with_native(namespace._concat_horizontal([row_index.native, frame]))
+
+ def row(self, index: int) -> tuple[Any, ...]:
+ return tuple(x for x in self.native.iloc[index])
+
+ def filter(
+ self: PandasLikeDataFrame, predicate: PandasLikeExpr | list[bool]
+ ) -> PandasLikeDataFrame:
+ if isinstance(predicate, list):
+ mask_native: pd.Series[Any] | list[bool] = predicate
+ else:
+ # `[0]` is safe as the predicate's expression only returns a single column
+ mask = self._evaluate_into_exprs(predicate)[0]
+ mask_native = self._extract_comparand(mask)
+ return self._with_native(
+ self.native.loc[mask_native], validate_column_names=False
+ )
+
+ def with_columns(
+ self: PandasLikeDataFrame, *exprs: PandasLikeExpr
+ ) -> PandasLikeDataFrame:
+ columns = self._evaluate_into_exprs(*exprs)
+ if not columns and len(self) == 0:
+ return self
+ name_columns: dict[str, PandasLikeSeries] = {s.name: s for s in columns}
+ to_concat = []
+ # Make sure to preserve column order
+ for name in self.native.columns:
+ if name in name_columns:
+ series = self._extract_comparand(name_columns.pop(name))
+ else:
+ series = self.native[name]
+ to_concat.append(series)
+ to_concat.extend(self._extract_comparand(s) for s in name_columns.values())
+ namespace = self.__narwhals_namespace__()
+ df = namespace._concat_horizontal(to_concat)
+ # `concat` creates a new object, so fine to modify `.columns.name` inplace.
+ df.columns.name = self.native.columns.name
+ return self._with_native(df, validate_column_names=False)
+
+ def rename(self, mapping: Mapping[str, str]) -> Self:
+ return self._with_native(
+ rename(
+ self.native,
+ columns=mapping,
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ )
+ )
+
+ def drop(self, columns: Sequence[str], *, strict: bool) -> Self:
+ to_drop = parse_columns_to_drop(self, columns, strict=strict)
+ return self._with_native(
+ self.native.drop(columns=to_drop), validate_column_names=False
+ )
+
+ # --- transform ---
+ def sort(self, *by: str, descending: bool | Sequence[bool], nulls_last: bool) -> Self:
+ df = self.native
+ if isinstance(descending, bool):
+ ascending: bool | list[bool] = not descending
+ else:
+ ascending = [not d for d in descending]
+ na_position = "last" if nulls_last else "first"
+ return self._with_native(
+ df.sort_values(list(by), ascending=ascending, na_position=na_position),
+ validate_column_names=False,
+ )
+
+ # --- convert ---
+ def collect(
+ self, backend: Implementation | None, **kwargs: Any
+ ) -> CompliantDataFrameAny:
+ if backend is None:
+ return PandasLikeDataFrame(
+ self.native,
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=self._version,
+ validate_column_names=False,
+ )
+
+ if backend is Implementation.PANDAS:
+ import pandas as pd # ignore-banned-import
+
+ return PandasLikeDataFrame(
+ self.to_pandas(),
+ implementation=Implementation.PANDAS,
+ backend_version=parse_version(pd),
+ version=self._version,
+ validate_column_names=False,
+ )
+
+ if backend is Implementation.PYARROW:
+ import pyarrow as pa # ignore-banned-import
+
+ from narwhals._arrow.dataframe import ArrowDataFrame
+
+ return ArrowDataFrame(
+ native_dataframe=self.to_arrow(),
+ backend_version=parse_version(pa),
+ version=self._version,
+ validate_column_names=False,
+ )
+
+ if backend is Implementation.POLARS:
+ import polars as pl # ignore-banned-import
+
+ from narwhals._polars.dataframe import PolarsDataFrame
+
+ return PolarsDataFrame(
+ df=self.to_polars(),
+ backend_version=parse_version(pl),
+ version=self._version,
+ )
+
+ msg = f"Unsupported `backend` value: {backend}" # pragma: no cover
+ raise ValueError(msg) # pragma: no cover
+
+ # --- actions ---
+ def group_by(
+ self, keys: Sequence[str] | Sequence[PandasLikeExpr], *, drop_null_keys: bool
+ ) -> PandasLikeGroupBy:
+ from narwhals._pandas_like.group_by import PandasLikeGroupBy
+
+ return PandasLikeGroupBy(self, keys, drop_null_keys=drop_null_keys)
+
+ def join( # noqa: C901, PLR0911, PLR0912
+ self,
+ other: Self,
+ *,
+ how: JoinStrategy,
+ left_on: Sequence[str] | None,
+ right_on: Sequence[str] | None,
+ suffix: str,
+ ) -> Self:
+ if how == "cross":
+ if (
+ self._implementation is Implementation.MODIN
+ or self._implementation is Implementation.CUDF
+ ) or (
+ self._implementation is Implementation.PANDAS
+ and self._backend_version < (1, 4)
+ ):
+ key_token = generate_temporary_column_name(
+ n_bytes=8, columns=[*self.columns, *other.columns]
+ )
+
+ return self._with_native(
+ self.native.assign(**{key_token: 0})
+ .merge(
+ other.native.assign(**{key_token: 0}),
+ how="inner",
+ left_on=key_token,
+ right_on=key_token,
+ suffixes=("", suffix),
+ )
+ .drop(columns=key_token)
+ )
+ else:
+ return self._with_native(
+ self.native.merge(other.native, how="cross", suffixes=("", suffix))
+ )
+
+ if how == "anti":
+ if self._implementation is Implementation.CUDF:
+ return self._with_native(
+ self.native.merge(
+ other.native, how="leftanti", left_on=left_on, right_on=right_on
+ )
+ )
+ else:
+ indicator_token = generate_temporary_column_name(
+ n_bytes=8, columns=[*self.columns, *other.columns]
+ )
+ if right_on is None: # pragma: no cover
+ msg = "`right_on` cannot be `None` in anti-join"
+ raise TypeError(msg)
+
+ # rename to avoid creating extra columns in join
+ other_native = rename(
+ select_columns_by_name(
+ other.native,
+ list(right_on),
+ self._backend_version,
+ self._implementation,
+ ),
+ columns=dict(zip(right_on, left_on)), # type: ignore[arg-type]
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ ).drop_duplicates()
+ return self._with_native(
+ self.native.merge(
+ other_native,
+ how="outer",
+ indicator=indicator_token,
+ left_on=left_on,
+ right_on=left_on,
+ )
+ .loc[lambda t: t[indicator_token] == "left_only"]
+ .drop(columns=indicator_token)
+ )
+
+ if how == "semi":
+ if right_on is None: # pragma: no cover
+ msg = "`right_on` cannot be `None` in semi-join"
+ raise TypeError(msg)
+ # rename to avoid creating extra columns in join
+ other_native = (
+ rename(
+ select_columns_by_name(
+ other.native,
+ list(right_on),
+ self._backend_version,
+ self._implementation,
+ ),
+ columns=dict(zip(right_on, left_on)), # type: ignore[arg-type]
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ ).drop_duplicates() # avoids potential rows duplication from inner join
+ )
+ return self._with_native(
+ self.native.merge(
+ other_native, how="inner", left_on=left_on, right_on=left_on
+ )
+ )
+
+ if how == "left":
+ result_native = self.native.merge(
+ other.native,
+ how="left",
+ left_on=left_on,
+ right_on=right_on,
+ suffixes=("", suffix),
+ )
+ extra = []
+ for left_key, right_key in zip(left_on, right_on): # type: ignore[arg-type]
+ if right_key != left_key and right_key not in self.columns:
+ extra.append(right_key)
+ elif right_key != left_key:
+ extra.append(f"{right_key}{suffix}")
+ return self._with_native(result_native.drop(columns=extra))
+
+ if how == "full":
+ # Pandas coalesces keys in full joins unless there's no collision
+
+ # help mypy
+ assert left_on is not None # noqa: S101
+ assert right_on is not None # noqa: S101
+
+ right_on_mapper = _remap_full_join_keys(left_on, right_on, suffix)
+ other_native = other.native.rename(columns=right_on_mapper)
+ check_column_names_are_unique(other_native.columns)
+ right_on = list(right_on_mapper.values()) # we now have the suffixed keys
+ return self._with_native(
+ self.native.merge(
+ other_native,
+ left_on=left_on,
+ right_on=right_on,
+ how="outer",
+ suffixes=("", suffix),
+ )
+ )
+
+ return self._with_native(
+ self.native.merge(
+ other.native,
+ left_on=left_on,
+ right_on=right_on,
+ how=how,
+ suffixes=("", suffix),
+ )
+ )
+
+ def join_asof(
+ self,
+ other: Self,
+ *,
+ left_on: str,
+ right_on: str,
+ by_left: Sequence[str] | None,
+ by_right: Sequence[str] | None,
+ strategy: AsofJoinStrategy,
+ suffix: str,
+ ) -> Self:
+ plx = self.__native_namespace__()
+ return self._with_native(
+ plx.merge_asof(
+ self.native,
+ other.native,
+ left_on=left_on,
+ right_on=right_on,
+ left_by=by_left,
+ right_by=by_right,
+ direction=strategy,
+ suffixes=("", suffix),
+ )
+ )
+
+ # --- partial reduction ---
+
+ def head(self, n: int) -> Self:
+ return self._with_native(self.native.head(n), validate_column_names=False)
+
+ def tail(self, n: int) -> Self:
+ return self._with_native(self.native.tail(n), validate_column_names=False)
+
+ def unique(
+ self,
+ subset: Sequence[str] | None,
+ *,
+ keep: UniqueKeepStrategy,
+ maintain_order: bool | None = None,
+ ) -> Self:
+ # The param `maintain_order` is only here for compatibility with the Polars API
+ # and has no effect on the output.
+ mapped_keep = {"none": False, "any": "first"}.get(keep, keep)
+ if subset and (error := self._check_columns_exist(subset)):
+ raise error
+ return self._with_native(
+ self.native.drop_duplicates(subset=subset, keep=mapped_keep),
+ validate_column_names=False,
+ )
+
+ # --- lazy-only ---
+ def lazy(self, *, backend: Implementation | None = None) -> CompliantLazyFrameAny:
+ from narwhals.utils import parse_version
+
+ pandas_df = self.to_pandas()
+ if backend is None:
+ return self
+ elif backend is Implementation.DUCKDB:
+ import duckdb # ignore-banned-import
+
+ from narwhals._duckdb.dataframe import DuckDBLazyFrame
+
+ return DuckDBLazyFrame(
+ df=duckdb.table("pandas_df"),
+ backend_version=parse_version(duckdb),
+ version=self._version,
+ )
+ elif backend is Implementation.POLARS:
+ import polars as pl # ignore-banned-import
+
+ from narwhals._polars.dataframe import PolarsLazyFrame
+
+ return PolarsLazyFrame(
+ df=pl.from_pandas(pandas_df).lazy(),
+ backend_version=parse_version(pl),
+ version=self._version,
+ )
+ elif backend is Implementation.DASK:
+ import dask # ignore-banned-import
+ import dask.dataframe as dd # ignore-banned-import
+
+ from narwhals._dask.dataframe import DaskLazyFrame
+
+ return DaskLazyFrame(
+ native_dataframe=dd.from_pandas(pandas_df),
+ backend_version=parse_version(dask),
+ version=self._version,
+ )
+ raise AssertionError # pragma: no cover
+
+ @property
+ def shape(self) -> tuple[int, int]:
+ return self.native.shape
+
+ def to_dict(self, *, as_series: bool) -> dict[str, Any]:
+ if as_series:
+ return {
+ col: PandasLikeSeries.from_native(self.native[col], context=self)
+ for col in self.columns
+ }
+ return self.native.to_dict(orient="list")
+
+ def to_numpy(self, dtype: Any = None, *, copy: bool | None = None) -> _2DArray:
+ native_dtypes = self.native.dtypes
+
+ if copy is None:
+ # pandas default differs from Polars, but cuDF default is True
+ copy = self._implementation is Implementation.CUDF
+
+ if native_dtypes.isin(CLASSICAL_NUMPY_DTYPES).all():
+ # Fast path, no conversions necessary.
+ if dtype is not None:
+ return self.native.to_numpy(dtype=dtype, copy=copy)
+ return self.native.to_numpy(copy=copy)
+
+ dtype_datetime = self._version.dtypes.Datetime
+ to_convert = [
+ key
+ for key, val in self.schema.items()
+ if isinstance(val, dtype_datetime) and val.time_zone is not None
+ ]
+ if to_convert:
+ df = self.with_columns(
+ self.__narwhals_namespace__()
+ .col(*to_convert)
+ .dt.convert_time_zone("UTC")
+ .dt.replace_time_zone(None)
+ ).native
+ else:
+ df = self.native
+
+ if dtype is not None:
+ return df.to_numpy(dtype=dtype, copy=copy)
+
+ # pandas return `object` dtype for nullable dtypes if dtype=None,
+ # so we cast each Series to numpy and let numpy find a common dtype.
+ # If there aren't any dtypes where `to_numpy()` is "broken" (i.e. it
+ # returns Object) then we just call `to_numpy()` on the DataFrame.
+ for col_dtype in native_dtypes:
+ if str(col_dtype) in PANDAS_TO_NUMPY_DTYPE_MISSING:
+ import numpy as np
+
+ arr: Any = np.hstack(
+ [
+ self.get_column(col).to_numpy(copy=copy, dtype=None)[:, None]
+ for col in self.columns
+ ]
+ )
+ return arr
+ return df.to_numpy(copy=copy)
+
+ def to_pandas(self) -> pd.DataFrame:
+ if self._implementation is Implementation.PANDAS:
+ return self.native
+ elif self._implementation is Implementation.CUDF:
+ return self.native.to_pandas()
+ elif self._implementation is Implementation.MODIN:
+ return self.native._to_pandas()
+ msg = f"Unknown implementation: {self._implementation}" # pragma: no cover
+ raise AssertionError(msg)
+
+ def to_polars(self) -> pl.DataFrame:
+ import polars as pl # ignore-banned-import
+
+ return pl.from_pandas(self.to_pandas())
+
+ def write_parquet(self, file: str | Path | BytesIO) -> None:
+ self.native.to_parquet(file)
+
+ @overload
+ def write_csv(self, file: None) -> str: ...
+
+ @overload
+ def write_csv(self, file: str | Path | BytesIO) -> None: ...
+
+ def write_csv(self, file: str | Path | BytesIO | None) -> str | None:
+ return self.native.to_csv(file, index=False)
+
+ # --- descriptive ---
+ def is_unique(self) -> PandasLikeSeries:
+ return PandasLikeSeries.from_native(
+ ~self.native.duplicated(keep=False), context=self
+ )
+
+ def item(self, row: int | None, column: int | str | None) -> Any:
+ if row is None and column is None:
+ if self.shape != (1, 1):
+ msg = (
+ "can only call `.item()` if the dataframe is of shape (1, 1),"
+ " or if explicit row/col values are provided;"
+ f" frame has shape {self.shape!r}"
+ )
+ raise ValueError(msg)
+ return self.native.iloc[0, 0]
+
+ elif row is None or column is None:
+ msg = "cannot call `.item()` with only one of `row` or `column`"
+ raise ValueError(msg)
+
+ _col = self.columns.index(column) if isinstance(column, str) else column
+ return self.native.iloc[row, _col]
+
+ def clone(self) -> Self:
+ return self._with_native(self.native.copy(), validate_column_names=False)
+
+ def gather_every(self, n: int, offset: int) -> Self:
+ return self._with_native(self.native.iloc[offset::n], validate_column_names=False)
+
+ def _pivot_into_index_values(
+ self,
+ on: Sequence[str],
+ index: Sequence[str] | None,
+ values: Sequence[str] | None,
+ /,
+ ) -> tuple[Sequence[str], Sequence[str]]:
+ index = index or (
+ exclude_column_names(self, {*on, *values})
+ if values
+ else exclude_column_names(self, on)
+ )
+ values = values or exclude_column_names(self, {*on, *index})
+ return index, values
+
+ @staticmethod
+ def _pivot_multi_on_name(unique_values: tuple[str, ...], /) -> str:
+ LB, RB, Q = "{", "}", '"' # noqa: N806
+ body = '","'.join(unique_values)
+ return f"{LB}{Q}{body}{Q}{RB}"
+
+ @staticmethod
+ def _pivot_single_on_names(
+ column_names: Iterable[str], n_values: int, separator: str, /
+ ) -> list[str]:
+ if n_values > 1:
+ return [separator.join(col).strip() for col in column_names]
+ return [col[-1] for col in column_names]
+
+ def _pivot_multi_on_names(
+ self,
+ column_names: Iterable[tuple[str, ...]],
+ n_on: int,
+ n_values: int,
+ separator: str,
+ /,
+ ) -> Iterator[str]:
+ if n_values > 1:
+ for col in column_names:
+ names = col[-n_on:]
+ prefix = col[0]
+ yield separator.join((prefix, self._pivot_multi_on_name(names)))
+ else:
+ for col in column_names:
+ yield self._pivot_multi_on_name(col[-n_on:])
+
+ def _pivot_remap_column_names(
+ self, column_names: Iterable[Any], *, n_on: int, n_values: int, separator: str
+ ) -> list[str]:
+ """Reformat output column names from a native pivot operation, to match `polars`.
+
+ Note:
+ `column_names` is a `pd.MultiIndex`, but not in the stubs.
+ """
+ if n_on == 1:
+ return self._pivot_single_on_names(column_names, n_values, separator)
+ return list(self._pivot_multi_on_names(column_names, n_on, n_values, separator))
+
+ def _pivot_table(
+ self,
+ on: Sequence[str],
+ index: Sequence[str],
+ values: Sequence[str],
+ aggregate_function: Literal[
+ "min", "max", "first", "last", "sum", "mean", "median"
+ ],
+ /,
+ ) -> Any:
+ categorical = self._version.dtypes.Categorical
+ kwds: dict[Any, Any] = {"observed": True}
+ if self._implementation is Implementation.CUDF:
+ kwds.pop("observed")
+ cols = set(chain(values, index, on))
+ schema = self.schema.items()
+ if any(
+ tp for name, tp in schema if name in cols and isinstance(tp, categorical)
+ ):
+ msg = "`pivot` with Categoricals is not implemented for cuDF backend"
+ raise NotImplementedError(msg)
+ return self.native.pivot_table(
+ values=values,
+ index=index,
+ columns=on,
+ aggfunc=aggregate_function,
+ margins=False,
+ **kwds,
+ )
+
+ def _pivot(
+ self,
+ on: Sequence[str],
+ index: Sequence[str],
+ values: Sequence[str],
+ aggregate_function: PivotAgg | None,
+ /,
+ ) -> pd.DataFrame:
+ if aggregate_function is None:
+ return self.native.pivot(columns=on, index=index, values=values)
+ elif aggregate_function == "len":
+ return (
+ self.native.groupby([*on, *index], as_index=False)
+ .agg(dict.fromkeys(values, "size"))
+ .pivot(columns=on, index=index, values=values)
+ )
+ return self._pivot_table(on, index, values, aggregate_function)
+
+ def pivot(
+ self,
+ on: Sequence[str],
+ *,
+ index: Sequence[str] | None,
+ values: Sequence[str] | None,
+ aggregate_function: PivotAgg | None,
+ sort_columns: bool,
+ separator: str,
+ ) -> Self:
+ implementation = self._implementation
+ backend_version = self._backend_version
+ if implementation.is_pandas() and backend_version < (1, 1): # pragma: no cover
+ msg = "pivot is only supported for 'pandas>=1.1'"
+ raise NotImplementedError(msg)
+ if implementation.is_modin():
+ msg = "pivot is not supported for Modin backend due to https://github.com/modin-project/modin/issues/7409."
+ raise NotImplementedError(msg)
+
+ index, values = self._pivot_into_index_values(on, index, values)
+ result = self._pivot(on, index, values, aggregate_function)
+
+ # Select the columns in the right order
+ uniques = (
+ (
+ self.get_column(col)
+ .unique()
+ .sort(descending=False, nulls_last=False)
+ .to_list()
+ for col in on
+ )
+ if sort_columns
+ else (self.get_column(col).unique().to_list() for col in on)
+ )
+ ordered_cols = list(product(values, *chain(uniques)))
+ result = result.loc[:, ordered_cols]
+ columns = result.columns
+ remapped = self._pivot_remap_column_names(
+ columns, n_on=len(on), n_values=len(values), separator=separator
+ )
+ result.columns = remapped # type: ignore[assignment]
+ result.columns.names = [""]
+ return self._with_native(result.reset_index())
+
+ def to_arrow(self) -> Any:
+ if self._implementation is Implementation.CUDF:
+ return self.native.to_arrow(preserve_index=False)
+
+ import pyarrow as pa # ignore-banned-import()
+
+ return pa.Table.from_pandas(self.native)
+
+ def sample(
+ self,
+ n: int | None,
+ *,
+ fraction: float | None,
+ with_replacement: bool,
+ seed: int | None,
+ ) -> Self:
+ return self._with_native(
+ self.native.sample(
+ n=n, frac=fraction, replace=with_replacement, random_state=seed
+ ),
+ validate_column_names=False,
+ )
+
+ def unpivot(
+ self,
+ on: Sequence[str] | None,
+ index: Sequence[str] | None,
+ variable_name: str,
+ value_name: str,
+ ) -> Self:
+ return self._with_native(
+ self.native.melt(
+ id_vars=index,
+ value_vars=on,
+ var_name=variable_name,
+ value_name=value_name,
+ )
+ )
+
+ def explode(self, columns: Sequence[str]) -> Self:
+ dtypes = self._version.dtypes
+
+ schema = self.collect_schema()
+ for col_to_explode in columns:
+ dtype = schema[col_to_explode]
+
+ if dtype != dtypes.List:
+ msg = (
+ f"`explode` operation not supported for dtype `{dtype}`, "
+ "expected List type"
+ )
+ raise InvalidOperationError(msg)
+
+ if len(columns) == 1:
+ return self._with_native(
+ self.native.explode(columns[0]), validate_column_names=False
+ )
+ else:
+ native_frame = self.native
+ anchor_series = native_frame[columns[0]].list.len()
+
+ if not all(
+ (native_frame[col_name].list.len() == anchor_series).all()
+ for col_name in columns[1:]
+ ):
+ msg = "exploded columns must have matching element counts"
+ raise ShapeError(msg)
+
+ original_columns = self.columns
+ other_columns = [c for c in original_columns if c not in columns]
+
+ exploded_frame = native_frame[[*other_columns, columns[0]]].explode(
+ columns[0]
+ )
+ exploded_series = [
+ native_frame[col_name].explode().to_frame() for col_name in columns[1:]
+ ]
+
+ plx = self.__native_namespace__()
+ return self._with_native(
+ plx.concat([exploded_frame, *exploded_series], axis=1)[original_columns],
+ validate_column_names=False,
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/expr.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/expr.py
new file mode 100644
index 0000000..0cd9958
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/expr.py
@@ -0,0 +1,402 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Sequence
+
+from narwhals._compliant import EagerExpr
+from narwhals._expression_parsing import evaluate_output_names_and_aliases
+from narwhals._pandas_like.group_by import PandasLikeGroupBy
+from narwhals._pandas_like.series import PandasLikeSeries
+from narwhals._utils import generate_temporary_column_name
+
+if TYPE_CHECKING:
+ from typing_extensions import Self
+
+ from narwhals._compliant.typing import AliasNames, EvalNames, EvalSeries, ScalarKwargs
+ from narwhals._expression_parsing import ExprMetadata
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+ from narwhals._pandas_like.namespace import PandasLikeNamespace
+ from narwhals._utils import Implementation, Version, _FullContext
+ from narwhals.typing import (
+ FillNullStrategy,
+ NonNestedLiteral,
+ PythonLiteral,
+ RankMethod,
+ )
+
+WINDOW_FUNCTIONS_TO_PANDAS_EQUIVALENT = {
+ "cum_sum": "cumsum",
+ "cum_min": "cummin",
+ "cum_max": "cummax",
+ "cum_prod": "cumprod",
+ # Pandas cumcount starts counting from 0 while Polars starts from 1
+ # Pandas cumcount counts nulls while Polars does not
+ # So, instead of using "cumcount" we use "cumsum" on notna() to get the same result
+ "cum_count": "cumsum",
+ "rolling_sum": "sum",
+ "rolling_mean": "mean",
+ "rolling_std": "std",
+ "rolling_var": "var",
+ "shift": "shift",
+ "rank": "rank",
+ "diff": "diff",
+ "fill_null": "fillna",
+}
+
+
+def window_kwargs_to_pandas_equivalent(
+ function_name: str, kwargs: ScalarKwargs
+) -> dict[str, PythonLiteral]:
+ if function_name == "shift":
+ assert "n" in kwargs # noqa: S101
+ pandas_kwargs: dict[str, PythonLiteral] = {"periods": kwargs["n"]}
+ elif function_name == "rank":
+ assert "method" in kwargs # noqa: S101
+ assert "descending" in kwargs # noqa: S101
+ _method = kwargs["method"]
+ pandas_kwargs = {
+ "method": "first" if _method == "ordinal" else _method,
+ "ascending": not kwargs["descending"],
+ "na_option": "keep",
+ "pct": False,
+ }
+ elif function_name.startswith("cum_"): # Cumulative operation
+ pandas_kwargs = {"skipna": True}
+ elif function_name.startswith("rolling_"): # Rolling operation
+ assert "min_samples" in kwargs # noqa: S101
+ assert "window_size" in kwargs # noqa: S101
+ assert "center" in kwargs # noqa: S101
+ pandas_kwargs = {
+ "min_periods": kwargs["min_samples"],
+ "window": kwargs["window_size"],
+ "center": kwargs["center"],
+ }
+ elif function_name in {"std", "var"}:
+ assert "ddof" in kwargs # noqa: S101
+ pandas_kwargs = {"ddof": kwargs["ddof"]}
+ elif function_name == "fill_null":
+ assert "strategy" in kwargs # noqa: S101
+ assert "limit" in kwargs # noqa: S101
+ pandas_kwargs = {"strategy": kwargs["strategy"], "limit": kwargs["limit"]}
+ else: # sum, len, ...
+ pandas_kwargs = {}
+ return pandas_kwargs
+
+
+class PandasLikeExpr(EagerExpr["PandasLikeDataFrame", PandasLikeSeries]):
+ def __init__(
+ self,
+ call: EvalSeries[PandasLikeDataFrame, PandasLikeSeries],
+ *,
+ depth: int,
+ function_name: str,
+ evaluate_output_names: EvalNames[PandasLikeDataFrame],
+ alias_output_names: AliasNames | None,
+ implementation: Implementation,
+ backend_version: tuple[int, ...],
+ version: Version,
+ scalar_kwargs: ScalarKwargs | None = None,
+ ) -> None:
+ self._call = call
+ self._depth = depth
+ self._function_name = function_name
+ self._evaluate_output_names = evaluate_output_names
+ self._alias_output_names = alias_output_names
+ self._implementation = implementation
+ self._backend_version = backend_version
+ self._version = version
+ self._scalar_kwargs = scalar_kwargs or {}
+ self._metadata: ExprMetadata | None = None
+
+ def __narwhals_namespace__(self) -> PandasLikeNamespace:
+ from narwhals._pandas_like.namespace import PandasLikeNamespace
+
+ return PandasLikeNamespace(
+ self._implementation, self._backend_version, version=self._version
+ )
+
+ def __narwhals_expr__(self) -> None: ...
+
+ @classmethod
+ def from_column_names(
+ cls: type[Self],
+ evaluate_column_names: EvalNames[PandasLikeDataFrame],
+ /,
+ *,
+ context: _FullContext,
+ function_name: str = "",
+ ) -> Self:
+ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
+ try:
+ return [
+ PandasLikeSeries(
+ df._native_frame[column_name],
+ implementation=df._implementation,
+ backend_version=df._backend_version,
+ version=df._version,
+ )
+ for column_name in evaluate_column_names(df)
+ ]
+ except KeyError as e:
+ if error := df._check_columns_exist(evaluate_column_names(df)):
+ raise error from e
+ raise
+
+ return cls(
+ func,
+ depth=0,
+ function_name=function_name,
+ evaluate_output_names=evaluate_column_names,
+ alias_output_names=None,
+ implementation=context._implementation,
+ backend_version=context._backend_version,
+ version=context._version,
+ )
+
+ @classmethod
+ def from_column_indices(cls, *column_indices: int, context: _FullContext) -> Self:
+ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
+ native = df.native
+ return [
+ PandasLikeSeries.from_native(native.iloc[:, i], context=df)
+ for i in column_indices
+ ]
+
+ return cls(
+ func,
+ depth=0,
+ function_name="nth",
+ evaluate_output_names=cls._eval_names_indices(column_indices),
+ alias_output_names=None,
+ implementation=context._implementation,
+ backend_version=context._backend_version,
+ version=context._version,
+ )
+
+ def ewm_mean(
+ self,
+ *,
+ com: float | None,
+ span: float | None,
+ half_life: float | None,
+ alpha: float | None,
+ adjust: bool,
+ min_samples: int,
+ ignore_nulls: bool,
+ ) -> Self:
+ return self._reuse_series(
+ "ewm_mean",
+ com=com,
+ span=span,
+ half_life=half_life,
+ alpha=alpha,
+ adjust=adjust,
+ min_samples=min_samples,
+ ignore_nulls=ignore_nulls,
+ )
+
+ def cum_sum(self, *, reverse: bool) -> Self:
+ return self._reuse_series("cum_sum", scalar_kwargs={"reverse": reverse})
+
+ def shift(self, n: int) -> Self:
+ return self._reuse_series("shift", scalar_kwargs={"n": n})
+
+ def over( # noqa: C901, PLR0915
+ self, partition_by: Sequence[str], order_by: Sequence[str]
+ ) -> Self:
+ if not partition_by:
+ # e.g. `nw.col('a').cum_sum().order_by(key)`
+ # We can always easily support this as it doesn't require grouping.
+ assert order_by # noqa: S101
+
+ def func(df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]:
+ token = generate_temporary_column_name(8, df.columns)
+ df = df.with_row_index(token).sort(
+ *order_by, descending=False, nulls_last=False
+ )
+ results = self(df.drop([token], strict=True))
+ sorting_indices = df.get_column(token)
+ for s in results:
+ s._scatter_in_place(sorting_indices, s)
+ return results
+ elif not self._is_elementary():
+ msg = (
+ "Only elementary expressions are supported for `.over` in pandas-like backends.\n\n"
+ "Please see: "
+ "https://narwhals-dev.github.io/narwhals/concepts/improve_group_by_operation/"
+ )
+ raise NotImplementedError(msg)
+ else:
+ function_name = PandasLikeGroupBy._leaf_name(self)
+ pandas_function_name = WINDOW_FUNCTIONS_TO_PANDAS_EQUIVALENT.get(
+ function_name, PandasLikeGroupBy._REMAP_AGGS.get(function_name)
+ )
+ if pandas_function_name is None:
+ msg = (
+ f"Unsupported function: {function_name} in `over` context.\n\n"
+ f"Supported functions are {', '.join(WINDOW_FUNCTIONS_TO_PANDAS_EQUIVALENT)}\n"
+ f"and {', '.join(PandasLikeGroupBy._REMAP_AGGS)}."
+ )
+ raise NotImplementedError(msg)
+ pandas_kwargs = window_kwargs_to_pandas_equivalent(
+ function_name, self._scalar_kwargs
+ )
+
+ def func(df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]: # noqa: C901, PLR0912
+ output_names, aliases = evaluate_output_names_and_aliases(self, df, [])
+ if function_name == "cum_count":
+ plx = self.__narwhals_namespace__()
+ df = df.with_columns(~plx.col(*output_names).is_null())
+
+ if function_name.startswith("cum_"):
+ assert "reverse" in self._scalar_kwargs # noqa: S101
+ reverse = self._scalar_kwargs["reverse"]
+ else:
+ assert "reverse" not in self._scalar_kwargs # noqa: S101
+ reverse = False
+
+ if order_by:
+ columns = list(set(partition_by).union(output_names).union(order_by))
+ token = generate_temporary_column_name(8, columns)
+ df = (
+ df.simple_select(*columns)
+ .with_row_index(token)
+ .sort(*order_by, descending=reverse, nulls_last=reverse)
+ )
+ sorting_indices = df.get_column(token)
+ elif reverse:
+ columns = list(set(partition_by).union(output_names))
+ df = df.simple_select(*columns)._gather_slice(slice(None, None, -1))
+ grouped = df._native_frame.groupby(partition_by)
+ if function_name.startswith("rolling"):
+ rolling = grouped[list(output_names)].rolling(**pandas_kwargs)
+ assert pandas_function_name is not None # help mypy # noqa: S101
+ if pandas_function_name in {"std", "var"}:
+ assert "ddof" in self._scalar_kwargs # noqa: S101
+ res_native = getattr(rolling, pandas_function_name)(
+ ddof=self._scalar_kwargs["ddof"]
+ )
+ else:
+ res_native = getattr(rolling, pandas_function_name)()
+ elif function_name == "fill_null":
+ assert "strategy" in self._scalar_kwargs # noqa: S101
+ assert "limit" in self._scalar_kwargs # noqa: S101
+ df_grouped = grouped[list(output_names)]
+ if self._scalar_kwargs["strategy"] == "forward":
+ res_native = df_grouped.ffill(limit=self._scalar_kwargs["limit"])
+ elif self._scalar_kwargs["strategy"] == "backward":
+ res_native = df_grouped.bfill(limit=self._scalar_kwargs["limit"])
+ else: # pragma: no cover
+ # This is deprecated in pandas. Indeed, `nw.col('a').fill_null(3).over('b')`
+ # does not seem very useful, and DuckDB doesn't support it either.
+ msg = "`fill_null` with `over` without `strategy` specified is not supported."
+ raise NotImplementedError(msg)
+ elif function_name == "len":
+ if len(output_names) != 1: # pragma: no cover
+ msg = "Safety check failed, please report a bug."
+ raise AssertionError(msg)
+ res_native = grouped.transform("size").to_frame(aliases[0])
+ else:
+ res_native = grouped[list(output_names)].transform(
+ pandas_function_name, **pandas_kwargs
+ )
+ result_frame = df._with_native(res_native).rename(
+ dict(zip(output_names, aliases))
+ )
+ results = [result_frame.get_column(name) for name in aliases]
+ if order_by:
+ for s in results:
+ s._scatter_in_place(sorting_indices, s)
+ return results
+ if reverse:
+ return [s._gather_slice(slice(None, None, -1)) for s in results]
+ return results
+
+ return self.__class__(
+ func,
+ depth=self._depth + 1,
+ function_name=self._function_name + "->over",
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def cum_count(self, *, reverse: bool) -> Self:
+ return self._reuse_series("cum_count", scalar_kwargs={"reverse": reverse})
+
+ def cum_min(self, *, reverse: bool) -> Self:
+ return self._reuse_series("cum_min", scalar_kwargs={"reverse": reverse})
+
+ def cum_max(self, *, reverse: bool) -> Self:
+ return self._reuse_series("cum_max", scalar_kwargs={"reverse": reverse})
+
+ def cum_prod(self, *, reverse: bool) -> Self:
+ return self._reuse_series("cum_prod", scalar_kwargs={"reverse": reverse})
+
+ def fill_null(
+ self,
+ value: Self | NonNestedLiteral,
+ strategy: FillNullStrategy | None,
+ limit: int | None,
+ ) -> Self:
+ return self._reuse_series(
+ "fill_null", scalar_kwargs={"strategy": strategy, "limit": limit}, value=value
+ )
+
+ def rolling_sum(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ return self._reuse_series(
+ "rolling_sum",
+ scalar_kwargs={
+ "window_size": window_size,
+ "min_samples": min_samples,
+ "center": center,
+ },
+ )
+
+ def rolling_mean(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ return self._reuse_series(
+ "rolling_mean",
+ scalar_kwargs={
+ "window_size": window_size,
+ "min_samples": min_samples,
+ "center": center,
+ },
+ )
+
+ def rolling_std(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ return self._reuse_series(
+ "rolling_std",
+ scalar_kwargs={
+ "window_size": window_size,
+ "min_samples": min_samples,
+ "center": center,
+ "ddof": ddof,
+ },
+ )
+
+ def rolling_var(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ return self._reuse_series(
+ "rolling_var",
+ scalar_kwargs={
+ "window_size": window_size,
+ "min_samples": min_samples,
+ "center": center,
+ "ddof": ddof,
+ },
+ )
+
+ def rank(self, method: RankMethod, *, descending: bool) -> Self:
+ return self._reuse_series(
+ "rank", scalar_kwargs={"method": method, "descending": descending}
+ )
+
+ def log(self, base: float) -> Self:
+ return self._reuse_series("log", base=base)
+
+ def exp(self) -> Self:
+ return self._reuse_series("exp")
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/group_by.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/group_by.py
new file mode 100644
index 0000000..ede3f05
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/group_by.py
@@ -0,0 +1,293 @@
+from __future__ import annotations
+
+import collections
+import warnings
+from typing import TYPE_CHECKING, Any, ClassVar, Iterator, Mapping, Sequence
+
+from narwhals._compliant import EagerGroupBy
+from narwhals._expression_parsing import evaluate_output_names_and_aliases
+from narwhals._pandas_like.utils import select_columns_by_name
+from narwhals._utils import find_stacklevel
+
+if TYPE_CHECKING:
+ from narwhals._compliant.group_by import NarwhalsAggregation
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+ from narwhals._pandas_like.expr import PandasLikeExpr
+
+
+class PandasLikeGroupBy(EagerGroupBy["PandasLikeDataFrame", "PandasLikeExpr", str]):
+ _REMAP_AGGS: ClassVar[Mapping[NarwhalsAggregation, Any]] = {
+ "sum": "sum",
+ "mean": "mean",
+ "median": "median",
+ "max": "max",
+ "min": "min",
+ "std": "std",
+ "var": "var",
+ "len": "size",
+ "n_unique": "nunique",
+ "count": "count",
+ }
+
+ def __init__(
+ self,
+ df: PandasLikeDataFrame,
+ keys: Sequence[PandasLikeExpr] | Sequence[str],
+ /,
+ *,
+ drop_null_keys: bool,
+ ) -> None:
+ self._df = df
+ self._drop_null_keys = drop_null_keys
+ self._compliant_frame, self._keys, self._output_key_names = self._parse_keys(
+ df, keys=keys
+ )
+ # Drop index to avoid potential collisions:
+ # https://github.com/narwhals-dev/narwhals/issues/1907.
+ if set(self.compliant.native.index.names).intersection(self.compliant.columns):
+ native_frame = self.compliant.native.reset_index(drop=True)
+ else:
+ native_frame = self.compliant.native
+ if (
+ self.compliant._implementation.is_pandas()
+ and self.compliant._backend_version < (1, 1)
+ ): # pragma: no cover
+ if (
+ not drop_null_keys
+ and self.compliant.simple_select(*self._keys).native.isna().any().any()
+ ):
+ msg = "Grouping by null values is not supported in pandas < 1.1.0"
+ raise NotImplementedError(msg)
+ self._grouped = native_frame.groupby(
+ list(self._keys), sort=False, as_index=True, observed=True
+ )
+ else:
+ self._grouped = native_frame.groupby(
+ list(self._keys),
+ sort=False,
+ as_index=True,
+ dropna=drop_null_keys,
+ observed=True,
+ )
+
+ def agg(self, *exprs: PandasLikeExpr) -> PandasLikeDataFrame: # noqa: C901, PLR0912, PLR0914, PLR0915
+ implementation = self.compliant._implementation
+ backend_version = self.compliant._backend_version
+ new_names: list[str] = self._keys.copy()
+
+ all_aggs_are_simple = True
+ exclude = (*self._keys, *self._output_key_names)
+ for expr in exprs:
+ _, aliases = evaluate_output_names_and_aliases(expr, self.compliant, exclude)
+ new_names.extend(aliases)
+ if not self._is_simple(expr):
+ all_aggs_are_simple = False
+
+ # dict of {output_name: root_name} that we count n_unique on
+ # We need to do this separately from the rest so that we
+ # can pass the `dropna` kwargs.
+ nunique_aggs: dict[str, str] = {}
+ simple_aggs: dict[str, list[str]] = collections.defaultdict(list)
+ simple_aggs_functions: set[str] = set()
+
+ # ddof to (output_names, aliases) mapping
+ std_aggs: dict[int, tuple[list[str], list[str]]] = collections.defaultdict(
+ lambda: ([], [])
+ )
+ var_aggs: dict[int, tuple[list[str], list[str]]] = collections.defaultdict(
+ lambda: ([], [])
+ )
+
+ expected_old_names: list[str] = []
+ simple_agg_new_names: list[str] = []
+
+ if all_aggs_are_simple: # noqa: PLR1702
+ for expr in exprs:
+ output_names, aliases = evaluate_output_names_and_aliases(
+ expr, self.compliant, exclude
+ )
+ if expr._depth == 0:
+ # e.g. `agg(nw.len())`
+ function_name = self._remap_expr_name(expr._function_name)
+ simple_aggs_functions.add(function_name)
+
+ for alias in aliases:
+ expected_old_names.append(f"{self._keys[0]}_{function_name}")
+ simple_aggs[self._keys[0]].append(function_name)
+ simple_agg_new_names.append(alias)
+ continue
+
+ # e.g. `agg(nw.mean('a'))`
+ function_name = self._remap_expr_name(self._leaf_name(expr))
+ is_n_unique = function_name == "nunique"
+ is_std = function_name == "std"
+ is_var = function_name == "var"
+ for output_name, alias in zip(output_names, aliases):
+ if is_n_unique:
+ nunique_aggs[alias] = output_name
+ elif is_std and (ddof := expr._scalar_kwargs["ddof"]) != 1: # pyright: ignore[reportTypedDictNotRequiredAccess]
+ std_aggs[ddof][0].append(output_name)
+ std_aggs[ddof][1].append(alias)
+ elif is_var and (ddof := expr._scalar_kwargs["ddof"]) != 1: # pyright: ignore[reportTypedDictNotRequiredAccess]
+ var_aggs[ddof][0].append(output_name)
+ var_aggs[ddof][1].append(alias)
+ else:
+ expected_old_names.append(f"{output_name}_{function_name}")
+ simple_aggs[output_name].append(function_name)
+ simple_agg_new_names.append(alias)
+ simple_aggs_functions.add(function_name)
+
+ result_aggs = []
+
+ if simple_aggs:
+ # Fast path for single aggregation such as `df.groupby(...).mean()`
+ if (
+ len(simple_aggs_functions) == 1
+ and (agg_method := simple_aggs_functions.pop()) != "size"
+ and len(simple_aggs) > 1
+ ):
+ result_simple_aggs = getattr(
+ self._grouped[list(simple_aggs.keys())], agg_method
+ )()
+ result_simple_aggs.columns = [
+ f"{a}_{agg_method}" for a in result_simple_aggs.columns
+ ]
+ else:
+ result_simple_aggs = self._grouped.agg(simple_aggs)
+ result_simple_aggs.columns = [
+ f"{a}_{b}" for a, b in result_simple_aggs.columns
+ ]
+ if not (
+ set(result_simple_aggs.columns) == set(expected_old_names)
+ and len(result_simple_aggs.columns) == len(expected_old_names)
+ ): # pragma: no cover
+ msg = (
+ f"Safety assertion failed, expected {expected_old_names} "
+ f"got {result_simple_aggs.columns}, "
+ "please report a bug at https://github.com/narwhals-dev/narwhals/issues"
+ )
+ raise AssertionError(msg)
+
+ # Rename columns, being very careful
+ expected_old_names_indices: dict[str, list[int]] = (
+ collections.defaultdict(list)
+ )
+ for idx, item in enumerate(expected_old_names):
+ expected_old_names_indices[item].append(idx)
+ index_map: list[int] = [
+ expected_old_names_indices[item].pop(0)
+ for item in result_simple_aggs.columns
+ ]
+ result_simple_aggs.columns = [simple_agg_new_names[i] for i in index_map]
+ result_aggs.append(result_simple_aggs)
+
+ if nunique_aggs:
+ result_nunique_aggs = self._grouped[list(nunique_aggs.values())].nunique(
+ dropna=False
+ )
+ result_nunique_aggs.columns = list(nunique_aggs.keys())
+
+ result_aggs.append(result_nunique_aggs)
+
+ if std_aggs:
+ for ddof, (std_output_names, std_aliases) in std_aggs.items():
+ _aggregation = self._grouped[std_output_names].std(ddof=ddof)
+ # `_aggregation` is a new object so it's OK to operate inplace.
+ _aggregation.columns = std_aliases
+ result_aggs.append(_aggregation)
+ if var_aggs:
+ for ddof, (var_output_names, var_aliases) in var_aggs.items():
+ _aggregation = self._grouped[var_output_names].var(ddof=ddof)
+ # `_aggregation` is a new object so it's OK to operate inplace.
+ _aggregation.columns = var_aliases
+ result_aggs.append(_aggregation)
+
+ if result_aggs:
+ output_names_counter = collections.Counter(
+ c for frame in result_aggs for c in frame
+ )
+ if any(v > 1 for v in output_names_counter.values()):
+ msg = ""
+ for key, value in output_names_counter.items():
+ if value > 1:
+ msg += f"\n- '{key}' {value} times"
+ else: # pragma: no cover
+ pass
+ msg = f"Expected unique output names, got:{msg}"
+ raise ValueError(msg)
+ namespace = self.compliant.__narwhals_namespace__()
+ result = namespace._concat_horizontal(result_aggs)
+ else:
+ # No aggregation provided
+ result = self.compliant.__native_namespace__().DataFrame(
+ list(self._grouped.groups.keys()), columns=self._keys
+ )
+ # Keep inplace=True to avoid making a redundant copy.
+ # This may need updating, depending on https://github.com/pandas-dev/pandas/pull/51466/files
+ result.reset_index(inplace=True) # noqa: PD002
+ return self.compliant._with_native(
+ select_columns_by_name(result, new_names, backend_version, implementation)
+ ).rename(dict(zip(self._keys, self._output_key_names)))
+
+ if self.compliant.native.empty:
+ # Don't even attempt this, it's way too inconsistent across pandas versions.
+ msg = (
+ "No results for group-by aggregation.\n\n"
+ "Hint: you were probably trying to apply a non-elementary aggregation with a "
+ "pandas-like API.\n"
+ "Please rewrite your query such that group-by aggregations "
+ "are elementary. For example, instead of:\n\n"
+ " df.group_by('a').agg(nw.col('b').round(2).mean())\n\n"
+ "use:\n\n"
+ " df.with_columns(nw.col('b').round(2)).group_by('a').agg(nw.col('b').mean())\n\n"
+ )
+ raise ValueError(msg)
+
+ warnings.warn(
+ "Found complex group-by expression, which can't be expressed efficiently with the "
+ "pandas API. If you can, please rewrite your query such that group-by aggregations "
+ "are simple (e.g. mean, std, min, max, ...). \n\n"
+ "Please see: "
+ "https://narwhals-dev.github.io/narwhals/concepts/improve_group_by_operation/",
+ UserWarning,
+ stacklevel=find_stacklevel(),
+ )
+
+ def func(df: Any) -> Any:
+ out_group = []
+ out_names = []
+ for expr in exprs:
+ results_keys = expr(self.compliant._with_native(df))
+ for result_keys in results_keys:
+ out_group.append(result_keys.native.iloc[0])
+ out_names.append(result_keys.name)
+ ns = self.compliant.__narwhals_namespace__()
+ return ns._series.from_iterable(out_group, index=out_names, context=ns).native
+
+ if implementation.is_pandas() and backend_version >= (2, 2):
+ result_complex = self._grouped.apply(func, include_groups=False)
+ else: # pragma: no cover
+ result_complex = self._grouped.apply(func)
+
+ # Keep inplace=True to avoid making a redundant copy.
+ # This may need updating, depending on https://github.com/pandas-dev/pandas/pull/51466/files
+ result_complex.reset_index(inplace=True) # noqa: PD002
+ return self.compliant._with_native(
+ select_columns_by_name(
+ result_complex, new_names, backend_version, implementation
+ )
+ ).rename(dict(zip(self._keys, self._output_key_names)))
+
+ def __iter__(self) -> Iterator[tuple[Any, PandasLikeDataFrame]]:
+ with warnings.catch_warnings():
+ warnings.filterwarnings(
+ "ignore",
+ message=".*a length 1 tuple will be returned",
+ category=FutureWarning,
+ )
+
+ for key, group in self._grouped:
+ yield (
+ key,
+ self.compliant._with_native(group).simple_select(*self._df.columns),
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/namespace.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/namespace.py
new file mode 100644
index 0000000..5612c85
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/namespace.py
@@ -0,0 +1,332 @@
+from __future__ import annotations
+
+import operator
+import warnings
+from functools import reduce
+from typing import TYPE_CHECKING, Literal, Sequence
+
+import pandas as pd
+
+from narwhals._compliant import CompliantThen, EagerNamespace, EagerWhen
+from narwhals._expression_parsing import (
+ combine_alias_output_names,
+ combine_evaluate_output_names,
+)
+from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+from narwhals._pandas_like.expr import PandasLikeExpr
+from narwhals._pandas_like.selectors import PandasSelectorNamespace
+from narwhals._pandas_like.series import PandasLikeSeries
+from narwhals._pandas_like.utils import align_series_full_broadcast
+
+if TYPE_CHECKING:
+ from narwhals._pandas_like.typing import NDFrameT
+ from narwhals._utils import Implementation, Version
+ from narwhals.typing import IntoDType, NonNestedLiteral
+
+VERTICAL: Literal[0] = 0
+HORIZONTAL: Literal[1] = 1
+
+
+class PandasLikeNamespace(
+ EagerNamespace[PandasLikeDataFrame, PandasLikeSeries, PandasLikeExpr, pd.DataFrame]
+):
+ @property
+ def _dataframe(self) -> type[PandasLikeDataFrame]:
+ return PandasLikeDataFrame
+
+ @property
+ def _expr(self) -> type[PandasLikeExpr]:
+ return PandasLikeExpr
+
+ @property
+ def _series(self) -> type[PandasLikeSeries]:
+ return PandasLikeSeries
+
+ @property
+ def selectors(self) -> PandasSelectorNamespace:
+ return PandasSelectorNamespace.from_namespace(self)
+
+ # --- not in spec ---
+ def __init__(
+ self,
+ implementation: Implementation,
+ backend_version: tuple[int, ...],
+ version: Version,
+ ) -> None:
+ self._implementation = implementation
+ self._backend_version = backend_version
+ self._version = version
+
+ def lit(self, value: NonNestedLiteral, dtype: IntoDType | None) -> PandasLikeExpr:
+ def _lit_pandas_series(df: PandasLikeDataFrame) -> PandasLikeSeries:
+ pandas_series = self._series.from_iterable(
+ data=[value],
+ name="literal",
+ index=df._native_frame.index[0:1],
+ context=self,
+ )
+ if dtype:
+ return pandas_series.cast(dtype)
+ return pandas_series
+
+ return PandasLikeExpr(
+ lambda df: [_lit_pandas_series(df)],
+ depth=0,
+ function_name="lit",
+ evaluate_output_names=lambda _df: ["literal"],
+ alias_output_names=None,
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ def len(self) -> PandasLikeExpr:
+ return PandasLikeExpr(
+ lambda df: [
+ self._series.from_iterable(
+ [len(df._native_frame)], name="len", index=[0], context=self
+ )
+ ],
+ depth=0,
+ function_name="len",
+ evaluate_output_names=lambda _df: ["len"],
+ alias_output_names=None,
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+
+ # --- horizontal ---
+ def sum_horizontal(self, *exprs: PandasLikeExpr) -> PandasLikeExpr:
+ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
+ series = [s for _expr in exprs for s in _expr(df)]
+ series = align_series_full_broadcast(*series)
+ native_series = (s.fill_null(0, None, None) for s in series)
+ return [reduce(operator.add, native_series)]
+
+ return self._expr._from_callable(
+ func=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="sum_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ context=self,
+ )
+
+ def all_horizontal(self, *exprs: PandasLikeExpr) -> PandasLikeExpr:
+ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
+ series = align_series_full_broadcast(
+ *(s for _expr in exprs for s in _expr(df))
+ )
+ return [reduce(operator.and_, series)]
+
+ return self._expr._from_callable(
+ func=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="all_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ context=self,
+ )
+
+ def any_horizontal(self, *exprs: PandasLikeExpr) -> PandasLikeExpr:
+ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
+ series = align_series_full_broadcast(
+ *(s for _expr in exprs for s in _expr(df))
+ )
+ return [reduce(operator.or_, series)]
+
+ return self._expr._from_callable(
+ func=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="any_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ context=self,
+ )
+
+ def mean_horizontal(self, *exprs: PandasLikeExpr) -> PandasLikeExpr:
+ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
+ expr_results = [s for _expr in exprs for s in _expr(df)]
+ series = align_series_full_broadcast(
+ *(s.fill_null(0, strategy=None, limit=None) for s in expr_results)
+ )
+ non_na = align_series_full_broadcast(*(1 - s.is_null() for s in expr_results))
+ return [reduce(operator.add, series) / reduce(operator.add, non_na)]
+
+ return self._expr._from_callable(
+ func=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="mean_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ context=self,
+ )
+
+ def min_horizontal(self, *exprs: PandasLikeExpr) -> PandasLikeExpr:
+ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
+ series = [s for _expr in exprs for s in _expr(df)]
+ series = align_series_full_broadcast(*series)
+
+ return [
+ PandasLikeSeries(
+ self.concat(
+ (s.to_frame() for s in series), how="horizontal"
+ )._native_frame.min(axis=1),
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=self._version,
+ ).alias(series[0].name)
+ ]
+
+ return self._expr._from_callable(
+ func=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="min_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ context=self,
+ )
+
+ def max_horizontal(self, *exprs: PandasLikeExpr) -> PandasLikeExpr:
+ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
+ series = [s for _expr in exprs for s in _expr(df)]
+ series = align_series_full_broadcast(*series)
+
+ return [
+ PandasLikeSeries(
+ self.concat(
+ (s.to_frame() for s in series), how="horizontal"
+ )._native_frame.max(axis=1),
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=self._version,
+ ).alias(series[0].name)
+ ]
+
+ return self._expr._from_callable(
+ func=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="max_horizontal",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ context=self,
+ )
+
+ @property
+ def _concat(self): # type: ignore[no-untyped-def] # noqa: ANN202
+ """Return the **native** equivalent of `pd.concat`."""
+ # NOTE: Leave un-annotated to allow `@overload` matching via inference.
+ if TYPE_CHECKING:
+ import pandas as pd
+
+ return pd.concat
+ return self._implementation.to_native_namespace().concat
+
+ def _concat_diagonal(self, dfs: Sequence[pd.DataFrame], /) -> pd.DataFrame:
+ if self._implementation.is_pandas() and self._backend_version < (3,):
+ if self._backend_version < (1,):
+ return self._concat(dfs, axis=VERTICAL, copy=False, sort=False)
+ return self._concat(dfs, axis=VERTICAL, copy=False)
+ return self._concat(dfs, axis=VERTICAL)
+
+ def _concat_horizontal(self, dfs: Sequence[NDFrameT], /) -> pd.DataFrame:
+ if self._implementation.is_cudf():
+ with warnings.catch_warnings():
+ warnings.filterwarnings(
+ "ignore",
+ message="The behavior of array concatenation with empty entries is deprecated",
+ category=FutureWarning,
+ )
+ return self._concat(dfs, axis=HORIZONTAL)
+ elif self._implementation.is_pandas() and self._backend_version < (3,):
+ return self._concat(dfs, axis=HORIZONTAL, copy=False)
+ return self._concat(dfs, axis=HORIZONTAL)
+
+ def _concat_vertical(self, dfs: Sequence[pd.DataFrame], /) -> pd.DataFrame:
+ cols_0 = dfs[0].columns
+ for i, df in enumerate(dfs[1:], start=1):
+ cols_current = df.columns
+ if not (
+ (len(cols_current) == len(cols_0)) and (cols_current == cols_0).all()
+ ):
+ msg = (
+ "unable to vstack, column names don't match:\n"
+ f" - dataframe 0: {cols_0.to_list()}\n"
+ f" - dataframe {i}: {cols_current.to_list()}\n"
+ )
+ raise TypeError(msg)
+ if self._implementation.is_pandas() and self._backend_version < (3,):
+ return self._concat(dfs, axis=VERTICAL, copy=False)
+ return self._concat(dfs, axis=VERTICAL)
+
+ def when(self, predicate: PandasLikeExpr) -> PandasWhen:
+ return PandasWhen.from_expr(predicate, context=self)
+
+ def concat_str(
+ self, *exprs: PandasLikeExpr, separator: str, ignore_nulls: bool
+ ) -> PandasLikeExpr:
+ string = self._version.dtypes.String()
+
+ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
+ expr_results = [s for _expr in exprs for s in _expr(df)]
+ series = align_series_full_broadcast(*(s.cast(string) for s in expr_results))
+ null_mask = align_series_full_broadcast(*(s.is_null() for s in expr_results))
+
+ if not ignore_nulls:
+ null_mask_result = reduce(operator.or_, null_mask)
+ result = reduce(lambda x, y: x + separator + y, series).zip_with(
+ ~null_mask_result, None
+ )
+ else:
+ init_value, *values = [
+ s.zip_with(~nm, "") for s, nm in zip(series, null_mask)
+ ]
+
+ sep_array = init_value.from_iterable(
+ data=[separator] * len(init_value),
+ name="sep",
+ index=init_value.native.index,
+ context=self,
+ )
+ separators = (sep_array.zip_with(~nm, "") for nm in null_mask[:-1])
+ result = reduce(
+ operator.add, (s + v for s, v in zip(separators, values)), init_value
+ )
+
+ return [result]
+
+ return self._expr._from_callable(
+ func=func,
+ depth=max(x._depth for x in exprs) + 1,
+ function_name="concat_str",
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ context=self,
+ )
+
+
+class PandasWhen(EagerWhen[PandasLikeDataFrame, PandasLikeSeries, PandasLikeExpr]):
+ @property
+ def _then(self) -> type[PandasThen]:
+ return PandasThen
+
+ def _if_then_else(
+ self,
+ when: PandasLikeSeries,
+ then: PandasLikeSeries,
+ otherwise: PandasLikeSeries | None,
+ /,
+ ) -> PandasLikeSeries:
+ if otherwise is None:
+ when, then = align_series_full_broadcast(when, then)
+ res_native = then.native.where(when.native)
+ else:
+ when, then, otherwise = align_series_full_broadcast(when, then, otherwise)
+ res_native = then.native.where(when.native, otherwise.native)
+ return then._with_native(res_native)
+
+
+class PandasThen(
+ CompliantThen[PandasLikeDataFrame, PandasLikeSeries, PandasLikeExpr], PandasLikeExpr
+): ...
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/selectors.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/selectors.py
new file mode 100644
index 0000000..f6b2a73
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/selectors.py
@@ -0,0 +1,34 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from narwhals._compliant import CompliantSelector, EagerSelectorNamespace
+from narwhals._pandas_like.expr import PandasLikeExpr
+
+if TYPE_CHECKING:
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame # noqa: F401
+ from narwhals._pandas_like.series import PandasLikeSeries # noqa: F401
+
+
+class PandasSelectorNamespace(
+ EagerSelectorNamespace["PandasLikeDataFrame", "PandasLikeSeries"]
+):
+ @property
+ def _selector(self) -> type[PandasSelector]:
+ return PandasSelector
+
+
+class PandasSelector( # type: ignore[misc]
+ CompliantSelector["PandasLikeDataFrame", "PandasLikeSeries"], PandasLikeExpr
+):
+ def _to_expr(self) -> PandasLikeExpr:
+ return PandasLikeExpr(
+ self._call,
+ depth=self._depth,
+ function_name=self._function_name,
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series.py
new file mode 100644
index 0000000..0ea4e83
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series.py
@@ -0,0 +1,1109 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Iterable, Iterator, Mapping, Sequence, cast
+
+import numpy as np
+
+from narwhals._compliant import EagerSeries
+from narwhals._pandas_like.series_cat import PandasLikeSeriesCatNamespace
+from narwhals._pandas_like.series_dt import PandasLikeSeriesDateTimeNamespace
+from narwhals._pandas_like.series_list import PandasLikeSeriesListNamespace
+from narwhals._pandas_like.series_str import PandasLikeSeriesStringNamespace
+from narwhals._pandas_like.series_struct import PandasLikeSeriesStructNamespace
+from narwhals._pandas_like.utils import (
+ align_and_extract_native,
+ get_dtype_backend,
+ narwhals_to_native_dtype,
+ native_to_narwhals_dtype,
+ object_native_to_narwhals_dtype,
+ rename,
+ select_columns_by_name,
+ set_index,
+)
+from narwhals._utils import (
+ Implementation,
+ is_list_of,
+ parse_version,
+ validate_backend_version,
+)
+from narwhals.dependencies import is_numpy_array_1d, is_pandas_like_series
+from narwhals.exceptions import InvalidOperationError
+
+if TYPE_CHECKING:
+ from types import ModuleType
+ from typing import Hashable
+
+ import pandas as pd
+ import polars as pl
+ import pyarrow as pa
+ from typing_extensions import Self, TypeIs
+
+ from narwhals._arrow.typing import ChunkedArrayAny
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+ from narwhals._pandas_like.namespace import PandasLikeNamespace
+ from narwhals._utils import Version, _FullContext
+ from narwhals.dtypes import DType
+ from narwhals.typing import (
+ ClosedInterval,
+ FillNullStrategy,
+ Into1DArray,
+ IntoDType,
+ NonNestedLiteral,
+ NumericLiteral,
+ RankMethod,
+ RollingInterpolationMethod,
+ SizedMultiIndexSelector,
+ TemporalLiteral,
+ _1DArray,
+ _AnyDArray,
+ _SliceIndex,
+ )
+
+PANDAS_TO_NUMPY_DTYPE_NO_MISSING = {
+ "Int64": "int64",
+ "int64[pyarrow]": "int64",
+ "Int32": "int32",
+ "int32[pyarrow]": "int32",
+ "Int16": "int16",
+ "int16[pyarrow]": "int16",
+ "Int8": "int8",
+ "int8[pyarrow]": "int8",
+ "UInt64": "uint64",
+ "uint64[pyarrow]": "uint64",
+ "UInt32": "uint32",
+ "uint32[pyarrow]": "uint32",
+ "UInt16": "uint16",
+ "uint16[pyarrow]": "uint16",
+ "UInt8": "uint8",
+ "uint8[pyarrow]": "uint8",
+ "Float64": "float64",
+ "float64[pyarrow]": "float64",
+ "Float32": "float32",
+ "float32[pyarrow]": "float32",
+}
+PANDAS_TO_NUMPY_DTYPE_MISSING = {
+ "Int64": "float64",
+ "int64[pyarrow]": "float64",
+ "Int32": "float64",
+ "int32[pyarrow]": "float64",
+ "Int16": "float64",
+ "int16[pyarrow]": "float64",
+ "Int8": "float64",
+ "int8[pyarrow]": "float64",
+ "UInt64": "float64",
+ "uint64[pyarrow]": "float64",
+ "UInt32": "float64",
+ "uint32[pyarrow]": "float64",
+ "UInt16": "float64",
+ "uint16[pyarrow]": "float64",
+ "UInt8": "float64",
+ "uint8[pyarrow]": "float64",
+ "Float64": "float64",
+ "float64[pyarrow]": "float64",
+ "Float32": "float32",
+ "float32[pyarrow]": "float32",
+}
+
+
+class PandasLikeSeries(EagerSeries[Any]):
+ def __init__(
+ self,
+ native_series: Any,
+ *,
+ implementation: Implementation,
+ backend_version: tuple[int, ...],
+ version: Version,
+ ) -> None:
+ self._name = native_series.name
+ self._native_series = native_series
+ self._implementation = implementation
+ self._backend_version = backend_version
+ self._version = version
+ validate_backend_version(self._implementation, self._backend_version)
+ # Flag which indicates if, in the final step before applying an operation,
+ # the single value behind the PandasLikeSeries should be extract and treated
+ # as a Scalar. For example, in `nw.col('a') - nw.lit(3)`, the latter would
+ # become a Series of length 1. Rather that doing a full broadcast so it matches
+ # the length of the whole dataframe, we just extract the scalar.
+ self._broadcast = False
+
+ @property
+ def native(self) -> Any:
+ return self._native_series
+
+ def __native_namespace__(self) -> ModuleType:
+ if self._implementation.is_pandas_like():
+ return self._implementation.to_native_namespace()
+
+ msg = f"Expected pandas/modin/cudf, got: {type(self._implementation)}" # pragma: no cover
+ raise AssertionError(msg)
+
+ def __narwhals_namespace__(self) -> PandasLikeNamespace:
+ from narwhals._pandas_like.namespace import PandasLikeNamespace
+
+ return PandasLikeNamespace(
+ self._implementation, self._backend_version, self._version
+ )
+
+ def _gather(self, rows: SizedMultiIndexSelector[pd.Series[Any]]) -> Self:
+ rows = list(rows) if isinstance(rows, tuple) else rows
+ return self._with_native(self.native.iloc[rows])
+
+ def _gather_slice(self, rows: _SliceIndex | range) -> Self:
+ return self._with_native(
+ self.native.iloc[slice(rows.start, rows.stop, rows.step)]
+ )
+
+ def _with_version(self, version: Version) -> Self:
+ return self.__class__(
+ self.native,
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=version,
+ )
+
+ def _with_native(self, series: Any, *, preserve_broadcast: bool = False) -> Self:
+ result = self.__class__(
+ series,
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+ if preserve_broadcast:
+ result._broadcast = self._broadcast
+ return result
+
+ @classmethod
+ def from_iterable(
+ cls,
+ data: Iterable[Any],
+ *,
+ context: _FullContext,
+ name: str = "",
+ dtype: IntoDType | None = None,
+ index: Any = None,
+ ) -> Self:
+ implementation = context._implementation
+ backend_version = context._backend_version
+ version = context._version
+ ns = implementation.to_native_namespace()
+ kwds: dict[str, Any] = {}
+ if dtype:
+ kwds["dtype"] = narwhals_to_native_dtype(
+ dtype, None, implementation, backend_version, version
+ )
+ else:
+ if implementation.is_pandas():
+ kwds["copy"] = False
+ if index is not None and len(index):
+ kwds["index"] = index
+ return cls.from_native(ns.Series(data, name=name, **kwds), context=context)
+
+ @staticmethod
+ def _is_native(obj: Any) -> TypeIs[Any]:
+ return is_pandas_like_series(obj) # pragma: no cover
+
+ @classmethod
+ def from_native(cls, data: Any, /, *, context: _FullContext) -> Self:
+ return cls(
+ data,
+ implementation=context._implementation,
+ backend_version=context._backend_version,
+ version=context._version,
+ )
+
+ @classmethod
+ def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self:
+ implementation = context._implementation
+ arr = data if is_numpy_array_1d(data) else [data]
+ native = implementation.to_native_namespace().Series(arr, name="")
+ return cls.from_native(native, context=context)
+
+ @property
+ def name(self) -> str:
+ return self._name
+
+ @property
+ def dtype(self) -> DType:
+ native_dtype = self.native.dtype
+ return (
+ native_to_narwhals_dtype(native_dtype, self._version, self._implementation)
+ if native_dtype != "object"
+ else object_native_to_narwhals_dtype(
+ self.native, self._version, self._implementation
+ )
+ )
+
+ def ewm_mean(
+ self,
+ *,
+ com: float | None,
+ span: float | None,
+ half_life: float | None,
+ alpha: float | None,
+ adjust: bool,
+ min_samples: int,
+ ignore_nulls: bool,
+ ) -> PandasLikeSeries:
+ ser = self.native
+ mask_na = ser.isna()
+ if self._implementation is Implementation.CUDF:
+ if (min_samples == 0 and not ignore_nulls) or (not mask_na.any()):
+ result = ser.ewm(
+ com=com, span=span, halflife=half_life, alpha=alpha, adjust=adjust
+ ).mean()
+ else:
+ msg = (
+ "cuDF only supports `ewm_mean` when there are no missing values "
+ "or when both `min_period=0` and `ignore_nulls=False`"
+ )
+ raise NotImplementedError(msg)
+ else:
+ result = ser.ewm(
+ com, span, half_life, alpha, min_samples, adjust, ignore_na=ignore_nulls
+ ).mean()
+ result[mask_na] = None
+ return self._with_native(result)
+
+ def scatter(self, indices: int | Sequence[int], values: Any) -> Self:
+ if isinstance(values, self.__class__):
+ values = set_index(
+ values.native,
+ self.native.index[indices],
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ )
+ s = self.native.copy(deep=True)
+ s.iloc[indices] = values
+ s.name = self.name
+ return self._with_native(s)
+
+ def _scatter_in_place(self, indices: Self, values: Self) -> None:
+ # Scatter, modifying original Series. Use with care!
+ values_native = set_index(
+ values.native,
+ self.native.index[indices.native],
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ )
+ if self._implementation is Implementation.PANDAS and parse_version(np) < (2,):
+ values_native = values_native.copy() # pragma: no cover
+ min_pd_version = (1, 2)
+ if (
+ self._implementation is Implementation.PANDAS
+ and self._backend_version < min_pd_version
+ ):
+ self.native.iloc[indices.native.values] = values_native # noqa: PD011
+ else:
+ self.native.iloc[indices.native] = values_native
+
+ def cast(self, dtype: IntoDType) -> Self:
+ pd_dtype = narwhals_to_native_dtype(
+ dtype,
+ dtype_backend=get_dtype_backend(self.native.dtype, self._implementation),
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=self._version,
+ )
+ return self._with_native(self.native.astype(pd_dtype), preserve_broadcast=True)
+
+ def item(self, index: int | None) -> Any:
+ # cuDF doesn't have Series.item().
+ if index is None:
+ if len(self) != 1:
+ msg = (
+ "can only call '.item()' if the Series is of length 1,"
+ f" or an explicit index is provided (Series is of length {len(self)})"
+ )
+ raise ValueError(msg)
+ return self.native.iloc[0]
+ return self.native.iloc[index]
+
+ def to_frame(self) -> PandasLikeDataFrame:
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+
+ return PandasLikeDataFrame(
+ self.native.to_frame(),
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ version=self._version,
+ validate_column_names=False,
+ )
+
+ def to_list(self) -> list[Any]:
+ is_cudf = self._implementation.is_cudf()
+ return self.native.to_arrow().to_pylist() if is_cudf else self.native.to_list()
+
+ def is_between(
+ self, lower_bound: Any, upper_bound: Any, closed: ClosedInterval
+ ) -> Self:
+ ser = self.native
+ _, lower_bound = align_and_extract_native(self, lower_bound)
+ _, upper_bound = align_and_extract_native(self, upper_bound)
+ if closed == "left":
+ res = ser.ge(lower_bound) & ser.lt(upper_bound)
+ elif closed == "right":
+ res = ser.gt(lower_bound) & ser.le(upper_bound)
+ elif closed == "none":
+ res = ser.gt(lower_bound) & ser.lt(upper_bound)
+ elif closed == "both":
+ res = ser.ge(lower_bound) & ser.le(upper_bound)
+ else: # pragma: no cover
+ raise AssertionError
+ return self._with_native(res).alias(ser.name)
+
+ def is_in(self, other: Any) -> PandasLikeSeries:
+ return self._with_native(self.native.isin(other))
+
+ def arg_true(self) -> PandasLikeSeries:
+ ser = self.native
+ result = ser.__class__(range(len(ser)), name=ser.name, index=ser.index).loc[ser]
+ return self._with_native(result)
+
+ def arg_min(self) -> int:
+ if self._implementation is Implementation.PANDAS and self._backend_version < (1,):
+ return self.native.to_numpy().argmin()
+ return self.native.argmin()
+
+ def arg_max(self) -> int:
+ ser = self.native
+ if self._implementation is Implementation.PANDAS and self._backend_version < (1,):
+ return ser.to_numpy().argmax()
+ return ser.argmax()
+
+ # Binary comparisons
+
+ def filter(self, predicate: Any) -> PandasLikeSeries:
+ if not is_list_of(predicate, bool):
+ _, other_native = align_and_extract_native(self, predicate)
+ else:
+ other_native = predicate
+ return self._with_native(self.native.loc[other_native]).alias(self.name)
+
+ def __eq__(self, other: object) -> PandasLikeSeries: # type: ignore[override]
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser == other).alias(self.name)
+
+ def __ne__(self, other: object) -> PandasLikeSeries: # type: ignore[override]
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser != other).alias(self.name)
+
+ def __ge__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser >= other).alias(self.name)
+
+ def __gt__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser > other).alias(self.name)
+
+ def __le__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser <= other).alias(self.name)
+
+ def __lt__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser < other).alias(self.name)
+
+ def __and__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser & other).alias(self.name)
+
+ def __rand__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ ser = cast("pd.Series[Any]", ser)
+ return self._with_native(ser.__and__(other)).alias(self.name)
+
+ def __or__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser | other).alias(self.name)
+
+ def __ror__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ ser = cast("pd.Series[Any]", ser)
+ return self._with_native(ser.__or__(other)).alias(self.name)
+
+ def __add__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser + other).alias(self.name)
+
+ def __radd__(self, other: Any) -> PandasLikeSeries:
+ _, other_native = align_and_extract_native(self, other)
+ return self._with_native(self.native.__radd__(other_native)).alias(self.name)
+
+ def __sub__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser - other).alias(self.name)
+
+ def __rsub__(self, other: Any) -> PandasLikeSeries:
+ _, other_native = align_and_extract_native(self, other)
+ return self._with_native(self.native.__rsub__(other_native)).alias(self.name)
+
+ def __mul__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser * other).alias(self.name)
+
+ def __rmul__(self, other: Any) -> PandasLikeSeries:
+ _, other_native = align_and_extract_native(self, other)
+ return self._with_native(self.native.__rmul__(other_native)).alias(self.name)
+
+ def __truediv__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser / other).alias(self.name)
+
+ def __rtruediv__(self, other: Any) -> PandasLikeSeries:
+ _, other_native = align_and_extract_native(self, other)
+ return self._with_native(self.native.__rtruediv__(other_native)).alias(self.name)
+
+ def __floordiv__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser // other).alias(self.name)
+
+ def __rfloordiv__(self, other: Any) -> PandasLikeSeries:
+ _, other_native = align_and_extract_native(self, other)
+ return self._with_native(self.native.__rfloordiv__(other_native)).alias(self.name)
+
+ def __pow__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser**other).alias(self.name)
+
+ def __rpow__(self, other: Any) -> PandasLikeSeries:
+ _, other_native = align_and_extract_native(self, other)
+ return self._with_native(self.native.__rpow__(other_native)).alias(self.name)
+
+ def __mod__(self, other: Any) -> PandasLikeSeries:
+ ser, other = align_and_extract_native(self, other)
+ return self._with_native(ser % other).alias(self.name)
+
+ def __rmod__(self, other: Any) -> PandasLikeSeries:
+ _, other_native = align_and_extract_native(self, other)
+ return self._with_native(self.native.__rmod__(other_native)).alias(self.name)
+
+ # Unary
+
+ def __invert__(self: PandasLikeSeries) -> PandasLikeSeries:
+ return self._with_native(~self.native)
+
+ # Reductions
+
+ def any(self) -> bool:
+ return self.native.any()
+
+ def all(self) -> bool:
+ return self.native.all()
+
+ def min(self) -> Any:
+ return self.native.min()
+
+ def max(self) -> Any:
+ return self.native.max()
+
+ def sum(self) -> float:
+ return self.native.sum()
+
+ def count(self) -> int:
+ return self.native.count()
+
+ def mean(self) -> float:
+ return self.native.mean()
+
+ def median(self) -> float:
+ if not self.dtype.is_numeric():
+ msg = "`median` operation not supported for non-numeric input type."
+ raise InvalidOperationError(msg)
+ return self.native.median()
+
+ def std(self, *, ddof: int) -> float:
+ return self.native.std(ddof=ddof)
+
+ def var(self, *, ddof: int) -> float:
+ return self.native.var(ddof=ddof)
+
+ def skew(self) -> float | None:
+ ser_not_null = self.native.dropna()
+ if len(ser_not_null) == 0:
+ return None
+ elif len(ser_not_null) == 1:
+ return float("nan")
+ elif len(ser_not_null) == 2:
+ return 0.0
+ else:
+ m = ser_not_null - ser_not_null.mean()
+ m2 = (m**2).mean()
+ m3 = (m**3).mean()
+ return m3 / (m2**1.5) if m2 != 0 else float("nan")
+
+ def len(self) -> int:
+ return len(self.native)
+
+ # Transformations
+
+ def is_null(self) -> PandasLikeSeries:
+ return self._with_native(self.native.isna(), preserve_broadcast=True)
+
+ def is_nan(self) -> PandasLikeSeries:
+ ser = self.native
+ if self.dtype.is_numeric():
+ return self._with_native(ser != ser, preserve_broadcast=True) # noqa: PLR0124
+ msg = f"`.is_nan` only supported for numeric dtype and not {self.dtype}, did you mean `.is_null`?"
+ raise InvalidOperationError(msg)
+
+ def fill_null(
+ self,
+ value: Self | NonNestedLiteral,
+ strategy: FillNullStrategy | None,
+ limit: int | None,
+ ) -> Self:
+ ser = self.native
+ if value is not None:
+ _, native_value = align_and_extract_native(self, value)
+ res_ser = self._with_native(
+ ser.fillna(value=native_value), preserve_broadcast=True
+ )
+ else:
+ res_ser = self._with_native(
+ ser.ffill(limit=limit)
+ if strategy == "forward"
+ else ser.bfill(limit=limit),
+ preserve_broadcast=True,
+ )
+
+ return res_ser
+
+ def drop_nulls(self) -> PandasLikeSeries:
+ return self._with_native(self.native.dropna())
+
+ def n_unique(self) -> int:
+ return self.native.nunique(dropna=False)
+
+ def sample(
+ self,
+ n: int | None,
+ *,
+ fraction: float | None,
+ with_replacement: bool,
+ seed: int | None,
+ ) -> Self:
+ return self._with_native(
+ self.native.sample(
+ n=n, frac=fraction, replace=with_replacement, random_state=seed
+ )
+ )
+
+ def abs(self) -> PandasLikeSeries:
+ return self._with_native(self.native.abs())
+
+ def cum_sum(self, *, reverse: bool) -> Self:
+ result = (
+ self.native.cumsum(skipna=True)
+ if not reverse
+ else self.native[::-1].cumsum(skipna=True)[::-1]
+ )
+ return self._with_native(result)
+
+ def unique(self, *, maintain_order: bool = True) -> PandasLikeSeries:
+ """Pandas always maintains order, as per its docstring.
+
+ > Uniques are returned in order of appearance.
+ """
+ return self._with_native(
+ self.native.__class__(self.native.unique(), name=self.name)
+ )
+
+ def diff(self) -> PandasLikeSeries:
+ return self._with_native(self.native.diff())
+
+ def shift(self, n: int) -> PandasLikeSeries:
+ return self._with_native(self.native.shift(n))
+
+ def replace_strict(
+ self,
+ old: Sequence[Any] | Mapping[Any, Any],
+ new: Sequence[Any],
+ *,
+ return_dtype: IntoDType | None,
+ ) -> PandasLikeSeries:
+ tmp_name = f"{self.name}_tmp"
+ dtype_backend = get_dtype_backend(self.native.dtype, self._implementation)
+ dtype = (
+ narwhals_to_native_dtype(
+ return_dtype,
+ dtype_backend,
+ self._implementation,
+ self._backend_version,
+ self._version,
+ )
+ if return_dtype
+ else None
+ )
+ namespace = self.__native_namespace__()
+ other = namespace.DataFrame(
+ {self.name: old, tmp_name: namespace.Series(new, dtype=dtype)}
+ )
+ result = self._with_native(
+ self.native.to_frame().merge(other, on=self.name, how="left")[tmp_name]
+ ).alias(self.name)
+ if result.is_null().sum() != self.is_null().sum():
+ msg = (
+ "replace_strict did not replace all non-null values.\n\n"
+ f"The following did not get replaced: {self.filter(~self.is_null() & result.is_null()).unique(maintain_order=False).to_list()}"
+ )
+ raise ValueError(msg)
+ return result
+
+ def sort(self, *, descending: bool, nulls_last: bool) -> PandasLikeSeries:
+ na_position = "last" if nulls_last else "first"
+ return self._with_native(
+ self.native.sort_values(ascending=not descending, na_position=na_position)
+ ).alias(self.name)
+
+ def alias(self, name: str | Hashable) -> Self:
+ if name != self.name:
+ return self._with_native(
+ rename(
+ self.native,
+ name,
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ ),
+ preserve_broadcast=True,
+ )
+ return self
+
+ def __array__(self, dtype: Any, *, copy: bool | None) -> _1DArray:
+ # pandas used to always return object dtype for nullable dtypes.
+ # So, we intercept __array__ and pass to `to_numpy` ourselves to make
+ # sure an appropriate numpy dtype is returned.
+ return self.to_numpy(dtype=dtype, copy=copy)
+
+ def to_numpy(self, dtype: Any = None, *, copy: bool | None = None) -> _1DArray:
+ # the default is meant to be None, but pandas doesn't allow it?
+ # https://numpy.org/doc/stable/reference/generated/numpy.ndarray.__array__.html
+ dtypes = self._version.dtypes
+ if isinstance(self.dtype, dtypes.Datetime) and self.dtype.time_zone is not None:
+ s = self.dt.convert_time_zone("UTC").dt.replace_time_zone(None).native
+ else:
+ s = self.native
+
+ has_missing = s.isna().any()
+ kwargs: dict[Any, Any] = {"copy": copy or self._implementation.is_cudf()}
+ if has_missing and str(s.dtype) in PANDAS_TO_NUMPY_DTYPE_MISSING:
+ if self._implementation is Implementation.PANDAS and self._backend_version < (
+ 1,
+ ): # pragma: no cover
+ ...
+ else:
+ kwargs.update({"na_value": float("nan")})
+ dtype = dtype or PANDAS_TO_NUMPY_DTYPE_MISSING[str(s.dtype)]
+ if not has_missing and str(s.dtype) in PANDAS_TO_NUMPY_DTYPE_NO_MISSING:
+ dtype = dtype or PANDAS_TO_NUMPY_DTYPE_NO_MISSING[str(s.dtype)]
+ return s.to_numpy(dtype=dtype, **kwargs)
+
+ def to_pandas(self) -> pd.Series[Any]:
+ if self._implementation is Implementation.PANDAS:
+ return self.native
+ elif self._implementation is Implementation.CUDF: # pragma: no cover
+ return self.native.to_pandas()
+ elif self._implementation is Implementation.MODIN:
+ return self.native._to_pandas()
+ msg = f"Unknown implementation: {self._implementation}" # pragma: no cover
+ raise AssertionError(msg)
+
+ def to_polars(self) -> pl.Series:
+ import polars as pl # ignore-banned-import
+
+ return pl.from_pandas(self.to_pandas())
+
+ # --- descriptive ---
+ def is_unique(self) -> Self:
+ return self._with_native(~self.native.duplicated(keep=False)).alias(self.name)
+
+ def null_count(self) -> int:
+ return self.native.isna().sum()
+
+ def is_first_distinct(self) -> Self:
+ return self._with_native(~self.native.duplicated(keep="first")).alias(self.name)
+
+ def is_last_distinct(self) -> Self:
+ return self._with_native(~self.native.duplicated(keep="last")).alias(self.name)
+
+ def is_sorted(self, *, descending: bool) -> bool:
+ if not isinstance(descending, bool):
+ msg = f"argument 'descending' should be boolean, found {type(descending)}"
+ raise TypeError(msg)
+
+ if descending:
+ return self.native.is_monotonic_decreasing
+ else:
+ return self.native.is_monotonic_increasing
+
+ def value_counts(
+ self, *, sort: bool, parallel: bool, name: str | None, normalize: bool
+ ) -> PandasLikeDataFrame:
+ """Parallel is unused, exists for compatibility."""
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+
+ index_name_ = "index" if self._name is None else self._name
+ value_name_ = name or ("proportion" if normalize else "count")
+ val_count = self.native.value_counts(
+ dropna=False, sort=False, normalize=normalize
+ ).reset_index()
+
+ val_count.columns = [index_name_, value_name_]
+
+ if sort:
+ val_count = val_count.sort_values(value_name_, ascending=False)
+
+ return PandasLikeDataFrame.from_native(val_count, context=self)
+
+ def quantile(
+ self, quantile: float, interpolation: RollingInterpolationMethod
+ ) -> float:
+ return self.native.quantile(q=quantile, interpolation=interpolation)
+
+ def zip_with(self, mask: Any, other: Any) -> PandasLikeSeries:
+ ser = self.native
+ _, mask = align_and_extract_native(self, mask)
+ _, other = align_and_extract_native(self, other)
+ res = ser.where(mask, other)
+ return self._with_native(res)
+
+ def head(self, n: int) -> Self:
+ return self._with_native(self.native.head(n))
+
+ def tail(self, n: int) -> Self:
+ return self._with_native(self.native.tail(n))
+
+ def round(self, decimals: int) -> Self:
+ return self._with_native(self.native.round(decimals=decimals))
+
+ def to_dummies(self, *, separator: str, drop_first: bool) -> PandasLikeDataFrame:
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+
+ plx = self.__native_namespace__()
+ series = self.native
+ name = str(self._name) if self._name else ""
+
+ null_col_pl = f"{name}{separator}null"
+
+ has_nulls = series.isna().any()
+ result = plx.get_dummies(
+ series,
+ prefix=name,
+ prefix_sep=separator,
+ drop_first=drop_first,
+ # Adds a null column at the end, depending on whether or not there are any.
+ dummy_na=has_nulls,
+ dtype="int8",
+ )
+ if has_nulls:
+ *cols, null_col_pd = list(result.columns)
+ output_order = [null_col_pd, *cols]
+ result = rename(
+ select_columns_by_name(
+ result, output_order, self._backend_version, self._implementation
+ ),
+ columns={null_col_pd: null_col_pl},
+ implementation=self._implementation,
+ backend_version=self._backend_version,
+ )
+ return PandasLikeDataFrame.from_native(result, context=self)
+
+ def gather_every(self, n: int, offset: int) -> Self:
+ return self._with_native(self.native.iloc[offset::n])
+
+ def clip(
+ self,
+ lower_bound: Self | NumericLiteral | TemporalLiteral | None,
+ upper_bound: Self | NumericLiteral | TemporalLiteral | None,
+ ) -> Self:
+ _, lower = (
+ align_and_extract_native(self, lower_bound) if lower_bound else (None, None)
+ )
+ _, upper = (
+ align_and_extract_native(self, upper_bound) if upper_bound else (None, None)
+ )
+ kwargs = {"axis": 0} if self._implementation is Implementation.MODIN else {}
+ return self._with_native(self.native.clip(lower, upper, **kwargs))
+
+ def to_arrow(self) -> pa.Array[Any]:
+ if self._implementation is Implementation.CUDF:
+ return self.native.to_arrow()
+
+ import pyarrow as pa # ignore-banned-import()
+
+ return pa.Array.from_pandas(self.native)
+
+ def mode(self) -> Self:
+ result = self.native.mode()
+ result.name = self.name
+ return self._with_native(result)
+
+ def cum_count(self, *, reverse: bool) -> Self:
+ not_na_series = ~self.native.isna()
+ result = (
+ not_na_series.cumsum()
+ if not reverse
+ else len(self) - not_na_series.cumsum() + not_na_series - 1
+ )
+ return self._with_native(result)
+
+ def cum_min(self, *, reverse: bool) -> Self:
+ result = (
+ self.native.cummin(skipna=True)
+ if not reverse
+ else self.native[::-1].cummin(skipna=True)[::-1]
+ )
+ return self._with_native(result)
+
+ def cum_max(self, *, reverse: bool) -> Self:
+ result = (
+ self.native.cummax(skipna=True)
+ if not reverse
+ else self.native[::-1].cummax(skipna=True)[::-1]
+ )
+ return self._with_native(result)
+
+ def cum_prod(self, *, reverse: bool) -> Self:
+ result = (
+ self.native.cumprod(skipna=True)
+ if not reverse
+ else self.native[::-1].cumprod(skipna=True)[::-1]
+ )
+ return self._with_native(result)
+
+ def rolling_sum(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ result = self.native.rolling(
+ window=window_size, min_periods=min_samples, center=center
+ ).sum()
+ return self._with_native(result)
+
+ def rolling_mean(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ result = self.native.rolling(
+ window=window_size, min_periods=min_samples, center=center
+ ).mean()
+ return self._with_native(result)
+
+ def rolling_var(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ result = self.native.rolling(
+ window=window_size, min_periods=min_samples, center=center
+ ).var(ddof=ddof)
+ return self._with_native(result)
+
+ def rolling_std(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ result = self.native.rolling(
+ window=window_size, min_periods=min_samples, center=center
+ ).std(ddof=ddof)
+ return self._with_native(result)
+
+ def __iter__(self) -> Iterator[Any]:
+ yield from self.native.__iter__()
+
+ def __contains__(self, other: Any) -> bool:
+ return self.native.isna().any() if other is None else (self.native == other).any()
+
+ def is_finite(self) -> Self:
+ s = self.native
+ return self._with_native((s > float("-inf")) & (s < float("inf")))
+
+ def rank(self, method: RankMethod, *, descending: bool) -> Self:
+ pd_method = "first" if method == "ordinal" else method
+ name = self.name
+ if (
+ self._implementation is Implementation.PANDAS
+ and self._backend_version < (3,)
+ and self.dtype.is_integer()
+ and (null_mask := self.native.isna()).any()
+ ):
+ # crazy workaround for the case of `na_option="keep"` and nullable
+ # integer dtypes. This should be supported in pandas > 3.0
+ # https://github.com/pandas-dev/pandas/issues/56976
+ ranked_series = (
+ self.native.to_frame()
+ .assign(**{f"{name}_is_null": null_mask})
+ .groupby(f"{name}_is_null")
+ .rank(
+ method=pd_method,
+ na_option="keep",
+ ascending=not descending,
+ pct=False,
+ )[name]
+ )
+ else:
+ ranked_series = self.native.rank(
+ method=pd_method, na_option="keep", ascending=not descending, pct=False
+ )
+ return self._with_native(ranked_series)
+
+ def hist( # noqa: C901, PLR0912
+ self,
+ bins: list[float | int] | None,
+ *,
+ bin_count: int | None,
+ include_breakpoint: bool,
+ ) -> PandasLikeDataFrame:
+ from numpy import linspace, zeros
+
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+
+ ns = self.__native_namespace__()
+ data: dict[str, Sequence[int | float | str] | _AnyDArray]
+
+ if bin_count == 0 or (bins is not None and len(bins) <= 1):
+ data = {}
+ if include_breakpoint:
+ data["breakpoint"] = []
+ data["count"] = []
+ return PandasLikeDataFrame.from_native(ns.DataFrame(data), context=self)
+
+ if self.native.count() < 1:
+ if bins is not None:
+ data = {"breakpoint": bins[1:], "count": zeros(shape=len(bins) - 1)}
+ else:
+ count = cast("int", bin_count)
+ if bin_count == 1:
+ data = {"breakpoint": [1.0], "count": [0]}
+ else:
+ data = {
+ "breakpoint": linspace(0, 1, count + 1)[1:],
+ "count": zeros(shape=count),
+ }
+ if not include_breakpoint:
+ del data["breakpoint"]
+ return PandasLikeDataFrame.from_native(ns.DataFrame(data), context=self)
+
+ if bin_count is not None:
+ # use Polars binning behavior
+ lower, upper = self.native.min(), self.native.max()
+ if lower == upper:
+ lower -= 0.5
+ upper += 0.5
+
+ if bin_count == 1:
+ data = {"breakpoint": [upper], "count": [self.native.count()]}
+ if not include_breakpoint:
+ del data["breakpoint"]
+ return PandasLikeDataFrame.from_native(ns.DataFrame(data), context=self)
+
+ bins = linspace(lower, upper, bin_count + 1)
+ bin_count = None
+
+ # pandas (2.2.*) .value_counts(bins=int) adjusts the lowest bin twice, result in improper counts.
+ # pandas (2.2.*) .value_counts(bins=[...]) adjusts the lowest bin which should not happen since
+ # the bins were explicitly passed in.
+ categories = ns.cut(
+ self.native,
+ bins=bins if bin_count is None else bin_count,
+ include_lowest=True, # Polars 1.27.0 always includes the lowest bin
+ )
+ # modin (0.32.0) .value_counts(...) silently drops bins with empty observations, .reindex
+ # is necessary to restore these bins.
+ result = categories.value_counts(dropna=True, sort=False).reindex(
+ categories.cat.categories, fill_value=0
+ )
+ data = {}
+ if include_breakpoint:
+ data["breakpoint"] = bins[1:] if bins is not None else result.index.right
+ data["count"] = result.reset_index(drop=True)
+ return PandasLikeDataFrame.from_native(ns.DataFrame(data), context=self)
+
+ def log(self, base: float) -> Self:
+ native = self.native
+ implementation = self._implementation
+
+ dtype_backend = get_dtype_backend(native.dtype, implementation=implementation)
+
+ if implementation.is_cudf():
+ import cupy as cp # ignore-banned-import # cuDF dependency.
+
+ native = self.native
+ log_arr = cp.log(native) / cp.log(base)
+ result_native = type(native)(log_arr, index=native.index, name=native.name)
+ return self._with_native(result_native)
+
+ if dtype_backend == "pyarrow":
+ import pyarrow.compute as pc
+
+ from narwhals._arrow.utils import native_to_narwhals_dtype
+
+ ca = native.array._pa_array
+ result_arr = cast("ChunkedArrayAny", pc.logb(ca, base))
+ nw_dtype = native_to_narwhals_dtype(result_arr.type, self._version)
+ out_dtype = narwhals_to_native_dtype(
+ nw_dtype,
+ "pyarrow",
+ self._implementation,
+ self._backend_version,
+ self._version,
+ )
+ result_native = native.__class__(
+ result_arr, dtype=out_dtype, index=native.index, name=native.name
+ )
+ else:
+ result_native = np.log(native) / np.log(base)
+ return self._with_native(result_native)
+
+ def exp(self) -> Self:
+ native = self.native
+ implementation = self._implementation
+
+ dtype_backend = get_dtype_backend(native.dtype, implementation=implementation)
+
+ if implementation.is_cudf():
+ import cupy as cp # ignore-banned-import # cuDF dependency.
+
+ native = self.native
+ exp_arr = cp.exp(native)
+ result_native = type(native)(exp_arr, index=native.index, name=native.name)
+ return self._with_native(result_native)
+
+ if dtype_backend == "pyarrow":
+ import pyarrow.compute as pc
+
+ from narwhals._arrow.utils import native_to_narwhals_dtype
+
+ ca = native.array._pa_array
+ result_arr = cast("ChunkedArrayAny", pc.exp(ca))
+ nw_dtype = native_to_narwhals_dtype(result_arr.type, self._version)
+ out_dtype = narwhals_to_native_dtype(
+ nw_dtype,
+ "pyarrow",
+ self._implementation,
+ self._backend_version,
+ self._version,
+ )
+ result_native = native.__class__(
+ result_arr, dtype=out_dtype, index=native.index, name=native.name
+ )
+ else:
+ result_native = np.exp(native)
+ return self._with_native(result_native)
+
+ @property
+ def str(self) -> PandasLikeSeriesStringNamespace:
+ return PandasLikeSeriesStringNamespace(self)
+
+ @property
+ def dt(self) -> PandasLikeSeriesDateTimeNamespace:
+ return PandasLikeSeriesDateTimeNamespace(self)
+
+ @property
+ def cat(self) -> PandasLikeSeriesCatNamespace:
+ return PandasLikeSeriesCatNamespace(self)
+
+ @property
+ def list(self) -> PandasLikeSeriesListNamespace:
+ if not hasattr(self.native, "list"):
+ msg = "Series must be of PyArrow List type to support list namespace."
+ raise TypeError(msg)
+ return PandasLikeSeriesListNamespace(self)
+
+ @property
+ def struct(self) -> PandasLikeSeriesStructNamespace:
+ if not hasattr(self.native, "struct"):
+ msg = "Series must be of PyArrow Struct type to support struct namespace."
+ raise TypeError(msg)
+ return PandasLikeSeriesStructNamespace(self)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_cat.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_cat.py
new file mode 100644
index 0000000..912da70
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_cat.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from narwhals._compliant.any_namespace import CatNamespace
+from narwhals._pandas_like.utils import PandasLikeSeriesNamespace
+
+if TYPE_CHECKING:
+ from narwhals._pandas_like.series import PandasLikeSeries
+
+
+class PandasLikeSeriesCatNamespace(
+ PandasLikeSeriesNamespace, CatNamespace["PandasLikeSeries"]
+):
+ def get_categories(self) -> PandasLikeSeries:
+ s = self.native
+ return self.with_native(type(s)(s.cat.categories, name=s.name))
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_dt.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_dt.py
new file mode 100644
index 0000000..c8083e9
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_dt.py
@@ -0,0 +1,237 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from narwhals._compliant.any_namespace import DateTimeNamespace
+from narwhals._duration import parse_interval_string
+from narwhals._pandas_like.utils import (
+ UNIT_DICT,
+ PandasLikeSeriesNamespace,
+ calculate_timestamp_date,
+ calculate_timestamp_datetime,
+ get_dtype_backend,
+ int_dtype_mapper,
+ is_pyarrow_dtype_backend,
+)
+
+if TYPE_CHECKING:
+ from narwhals._pandas_like.series import PandasLikeSeries
+ from narwhals.typing import TimeUnit
+
+
+class PandasLikeSeriesDateTimeNamespace(
+ PandasLikeSeriesNamespace, DateTimeNamespace["PandasLikeSeries"]
+):
+ def date(self) -> PandasLikeSeries:
+ result = self.with_native(self.native.dt.date)
+ if str(result.dtype).lower() == "object":
+ msg = (
+ "Accessing `date` on the default pandas backend "
+ "will return a Series of type `object`."
+ "\nThis differs from polars API and will prevent `.dt` chaining. "
+ "Please switch to the `pyarrow` backend:"
+ '\ndf.convert_dtypes(dtype_backend="pyarrow")'
+ )
+ raise NotImplementedError(msg)
+ return result
+
+ def year(self) -> PandasLikeSeries:
+ return self.with_native(self.native.dt.year)
+
+ def month(self) -> PandasLikeSeries:
+ return self.with_native(self.native.dt.month)
+
+ def day(self) -> PandasLikeSeries:
+ return self.with_native(self.native.dt.day)
+
+ def hour(self) -> PandasLikeSeries:
+ return self.with_native(self.native.dt.hour)
+
+ def minute(self) -> PandasLikeSeries:
+ return self.with_native(self.native.dt.minute)
+
+ def second(self) -> PandasLikeSeries:
+ return self.with_native(self.native.dt.second)
+
+ def millisecond(self) -> PandasLikeSeries:
+ return self.microsecond() // 1000
+
+ def microsecond(self) -> PandasLikeSeries:
+ if self.backend_version < (3, 0, 0) and self._is_pyarrow():
+ # crazy workaround for https://github.com/pandas-dev/pandas/issues/59154
+ import pyarrow.compute as pc # ignore-banned-import()
+
+ from narwhals._arrow.utils import lit
+
+ arr_ns = self.native.array
+ arr = arr_ns.__arrow_array__()
+ result_arr = pc.add(
+ pc.multiply(pc.millisecond(arr), lit(1_000)), pc.microsecond(arr)
+ )
+ result = type(self.native)(type(arr_ns)(result_arr), name=self.native.name)
+ return self.with_native(result)
+
+ return self.with_native(self.native.dt.microsecond)
+
+ def nanosecond(self) -> PandasLikeSeries:
+ return self.microsecond() * 1_000 + self.native.dt.nanosecond
+
+ def ordinal_day(self) -> PandasLikeSeries:
+ year_start = self.native.dt.year
+ result = (
+ self.native.to_numpy().astype("datetime64[D]")
+ - (year_start.to_numpy() - 1970).astype("datetime64[Y]")
+ ).astype("int32") + 1
+ dtype = "Int64[pyarrow]" if self._is_pyarrow() else "int32"
+ return self.with_native(
+ type(self.native)(result, dtype=dtype, name=year_start.name)
+ )
+
+ def weekday(self) -> PandasLikeSeries:
+ # Pandas is 0-6 while Polars is 1-7
+ return self.with_native(self.native.dt.weekday) + 1
+
+ def _is_pyarrow(self) -> bool:
+ return is_pyarrow_dtype_backend(self.native.dtype, self.implementation)
+
+ def _get_total_seconds(self) -> Any:
+ if hasattr(self.native.dt, "total_seconds"):
+ return self.native.dt.total_seconds()
+ else: # pragma: no cover
+ return (
+ self.native.dt.days * 86400
+ + self.native.dt.seconds
+ + (self.native.dt.microseconds / 1e6)
+ + (self.native.dt.nanoseconds / 1e9)
+ )
+
+ def total_minutes(self) -> PandasLikeSeries:
+ s = self._get_total_seconds()
+ # this calculates the sign of each series element
+ s_sign = 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
+ s_abs = s.abs() // 60
+ if ~s.isna().any():
+ s_abs = s_abs.astype(int_dtype_mapper(s.dtype))
+ return self.with_native(s_abs * s_sign)
+
+ def total_seconds(self) -> PandasLikeSeries:
+ s = self._get_total_seconds()
+ # this calculates the sign of each series element
+ s_sign = 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
+ s_abs = s.abs() // 1
+ if ~s.isna().any():
+ s_abs = s_abs.astype(int_dtype_mapper(s.dtype))
+ return self.with_native(s_abs * s_sign)
+
+ def total_milliseconds(self) -> PandasLikeSeries:
+ s = self._get_total_seconds() * 1e3
+ # this calculates the sign of each series element
+ s_sign = 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
+ s_abs = s.abs() // 1
+ if ~s.isna().any():
+ s_abs = s_abs.astype(int_dtype_mapper(s.dtype))
+ return self.with_native(s_abs * s_sign)
+
+ def total_microseconds(self) -> PandasLikeSeries:
+ s = self._get_total_seconds() * 1e6
+ # this calculates the sign of each series element
+ s_sign = 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
+ s_abs = s.abs() // 1
+ if ~s.isna().any():
+ s_abs = s_abs.astype(int_dtype_mapper(s.dtype))
+ return self.with_native(s_abs * s_sign)
+
+ def total_nanoseconds(self) -> PandasLikeSeries:
+ s = self._get_total_seconds() * 1e9
+ # this calculates the sign of each series element
+ s_sign = 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
+ s_abs = s.abs() // 1
+ if ~s.isna().any():
+ s_abs = s_abs.astype(int_dtype_mapper(s.dtype))
+ return self.with_native(s_abs * s_sign)
+
+ def to_string(self, format: str) -> PandasLikeSeries:
+ # Polars' parser treats `'%.f'` as pandas does `'.%f'`
+ # PyArrow interprets `'%S'` as "seconds, plus fractional seconds"
+ # and doesn't support `%f`
+ if not self._is_pyarrow():
+ format = format.replace("%S%.f", "%S.%f")
+ else:
+ format = format.replace("%S.%f", "%S").replace("%S%.f", "%S")
+ return self.with_native(self.native.dt.strftime(format))
+
+ def replace_time_zone(self, time_zone: str | None) -> PandasLikeSeries:
+ de_zone = self.native.dt.tz_localize(None)
+ result = de_zone.dt.tz_localize(time_zone) if time_zone is not None else de_zone
+ return self.with_native(result)
+
+ def convert_time_zone(self, time_zone: str) -> PandasLikeSeries:
+ if self.compliant.dtype.time_zone is None: # type: ignore[attr-defined]
+ result = self.native.dt.tz_localize("UTC").dt.tz_convert(time_zone)
+ else:
+ result = self.native.dt.tz_convert(time_zone)
+ return self.with_native(result)
+
+ def timestamp(self, time_unit: TimeUnit) -> PandasLikeSeries:
+ s = self.native
+ dtype = self.compliant.dtype
+ mask_na = s.isna()
+ dtypes = self.version.dtypes
+ if dtype == dtypes.Date:
+ # Date is only supported in pandas dtypes if pyarrow-backed
+ s_cast = s.astype("Int32[pyarrow]")
+ result = calculate_timestamp_date(s_cast, time_unit)
+ elif isinstance(dtype, dtypes.Datetime):
+ fn = (
+ s.view
+ if (self.implementation.is_pandas() and self.backend_version < (2,))
+ else s.astype
+ )
+ s_cast = fn("Int64[pyarrow]") if self._is_pyarrow() else fn("int64")
+ result = calculate_timestamp_datetime(s_cast, dtype.time_unit, time_unit)
+ else:
+ msg = "Input should be either of Date or Datetime type"
+ raise TypeError(msg)
+ result[mask_na] = None
+ return self.with_native(result)
+
+ def truncate(self, every: str) -> PandasLikeSeries:
+ multiple, unit = parse_interval_string(every)
+ native = self.native
+ if self.implementation.is_cudf():
+ if multiple != 1:
+ msg = f"Only multiple `1` is supported for cuDF, got: {multiple}."
+ raise NotImplementedError(msg)
+ return self.with_native(self.native.dt.floor(UNIT_DICT.get(unit, unit)))
+ dtype_backend = get_dtype_backend(native.dtype, self.compliant._implementation)
+ if unit in {"mo", "q", "y"}:
+ if self.implementation.is_cudf():
+ msg = f"Truncating to {unit} is not supported yet for cuDF."
+ raise NotImplementedError(msg)
+ if dtype_backend == "pyarrow":
+ import pyarrow.compute as pc # ignore-banned-import
+
+ from narwhals._arrow.utils import UNITS_DICT
+
+ ca = native.array._pa_array
+ result_arr = pc.floor_temporal(ca, multiple, UNITS_DICT[unit])
+ else:
+ if unit == "q":
+ multiple *= 3
+ np_unit = "M"
+ elif unit == "mo":
+ np_unit = "M"
+ else:
+ np_unit = "Y"
+ arr = native.values
+ arr_dtype = arr.dtype
+ result_arr = arr.astype(f"datetime64[{multiple}{np_unit}]").astype(
+ arr_dtype
+ )
+ result_native = native.__class__(
+ result_arr, dtype=native.dtype, index=native.index, name=native.name
+ )
+ return self.with_native(result_native)
+ return self.with_native(
+ self.native.dt.floor(f"{multiple}{UNIT_DICT.get(unit, unit)}")
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_list.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_list.py
new file mode 100644
index 0000000..7816c1b
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_list.py
@@ -0,0 +1,33 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from narwhals._compliant.any_namespace import ListNamespace
+from narwhals._pandas_like.utils import (
+ PandasLikeSeriesNamespace,
+ get_dtype_backend,
+ narwhals_to_native_dtype,
+)
+
+if TYPE_CHECKING:
+ from narwhals._pandas_like.series import PandasLikeSeries
+
+
+class PandasLikeSeriesListNamespace(
+ PandasLikeSeriesNamespace, ListNamespace["PandasLikeSeries"]
+):
+ def len(self) -> PandasLikeSeries:
+ result = self.native.list.len()
+ implementation = self.implementation
+ backend_version = self.backend_version
+ if implementation.is_pandas() and backend_version < (3, 0): # pragma: no cover
+ # `result` is a new object so it's safe to do this inplace.
+ result.index = self.native.index
+ dtype = narwhals_to_native_dtype(
+ self.version.dtypes.UInt32(),
+ get_dtype_backend(result.dtype, implementation),
+ implementation,
+ backend_version,
+ self.version,
+ )
+ return self.with_native(result.astype(dtype)).alias(self.native.name)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_str.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_str.py
new file mode 100644
index 0000000..c4bef09
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_str.py
@@ -0,0 +1,79 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from narwhals._compliant.any_namespace import StringNamespace
+from narwhals._pandas_like.utils import (
+ PandasLikeSeriesNamespace,
+ is_pyarrow_dtype_backend,
+)
+
+if TYPE_CHECKING:
+ from narwhals._pandas_like.series import PandasLikeSeries
+
+
+class PandasLikeSeriesStringNamespace(
+ PandasLikeSeriesNamespace, StringNamespace["PandasLikeSeries"]
+):
+ def len_chars(self) -> PandasLikeSeries:
+ return self.with_native(self.native.str.len())
+
+ def replace(
+ self, pattern: str, value: str, *, literal: bool, n: int
+ ) -> PandasLikeSeries:
+ return self.with_native(
+ self.native.str.replace(pat=pattern, repl=value, n=n, regex=not literal)
+ )
+
+ def replace_all(self, pattern: str, value: str, *, literal: bool) -> PandasLikeSeries:
+ return self.replace(pattern, value, literal=literal, n=-1)
+
+ def strip_chars(self, characters: str | None) -> PandasLikeSeries:
+ return self.with_native(self.native.str.strip(characters))
+
+ def starts_with(self, prefix: str) -> PandasLikeSeries:
+ return self.with_native(self.native.str.startswith(prefix))
+
+ def ends_with(self, suffix: str) -> PandasLikeSeries:
+ return self.with_native(self.native.str.endswith(suffix))
+
+ def contains(self, pattern: str, *, literal: bool) -> PandasLikeSeries:
+ return self.with_native(self.native.str.contains(pat=pattern, regex=not literal))
+
+ def slice(self, offset: int, length: int | None) -> PandasLikeSeries:
+ stop = offset + length if length else None
+ return self.with_native(self.native.str.slice(start=offset, stop=stop))
+
+ def split(self, by: str) -> PandasLikeSeries:
+ implementation = self.implementation
+ if not implementation.is_cudf() and not is_pyarrow_dtype_backend(
+ self.native.dtype, implementation
+ ):
+ msg = (
+ "This operation requires a pyarrow-backed series. "
+ "Please refer to https://narwhals-dev.github.io/narwhals/api-reference/narwhals/#narwhals.maybe_convert_dtypes "
+ "and ensure you are using dtype_backend='pyarrow'. "
+ "Additionally, make sure you have pandas version 1.5+ and pyarrow installed. "
+ )
+ raise TypeError(msg)
+ return self.with_native(self.native.str.split(pat=by))
+
+ def to_datetime(self, format: str | None) -> PandasLikeSeries:
+ # If we know inputs are timezone-aware, we can pass `utc=True` for better performance.
+ if format and any(x in format for x in ("%z", "Z")):
+ return self.with_native(self._to_datetime(format, utc=True))
+ result = self.with_native(self._to_datetime(format, utc=False))
+ if (tz := getattr(result.dtype, "time_zone", None)) and tz != "UTC":
+ return result.dt.convert_time_zone("UTC")
+ return result
+
+ def _to_datetime(self, format: str | None, *, utc: bool) -> Any:
+ return self.implementation.to_native_namespace().to_datetime(
+ self.native, format=format, utc=utc
+ )
+
+ def to_uppercase(self) -> PandasLikeSeries:
+ return self.with_native(self.native.str.upper())
+
+ def to_lowercase(self) -> PandasLikeSeries:
+ return self.with_native(self.native.str.lower())
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_struct.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_struct.py
new file mode 100644
index 0000000..dc80997
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/series_struct.py
@@ -0,0 +1,16 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from narwhals._compliant.any_namespace import StructNamespace
+from narwhals._pandas_like.utils import PandasLikeSeriesNamespace
+
+if TYPE_CHECKING:
+ from narwhals._pandas_like.series import PandasLikeSeries
+
+
+class PandasLikeSeriesStructNamespace(
+ PandasLikeSeriesNamespace, StructNamespace["PandasLikeSeries"]
+):
+ def field(self, name: str) -> PandasLikeSeries:
+ return self.with_native(self.native.struct.field(name)).alias(name)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/typing.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/typing.py
new file mode 100644
index 0000000..6f7bcb2
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/typing.py
@@ -0,0 +1,15 @@
+from __future__ import annotations # pragma: no cover
+
+from typing import TYPE_CHECKING # pragma: no cover
+
+if TYPE_CHECKING:
+ from typing import Any, TypeVar
+
+ import pandas as pd
+ from typing_extensions import TypeAlias
+
+ from narwhals._pandas_like.expr import PandasLikeExpr
+ from narwhals._pandas_like.series import PandasLikeSeries
+
+ IntoPandasLikeExpr: TypeAlias = "PandasLikeExpr | PandasLikeSeries"
+ NDFrameT = TypeVar("NDFrameT", "pd.DataFrame", "pd.Series[Any]")
diff --git a/venv/lib/python3.8/site-packages/narwhals/_pandas_like/utils.py b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/utils.py
new file mode 100644
index 0000000..bc75c14
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_pandas_like/utils.py
@@ -0,0 +1,673 @@
+from __future__ import annotations
+
+import functools
+import re
+from contextlib import suppress
+from typing import TYPE_CHECKING, Any, Callable, Literal, Sized, TypeVar
+
+import pandas as pd
+
+from narwhals._compliant.series import EagerSeriesNamespace
+from narwhals._utils import (
+ Implementation,
+ Version,
+ _DeferredIterable,
+ check_columns_exist,
+ isinstance_or_issubclass,
+)
+from narwhals.exceptions import DuplicateError, ShapeError
+
+T = TypeVar("T", bound=Sized)
+
+if TYPE_CHECKING:
+ from pandas._typing import Dtype as PandasDtype
+
+ from narwhals._pandas_like.expr import PandasLikeExpr
+ from narwhals._pandas_like.series import PandasLikeSeries
+ from narwhals.dtypes import DType
+ from narwhals.typing import DTypeBackend, IntoDType, TimeUnit, _1DArray
+
+ ExprT = TypeVar("ExprT", bound=PandasLikeExpr)
+
+
+PANDAS_LIKE_IMPLEMENTATION = {
+ Implementation.PANDAS,
+ Implementation.CUDF,
+ Implementation.MODIN,
+}
+PD_DATETIME_RGX = r"""^
+ datetime64\[
+ (?P<time_unit>s|ms|us|ns) # Match time unit: s, ms, us, or ns
+ (?:, # Begin non-capturing group for optional timezone
+ \s* # Optional whitespace after comma
+ (?P<time_zone> # Start named group for timezone
+ [a-zA-Z\/]+ # Match timezone name, e.g., UTC, America/New_York
+ (?:[+-]\d{2}:\d{2})? # Optional offset in format +HH:MM or -HH:MM
+ | # OR
+ pytz\.FixedOffset\(\d+\) # Match pytz.FixedOffset with integer offset in parentheses
+ ) # End time_zone group
+ )? # End optional timezone group
+ \] # Closing bracket for datetime64
+$"""
+PATTERN_PD_DATETIME = re.compile(PD_DATETIME_RGX, re.VERBOSE)
+PA_DATETIME_RGX = r"""^
+ timestamp\[
+ (?P<time_unit>s|ms|us|ns) # Match time unit: s, ms, us, or ns
+ (?:, # Begin non-capturing group for optional timezone
+ \s?tz= # Match "tz=" prefix
+ (?P<time_zone> # Start named group for timezone
+ [a-zA-Z\/]* # Match timezone name (e.g., UTC, America/New_York)
+ (?: # Begin optional non-capturing group for offset
+ [+-]\d{2}:\d{2} # Match offset in format +HH:MM or -HH:MM
+ )? # End optional offset group
+ ) # End time_zone group
+ )? # End optional timezone group
+ \] # Closing bracket for timestamp
+ \[pyarrow\] # Literal string "[pyarrow]"
+$"""
+PATTERN_PA_DATETIME = re.compile(PA_DATETIME_RGX, re.VERBOSE)
+PD_DURATION_RGX = r"""^
+ timedelta64\[
+ (?P<time_unit>s|ms|us|ns) # Match time unit: s, ms, us, or ns
+ \] # Closing bracket for timedelta64
+$"""
+
+PATTERN_PD_DURATION = re.compile(PD_DURATION_RGX, re.VERBOSE)
+PA_DURATION_RGX = r"""^
+ duration\[
+ (?P<time_unit>s|ms|us|ns) # Match time unit: s, ms, us, or ns
+ \] # Closing bracket for duration
+ \[pyarrow\] # Literal string "[pyarrow]"
+$"""
+PATTERN_PA_DURATION = re.compile(PA_DURATION_RGX, re.VERBOSE)
+
+UNIT_DICT = {"d": "D", "m": "min"}
+
+
+def align_and_extract_native(
+ lhs: PandasLikeSeries, rhs: PandasLikeSeries | object
+) -> tuple[pd.Series[Any] | object, pd.Series[Any] | object]:
+ """Validate RHS of binary operation.
+
+ If the comparison isn't supported, return `NotImplemented` so that the
+ "right-hand-side" operation (e.g. `__radd__`) can be tried.
+ """
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+ from narwhals._pandas_like.series import PandasLikeSeries
+
+ lhs_index = lhs.native.index
+
+ if isinstance(rhs, PandasLikeDataFrame):
+ return NotImplemented
+
+ if lhs._broadcast and isinstance(rhs, PandasLikeSeries) and not rhs._broadcast:
+ return lhs.native.iloc[0], rhs.native
+
+ if isinstance(rhs, PandasLikeSeries):
+ if rhs._broadcast:
+ return (lhs.native, rhs.native.iloc[0])
+ if rhs.native.index is not lhs_index:
+ return (
+ lhs.native,
+ set_index(
+ rhs.native,
+ lhs_index,
+ implementation=rhs._implementation,
+ backend_version=rhs._backend_version,
+ ),
+ )
+ return (lhs.native, rhs.native)
+
+ if isinstance(rhs, list):
+ msg = "Expected Series or scalar, got list."
+ raise TypeError(msg)
+ # `rhs` must be scalar, so just leave it as-is
+ return lhs.native, rhs
+
+
+def set_index(
+ obj: T,
+ index: Any,
+ *,
+ implementation: Implementation,
+ backend_version: tuple[int, ...],
+) -> T:
+ """Wrapper around pandas' set_axis to set object index.
+
+ We can set `copy` / `inplace` based on implementation/version.
+ """
+ if isinstance(index, implementation.to_native_namespace().Index) and (
+ expected_len := len(index)
+ ) != (actual_len := len(obj)):
+ msg = f"Expected object of length {expected_len}, got length: {actual_len}"
+ raise ShapeError(msg)
+ if implementation is Implementation.CUDF: # pragma: no cover
+ obj = obj.copy(deep=False) # type: ignore[attr-defined]
+ obj.index = index # type: ignore[attr-defined]
+ return obj
+ if implementation is Implementation.PANDAS and (
+ backend_version < (1,)
+ ): # pragma: no cover
+ kwargs = {"inplace": False}
+ else:
+ kwargs = {}
+ if implementation is Implementation.PANDAS and (
+ (1, 5) <= backend_version < (3,)
+ ): # pragma: no cover
+ kwargs["copy"] = False
+ else: # pragma: no cover
+ pass
+ return obj.set_axis(index, axis=0, **kwargs) # type: ignore[attr-defined]
+
+
+def rename(
+ obj: T,
+ *args: Any,
+ implementation: Implementation,
+ backend_version: tuple[int, ...],
+ **kwargs: Any,
+) -> T:
+ """Wrapper around pandas' rename so that we can set `copy` based on implementation/version."""
+ if implementation is Implementation.PANDAS and (
+ backend_version >= (3,)
+ ): # pragma: no cover
+ return obj.rename(*args, **kwargs) # type: ignore[attr-defined]
+ return obj.rename(*args, **kwargs, copy=False) # type: ignore[attr-defined]
+
+
+@functools.lru_cache(maxsize=16)
+def non_object_native_to_narwhals_dtype(native_dtype: Any, version: Version) -> DType: # noqa: C901, PLR0912
+ dtype = str(native_dtype)
+
+ dtypes = version.dtypes
+ if dtype in {"int64", "Int64", "Int64[pyarrow]", "int64[pyarrow]"}:
+ return dtypes.Int64()
+ if dtype in {"int32", "Int32", "Int32[pyarrow]", "int32[pyarrow]"}:
+ return dtypes.Int32()
+ if dtype in {"int16", "Int16", "Int16[pyarrow]", "int16[pyarrow]"}:
+ return dtypes.Int16()
+ if dtype in {"int8", "Int8", "Int8[pyarrow]", "int8[pyarrow]"}:
+ return dtypes.Int8()
+ if dtype in {"uint64", "UInt64", "UInt64[pyarrow]", "uint64[pyarrow]"}:
+ return dtypes.UInt64()
+ if dtype in {"uint32", "UInt32", "UInt32[pyarrow]", "uint32[pyarrow]"}:
+ return dtypes.UInt32()
+ if dtype in {"uint16", "UInt16", "UInt16[pyarrow]", "uint16[pyarrow]"}:
+ return dtypes.UInt16()
+ if dtype in {"uint8", "UInt8", "UInt8[pyarrow]", "uint8[pyarrow]"}:
+ return dtypes.UInt8()
+ if dtype in {
+ "float64",
+ "Float64",
+ "Float64[pyarrow]",
+ "float64[pyarrow]",
+ "double[pyarrow]",
+ }:
+ return dtypes.Float64()
+ if dtype in {
+ "float32",
+ "Float32",
+ "Float32[pyarrow]",
+ "float32[pyarrow]",
+ "float[pyarrow]",
+ }:
+ return dtypes.Float32()
+ if dtype in {"string", "string[python]", "string[pyarrow]", "large_string[pyarrow]"}:
+ return dtypes.String()
+ if dtype in {"bool", "boolean", "boolean[pyarrow]", "bool[pyarrow]"}:
+ return dtypes.Boolean()
+ if dtype.startswith("dictionary<"):
+ return dtypes.Categorical()
+ if dtype == "category":
+ return native_categorical_to_narwhals_dtype(native_dtype, version)
+ if (match_ := PATTERN_PD_DATETIME.match(dtype)) or (
+ match_ := PATTERN_PA_DATETIME.match(dtype)
+ ):
+ dt_time_unit: TimeUnit = match_.group("time_unit") # type: ignore[assignment]
+ dt_time_zone: str | None = match_.group("time_zone")
+ return dtypes.Datetime(dt_time_unit, dt_time_zone)
+ if (match_ := PATTERN_PD_DURATION.match(dtype)) or (
+ match_ := PATTERN_PA_DURATION.match(dtype)
+ ):
+ du_time_unit: TimeUnit = match_.group("time_unit") # type: ignore[assignment]
+ return dtypes.Duration(du_time_unit)
+ if dtype == "date32[day][pyarrow]":
+ return dtypes.Date()
+ if dtype.startswith("decimal") and dtype.endswith("[pyarrow]"):
+ return dtypes.Decimal()
+ if dtype.startswith("time") and dtype.endswith("[pyarrow]"):
+ return dtypes.Time()
+ if dtype.startswith("binary") and dtype.endswith("[pyarrow]"):
+ return dtypes.Binary()
+ return dtypes.Unknown() # pragma: no cover
+
+
+def object_native_to_narwhals_dtype(
+ series: PandasLikeSeries, version: Version, implementation: Implementation
+) -> DType:
+ dtypes = version.dtypes
+ if implementation is Implementation.CUDF: # pragma: no cover
+ # Per conversations with their maintainers, they don't support arbitrary
+ # objects, so we can just return String.
+ return dtypes.String()
+
+ # Arbitrary limit of 100 elements to use to sniff dtype.
+ inferred_dtype = pd.api.types.infer_dtype(series.head(100), skipna=True)
+ if inferred_dtype == "string":
+ return dtypes.String()
+ if inferred_dtype == "empty" and version is not Version.V1:
+ # Default to String for empty Series.
+ return dtypes.String()
+ elif inferred_dtype == "empty":
+ # But preserve returning Object in V1.
+ return dtypes.Object()
+ return dtypes.Object()
+
+
+def native_categorical_to_narwhals_dtype(
+ native_dtype: pd.CategoricalDtype,
+ version: Version,
+ implementation: Literal[Implementation.CUDF] | None = None,
+) -> DType:
+ dtypes = version.dtypes
+ if version is Version.V1:
+ return dtypes.Categorical()
+ if native_dtype.ordered:
+ into_iter = (
+ _cudf_categorical_to_list(native_dtype)
+ if implementation is Implementation.CUDF
+ else native_dtype.categories.to_list
+ )
+ return dtypes.Enum(_DeferredIterable(into_iter))
+ return dtypes.Categorical()
+
+
+def _cudf_categorical_to_list(
+ native_dtype: Any,
+) -> Callable[[], list[Any]]: # pragma: no cover
+ # NOTE: https://docs.rapids.ai/api/cudf/stable/user_guide/api_docs/api/cudf.core.dtypes.categoricaldtype/#cudf.core.dtypes.CategoricalDtype
+ def fn() -> list[Any]:
+ return native_dtype.categories.to_arrow().to_pylist()
+
+ return fn
+
+
+def native_to_narwhals_dtype(
+ native_dtype: Any, version: Version, implementation: Implementation
+) -> DType:
+ str_dtype = str(native_dtype)
+
+ if str_dtype.startswith(("large_list", "list", "struct", "fixed_size_list")):
+ from narwhals._arrow.utils import (
+ native_to_narwhals_dtype as arrow_native_to_narwhals_dtype,
+ )
+
+ if hasattr(native_dtype, "to_arrow"): # pragma: no cover
+ # cudf, cudf.pandas
+ return arrow_native_to_narwhals_dtype(native_dtype.to_arrow(), version)
+ return arrow_native_to_narwhals_dtype(native_dtype.pyarrow_dtype, version)
+ if str_dtype == "category" and implementation.is_cudf():
+ # https://github.com/rapidsai/cudf/issues/18536
+ # https://github.com/rapidsai/cudf/issues/14027
+ return native_categorical_to_narwhals_dtype(
+ native_dtype, version, Implementation.CUDF
+ )
+ if str_dtype != "object":
+ return non_object_native_to_narwhals_dtype(native_dtype, version)
+ elif implementation is Implementation.DASK:
+ # Per conversations with their maintainers, they don't support arbitrary
+ # objects, so we can just return String.
+ return version.dtypes.String()
+ msg = (
+ "Unreachable code, object dtype should be handled separately" # pragma: no cover
+ )
+ raise AssertionError(msg)
+
+
+def get_dtype_backend(dtype: Any, implementation: Implementation) -> DTypeBackend:
+ """Get dtype backend for pandas type.
+
+ Matches pandas' `dtype_backend` argument in `convert_dtypes`.
+ """
+ if implementation is Implementation.CUDF:
+ return None
+ if hasattr(pd, "ArrowDtype") and isinstance(dtype, pd.ArrowDtype):
+ return "pyarrow"
+ with suppress(AttributeError):
+ sentinel = object()
+ if (
+ isinstance(dtype, pd.api.extensions.ExtensionDtype)
+ and getattr(dtype, "base", sentinel) is None
+ ):
+ return "numpy_nullable"
+ return None
+
+
+@functools.lru_cache(maxsize=16)
+def is_pyarrow_dtype_backend(dtype: Any, implementation: Implementation) -> bool:
+ return get_dtype_backend(dtype, implementation) == "pyarrow"
+
+
+def narwhals_to_native_dtype( # noqa: C901, PLR0912, PLR0915
+ dtype: IntoDType,
+ dtype_backend: DTypeBackend,
+ implementation: Implementation,
+ backend_version: tuple[int, ...],
+ version: Version,
+) -> str | PandasDtype:
+ if dtype_backend is not None and dtype_backend not in {"pyarrow", "numpy_nullable"}:
+ msg = f"Expected one of {{None, 'pyarrow', 'numpy_nullable'}}, got: '{dtype_backend}'"
+ raise ValueError(msg)
+ dtypes = version.dtypes
+ if isinstance_or_issubclass(dtype, dtypes.Decimal):
+ msg = "Casting to Decimal is not supported yet."
+ raise NotImplementedError(msg)
+ if isinstance_or_issubclass(dtype, dtypes.Float64):
+ if dtype_backend == "pyarrow":
+ return "Float64[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "Float64"
+ return "float64"
+ if isinstance_or_issubclass(dtype, dtypes.Float32):
+ if dtype_backend == "pyarrow":
+ return "Float32[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "Float32"
+ return "float32"
+ if isinstance_or_issubclass(dtype, dtypes.Int64):
+ if dtype_backend == "pyarrow":
+ return "Int64[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "Int64"
+ return "int64"
+ if isinstance_or_issubclass(dtype, dtypes.Int32):
+ if dtype_backend == "pyarrow":
+ return "Int32[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "Int32"
+ return "int32"
+ if isinstance_or_issubclass(dtype, dtypes.Int16):
+ if dtype_backend == "pyarrow":
+ return "Int16[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "Int16"
+ return "int16"
+ if isinstance_or_issubclass(dtype, dtypes.Int8):
+ if dtype_backend == "pyarrow":
+ return "Int8[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "Int8"
+ return "int8"
+ if isinstance_or_issubclass(dtype, dtypes.UInt64):
+ if dtype_backend == "pyarrow":
+ return "UInt64[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "UInt64"
+ return "uint64"
+ if isinstance_or_issubclass(dtype, dtypes.UInt32):
+ if dtype_backend == "pyarrow":
+ return "UInt32[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "UInt32"
+ return "uint32"
+ if isinstance_or_issubclass(dtype, dtypes.UInt16):
+ if dtype_backend == "pyarrow":
+ return "UInt16[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "UInt16"
+ return "uint16"
+ if isinstance_or_issubclass(dtype, dtypes.UInt8):
+ if dtype_backend == "pyarrow":
+ return "UInt8[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "UInt8"
+ return "uint8"
+ if isinstance_or_issubclass(dtype, dtypes.String):
+ if dtype_backend == "pyarrow":
+ return "string[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "string"
+ return str
+ if isinstance_or_issubclass(dtype, dtypes.Boolean):
+ if dtype_backend == "pyarrow":
+ return "boolean[pyarrow]"
+ elif dtype_backend == "numpy_nullable":
+ return "boolean"
+ return "bool"
+ if isinstance_or_issubclass(dtype, dtypes.Categorical):
+ # TODO(Unassigned): is there no pyarrow-backed categorical?
+ # or at least, convert_dtypes(dtype_backend='pyarrow') doesn't
+ # convert to it?
+ return "category"
+ if isinstance_or_issubclass(dtype, dtypes.Datetime):
+ # Pandas does not support "ms" or "us" time units before version 2.0
+ if implementation is Implementation.PANDAS and backend_version < (
+ 2,
+ ): # pragma: no cover
+ dt_time_unit = "ns"
+ else:
+ dt_time_unit = dtype.time_unit
+
+ if dtype_backend == "pyarrow":
+ tz_part = f", tz={tz}" if (tz := dtype.time_zone) else ""
+ return f"timestamp[{dt_time_unit}{tz_part}][pyarrow]"
+ else:
+ tz_part = f", {tz}" if (tz := dtype.time_zone) else ""
+ return f"datetime64[{dt_time_unit}{tz_part}]"
+ if isinstance_or_issubclass(dtype, dtypes.Duration):
+ if implementation is Implementation.PANDAS and backend_version < (
+ 2,
+ ): # pragma: no cover
+ du_time_unit = "ns"
+ else:
+ du_time_unit = dtype.time_unit
+ return (
+ f"duration[{du_time_unit}][pyarrow]"
+ if dtype_backend == "pyarrow"
+ else f"timedelta64[{du_time_unit}]"
+ )
+ if isinstance_or_issubclass(dtype, dtypes.Date):
+ try:
+ import pyarrow as pa # ignore-banned-import
+ except ModuleNotFoundError: # pragma: no cover
+ msg = "'pyarrow>=11.0.0' is required for `Date` dtype."
+ return "date32[pyarrow]"
+ if isinstance_or_issubclass(dtype, dtypes.Enum):
+ if version is Version.V1:
+ msg = "Converting to Enum is not supported in narwhals.stable.v1"
+ raise NotImplementedError(msg)
+ if isinstance(dtype, dtypes.Enum):
+ ns = implementation.to_native_namespace()
+ return ns.CategoricalDtype(dtype.categories, ordered=True)
+ msg = "Can not cast / initialize Enum without categories present"
+ raise ValueError(msg)
+
+ if isinstance_or_issubclass(
+ dtype, (dtypes.Struct, dtypes.Array, dtypes.List, dtypes.Time, dtypes.Binary)
+ ):
+ if implementation is Implementation.PANDAS and backend_version >= (2, 2):
+ try:
+ import pandas as pd
+ import pyarrow as pa # ignore-banned-import # noqa: F401
+ except ImportError as exc: # pragma: no cover
+ msg = f"Unable to convert to {dtype} to to the following exception: {exc.msg}"
+ raise ImportError(msg) from exc
+ from narwhals._arrow.utils import (
+ narwhals_to_native_dtype as arrow_narwhals_to_native_dtype,
+ )
+
+ return pd.ArrowDtype(arrow_narwhals_to_native_dtype(dtype, version=version))
+ else: # pragma: no cover
+ msg = (
+ f"Converting to {dtype} dtype is not supported for implementation "
+ f"{implementation} and version {version}."
+ )
+ raise NotImplementedError(msg)
+ msg = f"Unknown dtype: {dtype}" # pragma: no cover
+ raise AssertionError(msg)
+
+
+def align_series_full_broadcast(*series: PandasLikeSeries) -> list[PandasLikeSeries]:
+ # Ensure all of `series` have the same length and index. Scalars get broadcasted to
+ # the full length of the longest Series. This is useful when you need to construct a
+ # full Series anyway (e.g. `DataFrame.select`). It should not be used in binary operations,
+ # such as `nw.col('a') - nw.col('a').mean()`, because then it's more efficient to extract
+ # the right-hand-side's single element as a scalar.
+ native_namespace = series[0].__native_namespace__()
+
+ lengths = [len(s) for s in series]
+ max_length = max(lengths)
+
+ idx = series[lengths.index(max_length)].native.index
+ reindexed = []
+ for s in series:
+ if s._broadcast:
+ reindexed.append(
+ s._with_native(
+ native_namespace.Series(
+ [s.native.iloc[0]] * max_length,
+ index=idx,
+ name=s.name,
+ dtype=s.native.dtype,
+ )
+ )
+ )
+
+ elif s.native.index is not idx:
+ reindexed.append(
+ s._with_native(
+ set_index(
+ s.native,
+ idx,
+ implementation=s._implementation,
+ backend_version=s._backend_version,
+ )
+ )
+ )
+ else:
+ reindexed.append(s)
+ return reindexed
+
+
+def int_dtype_mapper(dtype: Any) -> str:
+ if "pyarrow" in str(dtype):
+ return "Int64[pyarrow]"
+ if str(dtype).lower() != str(dtype): # pragma: no cover
+ return "Int64"
+ return "int64"
+
+
+def calculate_timestamp_datetime( # noqa: C901, PLR0912
+ s: pd.Series[int], original_time_unit: str, time_unit: str
+) -> pd.Series[int]:
+ if original_time_unit == "ns":
+ if time_unit == "ns":
+ result = s
+ elif time_unit == "us":
+ result = s // 1_000
+ else:
+ result = s // 1_000_000
+ elif original_time_unit == "us":
+ if time_unit == "ns":
+ result = s * 1_000
+ elif time_unit == "us":
+ result = s
+ else:
+ result = s // 1_000
+ elif original_time_unit == "ms":
+ if time_unit == "ns":
+ result = s * 1_000_000
+ elif time_unit == "us":
+ result = s * 1_000
+ else:
+ result = s
+ elif original_time_unit == "s":
+ if time_unit == "ns":
+ result = s * 1_000_000_000
+ elif time_unit == "us":
+ result = s * 1_000_000
+ else:
+ result = s * 1_000
+ else: # pragma: no cover
+ msg = f"unexpected time unit {original_time_unit}, please report a bug at https://github.com/narwhals-dev/narwhals"
+ raise AssertionError(msg)
+ return result
+
+
+def calculate_timestamp_date(s: pd.Series[int], time_unit: str) -> pd.Series[int]:
+ s = s * 86_400 # number of seconds in a day
+ if time_unit == "ns":
+ result = s * 1_000_000_000
+ elif time_unit == "us":
+ result = s * 1_000_000
+ else:
+ result = s * 1_000
+ return result
+
+
+def select_columns_by_name(
+ df: T,
+ column_names: list[str] | _1DArray, # NOTE: Cannot be a tuple!
+ backend_version: tuple[int, ...],
+ implementation: Implementation,
+) -> T:
+ """Select columns by name.
+
+ Prefer this over `df.loc[:, column_names]` as it's
+ generally more performant.
+ """
+ if len(column_names) == df.shape[1] and all(column_names == df.columns): # type: ignore[attr-defined]
+ return df
+ if (df.columns.dtype.kind == "b") or ( # type: ignore[attr-defined]
+ implementation is Implementation.PANDAS and backend_version < (1, 5)
+ ):
+ # See https://github.com/narwhals-dev/narwhals/issues/1349#issuecomment-2470118122
+ # for why we need this
+ if error := check_columns_exist(
+ column_names, # type: ignore[arg-type]
+ available=df.columns.tolist(), # type: ignore[attr-defined]
+ ):
+ raise error
+ return df.loc[:, column_names] # type: ignore[attr-defined]
+ try:
+ return df[column_names] # type: ignore[index]
+ except KeyError as e:
+ if error := check_columns_exist(
+ column_names, # type: ignore[arg-type]
+ available=df.columns.tolist(), # type: ignore[attr-defined]
+ ):
+ raise error from e
+ raise
+
+
+def check_column_names_are_unique(columns: pd.Index[str]) -> None:
+ try:
+ len_unique_columns = len(columns.drop_duplicates())
+ except Exception: # noqa: BLE001 # pragma: no cover
+ msg = f"Expected hashable (e.g. str or int) column names, got: {columns}"
+ raise ValueError(msg) from None
+
+ if len(columns) != len_unique_columns:
+ from collections import Counter
+
+ counter = Counter(columns)
+ msg = ""
+ for key, value in counter.items():
+ if value > 1:
+ msg += f"\n- '{key}' {value} times"
+ msg = f"Expected unique column names, got:{msg}"
+ raise DuplicateError(msg)
+
+
+class PandasLikeSeriesNamespace(EagerSeriesNamespace["PandasLikeSeries", Any]):
+ @property
+ def implementation(self) -> Implementation:
+ return self.compliant._implementation
+
+ @property
+ def backend_version(self) -> tuple[int, ...]:
+ return self.compliant._backend_version
+
+ @property
+ def version(self) -> Version:
+ return self.compliant._version
diff --git a/venv/lib/python3.8/site-packages/narwhals/_polars/__init__.py b/venv/lib/python3.8/site-packages/narwhals/_polars/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_polars/__init__.py
diff --git a/venv/lib/python3.8/site-packages/narwhals/_polars/dataframe.py b/venv/lib/python3.8/site-packages/narwhals/_polars/dataframe.py
new file mode 100644
index 0000000..9a270ff
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_polars/dataframe.py
@@ -0,0 +1,770 @@
+from __future__ import annotations
+
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Iterator,
+ Literal,
+ Mapping,
+ Sequence,
+ Sized,
+ cast,
+ overload,
+)
+
+import polars as pl
+
+from narwhals._polars.namespace import PolarsNamespace
+from narwhals._polars.series import PolarsSeries
+from narwhals._polars.utils import (
+ catch_polars_exception,
+ extract_args_kwargs,
+ native_to_narwhals_dtype,
+)
+from narwhals._utils import (
+ Implementation,
+ _into_arrow_table,
+ check_columns_exist,
+ convert_str_slice_to_int_slice,
+ is_compliant_series,
+ is_index_selector,
+ is_range,
+ is_sequence_like,
+ is_slice_index,
+ is_slice_none,
+ parse_columns_to_drop,
+ parse_version,
+ requires,
+ validate_backend_version,
+)
+from narwhals.dependencies import is_numpy_array_1d
+from narwhals.exceptions import ColumnNotFoundError
+
+if TYPE_CHECKING:
+ from types import ModuleType
+ from typing import Callable, TypeVar
+
+ import pandas as pd
+ import pyarrow as pa
+ from typing_extensions import Self, TypeAlias, TypeIs
+
+ from narwhals._compliant.typing import CompliantDataFrameAny, CompliantLazyFrameAny
+ from narwhals._polars.expr import PolarsExpr
+ from narwhals._polars.group_by import PolarsGroupBy, PolarsLazyGroupBy
+ from narwhals._translate import IntoArrowTable
+ from narwhals._utils import Version, _FullContext
+ from narwhals.dataframe import DataFrame, LazyFrame
+ from narwhals.dtypes import DType
+ from narwhals.schema import Schema
+ from narwhals.typing import (
+ JoinStrategy,
+ MultiColSelector,
+ MultiIndexSelector,
+ PivotAgg,
+ SingleIndexSelector,
+ _2DArray,
+ )
+
+ T = TypeVar("T")
+ R = TypeVar("R")
+
+Method: TypeAlias = "Callable[..., R]"
+"""Generic alias representing all methods implemented via `__getattr__`.
+
+Where `R` is the return type.
+"""
+
+# DataFrame methods where PolarsDataFrame just defers to Polars.DataFrame directly.
+INHERITED_METHODS = frozenset(
+ [
+ "clone",
+ "drop_nulls",
+ "estimated_size",
+ "explode",
+ "filter",
+ "gather_every",
+ "head",
+ "is_unique",
+ "item",
+ "iter_rows",
+ "join_asof",
+ "rename",
+ "row",
+ "rows",
+ "sample",
+ "select",
+ "sort",
+ "tail",
+ "to_arrow",
+ "to_pandas",
+ "unique",
+ "with_columns",
+ "write_csv",
+ "write_parquet",
+ ]
+)
+
+
+class PolarsDataFrame:
+ clone: Method[Self]
+ collect: Method[CompliantDataFrameAny]
+ drop_nulls: Method[Self]
+ estimated_size: Method[int | float]
+ explode: Method[Self]
+ filter: Method[Self]
+ gather_every: Method[Self]
+ item: Method[Any]
+ iter_rows: Method[Iterator[tuple[Any, ...]] | Iterator[Mapping[str, Any]]]
+ is_unique: Method[PolarsSeries]
+ join_asof: Method[Self]
+ rename: Method[Self]
+ row: Method[tuple[Any, ...]]
+ rows: Method[Sequence[tuple[Any, ...]] | Sequence[Mapping[str, Any]]]
+ sample: Method[Self]
+ select: Method[Self]
+ sort: Method[Self]
+ to_arrow: Method[pa.Table]
+ to_pandas: Method[pd.DataFrame]
+ unique: Method[Self]
+ with_columns: Method[Self]
+ # NOTE: `write_csv` requires an `@overload` for `str | None`
+ # Can't do that here 😟
+ write_csv: Method[Any]
+ write_parquet: Method[None]
+
+ # CompliantDataFrame
+ _evaluate_aliases: Any
+
+ def __init__(
+ self, df: pl.DataFrame, *, backend_version: tuple[int, ...], version: Version
+ ) -> None:
+ self._native_frame = df
+ self._backend_version = backend_version
+ self._implementation = Implementation.POLARS
+ self._version = version
+ validate_backend_version(self._implementation, self._backend_version)
+
+ @classmethod
+ def from_arrow(cls, data: IntoArrowTable, /, *, context: _FullContext) -> Self:
+ if context._backend_version >= (1, 3):
+ native = pl.DataFrame(data)
+ else:
+ native = cast("pl.DataFrame", pl.from_arrow(_into_arrow_table(data, context)))
+ return cls.from_native(native, context=context)
+
+ @classmethod
+ def from_dict(
+ cls,
+ data: Mapping[str, Any],
+ /,
+ *,
+ context: _FullContext,
+ schema: Mapping[str, DType] | Schema | None,
+ ) -> Self:
+ from narwhals.schema import Schema
+
+ pl_schema = Schema(schema).to_polars() if schema is not None else schema
+ return cls.from_native(pl.from_dict(data, pl_schema), context=context)
+
+ @staticmethod
+ def _is_native(obj: pl.DataFrame | Any) -> TypeIs[pl.DataFrame]:
+ return isinstance(obj, pl.DataFrame)
+
+ @classmethod
+ def from_native(cls, data: pl.DataFrame, /, *, context: _FullContext) -> Self:
+ return cls(
+ data, backend_version=context._backend_version, version=context._version
+ )
+
+ @classmethod
+ def from_numpy(
+ cls,
+ data: _2DArray,
+ /,
+ *,
+ context: _FullContext, # NOTE: Maybe only `Implementation`?
+ schema: Mapping[str, DType] | Schema | Sequence[str] | None,
+ ) -> Self:
+ from narwhals.schema import Schema
+
+ pl_schema = (
+ Schema(schema).to_polars()
+ if isinstance(schema, (Mapping, Schema))
+ else schema
+ )
+ return cls.from_native(pl.from_numpy(data, pl_schema), context=context)
+
+ def to_narwhals(self) -> DataFrame[pl.DataFrame]:
+ return self._version.dataframe(self, level="full")
+
+ @property
+ def native(self) -> pl.DataFrame:
+ return self._native_frame
+
+ def __repr__(self) -> str: # pragma: no cover
+ return "PolarsDataFrame"
+
+ def __narwhals_dataframe__(self) -> Self:
+ return self
+
+ def __narwhals_namespace__(self) -> PolarsNamespace:
+ return PolarsNamespace(
+ backend_version=self._backend_version, version=self._version
+ )
+
+ def __native_namespace__(self) -> ModuleType:
+ if self._implementation is Implementation.POLARS:
+ return self._implementation.to_native_namespace()
+
+ msg = f"Expected polars, got: {type(self._implementation)}" # pragma: no cover
+ raise AssertionError(msg)
+
+ def _with_version(self, version: Version) -> Self:
+ return self.__class__(
+ self.native, backend_version=self._backend_version, version=version
+ )
+
+ def _with_native(self, df: pl.DataFrame) -> Self:
+ return self.__class__(
+ df, backend_version=self._backend_version, version=self._version
+ )
+
+ @overload
+ def _from_native_object(self, obj: pl.Series) -> PolarsSeries: ...
+
+ @overload
+ def _from_native_object(self, obj: pl.DataFrame) -> Self: ...
+
+ @overload
+ def _from_native_object(self, obj: T) -> T: ...
+
+ def _from_native_object(
+ self, obj: pl.Series | pl.DataFrame | T
+ ) -> Self | PolarsSeries | T:
+ if isinstance(obj, pl.Series):
+ return PolarsSeries.from_native(obj, context=self)
+ if self._is_native(obj):
+ return self._with_native(obj)
+ # scalar
+ return obj
+
+ def __len__(self) -> int:
+ return len(self.native)
+
+ def head(self, n: int) -> Self:
+ return self._with_native(self.native.head(n))
+
+ def tail(self, n: int) -> Self:
+ return self._with_native(self.native.tail(n))
+
+ def __getattr__(self, attr: str) -> Any:
+ if attr not in INHERITED_METHODS: # pragma: no cover
+ msg = f"{self.__class__.__name__} has not attribute '{attr}'."
+ raise AttributeError(msg)
+
+ def func(*args: Any, **kwargs: Any) -> Any:
+ pos, kwds = extract_args_kwargs(args, kwargs)
+ try:
+ return self._from_native_object(getattr(self.native, attr)(*pos, **kwds))
+ except pl.exceptions.ColumnNotFoundError as e: # pragma: no cover
+ msg = f"{e!s}\n\nHint: Did you mean one of these columns: {self.columns}?"
+ raise ColumnNotFoundError(msg) from e
+ except Exception as e: # noqa: BLE001
+ raise catch_polars_exception(e, self._backend_version) from None
+
+ return func
+
+ def __array__(
+ self, dtype: Any | None = None, *, copy: bool | None = None
+ ) -> _2DArray:
+ if self._backend_version < (0, 20, 28) and copy is not None:
+ msg = "`copy` in `__array__` is only supported for 'polars>=0.20.28'"
+ raise NotImplementedError(msg)
+ if self._backend_version < (0, 20, 28):
+ return self.native.__array__(dtype)
+ return self.native.__array__(dtype)
+
+ def to_numpy(self, dtype: Any = None, *, copy: bool | None = None) -> _2DArray:
+ return self.native.to_numpy()
+
+ def collect_schema(self) -> dict[str, DType]:
+ if self._backend_version < (1,):
+ return {
+ name: native_to_narwhals_dtype(
+ dtype, self._version, self._backend_version
+ )
+ for name, dtype in self.native.schema.items()
+ }
+ else:
+ collected_schema = self.native.collect_schema()
+ return {
+ name: native_to_narwhals_dtype(
+ dtype, self._version, self._backend_version
+ )
+ for name, dtype in collected_schema.items()
+ }
+
+ @property
+ def shape(self) -> tuple[int, int]:
+ return self.native.shape
+
+ def __getitem__( # noqa: C901, PLR0912
+ self,
+ item: tuple[
+ SingleIndexSelector | MultiIndexSelector[PolarsSeries],
+ MultiColSelector[PolarsSeries],
+ ],
+ ) -> Any:
+ rows, columns = item
+ if self._backend_version > (0, 20, 30):
+ rows_native = rows.native if is_compliant_series(rows) else rows
+ columns_native = columns.native if is_compliant_series(columns) else columns
+ selector = rows_native, columns_native
+ selected = self.native.__getitem__(selector) # type: ignore[index]
+ return self._from_native_object(selected)
+ else: # pragma: no cover
+ # TODO(marco): we can delete this branch after Polars==0.20.30 becomes the minimum
+ # Polars version we support
+ # This mostly mirrors the logic in `EagerDataFrame.__getitem__`.
+ rows = list(rows) if isinstance(rows, tuple) else rows
+ columns = list(columns) if isinstance(columns, tuple) else columns
+ if is_numpy_array_1d(columns):
+ columns = columns.tolist()
+
+ native = self.native
+ if not is_slice_none(columns):
+ if isinstance(columns, Sized) and len(columns) == 0:
+ return self.select()
+ if is_index_selector(columns):
+ if is_slice_index(columns) or is_range(columns):
+ native = native.select(
+ self.columns[slice(columns.start, columns.stop, columns.step)]
+ )
+ elif is_compliant_series(columns):
+ native = native[:, columns.native.to_list()]
+ else:
+ native = native[:, columns]
+ elif isinstance(columns, slice):
+ native = native.select(
+ self.columns[
+ slice(*convert_str_slice_to_int_slice(columns, self.columns))
+ ]
+ )
+ elif is_compliant_series(columns):
+ native = native.select(columns.native.to_list())
+ elif is_sequence_like(columns):
+ native = native.select(columns)
+ else:
+ msg = f"Unreachable code, got unexpected type: {type(columns)}"
+ raise AssertionError(msg)
+
+ if not is_slice_none(rows):
+ if isinstance(rows, int):
+ native = native[[rows], :]
+ elif isinstance(rows, (slice, range)):
+ native = native[rows, :]
+ elif is_compliant_series(rows):
+ native = native[rows.native, :]
+ elif is_sequence_like(rows):
+ native = native[rows, :]
+ else:
+ msg = f"Unreachable code, got unexpected type: {type(rows)}"
+ raise AssertionError(msg)
+
+ return self._with_native(native)
+
+ def simple_select(self, *column_names: str) -> Self:
+ return self._with_native(self.native.select(*column_names))
+
+ def aggregate(self, *exprs: Any) -> Self:
+ return self.select(*exprs)
+
+ def get_column(self, name: str) -> PolarsSeries:
+ return PolarsSeries.from_native(self.native.get_column(name), context=self)
+
+ def iter_columns(self) -> Iterator[PolarsSeries]:
+ for series in self.native.iter_columns():
+ yield PolarsSeries.from_native(series, context=self)
+
+ @property
+ def columns(self) -> list[str]:
+ return self.native.columns
+
+ @property
+ def schema(self) -> dict[str, DType]:
+ return {
+ name: native_to_narwhals_dtype(dtype, self._version, self._backend_version)
+ for name, dtype in self.native.schema.items()
+ }
+
+ def lazy(self, *, backend: Implementation | None = None) -> CompliantLazyFrameAny:
+ if backend is None or backend is Implementation.POLARS:
+ return PolarsLazyFrame.from_native(self.native.lazy(), context=self)
+ elif backend is Implementation.DUCKDB:
+ import duckdb # ignore-banned-import
+
+ from narwhals._duckdb.dataframe import DuckDBLazyFrame
+
+ # NOTE: (F841) is a false positive
+ df = self.native # noqa: F841
+ return DuckDBLazyFrame(
+ duckdb.table("df"),
+ backend_version=parse_version(duckdb),
+ version=self._version,
+ )
+ elif backend is Implementation.DASK:
+ import dask # ignore-banned-import
+ import dask.dataframe as dd # ignore-banned-import
+
+ from narwhals._dask.dataframe import DaskLazyFrame
+
+ return DaskLazyFrame(
+ dd.from_pandas(self.native.to_pandas()),
+ backend_version=parse_version(dask),
+ version=self._version,
+ )
+ raise AssertionError # pragma: no cover
+
+ @overload
+ def to_dict(self, *, as_series: Literal[True]) -> dict[str, PolarsSeries]: ...
+
+ @overload
+ def to_dict(self, *, as_series: Literal[False]) -> dict[str, list[Any]]: ...
+
+ def to_dict(
+ self, *, as_series: bool
+ ) -> dict[str, PolarsSeries] | dict[str, list[Any]]:
+ if as_series:
+ return {
+ name: PolarsSeries.from_native(col, context=self)
+ for name, col in self.native.to_dict().items()
+ }
+ else:
+ return self.native.to_dict(as_series=False)
+
+ def group_by(
+ self, keys: Sequence[str] | Sequence[PolarsExpr], *, drop_null_keys: bool
+ ) -> PolarsGroupBy:
+ from narwhals._polars.group_by import PolarsGroupBy
+
+ return PolarsGroupBy(self, keys, drop_null_keys=drop_null_keys)
+
+ def with_row_index(self, name: str) -> Self:
+ if self._backend_version < (0, 20, 4):
+ return self._with_native(self.native.with_row_count(name))
+ return self._with_native(self.native.with_row_index(name))
+
+ def drop(self, columns: Sequence[str], *, strict: bool) -> Self:
+ to_drop = parse_columns_to_drop(self, columns, strict=strict)
+ return self._with_native(self.native.drop(to_drop))
+
+ def unpivot(
+ self,
+ on: Sequence[str] | None,
+ index: Sequence[str] | None,
+ variable_name: str,
+ value_name: str,
+ ) -> Self:
+ if self._backend_version < (1, 0, 0):
+ return self._with_native(
+ self.native.melt(
+ id_vars=index,
+ value_vars=on,
+ variable_name=variable_name,
+ value_name=value_name,
+ )
+ )
+ return self._with_native(
+ self.native.unpivot(
+ on=on, index=index, variable_name=variable_name, value_name=value_name
+ )
+ )
+
+ @requires.backend_version((1,))
+ def pivot(
+ self,
+ on: Sequence[str],
+ *,
+ index: Sequence[str] | None,
+ values: Sequence[str] | None,
+ aggregate_function: PivotAgg | None,
+ sort_columns: bool,
+ separator: str,
+ ) -> Self:
+ try:
+ result = self.native.pivot(
+ on,
+ index=index,
+ values=values,
+ aggregate_function=aggregate_function,
+ sort_columns=sort_columns,
+ separator=separator,
+ )
+ except Exception as e: # noqa: BLE001
+ raise catch_polars_exception(e, self._backend_version) from None
+ return self._from_native_object(result)
+
+ def to_polars(self) -> pl.DataFrame:
+ return self.native
+
+ def join(
+ self,
+ other: Self,
+ *,
+ how: JoinStrategy,
+ left_on: Sequence[str] | None,
+ right_on: Sequence[str] | None,
+ suffix: str,
+ ) -> Self:
+ how_native = (
+ "outer" if (self._backend_version < (0, 20, 29) and how == "full") else how
+ )
+ try:
+ return self._with_native(
+ self.native.join(
+ other=other.native,
+ how=how_native, # type: ignore[arg-type]
+ left_on=left_on,
+ right_on=right_on,
+ suffix=suffix,
+ )
+ )
+ except Exception as e: # noqa: BLE001
+ raise catch_polars_exception(e, self._backend_version) from None
+
+ def _check_columns_exist(self, subset: Sequence[str]) -> ColumnNotFoundError | None:
+ return check_columns_exist(subset, available=self.columns)
+
+
+class PolarsLazyFrame:
+ drop_nulls: Method[Self]
+ explode: Method[Self]
+ filter: Method[Self]
+ gather_every: Method[Self]
+ head: Method[Self]
+ join_asof: Method[Self]
+ rename: Method[Self]
+ select: Method[Self]
+ sort: Method[Self]
+ tail: Method[Self]
+ unique: Method[Self]
+ with_columns: Method[Self]
+
+ # CompliantLazyFrame
+ _evaluate_expr: Any
+ _evaluate_window_expr: Any
+ _evaluate_aliases: Any
+
+ def __init__(
+ self, df: pl.LazyFrame, *, backend_version: tuple[int, ...], version: Version
+ ) -> None:
+ self._native_frame = df
+ self._backend_version = backend_version
+ self._implementation = Implementation.POLARS
+ self._version = version
+ validate_backend_version(self._implementation, self._backend_version)
+
+ @staticmethod
+ def _is_native(obj: pl.LazyFrame | Any) -> TypeIs[pl.LazyFrame]:
+ return isinstance(obj, pl.LazyFrame)
+
+ @classmethod
+ def from_native(cls, data: pl.LazyFrame, /, *, context: _FullContext) -> Self:
+ return cls(
+ data, backend_version=context._backend_version, version=context._version
+ )
+
+ def to_narwhals(self) -> LazyFrame[pl.LazyFrame]:
+ return self._version.lazyframe(self, level="lazy")
+
+ def __repr__(self) -> str: # pragma: no cover
+ return "PolarsLazyFrame"
+
+ def __narwhals_lazyframe__(self) -> Self:
+ return self
+
+ def __narwhals_namespace__(self) -> PolarsNamespace:
+ return PolarsNamespace(
+ backend_version=self._backend_version, version=self._version
+ )
+
+ def __native_namespace__(self) -> ModuleType:
+ if self._implementation is Implementation.POLARS:
+ return self._implementation.to_native_namespace()
+
+ msg = f"Expected polars, got: {type(self._implementation)}" # pragma: no cover
+ raise AssertionError(msg)
+
+ def _with_native(self, df: pl.LazyFrame) -> Self:
+ return self.__class__(
+ df, backend_version=self._backend_version, version=self._version
+ )
+
+ def _with_version(self, version: Version) -> Self:
+ return self.__class__(
+ self.native, backend_version=self._backend_version, version=version
+ )
+
+ def __getattr__(self, attr: str) -> Any:
+ if attr not in INHERITED_METHODS: # pragma: no cover
+ msg = f"{self.__class__.__name__} has not attribute '{attr}'."
+ raise AttributeError(msg)
+
+ def func(*args: Any, **kwargs: Any) -> Any:
+ pos, kwds = extract_args_kwargs(args, kwargs)
+ try:
+ return self._with_native(getattr(self.native, attr)(*pos, **kwds))
+ except pl.exceptions.ColumnNotFoundError as e: # pragma: no cover
+ raise ColumnNotFoundError(str(e)) from e
+
+ return func
+
+ def _iter_columns(self) -> Iterator[PolarsSeries]: # pragma: no cover
+ yield from self.collect(self._implementation).iter_columns()
+
+ @property
+ def native(self) -> pl.LazyFrame:
+ return self._native_frame
+
+ @property
+ def columns(self) -> list[str]:
+ return self.native.columns
+
+ @property
+ def schema(self) -> dict[str, DType]:
+ schema = self.native.schema
+ return {
+ name: native_to_narwhals_dtype(dtype, self._version, self._backend_version)
+ for name, dtype in schema.items()
+ }
+
+ def collect_schema(self) -> dict[str, DType]:
+ if self._backend_version < (1,):
+ return {
+ name: native_to_narwhals_dtype(
+ dtype, self._version, self._backend_version
+ )
+ for name, dtype in self.native.schema.items()
+ }
+ else:
+ try:
+ collected_schema = self.native.collect_schema()
+ except Exception as e: # noqa: BLE001
+ raise catch_polars_exception(e, self._backend_version) from None
+ return {
+ name: native_to_narwhals_dtype(
+ dtype, self._version, self._backend_version
+ )
+ for name, dtype in collected_schema.items()
+ }
+
+ def collect(
+ self, backend: Implementation | None, **kwargs: Any
+ ) -> CompliantDataFrameAny:
+ try:
+ result = self.native.collect(**kwargs)
+ except Exception as e: # noqa: BLE001
+ raise catch_polars_exception(e, self._backend_version) from None
+
+ if backend is None or backend is Implementation.POLARS:
+ return PolarsDataFrame.from_native(result, context=self)
+
+ if backend is Implementation.PANDAS:
+ import pandas as pd # ignore-banned-import
+
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+
+ return PandasLikeDataFrame(
+ result.to_pandas(),
+ implementation=Implementation.PANDAS,
+ backend_version=parse_version(pd),
+ version=self._version,
+ validate_column_names=False,
+ )
+
+ if backend is Implementation.PYARROW:
+ import pyarrow as pa # ignore-banned-import
+
+ from narwhals._arrow.dataframe import ArrowDataFrame
+
+ return ArrowDataFrame(
+ result.to_arrow(),
+ backend_version=parse_version(pa),
+ version=self._version,
+ validate_column_names=False,
+ )
+
+ msg = f"Unsupported `backend` value: {backend}" # pragma: no cover
+ raise ValueError(msg) # pragma: no cover
+
+ def group_by(
+ self, keys: Sequence[str] | Sequence[PolarsExpr], *, drop_null_keys: bool
+ ) -> PolarsLazyGroupBy:
+ from narwhals._polars.group_by import PolarsLazyGroupBy
+
+ return PolarsLazyGroupBy(self, keys, drop_null_keys=drop_null_keys)
+
+ def with_row_index(self, name: str) -> Self:
+ if self._backend_version < (0, 20, 4):
+ return self._with_native(self.native.with_row_count(name))
+ return self._with_native(self.native.with_row_index(name))
+
+ def drop(self, columns: Sequence[str], *, strict: bool) -> Self:
+ if self._backend_version < (1, 0, 0):
+ return self._with_native(self.native.drop(columns))
+ return self._with_native(self.native.drop(columns, strict=strict))
+
+ def unpivot(
+ self,
+ on: Sequence[str] | None,
+ index: Sequence[str] | None,
+ variable_name: str,
+ value_name: str,
+ ) -> Self:
+ if self._backend_version < (1, 0, 0):
+ return self._with_native(
+ self.native.melt(
+ id_vars=index,
+ value_vars=on,
+ variable_name=variable_name,
+ value_name=value_name,
+ )
+ )
+ return self._with_native(
+ self.native.unpivot(
+ on=on, index=index, variable_name=variable_name, value_name=value_name
+ )
+ )
+
+ def simple_select(self, *column_names: str) -> Self:
+ return self._with_native(self.native.select(*column_names))
+
+ def aggregate(self, *exprs: Any) -> Self:
+ return self.select(*exprs)
+
+ def join(
+ self,
+ other: Self,
+ *,
+ how: JoinStrategy,
+ left_on: Sequence[str] | None,
+ right_on: Sequence[str] | None,
+ suffix: str,
+ ) -> Self:
+ how_native = (
+ "outer" if (self._backend_version < (0, 20, 29) and how == "full") else how
+ )
+ return self._with_native(
+ self.native.join(
+ other=other.native,
+ how=how_native, # type: ignore[arg-type]
+ left_on=left_on,
+ right_on=right_on,
+ suffix=suffix,
+ )
+ )
+
+ def _check_columns_exist(self, subset: Sequence[str]) -> ColumnNotFoundError | None:
+ return check_columns_exist( # pragma: no cover
+ subset, available=self.columns
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/_polars/expr.py b/venv/lib/python3.8/site-packages/narwhals/_polars/expr.py
new file mode 100644
index 0000000..eb5b5f2
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_polars/expr.py
@@ -0,0 +1,415 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Callable, Literal, Mapping, Sequence
+
+import polars as pl
+
+from narwhals._duration import parse_interval_string
+from narwhals._polars.utils import (
+ extract_args_kwargs,
+ extract_native,
+ narwhals_to_native_dtype,
+)
+from narwhals._utils import Implementation, requires
+
+if TYPE_CHECKING:
+ from typing_extensions import Self
+
+ from narwhals._expression_parsing import ExprKind, ExprMetadata
+ from narwhals._polars.dataframe import Method
+ from narwhals._polars.namespace import PolarsNamespace
+ from narwhals._utils import Version
+ from narwhals.typing import IntoDType
+
+
+class PolarsExpr:
+ def __init__(
+ self, expr: pl.Expr, version: Version, backend_version: tuple[int, ...]
+ ) -> None:
+ self._native_expr = expr
+ self._implementation = Implementation.POLARS
+ self._version = version
+ self._backend_version = backend_version
+ self._metadata: ExprMetadata | None = None
+
+ @property
+ def native(self) -> pl.Expr:
+ return self._native_expr
+
+ def __repr__(self) -> str: # pragma: no cover
+ return "PolarsExpr"
+
+ def _with_native(self, expr: pl.Expr) -> Self:
+ return self.__class__(expr, self._version, self._backend_version)
+
+ @classmethod
+ def _from_series(cls, series: Any) -> Self:
+ return cls(series.native, series._version, series._backend_version)
+
+ def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Self:
+ # Let Polars do its thing.
+ return self
+
+ def __getattr__(self, attr: str) -> Any:
+ def func(*args: Any, **kwargs: Any) -> Any:
+ pos, kwds = extract_args_kwargs(args, kwargs)
+ return self._with_native(getattr(self.native, attr)(*pos, **kwds))
+
+ return func
+
+ def _renamed_min_periods(self, min_samples: int, /) -> dict[str, Any]:
+ name = "min_periods" if self._backend_version < (1, 21, 0) else "min_samples"
+ return {name: min_samples}
+
+ def cast(self, dtype: IntoDType) -> Self:
+ dtype_pl = narwhals_to_native_dtype(dtype, self._version, self._backend_version)
+ return self._with_native(self.native.cast(dtype_pl))
+
+ def ewm_mean(
+ self,
+ *,
+ com: float | None,
+ span: float | None,
+ half_life: float | None,
+ alpha: float | None,
+ adjust: bool,
+ min_samples: int,
+ ignore_nulls: bool,
+ ) -> Self:
+ native = self.native.ewm_mean(
+ com=com,
+ span=span,
+ half_life=half_life,
+ alpha=alpha,
+ adjust=adjust,
+ ignore_nulls=ignore_nulls,
+ **self._renamed_min_periods(min_samples),
+ )
+ if self._backend_version < (1,): # pragma: no cover
+ native = pl.when(~self.native.is_null()).then(native).otherwise(None)
+ return self._with_native(native)
+
+ def is_nan(self) -> Self:
+ if self._backend_version >= (1, 18):
+ native = self.native.is_nan()
+ else: # pragma: no cover
+ native = pl.when(self.native.is_not_null()).then(self.native.is_nan())
+ return self._with_native(native)
+
+ def over(self, partition_by: Sequence[str], order_by: Sequence[str]) -> Self:
+ if self._backend_version < (1, 9):
+ if order_by:
+ msg = "`order_by` in Polars requires version 1.10 or greater"
+ raise NotImplementedError(msg)
+ native = self.native.over(partition_by or pl.lit(1))
+ else:
+ native = self.native.over(
+ partition_by or pl.lit(1), order_by=order_by or None
+ )
+ return self._with_native(native)
+
+ @requires.backend_version((1,))
+ def rolling_var(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ kwds = self._renamed_min_periods(min_samples)
+ native = self.native.rolling_var(
+ window_size=window_size, center=center, ddof=ddof, **kwds
+ )
+ return self._with_native(native)
+
+ @requires.backend_version((1,))
+ def rolling_std(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ kwds = self._renamed_min_periods(min_samples)
+ native = self.native.rolling_std(
+ window_size=window_size, center=center, ddof=ddof, **kwds
+ )
+ return self._with_native(native)
+
+ def rolling_sum(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ kwds = self._renamed_min_periods(min_samples)
+ native = self.native.rolling_sum(window_size=window_size, center=center, **kwds)
+ return self._with_native(native)
+
+ def rolling_mean(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ kwds = self._renamed_min_periods(min_samples)
+ native = self.native.rolling_mean(window_size=window_size, center=center, **kwds)
+ return self._with_native(native)
+
+ def map_batches(
+ self, function: Callable[[Any], Any], return_dtype: IntoDType | None
+ ) -> Self:
+ return_dtype_pl = (
+ narwhals_to_native_dtype(return_dtype, self._version, self._backend_version)
+ if return_dtype
+ else None
+ )
+ native = self.native.map_batches(function, return_dtype_pl)
+ return self._with_native(native)
+
+ @requires.backend_version((1,))
+ def replace_strict(
+ self,
+ old: Sequence[Any] | Mapping[Any, Any],
+ new: Sequence[Any],
+ *,
+ return_dtype: IntoDType | None,
+ ) -> Self:
+ return_dtype_pl = (
+ narwhals_to_native_dtype(return_dtype, self._version, self._backend_version)
+ if return_dtype
+ else None
+ )
+ native = self.native.replace_strict(old, new, return_dtype=return_dtype_pl)
+ return self._with_native(native)
+
+ def __eq__(self, other: object) -> Self: # type: ignore[override]
+ return self._with_native(self.native.__eq__(extract_native(other))) # type: ignore[operator]
+
+ def __ne__(self, other: object) -> Self: # type: ignore[override]
+ return self._with_native(self.native.__ne__(extract_native(other))) # type: ignore[operator]
+
+ def __ge__(self, other: Any) -> Self:
+ return self._with_native(self.native.__ge__(extract_native(other)))
+
+ def __gt__(self, other: Any) -> Self:
+ return self._with_native(self.native.__gt__(extract_native(other)))
+
+ def __le__(self, other: Any) -> Self:
+ return self._with_native(self.native.__le__(extract_native(other)))
+
+ def __lt__(self, other: Any) -> Self:
+ return self._with_native(self.native.__lt__(extract_native(other)))
+
+ def __and__(self, other: PolarsExpr | bool | Any) -> Self:
+ return self._with_native(self.native.__and__(extract_native(other))) # type: ignore[operator]
+
+ def __or__(self, other: PolarsExpr | bool | Any) -> Self:
+ return self._with_native(self.native.__or__(extract_native(other))) # type: ignore[operator]
+
+ def __add__(self, other: Any) -> Self:
+ return self._with_native(self.native.__add__(extract_native(other)))
+
+ def __sub__(self, other: Any) -> Self:
+ return self._with_native(self.native.__sub__(extract_native(other)))
+
+ def __mul__(self, other: Any) -> Self:
+ return self._with_native(self.native.__mul__(extract_native(other)))
+
+ def __pow__(self, other: Any) -> Self:
+ return self._with_native(self.native.__pow__(extract_native(other)))
+
+ def __truediv__(self, other: Any) -> Self:
+ return self._with_native(self.native.__truediv__(extract_native(other)))
+
+ def __floordiv__(self, other: Any) -> Self:
+ return self._with_native(self.native.__floordiv__(extract_native(other)))
+
+ def __mod__(self, other: Any) -> Self:
+ return self._with_native(self.native.__mod__(extract_native(other)))
+
+ def __invert__(self) -> Self:
+ return self._with_native(self.native.__invert__())
+
+ def cum_count(self, *, reverse: bool) -> Self:
+ if self._backend_version < (0, 20, 4):
+ result = (~self.native.is_null()).cum_sum(reverse=reverse)
+ else:
+ result = self.native.cum_count(reverse=reverse)
+ return self._with_native(result)
+
+ def __narwhals_expr__(self) -> None: ...
+ def __narwhals_namespace__(self) -> PolarsNamespace: # pragma: no cover
+ from narwhals._polars.namespace import PolarsNamespace
+
+ return PolarsNamespace(
+ backend_version=self._backend_version, version=self._version
+ )
+
+ @property
+ def dt(self) -> PolarsExprDateTimeNamespace:
+ return PolarsExprDateTimeNamespace(self)
+
+ @property
+ def str(self) -> PolarsExprStringNamespace:
+ return PolarsExprStringNamespace(self)
+
+ @property
+ def cat(self) -> PolarsExprCatNamespace:
+ return PolarsExprCatNamespace(self)
+
+ @property
+ def name(self) -> PolarsExprNameNamespace:
+ return PolarsExprNameNamespace(self)
+
+ @property
+ def list(self) -> PolarsExprListNamespace:
+ return PolarsExprListNamespace(self)
+
+ @property
+ def struct(self) -> PolarsExprStructNamespace:
+ return PolarsExprStructNamespace(self)
+
+ # CompliantExpr
+ _alias_output_names: Any
+ _evaluate_aliases: Any
+ _evaluate_output_names: Any
+ _is_multi_output_unnamed: Any
+ __call__: Any
+ from_column_names: Any
+ from_column_indices: Any
+ _eval_names_indices: Any
+
+ # Polars
+ abs: Method[Self]
+ all: Method[Self]
+ any: Method[Self]
+ alias: Method[Self]
+ arg_max: Method[Self]
+ arg_min: Method[Self]
+ arg_true: Method[Self]
+ clip: Method[Self]
+ count: Method[Self]
+ cum_max: Method[Self]
+ cum_min: Method[Self]
+ cum_prod: Method[Self]
+ cum_sum: Method[Self]
+ diff: Method[Self]
+ drop_nulls: Method[Self]
+ exp: Method[Self]
+ fill_null: Method[Self]
+ gather_every: Method[Self]
+ head: Method[Self]
+ is_finite: Method[Self]
+ is_first_distinct: Method[Self]
+ is_in: Method[Self]
+ is_last_distinct: Method[Self]
+ is_null: Method[Self]
+ is_unique: Method[Self]
+ len: Method[Self]
+ log: Method[Self]
+ max: Method[Self]
+ mean: Method[Self]
+ median: Method[Self]
+ min: Method[Self]
+ mode: Method[Self]
+ n_unique: Method[Self]
+ null_count: Method[Self]
+ quantile: Method[Self]
+ rank: Method[Self]
+ round: Method[Self]
+ sample: Method[Self]
+ shift: Method[Self]
+ skew: Method[Self]
+ std: Method[Self]
+ sum: Method[Self]
+ sort: Method[Self]
+ tail: Method[Self]
+ unique: Method[Self]
+ var: Method[Self]
+
+
+class PolarsExprDateTimeNamespace:
+ def __init__(self, expr: PolarsExpr) -> None:
+ self._compliant_expr = expr
+
+ def truncate(self, every: str) -> PolarsExpr:
+ parse_interval_string(every) # Ensure consistent error message is raised.
+ return self._compliant_expr._with_native(
+ self._compliant_expr.native.dt.truncate(every)
+ )
+
+ def __getattr__(self, attr: str) -> Callable[[Any], PolarsExpr]:
+ def func(*args: Any, **kwargs: Any) -> PolarsExpr:
+ pos, kwds = extract_args_kwargs(args, kwargs)
+ return self._compliant_expr._with_native(
+ getattr(self._compliant_expr.native.dt, attr)(*pos, **kwds)
+ )
+
+ return func
+
+
+class PolarsExprStringNamespace:
+ def __init__(self, expr: PolarsExpr) -> None:
+ self._compliant_expr = expr
+
+ def __getattr__(self, attr: str) -> Callable[[Any], PolarsExpr]:
+ def func(*args: Any, **kwargs: Any) -> PolarsExpr:
+ pos, kwds = extract_args_kwargs(args, kwargs)
+ return self._compliant_expr._with_native(
+ getattr(self._compliant_expr.native.str, attr)(*pos, **kwds)
+ )
+
+ return func
+
+
+class PolarsExprCatNamespace:
+ def __init__(self, expr: PolarsExpr) -> None:
+ self._compliant_expr = expr
+
+ def __getattr__(self, attr: str) -> Callable[[Any], PolarsExpr]:
+ def func(*args: Any, **kwargs: Any) -> PolarsExpr:
+ pos, kwds = extract_args_kwargs(args, kwargs)
+ return self._compliant_expr._with_native(
+ getattr(self._compliant_expr.native.cat, attr)(*pos, **kwds)
+ )
+
+ return func
+
+
+class PolarsExprNameNamespace:
+ def __init__(self, expr: PolarsExpr) -> None:
+ self._compliant_expr = expr
+
+ def __getattr__(self, attr: str) -> Callable[[Any], PolarsExpr]:
+ def func(*args: Any, **kwargs: Any) -> PolarsExpr:
+ pos, kwds = extract_args_kwargs(args, kwargs)
+ return self._compliant_expr._with_native(
+ getattr(self._compliant_expr.native.name, attr)(*pos, **kwds)
+ )
+
+ return func
+
+
+class PolarsExprListNamespace:
+ def __init__(self, expr: PolarsExpr) -> None:
+ self._expr = expr
+
+ def len(self) -> PolarsExpr:
+ native_expr = self._expr._native_expr
+ native_result = native_expr.list.len()
+
+ if self._expr._backend_version < (1, 16): # pragma: no cover
+ native_result = (
+ pl.when(~native_expr.is_null()).then(native_result).cast(pl.UInt32())
+ )
+ elif self._expr._backend_version < (1, 17): # pragma: no cover
+ native_result = native_result.cast(pl.UInt32())
+
+ return self._expr._with_native(native_result)
+
+ # TODO(FBruzzesi): Remove `pragma: no cover` once other namespace methods are added
+ def __getattr__(self, attr: str) -> Callable[[Any], PolarsExpr]: # pragma: no cover
+ def func(*args: Any, **kwargs: Any) -> PolarsExpr:
+ pos, kwds = extract_args_kwargs(args, kwargs)
+ return self._expr._with_native(
+ getattr(self._expr.native.list, attr)(*pos, **kwds)
+ )
+
+ return func
+
+
+class PolarsExprStructNamespace:
+ def __init__(self, expr: PolarsExpr) -> None:
+ self._expr = expr
+
+ def __getattr__(self, attr: str) -> Callable[[Any], PolarsExpr]: # pragma: no cover
+ def func(*args: Any, **kwargs: Any) -> PolarsExpr:
+ pos, kwds = extract_args_kwargs(args, kwargs)
+ return self._expr._with_native(
+ getattr(self._expr.native.struct, attr)(*pos, **kwds)
+ )
+
+ return func
diff --git a/venv/lib/python3.8/site-packages/narwhals/_polars/group_by.py b/venv/lib/python3.8/site-packages/narwhals/_polars/group_by.py
new file mode 100644
index 0000000..e29c3e2
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_polars/group_by.py
@@ -0,0 +1,78 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Iterator, Sequence, cast
+
+from narwhals._utils import is_sequence_of
+
+if TYPE_CHECKING:
+ from polars.dataframe.group_by import GroupBy as NativeGroupBy
+ from polars.lazyframe.group_by import LazyGroupBy as NativeLazyGroupBy
+
+ from narwhals._polars.dataframe import PolarsDataFrame, PolarsLazyFrame
+ from narwhals._polars.expr import PolarsExpr
+
+
+class PolarsGroupBy:
+ _compliant_frame: PolarsDataFrame
+ _grouped: NativeGroupBy
+ _drop_null_keys: bool
+ _output_names: Sequence[str]
+
+ @property
+ def compliant(self) -> PolarsDataFrame:
+ return self._compliant_frame
+
+ def __init__(
+ self,
+ df: PolarsDataFrame,
+ keys: Sequence[PolarsExpr] | Sequence[str],
+ /,
+ *,
+ drop_null_keys: bool,
+ ) -> None:
+ self._keys = list(keys)
+ self._compliant_frame = df.drop_nulls(keys) if drop_null_keys else df
+ self._grouped = (
+ self.compliant.native.group_by(keys)
+ if is_sequence_of(keys, str)
+ else self.compliant.native.group_by(arg.native for arg in keys)
+ )
+
+ def agg(self, *aggs: PolarsExpr) -> PolarsDataFrame:
+ agg_result = self._grouped.agg(arg.native for arg in aggs)
+ return self.compliant._with_native(agg_result)
+
+ def __iter__(self) -> Iterator[tuple[tuple[str, ...], PolarsDataFrame]]:
+ for key, df in self._grouped:
+ yield tuple(cast("str", key)), self.compliant._with_native(df)
+
+
+class PolarsLazyGroupBy:
+ _compliant_frame: PolarsLazyFrame
+ _grouped: NativeLazyGroupBy
+ _drop_null_keys: bool
+ _output_names: Sequence[str]
+
+ @property
+ def compliant(self) -> PolarsLazyFrame:
+ return self._compliant_frame
+
+ def __init__(
+ self,
+ df: PolarsLazyFrame,
+ keys: Sequence[PolarsExpr] | Sequence[str],
+ /,
+ *,
+ drop_null_keys: bool,
+ ) -> None:
+ self._keys = list(keys)
+ self._compliant_frame = df.drop_nulls(keys) if drop_null_keys else df
+ self._grouped = (
+ self.compliant.native.group_by(keys)
+ if is_sequence_of(keys, str)
+ else self.compliant.native.group_by(arg.native for arg in keys)
+ )
+
+ def agg(self, *aggs: PolarsExpr) -> PolarsLazyFrame:
+ agg_result = self._grouped.agg(arg.native for arg in aggs)
+ return self.compliant._with_native(agg_result)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_polars/namespace.py b/venv/lib/python3.8/site-packages/narwhals/_polars/namespace.py
new file mode 100644
index 0000000..4dec34c
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_polars/namespace.py
@@ -0,0 +1,313 @@
+from __future__ import annotations
+
+import operator
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Iterable,
+ Literal,
+ Mapping,
+ Sequence,
+ cast,
+ overload,
+)
+
+import polars as pl
+
+from narwhals._polars.expr import PolarsExpr
+from narwhals._polars.series import PolarsSeries
+from narwhals._polars.utils import extract_args_kwargs, narwhals_to_native_dtype
+from narwhals._utils import Implementation, requires
+from narwhals.dependencies import is_numpy_array_2d
+from narwhals.dtypes import DType
+
+if TYPE_CHECKING:
+ from datetime import timezone
+
+ from narwhals._compliant import CompliantSelectorNamespace, CompliantWhen
+ from narwhals._polars.dataframe import Method, PolarsDataFrame, PolarsLazyFrame
+ from narwhals._polars.typing import FrameT
+ from narwhals._utils import Version, _FullContext
+ from narwhals.schema import Schema
+ from narwhals.typing import Into1DArray, IntoDType, TimeUnit, _2DArray
+
+
+class PolarsNamespace:
+ all: Method[PolarsExpr]
+ col: Method[PolarsExpr]
+ exclude: Method[PolarsExpr]
+ all_horizontal: Method[PolarsExpr]
+ any_horizontal: Method[PolarsExpr]
+ sum_horizontal: Method[PolarsExpr]
+ min_horizontal: Method[PolarsExpr]
+ max_horizontal: Method[PolarsExpr]
+
+ # NOTE: `pyright` accepts, `mypy` doesn't highlight the issue
+ # error: Type argument "PolarsExpr" of "CompliantWhen" must be a subtype of "CompliantExpr[Any, Any]"
+ when: Method[CompliantWhen[PolarsDataFrame, PolarsSeries, PolarsExpr]] # type: ignore[type-var]
+
+ def __init__(self, *, backend_version: tuple[int, ...], version: Version) -> None:
+ self._backend_version = backend_version
+ self._implementation = Implementation.POLARS
+ self._version = version
+
+ def __getattr__(self, attr: str) -> Any:
+ def func(*args: Any, **kwargs: Any) -> Any:
+ pos, kwds = extract_args_kwargs(args, kwargs)
+ return self._expr(
+ getattr(pl, attr)(*pos, **kwds),
+ version=self._version,
+ backend_version=self._backend_version,
+ )
+
+ return func
+
+ @property
+ def _dataframe(self) -> type[PolarsDataFrame]:
+ from narwhals._polars.dataframe import PolarsDataFrame
+
+ return PolarsDataFrame
+
+ @property
+ def _lazyframe(self) -> type[PolarsLazyFrame]:
+ from narwhals._polars.dataframe import PolarsLazyFrame
+
+ return PolarsLazyFrame
+
+ @property
+ def _expr(self) -> type[PolarsExpr]:
+ return PolarsExpr
+
+ @property
+ def _series(self) -> type[PolarsSeries]:
+ return PolarsSeries
+
+ @overload
+ def from_native(self, data: pl.DataFrame, /) -> PolarsDataFrame: ...
+ @overload
+ def from_native(self, data: pl.LazyFrame, /) -> PolarsLazyFrame: ...
+ @overload
+ def from_native(self, data: pl.Series, /) -> PolarsSeries: ...
+ def from_native(
+ self, data: pl.DataFrame | pl.LazyFrame | pl.Series | Any, /
+ ) -> PolarsDataFrame | PolarsLazyFrame | PolarsSeries:
+ if self._dataframe._is_native(data):
+ return self._dataframe.from_native(data, context=self)
+ elif self._series._is_native(data):
+ return self._series.from_native(data, context=self)
+ elif self._lazyframe._is_native(data):
+ return self._lazyframe.from_native(data, context=self)
+ else: # pragma: no cover
+ msg = f"Unsupported type: {type(data).__name__!r}"
+ raise TypeError(msg)
+
+ @overload
+ def from_numpy(self, data: Into1DArray, /, schema: None = ...) -> PolarsSeries: ...
+
+ @overload
+ def from_numpy(
+ self,
+ data: _2DArray,
+ /,
+ schema: Mapping[str, DType] | Schema | Sequence[str] | None,
+ ) -> PolarsDataFrame: ...
+
+ def from_numpy(
+ self,
+ data: Into1DArray | _2DArray,
+ /,
+ schema: Mapping[str, DType] | Schema | Sequence[str] | None = None,
+ ) -> PolarsDataFrame | PolarsSeries:
+ if is_numpy_array_2d(data):
+ return self._dataframe.from_numpy(data, schema=schema, context=self)
+ return self._series.from_numpy(data, context=self) # pragma: no cover
+
+ @requires.backend_version(
+ (1, 0, 0), "Please use `col` for columns selection instead."
+ )
+ def nth(self, *indices: int) -> PolarsExpr:
+ return self._expr(
+ pl.nth(*indices), version=self._version, backend_version=self._backend_version
+ )
+
+ def len(self) -> PolarsExpr:
+ if self._backend_version < (0, 20, 5):
+ return self._expr(
+ pl.count().alias("len"),
+ version=self._version,
+ backend_version=self._backend_version,
+ )
+ return self._expr(
+ pl.len(), version=self._version, backend_version=self._backend_version
+ )
+
+ def concat(
+ self,
+ items: Iterable[FrameT],
+ *,
+ how: Literal["vertical", "horizontal", "diagonal"],
+ ) -> PolarsDataFrame | PolarsLazyFrame:
+ result = pl.concat((item.native for item in items), how=how)
+ if isinstance(result, pl.DataFrame):
+ return self._dataframe(
+ result, backend_version=self._backend_version, version=self._version
+ )
+ return self._lazyframe.from_native(result, context=self)
+
+ def lit(self, value: Any, dtype: IntoDType | None) -> PolarsExpr:
+ if dtype is not None:
+ return self._expr(
+ pl.lit(
+ value,
+ dtype=narwhals_to_native_dtype(
+ dtype, self._version, self._backend_version
+ ),
+ ),
+ version=self._version,
+ backend_version=self._backend_version,
+ )
+ return self._expr(
+ pl.lit(value), version=self._version, backend_version=self._backend_version
+ )
+
+ def mean_horizontal(self, *exprs: PolarsExpr) -> PolarsExpr:
+ if self._backend_version < (0, 20, 8):
+ return self._expr(
+ pl.sum_horizontal(e._native_expr for e in exprs)
+ / pl.sum_horizontal(1 - e.is_null()._native_expr for e in exprs),
+ version=self._version,
+ backend_version=self._backend_version,
+ )
+
+ return self._expr(
+ pl.mean_horizontal(e._native_expr for e in exprs),
+ version=self._version,
+ backend_version=self._backend_version,
+ )
+
+ def concat_str(
+ self, *exprs: PolarsExpr, separator: str, ignore_nulls: bool
+ ) -> PolarsExpr:
+ pl_exprs: list[pl.Expr] = [expr._native_expr for expr in exprs]
+
+ if self._backend_version < (0, 20, 6):
+ null_mask = [expr.is_null() for expr in pl_exprs]
+ sep = pl.lit(separator)
+
+ if not ignore_nulls:
+ null_mask_result = pl.any_horizontal(*null_mask)
+ output_expr = pl.reduce(
+ lambda x, y: x.cast(pl.String()) + sep + y.cast(pl.String()), # type: ignore[arg-type,return-value]
+ pl_exprs,
+ )
+ result = pl.when(~null_mask_result).then(output_expr)
+ else:
+ init_value, *values = [
+ pl.when(nm).then(pl.lit("")).otherwise(expr.cast(pl.String()))
+ for expr, nm in zip(pl_exprs, null_mask)
+ ]
+ separators = [
+ pl.when(~nm).then(sep).otherwise(pl.lit("")) for nm in null_mask[:-1]
+ ]
+
+ result = pl.fold( # type: ignore[assignment]
+ acc=init_value,
+ function=operator.add,
+ exprs=[s + v for s, v in zip(separators, values)],
+ )
+
+ return self._expr(
+ result, version=self._version, backend_version=self._backend_version
+ )
+
+ return self._expr(
+ pl.concat_str(pl_exprs, separator=separator, ignore_nulls=ignore_nulls),
+ version=self._version,
+ backend_version=self._backend_version,
+ )
+
+ # NOTE: Implementation is too different to annotate correctly (vs other `*SelectorNamespace`)
+ # 1. Others have lots of private stuff for code reuse
+ # i. None of that is useful here
+ # 2. We don't have a `PolarsSelector` abstraction, and just use `PolarsExpr`
+ @property
+ def selectors(self) -> CompliantSelectorNamespace[PolarsDataFrame, PolarsSeries]:
+ return cast(
+ "CompliantSelectorNamespace[PolarsDataFrame, PolarsSeries]",
+ PolarsSelectorNamespace(self),
+ )
+
+
+class PolarsSelectorNamespace:
+ def __init__(self, context: _FullContext, /) -> None:
+ self._implementation = context._implementation
+ self._backend_version = context._backend_version
+ self._version = context._version
+
+ def by_dtype(self, dtypes: Iterable[DType]) -> PolarsExpr:
+ native_dtypes = [
+ narwhals_to_native_dtype(
+ dtype, self._version, self._backend_version
+ ).__class__
+ if isinstance(dtype, type) and issubclass(dtype, DType)
+ else narwhals_to_native_dtype(dtype, self._version, self._backend_version)
+ for dtype in dtypes
+ ]
+ return PolarsExpr(
+ pl.selectors.by_dtype(native_dtypes),
+ version=self._version,
+ backend_version=self._backend_version,
+ )
+
+ def matches(self, pattern: str) -> PolarsExpr:
+ return PolarsExpr(
+ pl.selectors.matches(pattern=pattern),
+ version=self._version,
+ backend_version=self._backend_version,
+ )
+
+ def numeric(self) -> PolarsExpr:
+ return PolarsExpr(
+ pl.selectors.numeric(),
+ version=self._version,
+ backend_version=self._backend_version,
+ )
+
+ def boolean(self) -> PolarsExpr:
+ return PolarsExpr(
+ pl.selectors.boolean(),
+ version=self._version,
+ backend_version=self._backend_version,
+ )
+
+ def string(self) -> PolarsExpr:
+ return PolarsExpr(
+ pl.selectors.string(),
+ version=self._version,
+ backend_version=self._backend_version,
+ )
+
+ def categorical(self) -> PolarsExpr:
+ return PolarsExpr(
+ pl.selectors.categorical(),
+ version=self._version,
+ backend_version=self._backend_version,
+ )
+
+ def all(self) -> PolarsExpr:
+ return PolarsExpr(
+ pl.selectors.all(),
+ version=self._version,
+ backend_version=self._backend_version,
+ )
+
+ def datetime(
+ self,
+ time_unit: TimeUnit | Iterable[TimeUnit] | None,
+ time_zone: str | timezone | Iterable[str | timezone | None] | None,
+ ) -> PolarsExpr:
+ return PolarsExpr(
+ pl.selectors.datetime(time_unit=time_unit, time_zone=time_zone), # type: ignore[arg-type]
+ version=self._version,
+ backend_version=self._backend_version,
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/_polars/series.py b/venv/lib/python3.8/site-packages/narwhals/_polars/series.py
new file mode 100644
index 0000000..2a4325e
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_polars/series.py
@@ -0,0 +1,757 @@
+from __future__ import annotations
+
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Iterable,
+ Iterator,
+ Mapping,
+ Sequence,
+ cast,
+ overload,
+)
+
+import polars as pl
+
+from narwhals._polars.utils import (
+ catch_polars_exception,
+ extract_args_kwargs,
+ extract_native,
+ narwhals_to_native_dtype,
+ native_to_narwhals_dtype,
+)
+from narwhals._utils import Implementation, requires, validate_backend_version
+from narwhals.dependencies import is_numpy_array_1d
+
+if TYPE_CHECKING:
+ from types import ModuleType
+ from typing import TypeVar
+
+ import pandas as pd
+ import pyarrow as pa
+ from typing_extensions import Self, TypeIs
+
+ from narwhals._polars.dataframe import Method, PolarsDataFrame
+ from narwhals._polars.expr import PolarsExpr
+ from narwhals._polars.namespace import PolarsNamespace
+ from narwhals._utils import Version, _FullContext
+ from narwhals.dtypes import DType
+ from narwhals.series import Series
+ from narwhals.typing import Into1DArray, IntoDType, MultiIndexSelector, _1DArray
+
+ T = TypeVar("T")
+
+
+# Series methods where PolarsSeries just defers to Polars.Series directly.
+INHERITED_METHODS = frozenset(
+ [
+ "__add__",
+ "__and__",
+ "__floordiv__",
+ "__invert__",
+ "__iter__",
+ "__mod__",
+ "__mul__",
+ "__or__",
+ "__pow__",
+ "__radd__",
+ "__rand__",
+ "__rfloordiv__",
+ "__rmod__",
+ "__rmul__",
+ "__ror__",
+ "__rsub__",
+ "__rtruediv__",
+ "__sub__",
+ "__truediv__",
+ "abs",
+ "all",
+ "any",
+ "arg_max",
+ "arg_min",
+ "arg_true",
+ "clip",
+ "count",
+ "cum_max",
+ "cum_min",
+ "cum_prod",
+ "cum_sum",
+ "diff",
+ "drop_nulls",
+ "exp",
+ "fill_null",
+ "filter",
+ "gather_every",
+ "head",
+ "is_between",
+ "is_finite",
+ "is_first_distinct",
+ "is_in",
+ "is_last_distinct",
+ "is_null",
+ "is_sorted",
+ "is_unique",
+ "item",
+ "len",
+ "log",
+ "max",
+ "mean",
+ "min",
+ "mode",
+ "n_unique",
+ "null_count",
+ "quantile",
+ "rank",
+ "round",
+ "sample",
+ "shift",
+ "skew",
+ "std",
+ "sum",
+ "tail",
+ "to_arrow",
+ "to_frame",
+ "to_list",
+ "to_pandas",
+ "unique",
+ "var",
+ "zip_with",
+ ]
+)
+
+
+class PolarsSeries:
+ def __init__(
+ self, series: pl.Series, *, backend_version: tuple[int, ...], version: Version
+ ) -> None:
+ self._native_series: pl.Series = series
+ self._backend_version = backend_version
+ self._implementation = Implementation.POLARS
+ self._version = version
+ validate_backend_version(self._implementation, self._backend_version)
+
+ def __repr__(self) -> str: # pragma: no cover
+ return "PolarsSeries"
+
+ def __narwhals_namespace__(self) -> PolarsNamespace:
+ from narwhals._polars.namespace import PolarsNamespace
+
+ return PolarsNamespace(
+ backend_version=self._backend_version, version=self._version
+ )
+
+ def __narwhals_series__(self) -> Self:
+ return self
+
+ def __native_namespace__(self) -> ModuleType:
+ if self._implementation is Implementation.POLARS:
+ return self._implementation.to_native_namespace()
+
+ msg = f"Expected polars, got: {type(self._implementation)}" # pragma: no cover
+ raise AssertionError(msg)
+
+ def _with_version(self, version: Version) -> Self:
+ return self.__class__(
+ self.native, backend_version=self._backend_version, version=version
+ )
+
+ @classmethod
+ def from_iterable(
+ cls,
+ data: Iterable[Any],
+ *,
+ context: _FullContext,
+ name: str = "",
+ dtype: IntoDType | None = None,
+ ) -> Self:
+ version = context._version
+ backend_version = context._backend_version
+ dtype_pl = (
+ narwhals_to_native_dtype(dtype, version, backend_version) if dtype else None
+ )
+ # NOTE: `Iterable` is fine, annotation is overly narrow
+ # https://github.com/pola-rs/polars/blob/82d57a4ee41f87c11ca1b1af15488459727efdd7/py-polars/polars/series/series.py#L332-L333
+ native = pl.Series(name=name, values=cast("Sequence[Any]", data), dtype=dtype_pl)
+ return cls.from_native(native, context=context)
+
+ @staticmethod
+ def _is_native(obj: pl.Series | Any) -> TypeIs[pl.Series]:
+ return isinstance(obj, pl.Series)
+
+ @classmethod
+ def from_native(cls, data: pl.Series, /, *, context: _FullContext) -> Self:
+ return cls(
+ data, backend_version=context._backend_version, version=context._version
+ )
+
+ @classmethod
+ def from_numpy(cls, data: Into1DArray, /, *, context: _FullContext) -> Self:
+ native = pl.Series(data if is_numpy_array_1d(data) else [data])
+ return cls.from_native(native, context=context)
+
+ def to_narwhals(self) -> Series[pl.Series]:
+ return self._version.series(self, level="full")
+
+ def _with_native(self, series: pl.Series) -> Self:
+ return self.__class__(
+ series, backend_version=self._backend_version, version=self._version
+ )
+
+ @overload
+ def _from_native_object(self, series: pl.Series) -> Self: ...
+
+ @overload
+ def _from_native_object(self, series: pl.DataFrame) -> PolarsDataFrame: ...
+
+ @overload
+ def _from_native_object(self, series: T) -> T: ...
+
+ def _from_native_object(
+ self, series: pl.Series | pl.DataFrame | T
+ ) -> Self | PolarsDataFrame | T:
+ if self._is_native(series):
+ return self._with_native(series)
+ if isinstance(series, pl.DataFrame):
+ from narwhals._polars.dataframe import PolarsDataFrame
+
+ return PolarsDataFrame.from_native(series, context=self)
+ # scalar
+ return series
+
+ def _to_expr(self) -> PolarsExpr:
+ return self.__narwhals_namespace__()._expr._from_series(self)
+
+ def __getattr__(self, attr: str) -> Any:
+ if attr not in INHERITED_METHODS:
+ msg = f"{self.__class__.__name__} has not attribute '{attr}'."
+ raise AttributeError(msg)
+
+ def func(*args: Any, **kwargs: Any) -> Any:
+ pos, kwds = extract_args_kwargs(args, kwargs)
+ return self._from_native_object(getattr(self.native, attr)(*pos, **kwds))
+
+ return func
+
+ def __len__(self) -> int:
+ return len(self.native)
+
+ @property
+ def name(self) -> str:
+ return self.native.name
+
+ @property
+ def dtype(self) -> DType:
+ return native_to_narwhals_dtype(
+ self.native.dtype, self._version, self._backend_version
+ )
+
+ @property
+ def native(self) -> pl.Series:
+ return self._native_series
+
+ def alias(self, name: str) -> Self:
+ return self._from_native_object(self.native.alias(name))
+
+ def __getitem__(self, item: MultiIndexSelector[Self]) -> Any | Self:
+ if isinstance(item, PolarsSeries):
+ return self._from_native_object(self.native.__getitem__(item.native))
+ return self._from_native_object(self.native.__getitem__(item))
+
+ def cast(self, dtype: IntoDType) -> Self:
+ dtype_pl = narwhals_to_native_dtype(dtype, self._version, self._backend_version)
+ return self._with_native(self.native.cast(dtype_pl))
+
+ @requires.backend_version((1,))
+ def replace_strict(
+ self,
+ old: Sequence[Any] | Mapping[Any, Any],
+ new: Sequence[Any],
+ *,
+ return_dtype: IntoDType | None,
+ ) -> Self:
+ ser = self.native
+ dtype = (
+ narwhals_to_native_dtype(return_dtype, self._version, self._backend_version)
+ if return_dtype
+ else None
+ )
+ return self._with_native(ser.replace_strict(old, new, return_dtype=dtype))
+
+ def to_numpy(self, dtype: Any = None, *, copy: bool | None = None) -> _1DArray:
+ return self.__array__(dtype, copy=copy)
+
+ def __array__(self, dtype: Any, *, copy: bool | None) -> _1DArray:
+ if self._backend_version < (0, 20, 29):
+ return self.native.__array__(dtype=dtype)
+ return self.native.__array__(dtype=dtype, copy=copy)
+
+ def __eq__(self, other: object) -> Self: # type: ignore[override]
+ return self._with_native(self.native.__eq__(extract_native(other)))
+
+ def __ne__(self, other: object) -> Self: # type: ignore[override]
+ return self._with_native(self.native.__ne__(extract_native(other)))
+
+ # NOTE: `pyright` is being reasonable here
+ def __ge__(self, other: Any) -> Self:
+ return self._with_native(self.native.__ge__(extract_native(other))) # pyright: ignore[reportArgumentType]
+
+ def __gt__(self, other: Any) -> Self:
+ return self._with_native(self.native.__gt__(extract_native(other))) # pyright: ignore[reportArgumentType]
+
+ def __le__(self, other: Any) -> Self:
+ return self._with_native(self.native.__le__(extract_native(other))) # pyright: ignore[reportArgumentType]
+
+ def __lt__(self, other: Any) -> Self:
+ return self._with_native(self.native.__lt__(extract_native(other))) # pyright: ignore[reportArgumentType]
+
+ def __rpow__(self, other: PolarsSeries | Any) -> Self:
+ result = self.native.__rpow__(extract_native(other))
+ if self._backend_version < (1, 16, 1):
+ # Explicitly set alias to work around https://github.com/pola-rs/polars/issues/20071
+ result = result.alias(self.name)
+ return self._with_native(result)
+
+ def is_nan(self) -> Self:
+ try:
+ native_is_nan = self.native.is_nan()
+ except Exception as e: # noqa: BLE001
+ raise catch_polars_exception(e, self._backend_version) from None
+ if self._backend_version < (1, 18): # pragma: no cover
+ select = pl.when(self.native.is_not_null()).then(native_is_nan)
+ return self._with_native(pl.select(select)[self.name])
+ return self._with_native(native_is_nan)
+
+ def median(self) -> Any:
+ from narwhals.exceptions import InvalidOperationError
+
+ if not self.dtype.is_numeric():
+ msg = "`median` operation not supported for non-numeric input type."
+ raise InvalidOperationError(msg)
+
+ return self.native.median()
+
+ def to_dummies(self, *, separator: str, drop_first: bool) -> PolarsDataFrame:
+ from narwhals._polars.dataframe import PolarsDataFrame
+
+ if self._backend_version < (0, 20, 15):
+ has_nulls = self.native.is_null().any()
+ result = self.native.to_dummies(separator=separator)
+ output_columns = result.columns
+ if drop_first:
+ _ = output_columns.pop(int(has_nulls))
+
+ result = result.select(output_columns)
+ else:
+ result = self.native.to_dummies(separator=separator, drop_first=drop_first)
+ result = result.with_columns(pl.all().cast(pl.Int8))
+ return PolarsDataFrame.from_native(result, context=self)
+
+ def ewm_mean(
+ self,
+ *,
+ com: float | None,
+ span: float | None,
+ half_life: float | None,
+ alpha: float | None,
+ adjust: bool,
+ min_samples: int,
+ ignore_nulls: bool,
+ ) -> Self:
+ extra_kwargs = (
+ {"min_periods": min_samples}
+ if self._backend_version < (1, 21, 0)
+ else {"min_samples": min_samples}
+ )
+
+ native_result = self.native.ewm_mean(
+ com=com,
+ span=span,
+ half_life=half_life,
+ alpha=alpha,
+ adjust=adjust,
+ ignore_nulls=ignore_nulls,
+ **extra_kwargs,
+ )
+ if self._backend_version < (1,): # pragma: no cover
+ return self._with_native(
+ pl.select(
+ pl.when(~self.native.is_null()).then(native_result).otherwise(None)
+ )[self.native.name]
+ )
+
+ return self._with_native(native_result)
+
+ @requires.backend_version((1,))
+ def rolling_var(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ extra_kwargs: dict[str, Any] = (
+ {"min_periods": min_samples}
+ if self._backend_version < (1, 21, 0)
+ else {"min_samples": min_samples}
+ )
+ return self._with_native(
+ self.native.rolling_var(
+ window_size=window_size, center=center, ddof=ddof, **extra_kwargs
+ )
+ )
+
+ @requires.backend_version((1,))
+ def rolling_std(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ extra_kwargs: dict[str, Any] = (
+ {"min_periods": min_samples}
+ if self._backend_version < (1, 21, 0)
+ else {"min_samples": min_samples}
+ )
+ return self._with_native(
+ self.native.rolling_std(
+ window_size=window_size, center=center, ddof=ddof, **extra_kwargs
+ )
+ )
+
+ def rolling_sum(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ extra_kwargs: dict[str, Any] = (
+ {"min_periods": min_samples}
+ if self._backend_version < (1, 21, 0)
+ else {"min_samples": min_samples}
+ )
+ return self._with_native(
+ self.native.rolling_sum(
+ window_size=window_size, center=center, **extra_kwargs
+ )
+ )
+
+ def rolling_mean(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ extra_kwargs: dict[str, Any] = (
+ {"min_periods": min_samples}
+ if self._backend_version < (1, 21, 0)
+ else {"min_samples": min_samples}
+ )
+ return self._with_native(
+ self.native.rolling_mean(
+ window_size=window_size, center=center, **extra_kwargs
+ )
+ )
+
+ def sort(self, *, descending: bool, nulls_last: bool) -> Self:
+ if self._backend_version < (0, 20, 6):
+ result = self.native.sort(descending=descending)
+
+ if nulls_last:
+ is_null = result.is_null()
+ result = pl.concat([result.filter(~is_null), result.filter(is_null)])
+ else:
+ result = self.native.sort(descending=descending, nulls_last=nulls_last)
+
+ return self._with_native(result)
+
+ def scatter(self, indices: int | Sequence[int], values: Any) -> Self:
+ s = self.native.clone().scatter(indices, extract_native(values))
+ return self._with_native(s)
+
+ def value_counts(
+ self, *, sort: bool, parallel: bool, name: str | None, normalize: bool
+ ) -> PolarsDataFrame:
+ from narwhals._polars.dataframe import PolarsDataFrame
+
+ if self._backend_version < (1, 0, 0):
+ value_name_ = name or ("proportion" if normalize else "count")
+
+ result = self.native.value_counts(sort=sort, parallel=parallel).select(
+ **{
+ (self.native.name): pl.col(self.native.name),
+ value_name_: pl.col("count") / pl.sum("count")
+ if normalize
+ else pl.col("count"),
+ }
+ )
+ else:
+ result = self.native.value_counts(
+ sort=sort, parallel=parallel, name=name, normalize=normalize
+ )
+ return PolarsDataFrame.from_native(result, context=self)
+
+ def cum_count(self, *, reverse: bool) -> Self:
+ if self._backend_version < (0, 20, 4):
+ not_null_series = ~self.native.is_null()
+ result = not_null_series.cum_sum(reverse=reverse)
+ else:
+ result = self.native.cum_count(reverse=reverse)
+
+ return self._with_native(result)
+
+ def __contains__(self, other: Any) -> bool:
+ try:
+ return self.native.__contains__(other)
+ except Exception as e: # noqa: BLE001
+ raise catch_polars_exception(e, self._backend_version) from None
+
+ def hist( # noqa: C901, PLR0912
+ self,
+ bins: list[float | int] | None,
+ *,
+ bin_count: int | None,
+ include_breakpoint: bool,
+ ) -> PolarsDataFrame:
+ from narwhals._polars.dataframe import PolarsDataFrame
+
+ if (bins is not None and len(bins) <= 1) or (bin_count == 0): # pragma: no cover
+ data: list[pl.Series] = []
+ if include_breakpoint:
+ data.append(pl.Series("breakpoint", [], dtype=pl.Float64))
+ data.append(pl.Series("count", [], dtype=pl.UInt32))
+ return PolarsDataFrame.from_native(pl.DataFrame(data), context=self)
+
+ if self.native.count() < 1:
+ data_dict: dict[str, Sequence[Any] | pl.Series]
+ if bins is not None:
+ data_dict = {
+ "breakpoint": bins[1:],
+ "count": pl.zeros(n=len(bins) - 1, dtype=pl.Int64, eager=True),
+ }
+ elif (bin_count is not None) and bin_count == 1:
+ data_dict = {"breakpoint": [1.0], "count": [0]}
+ elif (bin_count is not None) and bin_count > 1:
+ data_dict = {
+ "breakpoint": pl.int_range(1, bin_count + 1, eager=True) / bin_count,
+ "count": pl.zeros(n=bin_count, dtype=pl.Int64, eager=True),
+ }
+ else: # pragma: no cover
+ msg = (
+ "congratulations, you entered unreachable code - please report a bug"
+ )
+ raise AssertionError(msg)
+ if not include_breakpoint:
+ del data_dict["breakpoint"]
+ return PolarsDataFrame.from_native(pl.DataFrame(data_dict), context=self)
+
+ # polars <1.15 does not adjust the bins when they have equivalent min/max
+ # polars <1.5 with bin_count=...
+ # returns bins that range from -inf to +inf and has bin_count + 1 bins.
+ # for compat: convert `bin_count=` call to `bins=`
+ if (self._backend_version < (1, 15)) and (
+ bin_count is not None
+ ): # pragma: no cover
+ lower = cast("float", self.native.min())
+ upper = cast("float", self.native.max())
+ if lower == upper:
+ width = 1 / bin_count
+ lower -= 0.5
+ upper += 0.5
+ else:
+ width = (upper - lower) / bin_count
+
+ bins = (pl.int_range(0, bin_count + 1, eager=True) * width + lower).to_list()
+ bin_count = None
+
+ # Polars inconsistently handles NaN values when computing histograms
+ # against predefined bins: https://github.com/pola-rs/polars/issues/21082
+ series = self.native
+ if self._backend_version < (1, 15) or bins is not None:
+ series = series.set(series.is_nan(), None)
+
+ df = series.hist(
+ bins,
+ bin_count=bin_count,
+ include_category=False,
+ include_breakpoint=include_breakpoint,
+ )
+
+ if not include_breakpoint:
+ df.columns = ["count"]
+
+ if self._backend_version < (1, 0) and include_breakpoint:
+ df = df.rename({"break_point": "breakpoint"})
+
+ # polars<1.15 implicitly adds -inf and inf to either end of bins
+ if self._backend_version < (1, 15) and bins is not None: # pragma: no cover
+ r = pl.int_range(0, len(df))
+ df = df.filter((r > 0) & (r < len(df) - 1))
+
+ # polars<1.27 makes the lowest bin a left/right closed interval.
+ if self._backend_version < (1, 27) and bins is not None:
+ df[0, "count"] += (series == bins[0]).sum()
+
+ return PolarsDataFrame.from_native(df, context=self)
+
+ def to_polars(self) -> pl.Series:
+ return self.native
+
+ @property
+ def dt(self) -> PolarsSeriesDateTimeNamespace:
+ return PolarsSeriesDateTimeNamespace(self)
+
+ @property
+ def str(self) -> PolarsSeriesStringNamespace:
+ return PolarsSeriesStringNamespace(self)
+
+ @property
+ def cat(self) -> PolarsSeriesCatNamespace:
+ return PolarsSeriesCatNamespace(self)
+
+ @property
+ def struct(self) -> PolarsSeriesStructNamespace:
+ return PolarsSeriesStructNamespace(self)
+
+ __add__: Method[Self]
+ __and__: Method[Self]
+ __floordiv__: Method[Self]
+ __invert__: Method[Self]
+ __iter__: Method[Iterator[Any]]
+ __mod__: Method[Self]
+ __mul__: Method[Self]
+ __or__: Method[Self]
+ __pow__: Method[Self]
+ __radd__: Method[Self]
+ __rand__: Method[Self]
+ __rfloordiv__: Method[Self]
+ __rmod__: Method[Self]
+ __rmul__: Method[Self]
+ __ror__: Method[Self]
+ __rsub__: Method[Self]
+ __rtruediv__: Method[Self]
+ __sub__: Method[Self]
+ __truediv__: Method[Self]
+ abs: Method[Self]
+ all: Method[bool]
+ any: Method[bool]
+ arg_max: Method[int]
+ arg_min: Method[int]
+ arg_true: Method[Self]
+ clip: Method[Self]
+ count: Method[int]
+ cum_max: Method[Self]
+ cum_min: Method[Self]
+ cum_prod: Method[Self]
+ cum_sum: Method[Self]
+ diff: Method[Self]
+ drop_nulls: Method[Self]
+ exp: Method[Self]
+ fill_null: Method[Self]
+ filter: Method[Self]
+ gather_every: Method[Self]
+ head: Method[Self]
+ is_between: Method[Self]
+ is_finite: Method[Self]
+ is_first_distinct: Method[Self]
+ is_in: Method[Self]
+ is_last_distinct: Method[Self]
+ is_null: Method[Self]
+ is_sorted: Method[bool]
+ is_unique: Method[Self]
+ item: Method[Any]
+ len: Method[int]
+ log: Method[Self]
+ max: Method[Any]
+ mean: Method[float]
+ min: Method[Any]
+ mode: Method[Self]
+ n_unique: Method[int]
+ null_count: Method[int]
+ quantile: Method[float]
+ rank: Method[Self]
+ round: Method[Self]
+ sample: Method[Self]
+ shift: Method[Self]
+ skew: Method[float | None]
+ std: Method[float]
+ sum: Method[float]
+ tail: Method[Self]
+ to_arrow: Method[pa.Array[Any]]
+ to_frame: Method[PolarsDataFrame]
+ to_list: Method[list[Any]]
+ to_pandas: Method[pd.Series[Any]]
+ unique: Method[Self]
+ var: Method[float]
+ zip_with: Method[Self]
+
+ @property
+ def list(self) -> PolarsSeriesListNamespace:
+ return PolarsSeriesListNamespace(self)
+
+
+class PolarsSeriesDateTimeNamespace:
+ def __init__(self, series: PolarsSeries) -> None:
+ self._compliant_series = series
+
+ def __getattr__(self, attr: str) -> Any:
+ def func(*args: Any, **kwargs: Any) -> Any:
+ pos, kwds = extract_args_kwargs(args, kwargs)
+ return self._compliant_series._with_native(
+ getattr(self._compliant_series.native.dt, attr)(*pos, **kwds)
+ )
+
+ return func
+
+
+class PolarsSeriesStringNamespace:
+ def __init__(self, series: PolarsSeries) -> None:
+ self._compliant_series = series
+
+ def __getattr__(self, attr: str) -> Any:
+ def func(*args: Any, **kwargs: Any) -> Any:
+ pos, kwds = extract_args_kwargs(args, kwargs)
+ return self._compliant_series._with_native(
+ getattr(self._compliant_series.native.str, attr)(*pos, **kwds)
+ )
+
+ return func
+
+
+class PolarsSeriesCatNamespace:
+ def __init__(self, series: PolarsSeries) -> None:
+ self._compliant_series = series
+
+ def __getattr__(self, attr: str) -> Any:
+ def func(*args: Any, **kwargs: Any) -> Any:
+ pos, kwds = extract_args_kwargs(args, kwargs)
+ return self._compliant_series._with_native(
+ getattr(self._compliant_series.native.cat, attr)(*pos, **kwds)
+ )
+
+ return func
+
+
+class PolarsSeriesListNamespace:
+ def __init__(self, series: PolarsSeries) -> None:
+ self._series = series
+
+ def len(self) -> PolarsSeries:
+ native_series = self._series.native
+ native_result = native_series.list.len()
+
+ if self._series._backend_version < (1, 16): # pragma: no cover
+ native_result = pl.select(
+ pl.when(~native_series.is_null()).then(native_result).otherwise(None)
+ )[native_series.name].cast(pl.UInt32())
+
+ elif self._series._backend_version < (1, 17): # pragma: no cover
+ native_result = native_series.cast(pl.UInt32())
+
+ return self._series._with_native(native_result)
+
+ # TODO(FBruzzesi): Remove `pragma: no cover` once other namespace methods are added
+ def __getattr__(self, attr: str) -> Any: # pragma: no cover
+ def func(*args: Any, **kwargs: Any) -> Any:
+ pos, kwds = extract_args_kwargs(args, kwargs)
+ return self._series._with_native(
+ getattr(self._series.native.list, attr)(*pos, **kwds)
+ )
+
+ return func
+
+
+class PolarsSeriesStructNamespace:
+ def __init__(self, series: PolarsSeries) -> None:
+ self._compliant_series = series
+
+ def __getattr__(self, attr: str) -> Any:
+ def func(*args: Any, **kwargs: Any) -> Any:
+ pos, kwds = extract_args_kwargs(args, kwargs)
+ return self._compliant_series._with_native(
+ getattr(self._compliant_series.native.struct, attr)(*pos, **kwds)
+ )
+
+ return func
diff --git a/venv/lib/python3.8/site-packages/narwhals/_polars/typing.py b/venv/lib/python3.8/site-packages/narwhals/_polars/typing.py
new file mode 100644
index 0000000..88a6f75
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_polars/typing.py
@@ -0,0 +1,22 @@
+from __future__ import annotations # pragma: no cover
+
+from typing import (
+ TYPE_CHECKING, # pragma: no cover
+ Union, # pragma: no cover
+)
+
+if TYPE_CHECKING:
+ import sys
+ from typing import TypeVar
+
+ if sys.version_info >= (3, 10):
+ from typing import TypeAlias
+ else:
+ from typing_extensions import TypeAlias
+
+ from narwhals._polars.dataframe import PolarsDataFrame, PolarsLazyFrame
+ from narwhals._polars.expr import PolarsExpr
+ from narwhals._polars.series import PolarsSeries
+
+ IntoPolarsExpr: TypeAlias = Union[PolarsExpr, PolarsSeries]
+ FrameT = TypeVar("FrameT", PolarsDataFrame, PolarsLazyFrame)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_polars/utils.py b/venv/lib/python3.8/site-packages/narwhals/_polars/utils.py
new file mode 100644
index 0000000..bb15dfb
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_polars/utils.py
@@ -0,0 +1,249 @@
+from __future__ import annotations
+
+from functools import lru_cache
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Iterable,
+ Iterator,
+ Mapping,
+ TypeVar,
+ cast,
+ overload,
+)
+
+import polars as pl
+
+from narwhals._utils import Version, _DeferredIterable, isinstance_or_issubclass
+from narwhals.exceptions import (
+ ColumnNotFoundError,
+ ComputeError,
+ DuplicateError,
+ InvalidOperationError,
+ NarwhalsError,
+ ShapeError,
+)
+
+if TYPE_CHECKING:
+ from typing_extensions import TypeIs
+
+ from narwhals._utils import _StoresNative
+ from narwhals.dtypes import DType
+ from narwhals.typing import IntoDType
+
+ T = TypeVar("T")
+ NativeT = TypeVar(
+ "NativeT", bound="pl.DataFrame | pl.LazyFrame | pl.Series | pl.Expr"
+ )
+
+
+@overload
+def extract_native(obj: _StoresNative[NativeT]) -> NativeT: ...
+@overload
+def extract_native(obj: T) -> T: ...
+def extract_native(obj: _StoresNative[NativeT] | T) -> NativeT | T:
+ return obj.native if _is_compliant_polars(obj) else obj
+
+
+def _is_compliant_polars(
+ obj: _StoresNative[NativeT] | Any,
+) -> TypeIs[_StoresNative[NativeT]]:
+ from narwhals._polars.dataframe import PolarsDataFrame, PolarsLazyFrame
+ from narwhals._polars.expr import PolarsExpr
+ from narwhals._polars.series import PolarsSeries
+
+ return isinstance(obj, (PolarsDataFrame, PolarsLazyFrame, PolarsSeries, PolarsExpr))
+
+
+def extract_args_kwargs(
+ args: Iterable[Any], kwds: Mapping[str, Any], /
+) -> tuple[Iterator[Any], dict[str, Any]]:
+ it_args = (extract_native(arg) for arg in args)
+ return it_args, {k: extract_native(v) for k, v in kwds.items()}
+
+
+@lru_cache(maxsize=16)
+def native_to_narwhals_dtype( # noqa: C901, PLR0912
+ dtype: pl.DataType, version: Version, backend_version: tuple[int, ...]
+) -> DType:
+ dtypes = version.dtypes
+ if dtype == pl.Float64:
+ return dtypes.Float64()
+ if dtype == pl.Float32:
+ return dtypes.Float32()
+ if hasattr(pl, "Int128") and dtype == pl.Int128: # pragma: no cover
+ # Not available for Polars pre 1.8.0
+ return dtypes.Int128()
+ if dtype == pl.Int64:
+ return dtypes.Int64()
+ if dtype == pl.Int32:
+ return dtypes.Int32()
+ if dtype == pl.Int16:
+ return dtypes.Int16()
+ if dtype == pl.Int8:
+ return dtypes.Int8()
+ if hasattr(pl, "UInt128") and dtype == pl.UInt128: # pragma: no cover
+ # Not available for Polars pre 1.8.0
+ return dtypes.UInt128()
+ if dtype == pl.UInt64:
+ return dtypes.UInt64()
+ if dtype == pl.UInt32:
+ return dtypes.UInt32()
+ if dtype == pl.UInt16:
+ return dtypes.UInt16()
+ if dtype == pl.UInt8:
+ return dtypes.UInt8()
+ if dtype == pl.String:
+ return dtypes.String()
+ if dtype == pl.Boolean:
+ return dtypes.Boolean()
+ if dtype == pl.Object:
+ return dtypes.Object()
+ if dtype == pl.Categorical:
+ return dtypes.Categorical()
+ if isinstance_or_issubclass(dtype, pl.Enum):
+ if version is Version.V1:
+ return dtypes.Enum() # type: ignore[call-arg]
+ categories = _DeferredIterable(
+ dtype.categories.to_list
+ if backend_version >= (0, 20, 4)
+ else lambda: cast("list[str]", dtype.categories)
+ )
+ return dtypes.Enum(categories)
+ if dtype == pl.Date:
+ return dtypes.Date()
+ if isinstance_or_issubclass(dtype, pl.Datetime):
+ return (
+ dtypes.Datetime()
+ if dtype is pl.Datetime
+ else dtypes.Datetime(dtype.time_unit, dtype.time_zone)
+ )
+ if isinstance_or_issubclass(dtype, pl.Duration):
+ return (
+ dtypes.Duration()
+ if dtype is pl.Duration
+ else dtypes.Duration(dtype.time_unit)
+ )
+ if isinstance_or_issubclass(dtype, pl.Struct):
+ fields = [
+ dtypes.Field(name, native_to_narwhals_dtype(tp, version, backend_version))
+ for name, tp in dtype
+ ]
+ return dtypes.Struct(fields)
+ if isinstance_or_issubclass(dtype, pl.List):
+ return dtypes.List(
+ native_to_narwhals_dtype(dtype.inner, version, backend_version)
+ )
+ if isinstance_or_issubclass(dtype, pl.Array):
+ outer_shape = dtype.width if backend_version < (0, 20, 30) else dtype.size
+ return dtypes.Array(
+ native_to_narwhals_dtype(dtype.inner, version, backend_version), outer_shape
+ )
+ if dtype == pl.Decimal:
+ return dtypes.Decimal()
+ if dtype == pl.Time:
+ return dtypes.Time()
+ if dtype == pl.Binary:
+ return dtypes.Binary()
+ return dtypes.Unknown()
+
+
+def narwhals_to_native_dtype( # noqa: C901, PLR0912
+ dtype: IntoDType, version: Version, backend_version: tuple[int, ...]
+) -> pl.DataType:
+ dtypes = version.dtypes
+ if dtype == dtypes.Float64:
+ return pl.Float64()
+ if dtype == dtypes.Float32:
+ return pl.Float32()
+ if dtype == dtypes.Int128 and hasattr(pl, "Int128"):
+ # Not available for Polars pre 1.8.0
+ return pl.Int128()
+ if dtype == dtypes.Int64:
+ return pl.Int64()
+ if dtype == dtypes.Int32:
+ return pl.Int32()
+ if dtype == dtypes.Int16:
+ return pl.Int16()
+ if dtype == dtypes.Int8:
+ return pl.Int8()
+ if dtype == dtypes.UInt64:
+ return pl.UInt64()
+ if dtype == dtypes.UInt32:
+ return pl.UInt32()
+ if dtype == dtypes.UInt16:
+ return pl.UInt16()
+ if dtype == dtypes.UInt8:
+ return pl.UInt8()
+ if dtype == dtypes.String:
+ return pl.String()
+ if dtype == dtypes.Boolean:
+ return pl.Boolean()
+ if dtype == dtypes.Object: # pragma: no cover
+ return pl.Object()
+ if dtype == dtypes.Categorical:
+ return pl.Categorical()
+ if isinstance_or_issubclass(dtype, dtypes.Enum):
+ if version is Version.V1:
+ msg = "Converting to Enum is not supported in narwhals.stable.v1"
+ raise NotImplementedError(msg)
+ if isinstance(dtype, dtypes.Enum):
+ return pl.Enum(dtype.categories)
+ msg = "Can not cast / initialize Enum without categories present"
+ raise ValueError(msg)
+ if dtype == dtypes.Date:
+ return pl.Date()
+ if dtype == dtypes.Time:
+ return pl.Time()
+ if dtype == dtypes.Binary:
+ return pl.Binary()
+ if dtype == dtypes.Decimal:
+ msg = "Casting to Decimal is not supported yet."
+ raise NotImplementedError(msg)
+ if isinstance_or_issubclass(dtype, dtypes.Datetime):
+ return pl.Datetime(dtype.time_unit, dtype.time_zone) # type: ignore[arg-type]
+ if isinstance_or_issubclass(dtype, dtypes.Duration):
+ return pl.Duration(dtype.time_unit) # type: ignore[arg-type]
+ if isinstance_or_issubclass(dtype, dtypes.List):
+ return pl.List(narwhals_to_native_dtype(dtype.inner, version, backend_version))
+ if isinstance_or_issubclass(dtype, dtypes.Struct):
+ fields = [
+ pl.Field(
+ field.name,
+ narwhals_to_native_dtype(field.dtype, version, backend_version),
+ )
+ for field in dtype.fields
+ ]
+ return pl.Struct(fields)
+ if isinstance_or_issubclass(dtype, dtypes.Array): # pragma: no cover
+ size = dtype.size
+ kwargs = {"width": size} if backend_version < (0, 20, 30) else {"shape": size}
+ return pl.Array(
+ narwhals_to_native_dtype(dtype.inner, version, backend_version), **kwargs
+ )
+ return pl.Unknown() # pragma: no cover
+
+
+def catch_polars_exception(
+ exception: Exception, backend_version: tuple[int, ...]
+) -> NarwhalsError | Exception:
+ if isinstance(exception, pl.exceptions.ColumnNotFoundError):
+ return ColumnNotFoundError(str(exception))
+ elif isinstance(exception, pl.exceptions.ShapeError):
+ return ShapeError(str(exception))
+ elif isinstance(exception, pl.exceptions.InvalidOperationError):
+ return InvalidOperationError(str(exception))
+ elif isinstance(exception, pl.exceptions.DuplicateError):
+ return DuplicateError(str(exception))
+ elif isinstance(exception, pl.exceptions.ComputeError):
+ return ComputeError(str(exception))
+ if backend_version >= (1,) and isinstance(exception, pl.exceptions.PolarsError):
+ # Old versions of Polars didn't have PolarsError.
+ return NarwhalsError(str(exception)) # pragma: no cover
+ elif backend_version < (1,) and "polars.exceptions" in str(
+ type(exception)
+ ): # pragma: no cover
+ # Last attempt, for old Polars versions.
+ return NarwhalsError(str(exception))
+ # Just return exception as-is.
+ return exception
diff --git a/venv/lib/python3.8/site-packages/narwhals/_spark_like/__init__.py b/venv/lib/python3.8/site-packages/narwhals/_spark_like/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_spark_like/__init__.py
diff --git a/venv/lib/python3.8/site-packages/narwhals/_spark_like/dataframe.py b/venv/lib/python3.8/site-packages/narwhals/_spark_like/dataframe.py
new file mode 100644
index 0000000..c4ea73f
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_spark_like/dataframe.py
@@ -0,0 +1,531 @@
+from __future__ import annotations
+
+import warnings
+from functools import reduce
+from operator import and_
+from typing import TYPE_CHECKING, Any, Iterator, Mapping, Sequence
+
+from narwhals._namespace import is_native_spark_like
+from narwhals._spark_like.utils import (
+ evaluate_exprs,
+ import_functions,
+ import_native_dtypes,
+ import_window,
+ native_to_narwhals_dtype,
+)
+from narwhals._utils import (
+ Implementation,
+ find_stacklevel,
+ generate_temporary_column_name,
+ not_implemented,
+ parse_columns_to_drop,
+ parse_version,
+ validate_backend_version,
+)
+from narwhals.exceptions import InvalidOperationError
+from narwhals.typing import CompliantLazyFrame
+
+if TYPE_CHECKING:
+ from types import ModuleType
+
+ import pyarrow as pa
+ from sqlframe.base.column import Column
+ from sqlframe.base.dataframe import BaseDataFrame
+ from sqlframe.base.window import Window
+ from typing_extensions import Self, TypeAlias, TypeIs
+
+ from narwhals._compliant.typing import CompliantDataFrameAny
+ from narwhals._spark_like.expr import SparkLikeExpr
+ from narwhals._spark_like.group_by import SparkLikeLazyGroupBy
+ from narwhals._spark_like.namespace import SparkLikeNamespace
+ from narwhals._utils import Version, _FullContext
+ from narwhals.dataframe import LazyFrame
+ from narwhals.dtypes import DType
+ from narwhals.typing import JoinStrategy, LazyUniqueKeepStrategy
+
+ SQLFrameDataFrame = BaseDataFrame[Any, Any, Any, Any, Any]
+
+Incomplete: TypeAlias = Any # pragma: no cover
+"""Marker for working code that fails type checking."""
+
+
+class SparkLikeLazyFrame(
+ CompliantLazyFrame[
+ "SparkLikeExpr", "SQLFrameDataFrame", "LazyFrame[SQLFrameDataFrame]"
+ ]
+):
+ def __init__(
+ self,
+ native_dataframe: SQLFrameDataFrame,
+ *,
+ backend_version: tuple[int, ...],
+ version: Version,
+ implementation: Implementation,
+ ) -> None:
+ self._native_frame: SQLFrameDataFrame = native_dataframe
+ self._backend_version = backend_version
+ self._implementation = implementation
+ self._version = version
+ self._cached_schema: dict[str, DType] | None = None
+ self._cached_columns: list[str] | None = None
+ validate_backend_version(self._implementation, self._backend_version)
+
+ @property
+ def _F(self): # type: ignore[no-untyped-def] # noqa: ANN202, N802
+ if TYPE_CHECKING:
+ from sqlframe.base import functions
+
+ return functions
+ else:
+ return import_functions(self._implementation)
+
+ @property
+ def _native_dtypes(self): # type: ignore[no-untyped-def] # noqa: ANN202
+ if TYPE_CHECKING:
+ from sqlframe.base import types
+
+ return types
+ else:
+ return import_native_dtypes(self._implementation)
+
+ @property
+ def _Window(self) -> type[Window]: # noqa: N802
+ if TYPE_CHECKING:
+ from sqlframe.base.window import Window
+
+ return Window
+ else:
+ return import_window(self._implementation)
+
+ @staticmethod
+ def _is_native(obj: SQLFrameDataFrame | Any) -> TypeIs[SQLFrameDataFrame]:
+ return is_native_spark_like(obj)
+
+ @classmethod
+ def from_native(cls, data: SQLFrameDataFrame, /, *, context: _FullContext) -> Self:
+ return cls(
+ data,
+ backend_version=context._backend_version,
+ version=context._version,
+ implementation=context._implementation,
+ )
+
+ def to_narwhals(self) -> LazyFrame[SQLFrameDataFrame]:
+ return self._version.lazyframe(self, level="lazy")
+
+ def __native_namespace__(self) -> ModuleType: # pragma: no cover
+ return self._implementation.to_native_namespace()
+
+ def __narwhals_namespace__(self) -> SparkLikeNamespace:
+ from narwhals._spark_like.namespace import SparkLikeNamespace
+
+ return SparkLikeNamespace(
+ backend_version=self._backend_version,
+ version=self._version,
+ implementation=self._implementation,
+ )
+
+ def __narwhals_lazyframe__(self) -> Self:
+ return self
+
+ def _with_version(self, version: Version) -> Self:
+ return self.__class__(
+ self.native,
+ backend_version=self._backend_version,
+ version=version,
+ implementation=self._implementation,
+ )
+
+ def _with_native(self, df: SQLFrameDataFrame) -> Self:
+ return self.__class__(
+ df,
+ backend_version=self._backend_version,
+ version=self._version,
+ implementation=self._implementation,
+ )
+
+ def _to_arrow_schema(self) -> pa.Schema: # pragma: no cover
+ import pyarrow as pa # ignore-banned-import
+
+ from narwhals._arrow.utils import narwhals_to_native_dtype
+
+ schema: list[tuple[str, pa.DataType]] = []
+ nw_schema = self.collect_schema()
+ native_schema = self.native.schema
+ for key, value in nw_schema.items():
+ try:
+ native_dtype = narwhals_to_native_dtype(value, self._version)
+ except Exception as exc: # noqa: BLE001,PERF203
+ native_spark_dtype = native_schema[key].dataType # type: ignore[index]
+ # If we can't convert the type, just set it to `pa.null`, and warn.
+ # Avoid the warning if we're starting from PySpark's void type.
+ # We can avoid the check when we introduce `nw.Null` dtype.
+ null_type = self._native_dtypes.NullType # pyright: ignore[reportAttributeAccessIssue]
+ if not isinstance(native_spark_dtype, null_type):
+ warnings.warn(
+ f"Could not convert dtype {native_spark_dtype} to PyArrow dtype, {exc!r}",
+ stacklevel=find_stacklevel(),
+ )
+ schema.append((key, pa.null()))
+ else:
+ schema.append((key, native_dtype))
+ return pa.schema(schema)
+
+ def _collect_to_arrow(self) -> pa.Table:
+ if self._implementation.is_pyspark() and self._backend_version < (4,):
+ import pyarrow as pa # ignore-banned-import
+
+ try:
+ return pa.Table.from_batches(self.native._collect_as_arrow())
+ except ValueError as exc:
+ if "at least one RecordBatch" in str(exc):
+ # Empty dataframe
+
+ data: dict[str, list[Any]] = {k: [] for k in self.columns}
+ pa_schema = self._to_arrow_schema()
+ return pa.Table.from_pydict(data, schema=pa_schema)
+ else: # pragma: no cover
+ raise
+ elif self._implementation.is_pyspark_connect() and self._backend_version < (4,):
+ import pyarrow as pa # ignore-banned-import
+
+ pa_schema = self._to_arrow_schema()
+ return pa.Table.from_pandas(self.native.toPandas(), schema=pa_schema)
+ else:
+ return self.native.toArrow()
+
+ def _iter_columns(self) -> Iterator[Column]:
+ for col in self.columns:
+ yield self._F.col(col)
+
+ @property
+ def columns(self) -> list[str]:
+ if self._cached_columns is None:
+ self._cached_columns = (
+ list(self.schema)
+ if self._cached_schema is not None
+ else self.native.columns
+ )
+ return self._cached_columns
+
+ def collect(
+ self, backend: ModuleType | Implementation | str | None, **kwargs: Any
+ ) -> CompliantDataFrameAny:
+ if backend is Implementation.PANDAS:
+ import pandas as pd # ignore-banned-import
+
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+
+ return PandasLikeDataFrame(
+ self.native.toPandas(),
+ implementation=Implementation.PANDAS,
+ backend_version=parse_version(pd),
+ version=self._version,
+ validate_column_names=True,
+ )
+
+ elif backend is None or backend is Implementation.PYARROW:
+ import pyarrow as pa # ignore-banned-import
+
+ from narwhals._arrow.dataframe import ArrowDataFrame
+
+ return ArrowDataFrame(
+ self._collect_to_arrow(),
+ backend_version=parse_version(pa),
+ version=self._version,
+ validate_column_names=True,
+ )
+
+ elif backend is Implementation.POLARS:
+ import polars as pl # ignore-banned-import
+ import pyarrow as pa # ignore-banned-import
+
+ from narwhals._polars.dataframe import PolarsDataFrame
+
+ return PolarsDataFrame(
+ pl.from_arrow(self._collect_to_arrow()), # type: ignore[arg-type]
+ backend_version=parse_version(pl),
+ version=self._version,
+ )
+
+ msg = f"Unsupported `backend` value: {backend}" # pragma: no cover
+ raise ValueError(msg) # pragma: no cover
+
+ def simple_select(self, *column_names: str) -> Self:
+ return self._with_native(self.native.select(*column_names))
+
+ def aggregate(self, *exprs: SparkLikeExpr) -> Self:
+ new_columns = evaluate_exprs(self, *exprs)
+
+ new_columns_list = [col.alias(col_name) for col_name, col in new_columns]
+ return self._with_native(self.native.agg(*new_columns_list))
+
+ def select(self, *exprs: SparkLikeExpr) -> Self:
+ new_columns = evaluate_exprs(self, *exprs)
+ new_columns_list = [col.alias(col_name) for (col_name, col) in new_columns]
+ return self._with_native(self.native.select(*new_columns_list))
+
+ def with_columns(self, *exprs: SparkLikeExpr) -> Self:
+ new_columns = evaluate_exprs(self, *exprs)
+ return self._with_native(self.native.withColumns(dict(new_columns)))
+
+ def filter(self, predicate: SparkLikeExpr) -> Self:
+ # `[0]` is safe as the predicate's expression only returns a single column
+ condition = predicate._call(self)[0]
+ spark_df = self.native.where(condition)
+ return self._with_native(spark_df)
+
+ @property
+ def schema(self) -> dict[str, DType]:
+ if self._cached_schema is None:
+ self._cached_schema = {
+ field.name: native_to_narwhals_dtype(
+ field.dataType,
+ self._version,
+ self._native_dtypes,
+ self.native.sparkSession,
+ )
+ for field in self.native.schema
+ }
+ return self._cached_schema
+
+ def collect_schema(self) -> dict[str, DType]:
+ return self.schema
+
+ def drop(self, columns: Sequence[str], *, strict: bool) -> Self:
+ columns_to_drop = parse_columns_to_drop(self, columns, strict=strict)
+ return self._with_native(self.native.drop(*columns_to_drop))
+
+ def head(self, n: int) -> Self:
+ return self._with_native(self.native.limit(n))
+
+ def group_by(
+ self, keys: Sequence[str] | Sequence[SparkLikeExpr], *, drop_null_keys: bool
+ ) -> SparkLikeLazyGroupBy:
+ from narwhals._spark_like.group_by import SparkLikeLazyGroupBy
+
+ return SparkLikeLazyGroupBy(self, keys, drop_null_keys=drop_null_keys)
+
+ def sort(self, *by: str, descending: bool | Sequence[bool], nulls_last: bool) -> Self:
+ if isinstance(descending, bool):
+ descending = [descending] * len(by)
+
+ if nulls_last:
+ sort_funcs = (
+ self._F.desc_nulls_last if d else self._F.asc_nulls_last
+ for d in descending
+ )
+ else:
+ sort_funcs = (
+ self._F.desc_nulls_first if d else self._F.asc_nulls_first
+ for d in descending
+ )
+
+ sort_cols = [sort_f(col) for col, sort_f in zip(by, sort_funcs)]
+ return self._with_native(self.native.sort(*sort_cols))
+
+ def drop_nulls(self, subset: Sequence[str] | None) -> Self:
+ subset = list(subset) if subset else None
+ return self._with_native(self.native.dropna(subset=subset))
+
+ def rename(self, mapping: Mapping[str, str]) -> Self:
+ rename_mapping = {
+ colname: mapping.get(colname, colname) for colname in self.columns
+ }
+ return self._with_native(
+ self.native.select(
+ [self._F.col(old).alias(new) for old, new in rename_mapping.items()]
+ )
+ )
+
+ def unique(
+ self, subset: Sequence[str] | None, *, keep: LazyUniqueKeepStrategy
+ ) -> Self:
+ if subset and (error := self._check_columns_exist(subset)):
+ raise error
+ subset = list(subset) if subset else None
+ if keep == "none":
+ tmp = generate_temporary_column_name(8, self.columns)
+ window = self._Window.partitionBy(subset or self.columns)
+ df = (
+ self.native.withColumn(tmp, self._F.count("*").over(window))
+ .filter(self._F.col(tmp) == self._F.lit(1))
+ .drop(self._F.col(tmp))
+ )
+ return self._with_native(df)
+ return self._with_native(self.native.dropDuplicates(subset=subset))
+
+ def join(
+ self,
+ other: Self,
+ how: JoinStrategy,
+ left_on: Sequence[str] | None,
+ right_on: Sequence[str] | None,
+ suffix: str,
+ ) -> Self:
+ left_columns = self.columns
+ right_columns = other.columns
+
+ right_on_: list[str] = list(right_on) if right_on is not None else []
+ left_on_: list[str] = list(left_on) if left_on is not None else []
+
+ # create a mapping for columns on other
+ # `right_on` columns will be renamed as `left_on`
+ # the remaining columns will be either added the suffix or left unchanged.
+ right_cols_to_rename = (
+ [c for c in right_columns if c not in right_on_]
+ if how != "full"
+ else right_columns
+ )
+
+ rename_mapping = {
+ **dict(zip(right_on_, left_on_)),
+ **{
+ colname: f"{colname}{suffix}" if colname in left_columns else colname
+ for colname in right_cols_to_rename
+ },
+ }
+ other_native = other.native.select(
+ [self._F.col(old).alias(new) for old, new in rename_mapping.items()]
+ )
+
+ # If how in {"semi", "anti"}, then resulting columns are same as left columns
+ # Otherwise, we add the right columns with the new mapping, while keeping the
+ # original order of right_columns.
+ col_order = left_columns.copy()
+
+ if how in {"inner", "left", "cross"}:
+ col_order.extend(
+ rename_mapping[colname]
+ for colname in right_columns
+ if colname not in right_on_
+ )
+ elif how == "full":
+ col_order.extend(rename_mapping.values())
+
+ right_on_remapped = [rename_mapping[c] for c in right_on_]
+ on_ = (
+ reduce(
+ and_,
+ (
+ getattr(self.native, left_key) == getattr(other_native, right_key)
+ for left_key, right_key in zip(left_on_, right_on_remapped)
+ ),
+ )
+ if how == "full"
+ else None
+ if how == "cross"
+ else left_on_
+ )
+ how_native = "full_outer" if how == "full" else how
+ return self._with_native(
+ self.native.join(other_native, on=on_, how=how_native).select(col_order)
+ )
+
+ def explode(self, columns: Sequence[str]) -> Self:
+ dtypes = self._version.dtypes
+
+ schema = self.collect_schema()
+ for col_to_explode in columns:
+ dtype = schema[col_to_explode]
+
+ if dtype != dtypes.List:
+ msg = (
+ f"`explode` operation not supported for dtype `{dtype}`, "
+ "expected List type"
+ )
+ raise InvalidOperationError(msg)
+
+ column_names = self.columns
+
+ if len(columns) != 1:
+ msg = (
+ "Exploding on multiple columns is not supported with SparkLike backend since "
+ "we cannot guarantee that the exploded columns have matching element counts."
+ )
+ raise NotImplementedError(msg)
+
+ if self._implementation.is_pyspark() or self._implementation.is_pyspark_connect():
+ return self._with_native(
+ self.native.select(
+ *[
+ self._F.col(col_name).alias(col_name)
+ if col_name != columns[0]
+ else self._F.explode_outer(col_name).alias(col_name)
+ for col_name in column_names
+ ]
+ )
+ )
+ elif self._implementation.is_sqlframe():
+ # Not every sqlframe dialect supports `explode_outer` function
+ # (see https://github.com/eakmanrq/sqlframe/blob/3cb899c515b101ff4c197d84b34fae490d0ed257/sqlframe/base/functions.py#L2288-L2289)
+ # therefore we simply explode the array column which will ignore nulls and
+ # zero sized arrays, and append these specific condition with nulls (to
+ # match polars behavior).
+
+ def null_condition(col_name: str) -> Column:
+ return self._F.isnull(col_name) | (self._F.array_size(col_name) == 0)
+
+ return self._with_native(
+ self.native.select(
+ *[
+ self._F.col(col_name).alias(col_name)
+ if col_name != columns[0]
+ else self._F.explode(col_name).alias(col_name)
+ for col_name in column_names
+ ]
+ ).union(
+ self.native.filter(null_condition(columns[0])).select(
+ *[
+ self._F.col(col_name).alias(col_name)
+ if col_name != columns[0]
+ else self._F.lit(None).alias(col_name)
+ for col_name in column_names
+ ]
+ )
+ )
+ )
+ else: # pragma: no cover
+ msg = "Unreachable code, please report an issue at https://github.com/narwhals-dev/narwhals/issues"
+ raise AssertionError(msg)
+
+ def unpivot(
+ self,
+ on: Sequence[str] | None,
+ index: Sequence[str] | None,
+ variable_name: str,
+ value_name: str,
+ ) -> Self:
+ if self._implementation.is_sqlframe():
+ if variable_name == "":
+ msg = "`variable_name` cannot be empty string for sqlframe backend."
+ raise NotImplementedError(msg)
+
+ if value_name == "":
+ msg = "`value_name` cannot be empty string for sqlframe backend."
+ raise NotImplementedError(msg)
+ else: # pragma: no cover
+ pass
+
+ ids = tuple(index) if index else ()
+ values = (
+ tuple(set(self.columns).difference(set(ids))) if on is None else tuple(on)
+ )
+ unpivoted_native_frame = self.native.unpivot(
+ ids=ids,
+ values=values,
+ variableColumnName=variable_name,
+ valueColumnName=value_name,
+ )
+ if index is None:
+ unpivoted_native_frame = unpivoted_native_frame.drop(*ids)
+ return self._with_native(unpivoted_native_frame)
+
+ gather_every = not_implemented.deprecated(
+ "`LazyFrame.gather_every` is deprecated and will be removed in a future version."
+ )
+ join_asof = not_implemented()
+ tail = not_implemented.deprecated(
+ "`LazyFrame.tail` is deprecated and will be removed in a future version."
+ )
+ with_row_index = not_implemented()
diff --git a/venv/lib/python3.8/site-packages/narwhals/_spark_like/expr.py b/venv/lib/python3.8/site-packages/narwhals/_spark_like/expr.py
new file mode 100644
index 0000000..5c42dbb
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_spark_like/expr.py
@@ -0,0 +1,930 @@
+from __future__ import annotations
+
+import operator
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ ClassVar,
+ Iterator,
+ Literal,
+ Mapping,
+ Sequence,
+ cast,
+)
+
+from narwhals._compliant import LazyExpr
+from narwhals._compliant.window import WindowInputs
+from narwhals._expression_parsing import ExprKind
+from narwhals._spark_like.expr_dt import SparkLikeExprDateTimeNamespace
+from narwhals._spark_like.expr_list import SparkLikeExprListNamespace
+from narwhals._spark_like.expr_str import SparkLikeExprStringNamespace
+from narwhals._spark_like.expr_struct import SparkLikeExprStructNamespace
+from narwhals._spark_like.utils import (
+ import_functions,
+ import_native_dtypes,
+ import_window,
+ narwhals_to_native_dtype,
+)
+from narwhals._utils import Implementation, not_implemented, parse_version
+from narwhals.dependencies import get_pyspark
+
+if TYPE_CHECKING:
+ from sqlframe.base.column import Column
+ from sqlframe.base.window import Window, WindowSpec
+ from typing_extensions import Self, TypeAlias
+
+ from narwhals._compliant.typing import (
+ AliasNames,
+ EvalNames,
+ EvalSeries,
+ WindowFunction,
+ )
+ from narwhals._expression_parsing import ExprMetadata
+ from narwhals._spark_like.dataframe import SparkLikeLazyFrame
+ from narwhals._spark_like.namespace import SparkLikeNamespace
+ from narwhals._utils import Version, _FullContext
+ from narwhals.typing import (
+ FillNullStrategy,
+ IntoDType,
+ NonNestedLiteral,
+ NumericLiteral,
+ RankMethod,
+ TemporalLiteral,
+ )
+
+ NativeRankMethod: TypeAlias = Literal["rank", "dense_rank", "row_number"]
+ SparkWindowFunction = WindowFunction[SparkLikeLazyFrame, Column]
+ SparkWindowInputs = WindowInputs[Column]
+
+
+class SparkLikeExpr(LazyExpr["SparkLikeLazyFrame", "Column"]):
+ _REMAP_RANK_METHOD: ClassVar[Mapping[RankMethod, NativeRankMethod]] = {
+ "min": "rank",
+ "max": "rank",
+ "average": "rank",
+ "dense": "dense_rank",
+ "ordinal": "row_number",
+ }
+
+ def __init__(
+ self,
+ call: EvalSeries[SparkLikeLazyFrame, Column],
+ window_function: SparkWindowFunction | None = None,
+ *,
+ evaluate_output_names: EvalNames[SparkLikeLazyFrame],
+ alias_output_names: AliasNames | None,
+ backend_version: tuple[int, ...],
+ version: Version,
+ implementation: Implementation,
+ ) -> None:
+ self._call = call
+ self._evaluate_output_names = evaluate_output_names
+ self._alias_output_names = alias_output_names
+ self._backend_version = backend_version
+ self._version = version
+ self._implementation = implementation
+ self._metadata: ExprMetadata | None = None
+ self._window_function: SparkWindowFunction | None = window_function
+
+ @property
+ def window_function(self) -> SparkWindowFunction:
+ def default_window_func(
+ df: SparkLikeLazyFrame, window_inputs: SparkWindowInputs
+ ) -> list[Column]:
+ assert not window_inputs.order_by # noqa: S101
+ return [
+ expr.over(self.partition_by(*window_inputs.partition_by))
+ for expr in self(df)
+ ]
+
+ return self._window_function or default_window_func
+
+ def __call__(self, df: SparkLikeLazyFrame) -> Sequence[Column]:
+ return self._call(df)
+
+ def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Self:
+ if kind is ExprKind.LITERAL:
+ return self
+ return self.over([self._F.lit(1)], [])
+
+ @property
+ def _F(self): # type: ignore[no-untyped-def] # noqa: ANN202, N802
+ if TYPE_CHECKING:
+ from sqlframe.base import functions
+
+ return functions
+ else:
+ return import_functions(self._implementation)
+
+ @property
+ def _native_dtypes(self): # type: ignore[no-untyped-def] # noqa: ANN202
+ if TYPE_CHECKING:
+ from sqlframe.base import types
+
+ return types
+ else:
+ return import_native_dtypes(self._implementation)
+
+ @property
+ def _Window(self) -> type[Window]: # noqa: N802
+ if TYPE_CHECKING:
+ from sqlframe.base.window import Window
+
+ return Window
+ else:
+ return import_window(self._implementation)
+
+ def _sort(
+ self, *cols: Column | str, descending: bool = False, nulls_last: bool = False
+ ) -> Iterator[Column]:
+ F = self._F # noqa: N806
+ mapping = {
+ (False, False): F.asc_nulls_first,
+ (False, True): F.asc_nulls_last,
+ (True, False): F.desc_nulls_first,
+ (True, True): F.desc_nulls_last,
+ }
+ sort = mapping[(descending, nulls_last)]
+ yield from (sort(col) for col in cols)
+
+ def partition_by(self, *cols: Column | str) -> WindowSpec:
+ """Wraps `Window().paritionBy`, with default and `WindowInputs` handling."""
+ return self._Window.partitionBy(*cols or [self._F.lit(1)])
+
+ def __narwhals_expr__(self) -> None: ...
+
+ def __narwhals_namespace__(self) -> SparkLikeNamespace: # pragma: no cover
+ # Unused, just for compatibility with PandasLikeExpr
+ from narwhals._spark_like.namespace import SparkLikeNamespace
+
+ return SparkLikeNamespace(
+ backend_version=self._backend_version,
+ version=self._version,
+ implementation=self._implementation,
+ )
+
+ def _with_window_function(self, window_function: SparkWindowFunction) -> Self:
+ return self.__class__(
+ self._call,
+ window_function,
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ version=self._version,
+ implementation=self._implementation,
+ )
+
+ @classmethod
+ def _alias_native(cls, expr: Column, name: str) -> Column:
+ return expr.alias(name)
+
+ def _cum_window_func(
+ self,
+ *,
+ reverse: bool,
+ func_name: Literal["sum", "max", "min", "count", "product"],
+ ) -> SparkWindowFunction:
+ def func(df: SparkLikeLazyFrame, inputs: SparkWindowInputs) -> Sequence[Column]:
+ window = (
+ self.partition_by(*inputs.partition_by)
+ .orderBy(
+ *self._sort(*inputs.order_by, descending=reverse, nulls_last=reverse)
+ )
+ .rowsBetween(self._Window.unboundedPreceding, 0)
+ )
+ return [
+ getattr(self._F, func_name)(expr).over(window) for expr in self._call(df)
+ ]
+
+ return func
+
+ def _rolling_window_func(
+ self,
+ *,
+ func_name: Literal["sum", "mean", "std", "var"],
+ center: bool,
+ window_size: int,
+ min_samples: int,
+ ddof: int | None = None,
+ ) -> SparkWindowFunction:
+ supported_funcs = ["sum", "mean", "std", "var"]
+ if center:
+ half = (window_size - 1) // 2
+ remainder = (window_size - 1) % 2
+ start = self._Window.currentRow - half - remainder
+ end = self._Window.currentRow + half
+ else:
+ start = self._Window.currentRow - window_size + 1
+ end = self._Window.currentRow
+
+ def func(df: SparkLikeLazyFrame, inputs: SparkWindowInputs) -> Sequence[Column]:
+ window = (
+ self.partition_by(*inputs.partition_by)
+ .orderBy(*self._sort(*inputs.order_by))
+ .rowsBetween(start, end)
+ )
+ if func_name in {"sum", "mean"}:
+ func_: str = func_name
+ elif func_name == "var" and ddof == 0:
+ func_ = "var_pop"
+ elif func_name in "var" and ddof == 1:
+ func_ = "var_samp"
+ elif func_name == "std" and ddof == 0:
+ func_ = "stddev_pop"
+ elif func_name == "std" and ddof == 1:
+ func_ = "stddev_samp"
+ elif func_name in {"var", "std"}: # pragma: no cover
+ msg = f"Only ddof=0 and ddof=1 are currently supported for rolling_{func_name}."
+ raise ValueError(msg)
+ else: # pragma: no cover
+ msg = f"Only the following functions are supported: {supported_funcs}.\nGot: {func_name}."
+ raise ValueError(msg)
+ return [
+ self._F.when(
+ self._F.count(expr).over(window) >= min_samples,
+ getattr(self._F, func_)(expr).over(window),
+ )
+ for expr in self._call(df)
+ ]
+
+ return func
+
+ @classmethod
+ def from_column_names(
+ cls: type[Self],
+ evaluate_column_names: EvalNames[SparkLikeLazyFrame],
+ /,
+ *,
+ context: _FullContext,
+ ) -> Self:
+ def func(df: SparkLikeLazyFrame) -> list[Column]:
+ return [df._F.col(col_name) for col_name in evaluate_column_names(df)]
+
+ return cls(
+ func,
+ evaluate_output_names=evaluate_column_names,
+ alias_output_names=None,
+ backend_version=context._backend_version,
+ version=context._version,
+ implementation=context._implementation,
+ )
+
+ @classmethod
+ def from_column_indices(cls, *column_indices: int, context: _FullContext) -> Self:
+ def func(df: SparkLikeLazyFrame) -> list[Column]:
+ columns = df.columns
+ return [df._F.col(columns[i]) for i in column_indices]
+
+ return cls(
+ func,
+ evaluate_output_names=cls._eval_names_indices(column_indices),
+ alias_output_names=None,
+ backend_version=context._backend_version,
+ version=context._version,
+ implementation=context._implementation,
+ )
+
+ def _callable_to_eval_series(
+ self, call: Callable[..., Column], /, **expressifiable_args: Self | Any
+ ) -> EvalSeries[SparkLikeLazyFrame, Column]:
+ def func(df: SparkLikeLazyFrame) -> list[Column]:
+ native_series_list = self(df)
+ other_native_series = {
+ key: df._evaluate_expr(value)
+ if self._is_expr(value)
+ else self._F.lit(value)
+ for key, value in expressifiable_args.items()
+ }
+ return [
+ call(native_series, **other_native_series)
+ for native_series in native_series_list
+ ]
+
+ return func
+
+ def _push_down_window_function(
+ self, call: Callable[..., Column], /, **expressifiable_args: Self | Any
+ ) -> SparkWindowFunction:
+ def window_f(
+ df: SparkLikeLazyFrame, window_inputs: SparkWindowInputs
+ ) -> Sequence[Column]:
+ # If a function `f` is elementwise, and `g` is another function, then
+ # - `f(g) over (window)`
+ # - `f(g over (window))
+ # are equivalent.
+ # Make sure to only use with if `call` is elementwise!
+ native_series_list = self.window_function(df, window_inputs)
+ other_native_series = {
+ key: df._evaluate_window_expr(value, window_inputs)
+ if self._is_expr(value)
+ else self._F.lit(value)
+ for key, value in expressifiable_args.items()
+ }
+ return [
+ call(native_series, **other_native_series)
+ for native_series in native_series_list
+ ]
+
+ return window_f
+
+ def _with_callable(
+ self, call: Callable[..., Column], /, **expressifiable_args: Self | Any
+ ) -> Self:
+ return self.__class__(
+ self._callable_to_eval_series(call, **expressifiable_args),
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ version=self._version,
+ implementation=self._implementation,
+ )
+
+ def _with_elementwise(
+ self, call: Callable[..., Column], /, **expressifiable_args: Self | Any
+ ) -> Self:
+ return self.__class__(
+ self._callable_to_eval_series(call, **expressifiable_args),
+ self._push_down_window_function(call, **expressifiable_args),
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ version=self._version,
+ implementation=self._implementation,
+ )
+
+ def _with_binary(self, op: Callable[..., Column], other: Self | Any) -> Self:
+ return self.__class__(
+ self._callable_to_eval_series(op, other=other),
+ self._push_down_window_function(op, other=other),
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ version=self._version,
+ implementation=self._implementation,
+ )
+
+ def _with_alias_output_names(self, func: AliasNames | None, /) -> Self:
+ return type(self)(
+ self._call,
+ self._window_function,
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=func,
+ backend_version=self._backend_version,
+ version=self._version,
+ implementation=self._implementation,
+ )
+
+ def __eq__(self, other: SparkLikeExpr) -> Self: # type: ignore[override]
+ return self._with_binary(lambda expr, other: expr.__eq__(other), other)
+
+ def __ne__(self, other: SparkLikeExpr) -> Self: # type: ignore[override]
+ return self._with_binary(lambda expr, other: expr.__ne__(other), other)
+
+ def __add__(self, other: SparkLikeExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr.__add__(other), other)
+
+ def __sub__(self, other: SparkLikeExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr.__sub__(other), other)
+
+ def __rsub__(self, other: SparkLikeExpr) -> Self:
+ return self._with_binary(lambda expr, other: other.__sub__(expr), other).alias(
+ "literal"
+ )
+
+ def __mul__(self, other: SparkLikeExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr.__mul__(other), other)
+
+ def __truediv__(self, other: SparkLikeExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr.__truediv__(other), other)
+
+ def __rtruediv__(self, other: SparkLikeExpr) -> Self:
+ return self._with_binary(
+ lambda expr, other: other.__truediv__(expr), other
+ ).alias("literal")
+
+ def __floordiv__(self, other: SparkLikeExpr) -> Self:
+ def _floordiv(expr: Column, other: Column) -> Column:
+ return self._F.floor(expr / other)
+
+ return self._with_binary(_floordiv, other)
+
+ def __rfloordiv__(self, other: SparkLikeExpr) -> Self:
+ def _rfloordiv(expr: Column, other: Column) -> Column:
+ return self._F.floor(other / expr)
+
+ return self._with_binary(_rfloordiv, other).alias("literal")
+
+ def __pow__(self, other: SparkLikeExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr.__pow__(other), other)
+
+ def __rpow__(self, other: SparkLikeExpr) -> Self:
+ return self._with_binary(lambda expr, other: other.__pow__(expr), other).alias(
+ "literal"
+ )
+
+ def __mod__(self, other: SparkLikeExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr.__mod__(other), other)
+
+ def __rmod__(self, other: SparkLikeExpr) -> Self:
+ return self._with_binary(lambda expr, other: other.__mod__(expr), other).alias(
+ "literal"
+ )
+
+ def __ge__(self, other: SparkLikeExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr.__ge__(other), other)
+
+ def __gt__(self, other: SparkLikeExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr > other, other)
+
+ def __le__(self, other: SparkLikeExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr.__le__(other), other)
+
+ def __lt__(self, other: SparkLikeExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr.__lt__(other), other)
+
+ def __and__(self, other: SparkLikeExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr.__and__(other), other)
+
+ def __or__(self, other: SparkLikeExpr) -> Self:
+ return self._with_binary(lambda expr, other: expr.__or__(other), other)
+
+ def __invert__(self) -> Self:
+ invert = cast("Callable[..., Column]", operator.invert)
+ return self._with_elementwise(invert)
+
+ def abs(self) -> Self:
+ return self._with_elementwise(self._F.abs)
+
+ def all(self) -> Self:
+ def f(expr: Column) -> Column:
+ return self._F.coalesce(self._F.bool_and(expr), self._F.lit(True)) # noqa: FBT003
+
+ def window_f(
+ df: SparkLikeLazyFrame, window_inputs: SparkWindowInputs
+ ) -> Sequence[Column]:
+ return [
+ self._F.coalesce(
+ self._F.bool_and(expr).over(
+ self.partition_by(*window_inputs.partition_by)
+ ),
+ self._F.lit(True), # noqa: FBT003
+ )
+ for expr in self(df)
+ ]
+
+ return self._with_callable(f)._with_window_function(window_f)
+
+ def any(self) -> Self:
+ def f(expr: Column) -> Column:
+ return self._F.coalesce(self._F.bool_or(expr), self._F.lit(False)) # noqa: FBT003
+
+ def window_f(
+ df: SparkLikeLazyFrame, window_inputs: SparkWindowInputs
+ ) -> Sequence[Column]:
+ return [
+ self._F.coalesce(
+ self._F.bool_or(expr).over(
+ self.partition_by(*window_inputs.partition_by)
+ ),
+ self._F.lit(False), # noqa: FBT003
+ )
+ for expr in self(df)
+ ]
+
+ return self._with_callable(f)._with_window_function(window_f)
+
+ def cast(self, dtype: IntoDType) -> Self:
+ def _cast(expr: Column) -> Column:
+ spark_dtype = narwhals_to_native_dtype(
+ dtype, self._version, self._native_dtypes
+ )
+ return expr.cast(spark_dtype)
+
+ return self._with_elementwise(_cast)
+
+ def count(self) -> Self:
+ return self._with_callable(self._F.count)
+
+ def max(self) -> Self:
+ return self._with_callable(self._F.max)
+
+ def mean(self) -> Self:
+ return self._with_callable(self._F.mean)
+
+ def median(self) -> Self:
+ def _median(expr: Column) -> Column:
+ if (
+ self._implementation
+ in {Implementation.PYSPARK, Implementation.PYSPARK_CONNECT}
+ and (pyspark := get_pyspark()) is not None
+ and parse_version(pyspark) < (3, 4)
+ ): # pragma: no cover
+ # Use percentile_approx with default accuracy parameter (10000)
+ return self._F.percentile_approx(expr.cast("double"), 0.5)
+
+ return self._F.median(expr)
+
+ return self._with_callable(_median)
+
+ def min(self) -> Self:
+ return self._with_callable(self._F.min)
+
+ def null_count(self) -> Self:
+ def _null_count(expr: Column) -> Column:
+ return self._F.count_if(self._F.isnull(expr))
+
+ return self._with_callable(_null_count)
+
+ def sum(self) -> Self:
+ def f(expr: Column) -> Column:
+ return self._F.coalesce(self._F.sum(expr), self._F.lit(0))
+
+ def window_f(
+ df: SparkLikeLazyFrame, window_inputs: SparkWindowInputs
+ ) -> Sequence[Column]:
+ return [
+ self._F.coalesce(
+ self._F.sum(expr).over(
+ self.partition_by(*window_inputs.partition_by)
+ ),
+ self._F.lit(0),
+ )
+ for expr in self(df)
+ ]
+
+ return self._with_callable(f)._with_window_function(window_f)
+
+ def std(self, ddof: int) -> Self:
+ F = self._F # noqa: N806
+ if ddof == 0:
+ return self._with_callable(F.stddev_pop)
+ if ddof == 1:
+ return self._with_callable(F.stddev_samp)
+
+ def func(expr: Column) -> Column:
+ n_rows = F.count(expr)
+ return F.stddev_samp(expr) * F.sqrt((n_rows - 1) / (n_rows - ddof))
+
+ return self._with_callable(func)
+
+ def var(self, ddof: int) -> Self:
+ F = self._F # noqa: N806
+ if ddof == 0:
+ return self._with_callable(F.var_pop)
+ if ddof == 1:
+ return self._with_callable(F.var_samp)
+
+ def func(expr: Column) -> Column:
+ n_rows = F.count(expr)
+ return F.var_samp(expr) * (n_rows - 1) / (n_rows - ddof)
+
+ return self._with_callable(func)
+
+ def clip(
+ self,
+ lower_bound: Self | NumericLiteral | TemporalLiteral | None = None,
+ upper_bound: Self | NumericLiteral | TemporalLiteral | None = None,
+ ) -> Self:
+ def _clip_lower(expr: Column, lower_bound: Column) -> Column:
+ result = expr
+ return self._F.when(result < lower_bound, lower_bound).otherwise(result)
+
+ def _clip_upper(expr: Column, upper_bound: Column) -> Column:
+ result = expr
+ return self._F.when(result > upper_bound, upper_bound).otherwise(result)
+
+ def _clip_both(expr: Column, lower_bound: Column, upper_bound: Column) -> Column:
+ return (
+ self._F.when(expr < lower_bound, lower_bound)
+ .when(expr > upper_bound, upper_bound)
+ .otherwise(expr)
+ )
+
+ if lower_bound is None:
+ return self._with_elementwise(_clip_upper, upper_bound=upper_bound)
+ if upper_bound is None:
+ return self._with_elementwise(_clip_lower, lower_bound=lower_bound)
+ return self._with_elementwise(
+ _clip_both, lower_bound=lower_bound, upper_bound=upper_bound
+ )
+
+ def is_finite(self) -> Self:
+ def _is_finite(expr: Column) -> Column:
+ # A value is finite if it's not NaN, and not infinite, while NULLs should be
+ # preserved
+ is_finite_condition = (
+ ~self._F.isnan(expr)
+ & (expr != self._F.lit(float("inf")))
+ & (expr != self._F.lit(float("-inf")))
+ )
+ return self._F.when(~self._F.isnull(expr), is_finite_condition).otherwise(
+ None
+ )
+
+ return self._with_elementwise(_is_finite)
+
+ def is_in(self, values: Sequence[Any]) -> Self:
+ def _is_in(expr: Column) -> Column:
+ return expr.isin(values) if values else self._F.lit(False) # noqa: FBT003
+
+ return self._with_elementwise(_is_in)
+
+ def is_unique(self) -> Self:
+ def _is_unique(expr: Column, *partition_by: str | Column) -> Column:
+ return self._F.count("*").over(self.partition_by(expr, *partition_by)) == 1
+
+ def _unpartitioned_is_unique(expr: Column) -> Column:
+ return _is_unique(expr)
+
+ def _partitioned_is_unique(
+ df: SparkLikeLazyFrame, inputs: SparkWindowInputs
+ ) -> Sequence[Column]:
+ assert not inputs.order_by # noqa: S101
+ return [_is_unique(expr, *inputs.partition_by) for expr in self(df)]
+
+ return self._with_callable(_unpartitioned_is_unique)._with_window_function(
+ _partitioned_is_unique
+ )
+
+ def len(self) -> Self:
+ def _len(_expr: Column) -> Column:
+ # Use count(*) to count all rows including nulls
+ return self._F.count("*")
+
+ return self._with_callable(_len)
+
+ def round(self, decimals: int) -> Self:
+ def _round(expr: Column) -> Column:
+ return self._F.round(expr, decimals)
+
+ return self._with_elementwise(_round)
+
+ def skew(self) -> Self:
+ return self._with_callable(self._F.skewness)
+
+ def n_unique(self) -> Self:
+ def _n_unique(expr: Column) -> Column:
+ return self._F.count_distinct(expr) + self._F.max(
+ self._F.isnull(expr).cast(self._native_dtypes.IntegerType())
+ )
+
+ return self._with_callable(_n_unique)
+
+ def over(self, partition_by: Sequence[str | Column], order_by: Sequence[str]) -> Self:
+ def func(df: SparkLikeLazyFrame) -> Sequence[Column]:
+ return self.window_function(df, WindowInputs(partition_by, order_by))
+
+ return self.__class__(
+ func,
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ version=self._version,
+ implementation=self._implementation,
+ )
+
+ def is_null(self) -> Self:
+ return self._with_elementwise(self._F.isnull)
+
+ def is_nan(self) -> Self:
+ def _is_nan(expr: Column) -> Column:
+ return self._F.when(self._F.isnull(expr), None).otherwise(self._F.isnan(expr))
+
+ return self._with_elementwise(_is_nan)
+
+ def shift(self, n: int) -> Self:
+ def func(df: SparkLikeLazyFrame, inputs: SparkWindowInputs) -> Sequence[Column]:
+ window = self.partition_by(*inputs.partition_by).orderBy(
+ *self._sort(*inputs.order_by)
+ )
+ return [self._F.lag(expr, n).over(window) for expr in self(df)]
+
+ return self._with_window_function(func)
+
+ def is_first_distinct(self) -> Self:
+ def func(df: SparkLikeLazyFrame, inputs: SparkWindowInputs) -> Sequence[Column]:
+ return [
+ self._F.row_number().over(
+ self.partition_by(*inputs.partition_by, expr).orderBy(
+ *self._sort(*inputs.order_by)
+ )
+ )
+ == 1
+ for expr in self(df)
+ ]
+
+ return self._with_window_function(func)
+
+ def is_last_distinct(self) -> Self:
+ def func(df: SparkLikeLazyFrame, inputs: SparkWindowInputs) -> Sequence[Column]:
+ return [
+ self._F.row_number().over(
+ self.partition_by(*inputs.partition_by, expr).orderBy(
+ *self._sort(*inputs.order_by, descending=True, nulls_last=True)
+ )
+ )
+ == 1
+ for expr in self(df)
+ ]
+
+ return self._with_window_function(func)
+
+ def diff(self) -> Self:
+ def func(df: SparkLikeLazyFrame, inputs: SparkWindowInputs) -> Sequence[Column]:
+ window = self.partition_by(*inputs.partition_by).orderBy(
+ *self._sort(*inputs.order_by)
+ )
+ return [expr - self._F.lag(expr).over(window) for expr in self(df)]
+
+ return self._with_window_function(func)
+
+ def cum_sum(self, *, reverse: bool) -> Self:
+ return self._with_window_function(
+ self._cum_window_func(reverse=reverse, func_name="sum")
+ )
+
+ def cum_max(self, *, reverse: bool) -> Self:
+ return self._with_window_function(
+ self._cum_window_func(reverse=reverse, func_name="max")
+ )
+
+ def cum_min(self, *, reverse: bool) -> Self:
+ return self._with_window_function(
+ self._cum_window_func(reverse=reverse, func_name="min")
+ )
+
+ def cum_count(self, *, reverse: bool) -> Self:
+ return self._with_window_function(
+ self._cum_window_func(reverse=reverse, func_name="count")
+ )
+
+ def cum_prod(self, *, reverse: bool) -> Self:
+ return self._with_window_function(
+ self._cum_window_func(reverse=reverse, func_name="product")
+ )
+
+ def fill_null(
+ self,
+ value: Self | NonNestedLiteral,
+ strategy: FillNullStrategy | None,
+ limit: int | None,
+ ) -> Self:
+ if strategy is not None:
+
+ def _fill_with_strategy(
+ df: SparkLikeLazyFrame, inputs: SparkWindowInputs
+ ) -> Sequence[Column]:
+ fn = self._F.last_value if strategy == "forward" else self._F.first_value
+ if strategy == "forward":
+ start = self._Window.unboundedPreceding if limit is None else -limit
+ end = self._Window.currentRow
+ else:
+ start = self._Window.currentRow
+ end = self._Window.unboundedFollowing if limit is None else limit
+ return [
+ fn(expr, ignoreNulls=True).over(
+ self.partition_by(*inputs.partition_by)
+ .orderBy(*self._sort(*inputs.order_by))
+ .rowsBetween(start, end)
+ )
+ for expr in self(df)
+ ]
+
+ return self._with_window_function(_fill_with_strategy)
+
+ def _fill_constant(expr: Column, value: Column) -> Column:
+ return self._F.ifnull(expr, value)
+
+ return self._with_elementwise(_fill_constant, value=value)
+
+ def rolling_sum(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ return self._with_window_function(
+ self._rolling_window_func(
+ func_name="sum",
+ center=center,
+ window_size=window_size,
+ min_samples=min_samples,
+ )
+ )
+
+ def rolling_mean(self, window_size: int, *, min_samples: int, center: bool) -> Self:
+ return self._with_window_function(
+ self._rolling_window_func(
+ func_name="mean",
+ center=center,
+ window_size=window_size,
+ min_samples=min_samples,
+ )
+ )
+
+ def rolling_var(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ return self._with_window_function(
+ self._rolling_window_func(
+ func_name="var",
+ center=center,
+ window_size=window_size,
+ min_samples=min_samples,
+ ddof=ddof,
+ )
+ )
+
+ def rolling_std(
+ self, window_size: int, *, min_samples: int, center: bool, ddof: int
+ ) -> Self:
+ return self._with_window_function(
+ self._rolling_window_func(
+ func_name="std",
+ center=center,
+ window_size=window_size,
+ min_samples=min_samples,
+ ddof=ddof,
+ )
+ )
+
+ def rank(self, method: RankMethod, *, descending: bool) -> Self:
+ func_name = self._REMAP_RANK_METHOD[method]
+
+ def _rank(
+ expr: Column,
+ *,
+ descending: bool,
+ partition_by: Sequence[str | Column] | None = None,
+ ) -> Column:
+ order_by = self._sort(expr, descending=descending, nulls_last=True)
+ if partition_by is not None:
+ window = self.partition_by(*partition_by).orderBy(*order_by)
+ count_window = self.partition_by(*partition_by, expr)
+ else:
+ window = self.partition_by().orderBy(*order_by)
+ count_window = self.partition_by(expr)
+ if method == "max":
+ rank_expr = (
+ getattr(self._F, func_name)().over(window)
+ + self._F.count(expr).over(count_window)
+ - self._F.lit(1)
+ )
+
+ elif method == "average":
+ rank_expr = getattr(self._F, func_name)().over(window) + (
+ self._F.count(expr).over(count_window) - self._F.lit(1)
+ ) / self._F.lit(2)
+
+ else:
+ rank_expr = getattr(self._F, func_name)().over(window)
+
+ return self._F.when(expr.isNotNull(), rank_expr)
+
+ def _unpartitioned_rank(expr: Column) -> Column:
+ return _rank(expr, descending=descending)
+
+ def _partitioned_rank(
+ df: SparkLikeLazyFrame, inputs: SparkWindowInputs
+ ) -> Sequence[Column]:
+ assert not inputs.order_by # noqa: S101
+ return [
+ _rank(expr, descending=descending, partition_by=inputs.partition_by)
+ for expr in self(df)
+ ]
+
+ return self._with_callable(_unpartitioned_rank)._with_window_function(
+ _partitioned_rank
+ )
+
+ def log(self, base: float) -> Self:
+ def _log(expr: Column) -> Column:
+ return (
+ self._F.when(expr < 0, self._F.lit(float("nan")))
+ .when(expr == 0, self._F.lit(float("-inf")))
+ .otherwise(self._F.log(float(base), expr))
+ )
+
+ return self._with_elementwise(_log)
+
+ def exp(self) -> Self:
+ def _exp(expr: Column) -> Column:
+ return self._F.exp(expr)
+
+ return self._with_elementwise(_exp)
+
+ @property
+ def str(self) -> SparkLikeExprStringNamespace:
+ return SparkLikeExprStringNamespace(self)
+
+ @property
+ def dt(self) -> SparkLikeExprDateTimeNamespace:
+ return SparkLikeExprDateTimeNamespace(self)
+
+ @property
+ def list(self) -> SparkLikeExprListNamespace:
+ return SparkLikeExprListNamespace(self)
+
+ @property
+ def struct(self) -> SparkLikeExprStructNamespace:
+ return SparkLikeExprStructNamespace(self)
+
+ drop_nulls = not_implemented()
+ unique = not_implemented()
+ quantile = not_implemented()
diff --git a/venv/lib/python3.8/site-packages/narwhals/_spark_like/expr_dt.py b/venv/lib/python3.8/site-packages/narwhals/_spark_like/expr_dt.py
new file mode 100644
index 0000000..c5c76e3
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_spark_like/expr_dt.py
@@ -0,0 +1,193 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Sequence
+
+from narwhals._duration import parse_interval_string
+from narwhals._spark_like.utils import (
+ UNITS_DICT,
+ fetch_session_time_zone,
+ strptime_to_pyspark_format,
+)
+
+if TYPE_CHECKING:
+ from sqlframe.base.column import Column
+
+ from narwhals._spark_like.dataframe import SparkLikeLazyFrame
+ from narwhals._spark_like.expr import SparkLikeExpr
+
+
+class SparkLikeExprDateTimeNamespace:
+ def __init__(self, expr: SparkLikeExpr) -> None:
+ self._compliant_expr = expr
+
+ def to_string(self, format: str) -> SparkLikeExpr:
+ F = self._compliant_expr._F # noqa: N806
+
+ def _to_string(_input: Column) -> Column:
+ # Handle special formats
+ if format == "%G-W%V":
+ return self._format_iso_week(_input)
+ if format == "%G-W%V-%u":
+ return self._format_iso_week_with_day(_input)
+
+ format_, suffix = self._format_microseconds(_input, format)
+
+ # Convert Python format to PySpark format
+ pyspark_fmt = strptime_to_pyspark_format(format_)
+
+ result = F.date_format(_input, pyspark_fmt)
+ if "T" in format_:
+ # `strptime_to_pyspark_format` replaces "T" with " " since pyspark
+ # does not support the literal "T" in `date_format`.
+ # If no other spaces are in the given format, then we can revert this
+ # operation, otherwise we raise an exception.
+ if " " not in format_:
+ result = F.replace(result, F.lit(" "), F.lit("T"))
+ else: # pragma: no cover
+ msg = (
+ "`dt.to_string` with a format that contains both spaces and "
+ " the literal 'T' is not supported for spark-like backends."
+ )
+ raise NotImplementedError(msg)
+
+ return F.concat(result, *suffix)
+
+ return self._compliant_expr._with_callable(_to_string)
+
+ def date(self) -> SparkLikeExpr:
+ return self._compliant_expr._with_callable(self._compliant_expr._F.to_date)
+
+ def year(self) -> SparkLikeExpr:
+ return self._compliant_expr._with_callable(self._compliant_expr._F.year)
+
+ def month(self) -> SparkLikeExpr:
+ return self._compliant_expr._with_callable(self._compliant_expr._F.month)
+
+ def day(self) -> SparkLikeExpr:
+ return self._compliant_expr._with_callable(self._compliant_expr._F.day)
+
+ def hour(self) -> SparkLikeExpr:
+ return self._compliant_expr._with_callable(self._compliant_expr._F.hour)
+
+ def minute(self) -> SparkLikeExpr:
+ return self._compliant_expr._with_callable(self._compliant_expr._F.minute)
+
+ def second(self) -> SparkLikeExpr:
+ return self._compliant_expr._with_callable(self._compliant_expr._F.second)
+
+ def millisecond(self) -> SparkLikeExpr:
+ def _millisecond(expr: Column) -> Column:
+ return self._compliant_expr._F.floor(
+ (self._compliant_expr._F.unix_micros(expr) % 1_000_000) / 1000
+ )
+
+ return self._compliant_expr._with_callable(_millisecond)
+
+ def microsecond(self) -> SparkLikeExpr:
+ def _microsecond(expr: Column) -> Column:
+ return self._compliant_expr._F.unix_micros(expr) % 1_000_000
+
+ return self._compliant_expr._with_callable(_microsecond)
+
+ def nanosecond(self) -> SparkLikeExpr:
+ def _nanosecond(expr: Column) -> Column:
+ return (self._compliant_expr._F.unix_micros(expr) % 1_000_000) * 1000
+
+ return self._compliant_expr._with_callable(_nanosecond)
+
+ def ordinal_day(self) -> SparkLikeExpr:
+ return self._compliant_expr._with_callable(self._compliant_expr._F.dayofyear)
+
+ def weekday(self) -> SparkLikeExpr:
+ def _weekday(expr: Column) -> Column:
+ # PySpark's dayofweek returns 1-7 for Sunday-Saturday
+ return (self._compliant_expr._F.dayofweek(expr) + 6) % 7
+
+ return self._compliant_expr._with_callable(_weekday)
+
+ def truncate(self, every: str) -> SparkLikeExpr:
+ multiple, unit = parse_interval_string(every)
+ if multiple != 1:
+ msg = f"Only multiple 1 is currently supported for Spark-like.\nGot {multiple!s}."
+ raise ValueError(msg)
+ if unit == "ns":
+ msg = "Truncating to nanoseconds is not yet supported for Spark-like."
+ raise NotImplementedError(msg)
+ format = UNITS_DICT[unit]
+
+ def _truncate(expr: Column) -> Column:
+ return self._compliant_expr._F.date_trunc(format, expr)
+
+ return self._compliant_expr._with_callable(_truncate)
+
+ def _no_op_time_zone(self, time_zone: str) -> SparkLikeExpr: # pragma: no cover
+ def func(df: SparkLikeLazyFrame) -> Sequence[Column]:
+ native_series_list = self._compliant_expr(df)
+ conn_time_zone = fetch_session_time_zone(df.native.sparkSession)
+ if conn_time_zone != time_zone:
+ msg = (
+ "PySpark stores the time zone in the session, rather than in the "
+ f"data type, so changing the timezone to anything other than {conn_time_zone} "
+ " (the current session time zone) is not supported."
+ )
+ raise NotImplementedError(msg)
+ return native_series_list
+
+ return self._compliant_expr.__class__(
+ func,
+ evaluate_output_names=self._compliant_expr._evaluate_output_names,
+ alias_output_names=self._compliant_expr._alias_output_names,
+ backend_version=self._compliant_expr._backend_version,
+ version=self._compliant_expr._version,
+ implementation=self._compliant_expr._implementation,
+ )
+
+ def convert_time_zone(self, time_zone: str) -> SparkLikeExpr: # pragma: no cover
+ return self._no_op_time_zone(time_zone)
+
+ def replace_time_zone(
+ self, time_zone: str | None
+ ) -> SparkLikeExpr: # pragma: no cover
+ if time_zone is None:
+ return self._compliant_expr._with_callable(
+ lambda _input: _input.cast("timestamp_ntz")
+ )
+ else:
+ return self._no_op_time_zone(time_zone)
+
+ def _format_iso_week_with_day(self, _input: Column) -> Column:
+ """Format datetime as ISO week string with day."""
+ F = self._compliant_expr._F # noqa: N806
+
+ year = F.date_format(_input, "yyyy")
+ week = F.lpad(F.weekofyear(_input).cast("string"), 2, "0")
+ day = F.dayofweek(_input)
+ # Adjust Sunday from 1 to 7
+ day = F.when(day == 1, 7).otherwise(day - 1)
+ return F.concat(year, F.lit("-W"), week, F.lit("-"), day.cast("string"))
+
+ def _format_iso_week(self, _input: Column) -> Column:
+ """Format datetime as ISO week string."""
+ F = self._compliant_expr._F # noqa: N806
+
+ year = F.date_format(_input, "yyyy")
+ week = F.lpad(F.weekofyear(_input).cast("string"), 2, "0")
+ return F.concat(year, F.lit("-W"), week)
+
+ def _format_microseconds(
+ self, _input: Column, format: str
+ ) -> tuple[str, tuple[Column, ...]]:
+ """Format microseconds if present in format, else it's a no-op."""
+ F = self._compliant_expr._F # noqa: N806
+
+ suffix: tuple[Column, ...]
+ if format.endswith((".%f", "%.f")):
+ import re
+
+ micros = F.unix_micros(_input) % 1_000_000
+ micros_str = F.lpad(micros.cast("string"), 6, "0")
+ suffix = (F.lit("."), micros_str)
+ format_ = re.sub(r"(.%|%.)f$", "", format)
+ return format_, suffix
+
+ return format, ()
diff --git a/venv/lib/python3.8/site-packages/narwhals/_spark_like/expr_list.py b/venv/lib/python3.8/site-packages/narwhals/_spark_like/expr_list.py
new file mode 100644
index 0000000..b59eb83
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_spark_like/expr_list.py
@@ -0,0 +1,14 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from narwhals._spark_like.expr import SparkLikeExpr
+
+
+class SparkLikeExprListNamespace:
+ def __init__(self, expr: SparkLikeExpr) -> None:
+ self._compliant_expr = expr
+
+ def len(self) -> SparkLikeExpr:
+ return self._compliant_expr._with_callable(self._compliant_expr._F.array_size)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_spark_like/expr_str.py b/venv/lib/python3.8/site-packages/narwhals/_spark_like/expr_str.py
new file mode 100644
index 0000000..7c65952
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_spark_like/expr_str.py
@@ -0,0 +1,115 @@
+from __future__ import annotations
+
+from functools import partial
+from typing import TYPE_CHECKING
+
+from narwhals._spark_like.utils import strptime_to_pyspark_format
+from narwhals._utils import _is_naive_format
+
+if TYPE_CHECKING:
+ from sqlframe.base.column import Column
+
+ from narwhals._spark_like.expr import SparkLikeExpr
+
+
+class SparkLikeExprStringNamespace:
+ def __init__(self, expr: SparkLikeExpr) -> None:
+ self._compliant_expr = expr
+
+ def len_chars(self) -> SparkLikeExpr:
+ return self._compliant_expr._with_callable(self._compliant_expr._F.char_length)
+
+ def replace_all(self, pattern: str, value: str, *, literal: bool) -> SparkLikeExpr:
+ def func(expr: Column) -> Column:
+ replace_all_func = (
+ self._compliant_expr._F.replace
+ if literal
+ else self._compliant_expr._F.regexp_replace
+ )
+ return replace_all_func(
+ expr,
+ self._compliant_expr._F.lit(pattern), # pyright: ignore[reportArgumentType]
+ self._compliant_expr._F.lit(value), # pyright: ignore[reportArgumentType]
+ )
+
+ return self._compliant_expr._with_callable(func)
+
+ def strip_chars(self, characters: str | None) -> SparkLikeExpr:
+ import string
+
+ def func(expr: Column) -> Column:
+ to_remove = characters if characters is not None else string.whitespace
+ return self._compliant_expr._F.btrim(
+ expr, self._compliant_expr._F.lit(to_remove)
+ )
+
+ return self._compliant_expr._with_callable(func)
+
+ def starts_with(self, prefix: str) -> SparkLikeExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: self._compliant_expr._F.startswith(
+ expr, self._compliant_expr._F.lit(prefix)
+ )
+ )
+
+ def ends_with(self, suffix: str) -> SparkLikeExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: self._compliant_expr._F.endswith(
+ expr, self._compliant_expr._F.lit(suffix)
+ )
+ )
+
+ def contains(self, pattern: str, *, literal: bool) -> SparkLikeExpr:
+ def func(expr: Column) -> Column:
+ contains_func = (
+ self._compliant_expr._F.contains
+ if literal
+ else self._compliant_expr._F.regexp
+ )
+ return contains_func(expr, self._compliant_expr._F.lit(pattern))
+
+ return self._compliant_expr._with_callable(func)
+
+ def slice(self, offset: int, length: int | None) -> SparkLikeExpr:
+ # From the docs: https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.substring.html
+ # The position is not zero based, but 1 based index.
+ def func(expr: Column) -> Column:
+ col_length = self._compliant_expr._F.char_length(expr)
+
+ _offset = (
+ col_length + self._compliant_expr._F.lit(offset + 1)
+ if offset < 0
+ else self._compliant_expr._F.lit(offset + 1)
+ )
+ _length = (
+ self._compliant_expr._F.lit(length) if length is not None else col_length
+ )
+ return expr.substr(_offset, _length)
+
+ return self._compliant_expr._with_callable(func)
+
+ def split(self, by: str) -> SparkLikeExpr:
+ return self._compliant_expr._with_callable(
+ lambda expr: self._compliant_expr._F.split(expr, by)
+ )
+
+ def to_uppercase(self) -> SparkLikeExpr:
+ return self._compliant_expr._with_callable(self._compliant_expr._F.upper)
+
+ def to_lowercase(self) -> SparkLikeExpr:
+ return self._compliant_expr._with_callable(self._compliant_expr._F.lower)
+
+ def to_datetime(self, format: str | None) -> SparkLikeExpr:
+ F = self._compliant_expr._F # noqa: N806
+ if not format:
+ function = F.to_timestamp
+ elif _is_naive_format(format):
+ function = partial(
+ F.to_timestamp_ntz, format=F.lit(strptime_to_pyspark_format(format))
+ )
+ else:
+ format = strptime_to_pyspark_format(format)
+ function = partial(F.to_timestamp, format=format)
+ return self._compliant_expr._with_callable(
+ lambda expr: function(F.replace(expr, F.lit("T"), F.lit(" ")))
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/_spark_like/expr_struct.py b/venv/lib/python3.8/site-packages/narwhals/_spark_like/expr_struct.py
new file mode 100644
index 0000000..03e6d71
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_spark_like/expr_struct.py
@@ -0,0 +1,19 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from sqlframe.base.column import Column
+
+ from narwhals._spark_like.expr import SparkLikeExpr
+
+
+class SparkLikeExprStructNamespace:
+ def __init__(self, expr: SparkLikeExpr) -> None:
+ self._compliant_expr = expr
+
+ def field(self, name: str) -> SparkLikeExpr:
+ def func(expr: Column) -> Column:
+ return expr.getField(name)
+
+ return self._compliant_expr._with_callable(func).alias(name)
diff --git a/venv/lib/python3.8/site-packages/narwhals/_spark_like/group_by.py b/venv/lib/python3.8/site-packages/narwhals/_spark_like/group_by.py
new file mode 100644
index 0000000..4c63d77
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_spark_like/group_by.py
@@ -0,0 +1,35 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Sequence
+
+from narwhals._compliant import LazyGroupBy
+
+if TYPE_CHECKING:
+ from sqlframe.base.column import Column # noqa: F401
+
+ from narwhals._spark_like.dataframe import SparkLikeLazyFrame
+ from narwhals._spark_like.expr import SparkLikeExpr
+
+
+class SparkLikeLazyGroupBy(LazyGroupBy["SparkLikeLazyFrame", "SparkLikeExpr", "Column"]):
+ def __init__(
+ self,
+ df: SparkLikeLazyFrame,
+ keys: Sequence[SparkLikeExpr] | Sequence[str],
+ /,
+ *,
+ drop_null_keys: bool,
+ ) -> None:
+ frame, self._keys, self._output_key_names = self._parse_keys(df, keys=keys)
+ self._compliant_frame = frame.drop_nulls(self._keys) if drop_null_keys else frame
+
+ def agg(self, *exprs: SparkLikeExpr) -> SparkLikeLazyFrame:
+ result = (
+ self.compliant.native.groupBy(*self._keys).agg(*agg_columns)
+ if (agg_columns := list(self._evaluate_exprs(exprs)))
+ else self.compliant.native.select(*self._keys).dropDuplicates()
+ )
+
+ return self.compliant._with_native(result).rename(
+ dict(zip(self._keys, self._output_key_names))
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/_spark_like/namespace.py b/venv/lib/python3.8/site-packages/narwhals/_spark_like/namespace.py
new file mode 100644
index 0000000..7ad42ff
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_spark_like/namespace.py
@@ -0,0 +1,290 @@
+from __future__ import annotations
+
+import operator
+from functools import reduce
+from typing import TYPE_CHECKING, Callable, Iterable, Sequence
+
+from narwhals._compliant import LazyNamespace, LazyThen, LazyWhen
+from narwhals._expression_parsing import (
+ combine_alias_output_names,
+ combine_evaluate_output_names,
+)
+from narwhals._spark_like.dataframe import SparkLikeLazyFrame
+from narwhals._spark_like.expr import SparkLikeExpr
+from narwhals._spark_like.selectors import SparkLikeSelectorNamespace
+from narwhals._spark_like.utils import (
+ import_functions,
+ import_native_dtypes,
+ narwhals_to_native_dtype,
+)
+
+if TYPE_CHECKING:
+ from sqlframe.base.column import Column
+
+ from narwhals._spark_like.dataframe import SQLFrameDataFrame # noqa: F401
+ from narwhals._spark_like.expr import SparkWindowInputs
+ from narwhals._utils import Implementation, Version
+ from narwhals.typing import ConcatMethod, IntoDType, NonNestedLiteral
+
+
+class SparkLikeNamespace(
+ LazyNamespace[SparkLikeLazyFrame, SparkLikeExpr, "SQLFrameDataFrame"]
+):
+ def __init__(
+ self,
+ *,
+ backend_version: tuple[int, ...],
+ version: Version,
+ implementation: Implementation,
+ ) -> None:
+ self._backend_version = backend_version
+ self._version = version
+ self._implementation = implementation
+
+ @property
+ def selectors(self) -> SparkLikeSelectorNamespace:
+ return SparkLikeSelectorNamespace.from_namespace(self)
+
+ @property
+ def _expr(self) -> type[SparkLikeExpr]:
+ return SparkLikeExpr
+
+ @property
+ def _lazyframe(self) -> type[SparkLikeLazyFrame]:
+ return SparkLikeLazyFrame
+
+ @property
+ def _F(self): # type: ignore[no-untyped-def] # noqa: ANN202, N802
+ if TYPE_CHECKING:
+ from sqlframe.base import functions
+
+ return functions
+ else:
+ return import_functions(self._implementation)
+
+ @property
+ def _native_dtypes(self): # type: ignore[no-untyped-def] # noqa: ANN202
+ if TYPE_CHECKING:
+ from sqlframe.base import types
+
+ return types
+ else:
+ return import_native_dtypes(self._implementation)
+
+ def _with_elementwise(
+ self, func: Callable[[Iterable[Column]], Column], *exprs: SparkLikeExpr
+ ) -> SparkLikeExpr:
+ def call(df: SparkLikeLazyFrame) -> list[Column]:
+ cols = (col for _expr in exprs for col in _expr(df))
+ return [func(cols)]
+
+ def window_function(
+ df: SparkLikeLazyFrame, window_inputs: SparkWindowInputs
+ ) -> list[Column]:
+ cols = (
+ col for _expr in exprs for col in _expr.window_function(df, window_inputs)
+ )
+ return [func(cols)]
+
+ return self._expr(
+ call=call,
+ window_function=window_function,
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ backend_version=self._backend_version,
+ version=self._version,
+ implementation=self._implementation,
+ )
+
+ def lit(self, value: NonNestedLiteral, dtype: IntoDType | None) -> SparkLikeExpr:
+ def _lit(df: SparkLikeLazyFrame) -> list[Column]:
+ column = df._F.lit(value)
+ if dtype:
+ native_dtype = narwhals_to_native_dtype(
+ dtype, version=self._version, spark_types=df._native_dtypes
+ )
+ column = column.cast(native_dtype)
+
+ return [column]
+
+ return self._expr(
+ call=_lit,
+ evaluate_output_names=lambda _df: ["literal"],
+ alias_output_names=None,
+ backend_version=self._backend_version,
+ version=self._version,
+ implementation=self._implementation,
+ )
+
+ def len(self) -> SparkLikeExpr:
+ def func(df: SparkLikeLazyFrame) -> list[Column]:
+ return [df._F.count("*")]
+
+ return self._expr(
+ func,
+ evaluate_output_names=lambda _df: ["len"],
+ alias_output_names=None,
+ backend_version=self._backend_version,
+ version=self._version,
+ implementation=self._implementation,
+ )
+
+ def all_horizontal(self, *exprs: SparkLikeExpr) -> SparkLikeExpr:
+ def func(cols: Iterable[Column]) -> Column:
+ return reduce(operator.and_, cols)
+
+ return self._with_elementwise(func, *exprs)
+
+ def any_horizontal(self, *exprs: SparkLikeExpr) -> SparkLikeExpr:
+ def func(cols: Iterable[Column]) -> Column:
+ return reduce(operator.or_, cols)
+
+ return self._with_elementwise(func, *exprs)
+
+ def max_horizontal(self, *exprs: SparkLikeExpr) -> SparkLikeExpr:
+ def func(cols: Iterable[Column]) -> Column:
+ return self._F.greatest(*cols)
+
+ return self._with_elementwise(func, *exprs)
+
+ def min_horizontal(self, *exprs: SparkLikeExpr) -> SparkLikeExpr:
+ def func(cols: Iterable[Column]) -> Column:
+ return self._F.least(*cols)
+
+ return self._with_elementwise(func, *exprs)
+
+ def sum_horizontal(self, *exprs: SparkLikeExpr) -> SparkLikeExpr:
+ def func(cols: Iterable[Column]) -> Column:
+ return reduce(
+ operator.add, (self._F.coalesce(col, self._F.lit(0)) for col in cols)
+ )
+
+ return self._with_elementwise(func, *exprs)
+
+ def mean_horizontal(self, *exprs: SparkLikeExpr) -> SparkLikeExpr:
+ def func(cols: Iterable[Column]) -> Column:
+ cols = list(cols)
+ F = exprs[0]._F # noqa: N806
+ # PySpark before 3.5 doesn't have `try_divide`, SQLFrame doesn't have it.
+ divide = getattr(F, "try_divide", operator.truediv)
+ return divide(
+ reduce(
+ operator.add, (self._F.coalesce(col, self._F.lit(0)) for col in cols)
+ ),
+ reduce(
+ operator.add,
+ (
+ col.isNotNull().cast(self._native_dtypes.IntegerType())
+ for col in cols
+ ),
+ ),
+ )
+
+ return self._with_elementwise(func, *exprs)
+
+ def concat(
+ self, items: Iterable[SparkLikeLazyFrame], *, how: ConcatMethod
+ ) -> SparkLikeLazyFrame:
+ dfs = [item._native_frame for item in items]
+ if how == "vertical":
+ cols_0 = dfs[0].columns
+ for i, df in enumerate(dfs[1:], start=1):
+ cols_current = df.columns
+ if not ((len(cols_current) == len(cols_0)) and (cols_current == cols_0)):
+ msg = (
+ "unable to vstack, column names don't match:\n"
+ f" - dataframe 0: {cols_0}\n"
+ f" - dataframe {i}: {cols_current}\n"
+ )
+ raise TypeError(msg)
+
+ return SparkLikeLazyFrame(
+ native_dataframe=reduce(lambda x, y: x.union(y), dfs),
+ backend_version=self._backend_version,
+ version=self._version,
+ implementation=self._implementation,
+ )
+
+ if how == "diagonal":
+ return SparkLikeLazyFrame(
+ native_dataframe=reduce(
+ lambda x, y: x.unionByName(y, allowMissingColumns=True), dfs
+ ),
+ backend_version=self._backend_version,
+ version=self._version,
+ implementation=self._implementation,
+ )
+ raise NotImplementedError
+
+ def concat_str(
+ self, *exprs: SparkLikeExpr, separator: str, ignore_nulls: bool
+ ) -> SparkLikeExpr:
+ def func(df: SparkLikeLazyFrame) -> list[Column]:
+ cols = [s for _expr in exprs for s in _expr(df)]
+ cols_casted = [s.cast(df._native_dtypes.StringType()) for s in cols]
+ null_mask = [df._F.isnull(s) for s in cols]
+
+ if not ignore_nulls:
+ null_mask_result = reduce(operator.or_, null_mask)
+ result = df._F.when(
+ ~null_mask_result,
+ reduce(
+ lambda x, y: df._F.format_string(f"%s{separator}%s", x, y),
+ cols_casted,
+ ),
+ ).otherwise(df._F.lit(None))
+ else:
+ init_value, *values = [
+ df._F.when(~nm, col).otherwise(df._F.lit(""))
+ for col, nm in zip(cols_casted, null_mask)
+ ]
+
+ separators = (
+ df._F.when(nm, df._F.lit("")).otherwise(df._F.lit(separator))
+ for nm in null_mask[:-1]
+ )
+ result = reduce(
+ lambda x, y: df._F.format_string("%s%s", x, y),
+ (
+ df._F.format_string("%s%s", s, v)
+ for s, v in zip(separators, values)
+ ),
+ init_value,
+ )
+
+ return [result]
+
+ return self._expr(
+ call=func,
+ evaluate_output_names=combine_evaluate_output_names(*exprs),
+ alias_output_names=combine_alias_output_names(*exprs),
+ backend_version=self._backend_version,
+ version=self._version,
+ implementation=self._implementation,
+ )
+
+ def when(self, predicate: SparkLikeExpr) -> SparkLikeWhen:
+ return SparkLikeWhen.from_expr(predicate, context=self)
+
+
+class SparkLikeWhen(LazyWhen[SparkLikeLazyFrame, "Column", SparkLikeExpr]):
+ @property
+ def _then(self) -> type[SparkLikeThen]:
+ return SparkLikeThen
+
+ def __call__(self, df: SparkLikeLazyFrame) -> Sequence[Column]:
+ self.when = df._F.when
+ self.lit = df._F.lit
+ return super().__call__(df)
+
+ def _window_function(
+ self, df: SparkLikeLazyFrame, window_inputs: SparkWindowInputs
+ ) -> Sequence[Column]:
+ self.when = df._F.when
+ self.lit = df._F.lit
+ return super()._window_function(df, window_inputs)
+
+
+class SparkLikeThen(
+ LazyThen[SparkLikeLazyFrame, "Column", SparkLikeExpr], SparkLikeExpr
+): ...
diff --git a/venv/lib/python3.8/site-packages/narwhals/_spark_like/selectors.py b/venv/lib/python3.8/site-packages/narwhals/_spark_like/selectors.py
new file mode 100644
index 0000000..013bb9d
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_spark_like/selectors.py
@@ -0,0 +1,29 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from narwhals._compliant import CompliantSelector, LazySelectorNamespace
+from narwhals._spark_like.expr import SparkLikeExpr
+
+if TYPE_CHECKING:
+ from sqlframe.base.column import Column # noqa: F401
+
+ from narwhals._spark_like.dataframe import SparkLikeLazyFrame # noqa: F401
+
+
+class SparkLikeSelectorNamespace(LazySelectorNamespace["SparkLikeLazyFrame", "Column"]):
+ @property
+ def _selector(self) -> type[SparkLikeSelector]:
+ return SparkLikeSelector
+
+
+class SparkLikeSelector(CompliantSelector["SparkLikeLazyFrame", "Column"], SparkLikeExpr): # type: ignore[misc]
+ def _to_expr(self) -> SparkLikeExpr:
+ return SparkLikeExpr(
+ self._call,
+ evaluate_output_names=self._evaluate_output_names,
+ alias_output_names=self._alias_output_names,
+ backend_version=self._backend_version,
+ version=self._version,
+ implementation=self._implementation,
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/_spark_like/utils.py b/venv/lib/python3.8/site-packages/narwhals/_spark_like/utils.py
new file mode 100644
index 0000000..95fdc96
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_spark_like/utils.py
@@ -0,0 +1,285 @@
+from __future__ import annotations
+
+from functools import lru_cache
+from importlib import import_module
+from typing import TYPE_CHECKING, Any, overload
+
+from narwhals._utils import Implementation, isinstance_or_issubclass
+from narwhals.exceptions import UnsupportedDTypeError
+
+if TYPE_CHECKING:
+ from types import ModuleType
+
+ import sqlframe.base.types as sqlframe_types
+ from sqlframe.base.column import Column
+ from sqlframe.base.session import _BaseSession as Session
+ from typing_extensions import TypeAlias
+
+ from narwhals._spark_like.dataframe import SparkLikeLazyFrame
+ from narwhals._spark_like.expr import SparkLikeExpr
+ from narwhals._utils import Version
+ from narwhals.dtypes import DType
+ from narwhals.typing import IntoDType
+
+ _NativeDType: TypeAlias = sqlframe_types.DataType
+ SparkSession = Session[Any, Any, Any, Any, Any, Any, Any]
+
+UNITS_DICT = {
+ "y": "year",
+ "q": "quarter",
+ "mo": "month",
+ "d": "day",
+ "h": "hour",
+ "m": "minute",
+ "s": "second",
+ "ms": "millisecond",
+ "us": "microsecond",
+ "ns": "nanosecond",
+}
+
+# see https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+# and https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior
+DATETIME_PATTERNS_MAPPING = {
+ "%Y": "yyyy", # Year with century (4 digits)
+ "%y": "yy", # Year without century (2 digits)
+ "%m": "MM", # Month (01-12)
+ "%d": "dd", # Day of the month (01-31)
+ "%H": "HH", # Hour (24-hour clock) (00-23)
+ "%I": "hh", # Hour (12-hour clock) (01-12)
+ "%M": "mm", # Minute (00-59)
+ "%S": "ss", # Second (00-59)
+ "%f": "S", # Microseconds -> Milliseconds
+ "%p": "a", # AM/PM
+ "%a": "E", # Abbreviated weekday name
+ "%A": "E", # Full weekday name
+ "%j": "D", # Day of the year
+ "%z": "Z", # Timezone offset
+ "%s": "X", # Unix timestamp
+}
+
+
+# NOTE: don't lru_cache this as `ModuleType` isn't hashable
+def native_to_narwhals_dtype( # noqa: C901, PLR0912
+ dtype: _NativeDType, version: Version, spark_types: ModuleType, session: SparkSession
+) -> DType:
+ dtypes = version.dtypes
+ if TYPE_CHECKING:
+ native = sqlframe_types
+ else:
+ native = spark_types
+
+ if isinstance(dtype, native.DoubleType):
+ return dtypes.Float64()
+ if isinstance(dtype, native.FloatType):
+ return dtypes.Float32()
+ if isinstance(dtype, native.LongType):
+ return dtypes.Int64()
+ if isinstance(dtype, native.IntegerType):
+ return dtypes.Int32()
+ if isinstance(dtype, native.ShortType):
+ return dtypes.Int16()
+ if isinstance(dtype, native.ByteType):
+ return dtypes.Int8()
+ if isinstance(dtype, (native.StringType, native.VarcharType, native.CharType)):
+ return dtypes.String()
+ if isinstance(dtype, native.BooleanType):
+ return dtypes.Boolean()
+ if isinstance(dtype, native.DateType):
+ return dtypes.Date()
+ if isinstance(dtype, native.TimestampNTZType):
+ # TODO(marco): cover this
+ return dtypes.Datetime() # pragma: no cover
+ if isinstance(dtype, native.TimestampType):
+ return dtypes.Datetime(time_zone=fetch_session_time_zone(session))
+ if isinstance(dtype, native.DecimalType):
+ # TODO(marco): cover this
+ return dtypes.Decimal() # pragma: no cover
+ if isinstance(dtype, native.ArrayType):
+ return dtypes.List(
+ inner=native_to_narwhals_dtype(
+ dtype.elementType, version, spark_types, session
+ )
+ )
+ if isinstance(dtype, native.StructType):
+ return dtypes.Struct(
+ fields=[
+ dtypes.Field(
+ name=field.name,
+ dtype=native_to_narwhals_dtype(
+ field.dataType, version, spark_types, session
+ ),
+ )
+ for field in dtype
+ ]
+ )
+ if isinstance(dtype, native.BinaryType):
+ return dtypes.Binary()
+ return dtypes.Unknown() # pragma: no cover
+
+
+@lru_cache(maxsize=4)
+def fetch_session_time_zone(session: SparkSession) -> str:
+ # Timezone can't be changed in PySpark session, so this can be cached.
+ try:
+ return session.conf.get("spark.sql.session.timeZone") # type: ignore[attr-defined]
+ except Exception: # noqa: BLE001
+ # https://github.com/eakmanrq/sqlframe/issues/406
+ return "<unknown>"
+
+
+def narwhals_to_native_dtype( # noqa: C901, PLR0912
+ dtype: IntoDType, version: Version, spark_types: ModuleType
+) -> _NativeDType:
+ dtypes = version.dtypes
+ if TYPE_CHECKING:
+ native = sqlframe_types
+ else:
+ native = spark_types
+
+ if isinstance_or_issubclass(dtype, dtypes.Float64):
+ return native.DoubleType()
+ if isinstance_or_issubclass(dtype, dtypes.Float32):
+ return native.FloatType()
+ if isinstance_or_issubclass(dtype, dtypes.Int64):
+ return native.LongType()
+ if isinstance_or_issubclass(dtype, dtypes.Int32):
+ return native.IntegerType()
+ if isinstance_or_issubclass(dtype, dtypes.Int16):
+ return native.ShortType()
+ if isinstance_or_issubclass(dtype, dtypes.Int8):
+ return native.ByteType()
+ if isinstance_or_issubclass(dtype, dtypes.String):
+ return native.StringType()
+ if isinstance_or_issubclass(dtype, dtypes.Boolean):
+ return native.BooleanType()
+ if isinstance_or_issubclass(dtype, dtypes.Date):
+ return native.DateType()
+ if isinstance_or_issubclass(dtype, dtypes.Datetime):
+ dt_time_zone = dtype.time_zone
+ if dt_time_zone is None:
+ return native.TimestampNTZType()
+ if dt_time_zone != "UTC": # pragma: no cover
+ msg = f"Only UTC time zone is supported for PySpark, got: {dt_time_zone}"
+ raise ValueError(msg)
+ return native.TimestampType()
+ if isinstance_or_issubclass(dtype, (dtypes.List, dtypes.Array)):
+ return native.ArrayType(
+ elementType=narwhals_to_native_dtype(
+ dtype.inner, version=version, spark_types=native
+ )
+ )
+ if isinstance_or_issubclass(dtype, dtypes.Struct): # pragma: no cover
+ return native.StructType(
+ fields=[
+ native.StructField(
+ name=field.name,
+ dataType=narwhals_to_native_dtype(
+ field.dtype, version=version, spark_types=native
+ ),
+ )
+ for field in dtype.fields
+ ]
+ )
+ if isinstance_or_issubclass(dtype, dtypes.Binary):
+ return native.BinaryType()
+
+ if isinstance_or_issubclass(
+ dtype,
+ (
+ dtypes.UInt64,
+ dtypes.UInt32,
+ dtypes.UInt16,
+ dtypes.UInt8,
+ dtypes.Enum,
+ dtypes.Categorical,
+ dtypes.Time,
+ ),
+ ): # pragma: no cover
+ msg = "Unsigned integer, Enum, Categorical and Time types are not supported by spark-like backend"
+ raise UnsupportedDTypeError(msg)
+
+ msg = f"Unknown dtype: {dtype}" # pragma: no cover
+ raise AssertionError(msg)
+
+
+def evaluate_exprs(
+ df: SparkLikeLazyFrame, /, *exprs: SparkLikeExpr
+) -> list[tuple[str, Column]]:
+ native_results: list[tuple[str, Column]] = []
+
+ for expr in exprs:
+ native_series_list = expr._call(df)
+ output_names = expr._evaluate_output_names(df)
+ if expr._alias_output_names is not None:
+ output_names = expr._alias_output_names(output_names)
+ if len(output_names) != len(native_series_list): # pragma: no cover
+ msg = f"Internal error: got output names {output_names}, but only got {len(native_series_list)} results"
+ raise AssertionError(msg)
+ native_results.extend(zip(output_names, native_series_list))
+
+ return native_results
+
+
+def import_functions(implementation: Implementation, /) -> ModuleType:
+ if implementation is Implementation.PYSPARK:
+ from pyspark.sql import functions
+
+ return functions
+ if implementation is Implementation.PYSPARK_CONNECT:
+ from pyspark.sql.connect import functions
+
+ return functions
+ from sqlframe.base.session import _BaseSession
+
+ return import_module(f"sqlframe.{_BaseSession().execution_dialect_name}.functions")
+
+
+def import_native_dtypes(implementation: Implementation, /) -> ModuleType:
+ if implementation is Implementation.PYSPARK:
+ from pyspark.sql import types
+
+ return types
+ if implementation is Implementation.PYSPARK_CONNECT:
+ from pyspark.sql.connect import types
+
+ return types
+ from sqlframe.base.session import _BaseSession
+
+ return import_module(f"sqlframe.{_BaseSession().execution_dialect_name}.types")
+
+
+def import_window(implementation: Implementation, /) -> type[Any]:
+ if implementation is Implementation.PYSPARK:
+ from pyspark.sql import Window
+
+ return Window
+
+ if implementation is Implementation.PYSPARK_CONNECT:
+ from pyspark.sql.connect.window import Window
+
+ return Window
+ from sqlframe.base.session import _BaseSession
+
+ return import_module(
+ f"sqlframe.{_BaseSession().execution_dialect_name}.window"
+ ).Window
+
+
+@overload
+def strptime_to_pyspark_format(format: None) -> None: ...
+
+
+@overload
+def strptime_to_pyspark_format(format: str) -> str: ...
+
+
+def strptime_to_pyspark_format(format: str | None) -> str | None:
+ """Converts a Python strptime datetime format string to a PySpark datetime format string."""
+ if format is None: # pragma: no cover
+ return None
+
+ # Replace Python format specifiers with PySpark specifiers
+ pyspark_format = format
+ for py_format, spark_format in DATETIME_PATTERNS_MAPPING.items():
+ pyspark_format = pyspark_format.replace(py_format, spark_format)
+ return pyspark_format.replace("T", " ")
diff --git a/venv/lib/python3.8/site-packages/narwhals/_translate.py b/venv/lib/python3.8/site-packages/narwhals/_translate.py
new file mode 100644
index 0000000..0c72396
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_translate.py
@@ -0,0 +1,185 @@
+"""[Protocols] defining conversion methods between representations.
+
+These come in 3 flavors and are [generic] to promote reuse.
+
+The following examples use the placeholder types `Narwhal` and `Other`:
+- `Narwhal`: some class written in `narwhals`.
+- `Other`: any other class, could be native, compliant, or a builtin.
+
+## `To<Other>`
+When we want to convert or unwrap a `Narwhal` into an `Other`,
+we provide an **instance** method:
+
+ ToOtherT_co = TypeVar("ToOtherT_co", covariant=True)
+
+ class ToOther(Protocol[ToOtherT_co]):
+ def to_other(self, *args: Any, **kwds: Any) -> ToOtherT_co: ...
+
+- `*args`, `**kwds` are defined to be *permissive* and allow a wider set of signatures when implementing.
+ - In most cases, they are unused.
+ - But come in handy when adapting an [upstream signature].
+- We use a **covariant** `TypeVar`.
+
+## `From<Other>`
+But what if we have `Other` and want to do the reverse?
+
+Our `Narwhal` will need to provide a `@classmethod`:
+
+ FromOtherT_contra = TypeVar("FromOtherT_contra", contravariant=True)
+
+ class FromOther(Protocol[FromOtherT_contra]):
+ @classmethod
+ def from_other(cls, data: FromOtherT_contra, *args: Any, **kwds: Any) -> Self: ...
+
+- `*args`, `**kwds` serve a similar purpose as before, but are much more frequently used.
+- We've added a **required** [positional-only] parameter `data` which will always be passed `Other`.
+ - This removes the name from the contract of the protocol.
+ - Implementations are free to use something more descriptive for documentation purposes.
+- We use a **contravariant** `TypeVar`.
+
+## `<Other>Convertible`
+Combining our `to_` and `from_` methods allows us to convert in both directions `Narwhal` <-> `Other`:
+
+ class OtherConvertible(
+ ToOther[ToOtherT_co],
+ FromOther[FromOtherT_contra],
+ Protocol[ToOtherT_co, FromOtherT_contra],
+ ): ...
+
+## See Also
+Variance of `TypeVar`(s) can be tricky to wrap your head around.
+
+To learn more see [moist], [dry], or [even drier] - depending on how deep you wanna go.
+
+[Protocols]: https://typing.python.org/en/latest/spec/protocol.html
+[generic]: https://typing.python.org/en/latest/spec/generics.html
+[upstream signature]: https://numpy.org/doc/stable/user/basics.interoperability.html#the-array-method
+[positional-only]: https://peps.python.org/pep-0570/
+[moist]: https://mypy.readthedocs.io/en/stable/generics.html#variance-of-generic-types
+[dry]: https://typing.python.org/en/latest/spec/generics.html#variance
+[even drier]: https://en.wikipedia.org/wiki/Covariance_and_contravariance_%28computer_science%29
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Iterable, Mapping, Protocol
+
+from narwhals._typing_compat import TypeVar
+
+if TYPE_CHECKING:
+ import pyarrow as pa
+ from typing_extensions import Self, TypeAlias, TypeIs
+
+
+class ArrowStreamExportable(Protocol):
+ def __arrow_c_stream__(self, requested_schema: object | None = None) -> object: ...
+
+
+ToNumpyT_co = TypeVar("ToNumpyT_co", covariant=True)
+FromNumpyDT_contra = TypeVar(
+ "FromNumpyDT_contra", contravariant=True, default=ToNumpyT_co
+)
+FromNumpyT_contra = TypeVar("FromNumpyT_contra", contravariant=True)
+
+
+class ToNumpy(Protocol[ToNumpyT_co]):
+ def to_numpy(self, *args: Any, **kwds: Any) -> ToNumpyT_co: ...
+
+
+class FromNumpy(Protocol[FromNumpyT_contra]):
+ @classmethod
+ def from_numpy(cls, data: FromNumpyT_contra, *args: Any, **kwds: Any) -> Self: ...
+
+
+class NumpyConvertible(
+ ToNumpy[ToNumpyT_co],
+ FromNumpy[FromNumpyDT_contra],
+ Protocol[ToNumpyT_co, FromNumpyDT_contra],
+):
+ def to_numpy(self, dtype: Any, *, copy: bool | None) -> ToNumpyT_co: ...
+
+
+FromIterableT_contra = TypeVar("FromIterableT_contra", contravariant=True, default=Any)
+
+
+class FromIterable(Protocol[FromIterableT_contra]):
+ @classmethod
+ def from_iterable(
+ cls, data: Iterable[FromIterableT_contra], *args: Any, **kwds: Any
+ ) -> Self: ...
+
+
+ToDictDT_co = TypeVar(
+ "ToDictDT_co", bound=Mapping[str, Any], covariant=True, default="dict[str, Any]"
+)
+FromDictDT_contra = TypeVar(
+ "FromDictDT_contra",
+ bound=Mapping[str, Any],
+ contravariant=True,
+ default=Mapping[str, Any],
+)
+
+
+class ToDict(Protocol[ToDictDT_co]):
+ def to_dict(self, *args: Any, **kwds: Any) -> ToDictDT_co: ...
+
+
+class FromDict(Protocol[FromDictDT_contra]):
+ @classmethod
+ def from_dict(cls, data: FromDictDT_contra, *args: Any, **kwds: Any) -> Self: ...
+
+
+class DictConvertible(
+ ToDict[ToDictDT_co],
+ FromDict[FromDictDT_contra],
+ Protocol[ToDictDT_co, FromDictDT_contra],
+): ...
+
+
+IntoArrowTable: TypeAlias = "ArrowStreamExportable | pa.Table"
+"""An object supporting the [Arrow PyCapsule Interface], or a native [`pyarrow.Table`].
+
+[Arrow PyCapsule Interface]: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html#arrowstream-export
+[`pyarrow.Table`]: https://arrow.apache.org/docs/python/generated/pyarrow.Table.html
+"""
+ToArrowT_co = TypeVar("ToArrowT_co", covariant=True)
+FromArrowDT_contra = TypeVar(
+ "FromArrowDT_contra", contravariant=True, default=IntoArrowTable
+)
+
+
+class ToArrow(Protocol[ToArrowT_co]):
+ def to_arrow(self, *args: Any, **kwds: Any) -> ToArrowT_co: ...
+
+
+class FromArrow(Protocol[FromArrowDT_contra]):
+ @classmethod
+ def from_arrow(cls, data: FromArrowDT_contra, *args: Any, **kwds: Any) -> Self: ...
+
+
+class ArrowConvertible(
+ ToArrow[ToArrowT_co],
+ FromArrow[FromArrowDT_contra],
+ Protocol[ToArrowT_co, FromArrowDT_contra],
+): ...
+
+
+FromNativeT = TypeVar("FromNativeT")
+
+
+class FromNative(Protocol[FromNativeT]):
+ @classmethod
+ def from_native(cls, data: FromNativeT, *args: Any, **kwds: Any) -> Self: ...
+ @staticmethod
+ def _is_native(obj: FromNativeT | Any, /) -> TypeIs[FromNativeT]:
+ """Return `True` if `obj` can be passed to `from_native`."""
+ ...
+
+
+ToNarwhalsT_co = TypeVar("ToNarwhalsT_co", covariant=True)
+
+
+class ToNarwhals(Protocol[ToNarwhalsT_co]):
+ def to_narwhals(self) -> ToNarwhalsT_co:
+ """Convert into public representation."""
+ ...
diff --git a/venv/lib/python3.8/site-packages/narwhals/_typing_compat.py b/venv/lib/python3.8/site-packages/narwhals/_typing_compat.py
new file mode 100644
index 0000000..9e1194d
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_typing_compat.py
@@ -0,0 +1,76 @@
+"""Backward compatibility for newer/less buggy typing features.
+
+## Important
+Import from here to avoid introducing a runtime dependency on [`typing_extensions`]
+
+## Notes
+- `Protocol38`
+ - https://github.com/narwhals-dev/narwhals/pull/2064#discussion_r1965921386
+ - https://github.com/narwhals-dev/narwhals/pull/2294#discussion_r2014534830
+- `TypeVar` defaults
+ - https://typing.python.org/en/latest/spec/generics.html#type-parameter-defaults
+ - https://peps.python.org/pep-0696/
+- `@deprecated`
+ - https://docs.python.org/3/library/warnings.html#warnings.deprecated
+ - https://typing.python.org/en/latest/spec/directives.html#deprecated
+ - https://peps.python.org/pep-0702/
+
+[`typing_extensions`]: https://github.com/python/typing_extensions
+"""
+
+from __future__ import annotations
+
+# ruff: noqa: ARG001, ANN202, N802
+import sys
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+ from typing import Callable, Protocol as Protocol38
+
+ if sys.version_info >= (3, 13):
+ from typing import TypeVar
+ from warnings import deprecated
+ else:
+ from typing_extensions import TypeVar, deprecated
+
+ _Fn = TypeVar("_Fn", bound=Callable[..., Any])
+
+
+else: # pragma: no cover
+ if sys.version_info >= (3, 13):
+ from typing import TypeVar
+ from warnings import deprecated
+ else:
+ from typing import TypeVar as _TypeVar
+
+ def TypeVar(
+ name: str,
+ *constraints: Any,
+ bound: Any | None = None,
+ covariant: bool = False,
+ contravariant: bool = False,
+ **kwds: Any,
+ ):
+ return _TypeVar(
+ name,
+ *constraints,
+ bound=bound,
+ covariant=covariant,
+ contravariant=contravariant,
+ )
+
+ def deprecated(message: str, /) -> Callable[[_Fn], _Fn]:
+ def wrapper(func: _Fn, /) -> _Fn:
+ return func
+
+ return wrapper
+
+ # TODO @dangotbanned: Remove after dropping `3.8` (#2084)
+ # - https://github.com/narwhals-dev/narwhals/pull/2064#discussion_r1965921386
+ if sys.version_info >= (3, 9):
+ from typing import Protocol as Protocol38
+ else:
+ from typing import Generic as Protocol38
+
+
+__all__ = ["Protocol38", "TypeVar", "deprecated"]
diff --git a/venv/lib/python3.8/site-packages/narwhals/_utils.py b/venv/lib/python3.8/site-packages/narwhals/_utils.py
new file mode 100644
index 0000000..3d2c89a
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/_utils.py
@@ -0,0 +1,2010 @@
+from __future__ import annotations
+
+import os
+import re
+from datetime import timezone
+from enum import Enum, auto
+from functools import wraps
+from importlib.util import find_spec
+from inspect import getattr_static, getdoc
+from secrets import token_hex
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ Container,
+ Generic,
+ Iterable,
+ Iterator,
+ Literal,
+ Protocol,
+ Sequence,
+ TypeVar,
+ Union,
+ cast,
+ overload,
+)
+from warnings import warn
+
+from narwhals._enum import NoAutoEnum
+from narwhals._typing_compat import deprecated
+from narwhals.dependencies import (
+ get_cudf,
+ get_dask,
+ get_dask_dataframe,
+ get_duckdb,
+ get_ibis,
+ get_modin,
+ get_pandas,
+ get_polars,
+ get_pyarrow,
+ get_pyspark,
+ get_pyspark_connect,
+ get_pyspark_sql,
+ get_sqlframe,
+ is_narwhals_series,
+ is_narwhals_series_int,
+ is_numpy_array_1d,
+ is_numpy_array_1d_int,
+ is_pandas_dataframe,
+ is_pandas_like_dataframe,
+ is_pandas_like_series,
+ is_pandas_series,
+ is_polars_series,
+ is_pyarrow_chunked_array,
+)
+from narwhals.exceptions import ColumnNotFoundError, DuplicateError, InvalidOperationError
+
+if TYPE_CHECKING:
+ from types import ModuleType
+ from typing import AbstractSet as Set
+
+ import pandas as pd
+ import polars as pl
+ import pyarrow as pa
+ from typing_extensions import (
+ Concatenate,
+ LiteralString,
+ ParamSpec,
+ Self,
+ TypeAlias,
+ TypeIs,
+ )
+
+ from narwhals._compliant import (
+ CompliantExpr,
+ CompliantExprT,
+ CompliantFrameT,
+ CompliantSeriesOrNativeExprT_co,
+ CompliantSeriesT,
+ NativeFrameT_co,
+ NativeSeriesT_co,
+ )
+ from narwhals._compliant.typing import EvalNames
+ from narwhals._namespace import EagerAllowedImplementation, Namespace
+ from narwhals._translate import ArrowStreamExportable, IntoArrowTable, ToNarwhalsT_co
+ from narwhals.dataframe import DataFrame, LazyFrame
+ from narwhals.dtypes import DType
+ from narwhals.series import Series
+ from narwhals.typing import (
+ CompliantDataFrame,
+ CompliantLazyFrame,
+ CompliantSeries,
+ DataFrameLike,
+ DTypes,
+ IntoSeriesT,
+ MultiIndexSelector,
+ SingleIndexSelector,
+ SizedMultiIndexSelector,
+ SizeUnit,
+ SupportsNativeNamespace,
+ TimeUnit,
+ _1DArray,
+ _SliceIndex,
+ _SliceName,
+ _SliceNone,
+ )
+
+ FrameOrSeriesT = TypeVar(
+ "FrameOrSeriesT", bound=Union[LazyFrame[Any], DataFrame[Any], Series[Any]]
+ )
+
+ _T1 = TypeVar("_T1")
+ _T2 = TypeVar("_T2")
+ _T3 = TypeVar("_T3")
+ _Fn = TypeVar("_Fn", bound="Callable[..., Any]")
+ P = ParamSpec("P")
+ R = TypeVar("R")
+ R1 = TypeVar("R1")
+ R2 = TypeVar("R2")
+
+ class _SupportsVersion(Protocol):
+ __version__: str
+
+ class _SupportsGet(Protocol): # noqa: PYI046
+ def __get__(self, instance: Any, owner: Any | None = None, /) -> Any: ...
+
+ class _StoresImplementation(Protocol):
+ _implementation: Implementation
+ """Implementation of native object (pandas, Polars, PyArrow, ...)."""
+
+ class _StoresBackendVersion(Protocol):
+ _backend_version: tuple[int, ...]
+ """Version tuple for a native package."""
+
+ class _StoresVersion(Protocol):
+ _version: Version
+ """Narwhals API version (V1 or MAIN)."""
+
+ class _LimitedContext(_StoresBackendVersion, _StoresVersion, Protocol):
+ """Provides 2 attributes.
+
+ - `_backend_version`
+ - `_version`
+ """
+
+ class _FullContext(_StoresImplementation, _LimitedContext, Protocol):
+ """Provides 3 attributes.
+
+ - `_implementation`
+ - `_backend_version`
+ - `_version`
+ """
+
+ class _StoresColumns(Protocol):
+ @property
+ def columns(self) -> Sequence[str]: ...
+
+
+_T = TypeVar("_T")
+NativeT_co = TypeVar("NativeT_co", covariant=True)
+CompliantT_co = TypeVar("CompliantT_co", covariant=True)
+_ContextT = TypeVar("_ContextT", bound="_FullContext")
+_Method: TypeAlias = "Callable[Concatenate[_ContextT, P], R]"
+_Constructor: TypeAlias = "Callable[Concatenate[_T, P], R2]"
+
+
+class _StoresNative(Protocol[NativeT_co]): # noqa: PYI046
+ """Provides access to a native object.
+
+ Native objects have types like:
+
+ >>> from pandas import Series
+ >>> from pyarrow import Table
+ """
+
+ @property
+ def native(self) -> NativeT_co:
+ """Return the native object."""
+ ...
+
+
+class _StoresCompliant(Protocol[CompliantT_co]): # noqa: PYI046
+ """Provides access to a compliant object.
+
+ Compliant objects have types like:
+
+ >>> from narwhals._pandas_like.series import PandasLikeSeries
+ >>> from narwhals._arrow.dataframe import ArrowDataFrame
+ """
+
+ @property
+ def compliant(self) -> CompliantT_co:
+ """Return the compliant object."""
+ ...
+
+
+class Version(Enum):
+ V1 = auto()
+ MAIN = auto()
+
+ @property
+ def namespace(self) -> type[Namespace[Any]]:
+ if self is Version.MAIN:
+ from narwhals._namespace import Namespace
+
+ return Namespace
+ from narwhals.stable.v1._namespace import Namespace
+
+ return Namespace
+
+ @property
+ def dtypes(self) -> DTypes:
+ if self is Version.MAIN:
+ from narwhals import dtypes
+
+ return dtypes
+ from narwhals.stable.v1 import dtypes as v1_dtypes
+
+ return v1_dtypes
+
+ @property
+ def dataframe(self) -> type[DataFrame[Any]]:
+ if self is Version.MAIN:
+ from narwhals.dataframe import DataFrame
+
+ return DataFrame
+ from narwhals.stable.v1 import DataFrame as DataFrameV1
+
+ return DataFrameV1
+
+ @property
+ def lazyframe(self) -> type[LazyFrame[Any]]:
+ if self is Version.MAIN:
+ from narwhals.dataframe import LazyFrame
+
+ return LazyFrame
+ from narwhals.stable.v1 import LazyFrame as LazyFrameV1
+
+ return LazyFrameV1
+
+ @property
+ def series(self) -> type[Series[Any]]:
+ if self is Version.MAIN:
+ from narwhals.series import Series
+
+ return Series
+ from narwhals.stable.v1 import Series as SeriesV1
+
+ return SeriesV1
+
+
+class Implementation(NoAutoEnum):
+ """Implementation of native object (pandas, Polars, PyArrow, ...)."""
+
+ PANDAS = "pandas"
+ """pandas implementation."""
+ MODIN = "modin"
+ """Modin implementation."""
+ CUDF = "cudf"
+ """cuDF implementation."""
+ PYARROW = "pyarrow"
+ """PyArrow implementation."""
+ PYSPARK = "pyspark"
+ """PySpark implementation."""
+ POLARS = "polars"
+ """Polars implementation."""
+ DASK = "dask"
+ """Dask implementation."""
+ DUCKDB = "duckdb"
+ """DuckDB implementation."""
+ IBIS = "ibis"
+ """Ibis implementation."""
+ SQLFRAME = "sqlframe"
+ """SQLFrame implementation."""
+ PYSPARK_CONNECT = "pyspark[connect]"
+ """PySpark Connect implementation."""
+ UNKNOWN = "unknown"
+ """Unknown implementation."""
+
+ def __str__(self) -> str:
+ return str(self.value)
+
+ @classmethod
+ def from_native_namespace(
+ cls: type[Self], native_namespace: ModuleType
+ ) -> Implementation: # pragma: no cover
+ """Instantiate Implementation object from a native namespace module.
+
+ Arguments:
+ native_namespace: Native namespace.
+
+ Returns:
+ Implementation.
+ """
+ mapping = {
+ get_pandas(): Implementation.PANDAS,
+ get_modin(): Implementation.MODIN,
+ get_cudf(): Implementation.CUDF,
+ get_pyarrow(): Implementation.PYARROW,
+ get_pyspark_sql(): Implementation.PYSPARK,
+ get_polars(): Implementation.POLARS,
+ get_dask_dataframe(): Implementation.DASK,
+ get_duckdb(): Implementation.DUCKDB,
+ get_ibis(): Implementation.IBIS,
+ get_sqlframe(): Implementation.SQLFRAME,
+ get_pyspark_connect(): Implementation.PYSPARK_CONNECT,
+ }
+ return mapping.get(native_namespace, Implementation.UNKNOWN)
+
+ @classmethod
+ def from_string(
+ cls: type[Self], backend_name: str
+ ) -> Implementation: # pragma: no cover
+ """Instantiate Implementation object from a native namespace module.
+
+ Arguments:
+ backend_name: Name of backend, expressed as string.
+
+ Returns:
+ Implementation.
+ """
+ try:
+ return cls(backend_name)
+ except ValueError:
+ return Implementation.UNKNOWN
+
+ @classmethod
+ def from_backend(
+ cls: type[Self], backend: str | Implementation | ModuleType
+ ) -> Implementation:
+ """Instantiate from native namespace module, string, or Implementation.
+
+ Arguments:
+ backend: Backend to instantiate Implementation from.
+
+ Returns:
+ Implementation.
+ """
+ return (
+ cls.from_string(backend)
+ if isinstance(backend, str)
+ else backend
+ if isinstance(backend, Implementation)
+ else cls.from_native_namespace(backend)
+ )
+
+ def to_native_namespace(self) -> ModuleType: # noqa: C901, PLR0911
+ """Return the native namespace module corresponding to Implementation.
+
+ Returns:
+ Native module.
+ """
+ if self is Implementation.PANDAS:
+ import pandas as pd # ignore-banned-import
+
+ return pd
+ if self is Implementation.MODIN:
+ import modin.pandas
+
+ return modin.pandas
+ if self is Implementation.CUDF: # pragma: no cover
+ import cudf # ignore-banned-import
+
+ return cudf
+ if self is Implementation.PYARROW:
+ import pyarrow as pa # ignore-banned-import
+
+ return pa
+ if self is Implementation.PYSPARK: # pragma: no cover
+ import pyspark.sql
+
+ return pyspark.sql
+ if self is Implementation.POLARS:
+ import polars as pl # ignore-banned-import
+
+ return pl
+ if self is Implementation.DASK:
+ import dask.dataframe # ignore-banned-import
+
+ return dask.dataframe
+
+ if self is Implementation.DUCKDB:
+ import duckdb # ignore-banned-import
+
+ return duckdb
+
+ if self is Implementation.SQLFRAME:
+ import sqlframe # ignore-banned-import
+
+ return sqlframe
+
+ if self is Implementation.IBIS:
+ import ibis # ignore-banned-import
+
+ return ibis
+
+ if self is Implementation.PYSPARK_CONNECT: # pragma: no cover
+ import pyspark.sql.connect # ignore-banned-import
+
+ return pyspark.sql.connect
+
+ msg = "Not supported Implementation" # pragma: no cover
+ raise AssertionError(msg)
+
+ def is_pandas(self) -> bool:
+ """Return whether implementation is pandas.
+
+ Returns:
+ Boolean.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3]})
+ >>> df = nw.from_native(df_native)
+ >>> df.implementation.is_pandas()
+ True
+ """
+ return self is Implementation.PANDAS
+
+ def is_pandas_like(self) -> bool:
+ """Return whether implementation is pandas, Modin, or cuDF.
+
+ Returns:
+ Boolean.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3]})
+ >>> df = nw.from_native(df_native)
+ >>> df.implementation.is_pandas_like()
+ True
+ """
+ return self in {Implementation.PANDAS, Implementation.MODIN, Implementation.CUDF}
+
+ def is_spark_like(self) -> bool:
+ """Return whether implementation is pyspark or sqlframe.
+
+ Returns:
+ Boolean.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3]})
+ >>> df = nw.from_native(df_native)
+ >>> df.implementation.is_spark_like()
+ False
+ """
+ return self in {
+ Implementation.PYSPARK,
+ Implementation.SQLFRAME,
+ Implementation.PYSPARK_CONNECT,
+ }
+
+ def is_polars(self) -> bool:
+ """Return whether implementation is Polars.
+
+ Returns:
+ Boolean.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"a": [1, 2, 3]})
+ >>> df = nw.from_native(df_native)
+ >>> df.implementation.is_polars()
+ True
+ """
+ return self is Implementation.POLARS
+
+ def is_cudf(self) -> bool:
+ """Return whether implementation is cuDF.
+
+ Returns:
+ Boolean.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"a": [1, 2, 3]})
+ >>> df = nw.from_native(df_native)
+ >>> df.implementation.is_cudf()
+ False
+ """
+ return self is Implementation.CUDF # pragma: no cover
+
+ def is_modin(self) -> bool:
+ """Return whether implementation is Modin.
+
+ Returns:
+ Boolean.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"a": [1, 2, 3]})
+ >>> df = nw.from_native(df_native)
+ >>> df.implementation.is_modin()
+ False
+ """
+ return self is Implementation.MODIN # pragma: no cover
+
+ def is_pyspark(self) -> bool:
+ """Return whether implementation is PySpark.
+
+ Returns:
+ Boolean.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"a": [1, 2, 3]})
+ >>> df = nw.from_native(df_native)
+ >>> df.implementation.is_pyspark()
+ False
+ """
+ return self is Implementation.PYSPARK # pragma: no cover
+
+ def is_pyspark_connect(self) -> bool:
+ """Return whether implementation is PySpark.
+
+ Returns:
+ Boolean.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"a": [1, 2, 3]})
+ >>> df = nw.from_native(df_native)
+ >>> df.implementation.is_pyspark_connect()
+ False
+ """
+ return self is Implementation.PYSPARK_CONNECT # pragma: no cover
+
+ def is_pyarrow(self) -> bool:
+ """Return whether implementation is PyArrow.
+
+ Returns:
+ Boolean.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"a": [1, 2, 3]})
+ >>> df = nw.from_native(df_native)
+ >>> df.implementation.is_pyarrow()
+ False
+ """
+ return self is Implementation.PYARROW # pragma: no cover
+
+ def is_dask(self) -> bool:
+ """Return whether implementation is Dask.
+
+ Returns:
+ Boolean.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"a": [1, 2, 3]})
+ >>> df = nw.from_native(df_native)
+ >>> df.implementation.is_dask()
+ False
+ """
+ return self is Implementation.DASK # pragma: no cover
+
+ def is_duckdb(self) -> bool:
+ """Return whether implementation is DuckDB.
+
+ Returns:
+ Boolean.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"a": [1, 2, 3]})
+ >>> df = nw.from_native(df_native)
+ >>> df.implementation.is_duckdb()
+ False
+ """
+ return self is Implementation.DUCKDB # pragma: no cover
+
+ def is_ibis(self) -> bool:
+ """Return whether implementation is Ibis.
+
+ Returns:
+ Boolean.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"a": [1, 2, 3]})
+ >>> df = nw.from_native(df_native)
+ >>> df.implementation.is_ibis()
+ False
+ """
+ return self is Implementation.IBIS # pragma: no cover
+
+ def is_sqlframe(self) -> bool:
+ """Return whether implementation is SQLFrame.
+
+ Returns:
+ Boolean.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"a": [1, 2, 3]})
+ >>> df = nw.from_native(df_native)
+ >>> df.implementation.is_sqlframe()
+ False
+ """
+ return self is Implementation.SQLFRAME # pragma: no cover
+
+ def _backend_version(self) -> tuple[int, ...]:
+ native = self.to_native_namespace()
+ into_version: Any
+ if self not in {
+ Implementation.PYSPARK,
+ Implementation.PYSPARK_CONNECT,
+ Implementation.DASK,
+ Implementation.SQLFRAME,
+ }:
+ into_version = native
+ elif self in {Implementation.PYSPARK, Implementation.PYSPARK_CONNECT}:
+ into_version = get_pyspark() # pragma: no cover
+ elif self is Implementation.DASK:
+ into_version = get_dask()
+ else:
+ import sqlframe._version
+
+ into_version = sqlframe._version
+ return parse_version(into_version)
+
+
+MIN_VERSIONS: dict[Implementation, tuple[int, ...]] = {
+ Implementation.PANDAS: (0, 25, 3),
+ Implementation.MODIN: (0, 25, 3),
+ Implementation.CUDF: (24, 10),
+ Implementation.PYARROW: (11,),
+ Implementation.PYSPARK: (3, 5),
+ Implementation.PYSPARK_CONNECT: (3, 5),
+ Implementation.POLARS: (0, 20, 3),
+ Implementation.DASK: (2024, 8),
+ Implementation.DUCKDB: (1,),
+ Implementation.IBIS: (6,),
+ Implementation.SQLFRAME: (3, 22, 0),
+}
+
+
+def validate_backend_version(
+ implementation: Implementation, backend_version: tuple[int, ...]
+) -> None:
+ if backend_version < (min_version := MIN_VERSIONS[implementation]):
+ msg = f"Minimum version of {implementation} supported by Narwhals is {min_version}, found: {backend_version}"
+ raise ValueError(msg)
+
+
+def remove_prefix(text: str, prefix: str) -> str: # pragma: no cover
+ if text.startswith(prefix):
+ return text[len(prefix) :]
+ return text
+
+
+def remove_suffix(text: str, suffix: str) -> str: # pragma: no cover
+ if text.endswith(suffix):
+ return text[: -len(suffix)]
+ return text # pragma: no cover
+
+
+def flatten(args: Any) -> list[Any]:
+ return list(args[0] if (len(args) == 1 and _is_iterable(args[0])) else args)
+
+
+def tupleify(arg: Any) -> Any:
+ if not isinstance(arg, (list, tuple)): # pragma: no cover
+ return (arg,)
+ return arg
+
+
+def _is_iterable(arg: Any | Iterable[Any]) -> bool:
+ from narwhals.series import Series
+
+ if is_pandas_dataframe(arg) or is_pandas_series(arg):
+ msg = f"Expected Narwhals class or scalar, got: {qualified_type_name(arg)!r}. Perhaps you forgot a `nw.from_native` somewhere?"
+ raise TypeError(msg)
+ if (pl := get_polars()) is not None and isinstance(
+ arg, (pl.Series, pl.Expr, pl.DataFrame, pl.LazyFrame)
+ ):
+ msg = (
+ f"Expected Narwhals class or scalar, got: {qualified_type_name(arg)!r}.\n\n"
+ "Hint: Perhaps you\n"
+ "- forgot a `nw.from_native` somewhere?\n"
+ "- used `pl.col` instead of `nw.col`?"
+ )
+ raise TypeError(msg)
+
+ return isinstance(arg, Iterable) and not isinstance(arg, (str, bytes, Series))
+
+
+def parse_version(version: str | ModuleType | _SupportsVersion) -> tuple[int, ...]:
+ """Simple version parser; split into a tuple of ints for comparison.
+
+ Arguments:
+ version: Version string, or object with one, to parse.
+
+ Returns:
+ Parsed version number.
+ """
+ # lifted from Polars
+ # [marco]: Take care of DuckDB pre-releases which end with e.g. `-dev4108`
+ # and pandas pre-releases which end with e.g. .dev0+618.gb552dc95c9
+ version_str = version if isinstance(version, str) else version.__version__
+ version_str = re.sub(r"(\D?dev.*$)", "", version_str)
+ return tuple(int(re.sub(r"\D", "", v)) for v in version_str.split("."))
+
+
+@overload
+def isinstance_or_issubclass(
+ obj_or_cls: type, cls_or_tuple: type[_T]
+) -> TypeIs[type[_T]]: ...
+
+
+@overload
+def isinstance_or_issubclass(
+ obj_or_cls: object | type, cls_or_tuple: type[_T]
+) -> TypeIs[_T | type[_T]]: ...
+
+
+@overload
+def isinstance_or_issubclass(
+ obj_or_cls: type, cls_or_tuple: tuple[type[_T1], type[_T2]]
+) -> TypeIs[type[_T1 | _T2]]: ...
+
+
+@overload
+def isinstance_or_issubclass(
+ obj_or_cls: object | type, cls_or_tuple: tuple[type[_T1], type[_T2]]
+) -> TypeIs[_T1 | _T2 | type[_T1 | _T2]]: ...
+
+
+@overload
+def isinstance_or_issubclass(
+ obj_or_cls: type, cls_or_tuple: tuple[type[_T1], type[_T2], type[_T3]]
+) -> TypeIs[type[_T1 | _T2 | _T3]]: ...
+
+
+@overload
+def isinstance_or_issubclass(
+ obj_or_cls: object | type, cls_or_tuple: tuple[type[_T1], type[_T2], type[_T3]]
+) -> TypeIs[_T1 | _T2 | _T3 | type[_T1 | _T2 | _T3]]: ...
+
+
+@overload
+def isinstance_or_issubclass(
+ obj_or_cls: Any, cls_or_tuple: tuple[type, ...]
+) -> TypeIs[Any]: ...
+
+
+def isinstance_or_issubclass(obj_or_cls: Any, cls_or_tuple: Any) -> bool:
+ from narwhals.dtypes import DType
+
+ if isinstance(obj_or_cls, DType):
+ return isinstance(obj_or_cls, cls_or_tuple)
+ return isinstance(obj_or_cls, cls_or_tuple) or (
+ isinstance(obj_or_cls, type) and issubclass(obj_or_cls, cls_or_tuple)
+ )
+
+
+def validate_laziness(items: Iterable[Any]) -> None:
+ from narwhals.dataframe import DataFrame, LazyFrame
+
+ if all(isinstance(item, DataFrame) for item in items) or (
+ all(isinstance(item, LazyFrame) for item in items)
+ ):
+ return
+ msg = f"The items to concatenate should either all be eager, or all lazy, got: {[type(item) for item in items]}"
+ raise TypeError(msg)
+
+
+def maybe_align_index(
+ lhs: FrameOrSeriesT, rhs: Series[Any] | DataFrame[Any] | LazyFrame[Any]
+) -> FrameOrSeriesT:
+ """Align `lhs` to the Index of `rhs`, if they're both pandas-like.
+
+ Arguments:
+ lhs: Dataframe or Series.
+ rhs: Dataframe or Series to align with.
+
+ Returns:
+ Same type as input.
+
+ Notes:
+ This is only really intended for backwards-compatibility purposes,
+ for example if your library already aligns indices for users.
+ If you're designing a new library, we highly encourage you to not
+ rely on the Index.
+ For non-pandas-like inputs, this only checks that `lhs` and `rhs`
+ are the same length.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_pd = pd.DataFrame({"a": [1, 2]}, index=[3, 4])
+ >>> s_pd = pd.Series([6, 7], index=[4, 3])
+ >>> df = nw.from_native(df_pd)
+ >>> s = nw.from_native(s_pd, series_only=True)
+ >>> nw.to_native(nw.maybe_align_index(df, s))
+ a
+ 4 2
+ 3 1
+ """
+ from narwhals._pandas_like.dataframe import PandasLikeDataFrame
+ from narwhals._pandas_like.series import PandasLikeSeries
+
+ def _validate_index(index: Any) -> None:
+ if not index.is_unique:
+ msg = "given index doesn't have a unique index"
+ raise ValueError(msg)
+
+ lhs_any = cast("Any", lhs)
+ rhs_any = cast("Any", rhs)
+ if isinstance(
+ getattr(lhs_any, "_compliant_frame", None), PandasLikeDataFrame
+ ) and isinstance(getattr(rhs_any, "_compliant_frame", None), PandasLikeDataFrame):
+ _validate_index(lhs_any._compliant_frame.native.index)
+ _validate_index(rhs_any._compliant_frame.native.index)
+ return lhs_any._with_compliant(
+ lhs_any._compliant_frame._with_native(
+ lhs_any._compliant_frame.native.loc[rhs_any._compliant_frame.native.index]
+ )
+ )
+ if isinstance(
+ getattr(lhs_any, "_compliant_frame", None), PandasLikeDataFrame
+ ) and isinstance(getattr(rhs_any, "_compliant_series", None), PandasLikeSeries):
+ _validate_index(lhs_any._compliant_frame.native.index)
+ _validate_index(rhs_any._compliant_series.native.index)
+ return lhs_any._with_compliant(
+ lhs_any._compliant_frame._with_native(
+ lhs_any._compliant_frame.native.loc[
+ rhs_any._compliant_series.native.index
+ ]
+ )
+ )
+ if isinstance(
+ getattr(lhs_any, "_compliant_series", None), PandasLikeSeries
+ ) and isinstance(getattr(rhs_any, "_compliant_frame", None), PandasLikeDataFrame):
+ _validate_index(lhs_any._compliant_series.native.index)
+ _validate_index(rhs_any._compliant_frame.native.index)
+ return lhs_any._with_compliant(
+ lhs_any._compliant_series._with_native(
+ lhs_any._compliant_series.native.loc[
+ rhs_any._compliant_frame.native.index
+ ]
+ )
+ )
+ if isinstance(
+ getattr(lhs_any, "_compliant_series", None), PandasLikeSeries
+ ) and isinstance(getattr(rhs_any, "_compliant_series", None), PandasLikeSeries):
+ _validate_index(lhs_any._compliant_series.native.index)
+ _validate_index(rhs_any._compliant_series.native.index)
+ return lhs_any._with_compliant(
+ lhs_any._compliant_series._with_native(
+ lhs_any._compliant_series.native.loc[
+ rhs_any._compliant_series.native.index
+ ]
+ )
+ )
+ if len(lhs_any) != len(rhs_any):
+ msg = f"Expected `lhs` and `rhs` to have the same length, got {len(lhs_any)} and {len(rhs_any)}"
+ raise ValueError(msg)
+ return lhs
+
+
+def maybe_get_index(obj: DataFrame[Any] | LazyFrame[Any] | Series[Any]) -> Any | None:
+ """Get the index of a DataFrame or a Series, if it's pandas-like.
+
+ Arguments:
+ obj: Dataframe or Series.
+
+ Returns:
+ Same type as input.
+
+ Notes:
+ This is only really intended for backwards-compatibility purposes,
+ for example if your library already aligns indices for users.
+ If you're designing a new library, we highly encourage you to not
+ rely on the Index.
+ For non-pandas-like inputs, this returns `None`.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
+ >>> df = nw.from_native(df_pd)
+ >>> nw.maybe_get_index(df)
+ RangeIndex(start=0, stop=2, step=1)
+ >>> series_pd = pd.Series([1, 2])
+ >>> series = nw.from_native(series_pd, series_only=True)
+ >>> nw.maybe_get_index(series)
+ RangeIndex(start=0, stop=2, step=1)
+ """
+ obj_any = cast("Any", obj)
+ native_obj = obj_any.to_native()
+ if is_pandas_like_dataframe(native_obj) or is_pandas_like_series(native_obj):
+ return native_obj.index
+ return None
+
+
+def maybe_set_index(
+ obj: FrameOrSeriesT,
+ column_names: str | list[str] | None = None,
+ *,
+ index: Series[IntoSeriesT] | list[Series[IntoSeriesT]] | None = None,
+) -> FrameOrSeriesT:
+ """Set the index of a DataFrame or a Series, if it's pandas-like.
+
+ Arguments:
+ obj: object for which maybe set the index (can be either a Narwhals `DataFrame`
+ or `Series`).
+ column_names: name or list of names of the columns to set as index.
+ For dataframes, only one of `column_names` and `index` can be specified but
+ not both. If `column_names` is passed and `df` is a Series, then a
+ `ValueError` is raised.
+ index: series or list of series to set as index.
+
+ Returns:
+ Same type as input.
+
+ Raises:
+ ValueError: If one of the following conditions happens
+
+ - none of `column_names` and `index` are provided
+ - both `column_names` and `index` are provided
+ - `column_names` is provided and `df` is a Series
+
+ Notes:
+ This is only really intended for backwards-compatibility purposes, for example if
+ your library already aligns indices for users.
+ If you're designing a new library, we highly encourage you to not
+ rely on the Index.
+
+ For non-pandas-like inputs, this is a no-op.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
+ >>> df = nw.from_native(df_pd)
+ >>> nw.to_native(nw.maybe_set_index(df, "b")) # doctest: +NORMALIZE_WHITESPACE
+ a
+ b
+ 4 1
+ 5 2
+ """
+ from narwhals.translate import to_native
+
+ df_any = cast("Any", obj)
+ native_obj = df_any.to_native()
+
+ if column_names is not None and index is not None:
+ msg = "Only one of `column_names` or `index` should be provided"
+ raise ValueError(msg)
+
+ if not column_names and index is None:
+ msg = "Either `column_names` or `index` should be provided"
+ raise ValueError(msg)
+
+ if index is not None:
+ keys = (
+ [to_native(idx, pass_through=True) for idx in index]
+ if _is_iterable(index)
+ else to_native(index, pass_through=True)
+ )
+ else:
+ keys = column_names
+
+ if is_pandas_like_dataframe(native_obj):
+ return df_any._with_compliant(
+ df_any._compliant_frame._with_native(native_obj.set_index(keys))
+ )
+ elif is_pandas_like_series(native_obj):
+ from narwhals._pandas_like.utils import set_index
+
+ if column_names:
+ msg = "Cannot set index using column names on a Series"
+ raise ValueError(msg)
+
+ native_obj = set_index(
+ native_obj,
+ keys,
+ implementation=obj._compliant_series._implementation, # type: ignore[union-attr]
+ backend_version=obj._compliant_series._backend_version, # type: ignore[union-attr]
+ )
+ return df_any._with_compliant(df_any._compliant_series._with_native(native_obj))
+ else:
+ return df_any
+
+
+def maybe_reset_index(obj: FrameOrSeriesT) -> FrameOrSeriesT:
+ """Reset the index to the default integer index of a DataFrame or a Series, if it's pandas-like.
+
+ Arguments:
+ obj: Dataframe or Series.
+
+ Returns:
+ Same type as input.
+
+ Notes:
+ This is only really intended for backwards-compatibility purposes,
+ for example if your library already resets the index for users.
+ If you're designing a new library, we highly encourage you to not
+ rely on the Index.
+ For non-pandas-like inputs, this is a no-op.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]}, index=([6, 7]))
+ >>> df = nw.from_native(df_pd)
+ >>> nw.to_native(nw.maybe_reset_index(df))
+ a b
+ 0 1 4
+ 1 2 5
+ >>> series_pd = pd.Series([1, 2])
+ >>> series = nw.from_native(series_pd, series_only=True)
+ >>> nw.maybe_get_index(series)
+ RangeIndex(start=0, stop=2, step=1)
+ """
+ obj_any = cast("Any", obj)
+ native_obj = obj_any.to_native()
+ if is_pandas_like_dataframe(native_obj):
+ native_namespace = obj_any.__native_namespace__()
+ if _has_default_index(native_obj, native_namespace):
+ return obj_any
+ return obj_any._with_compliant(
+ obj_any._compliant_frame._with_native(native_obj.reset_index(drop=True))
+ )
+ if is_pandas_like_series(native_obj):
+ native_namespace = obj_any.__native_namespace__()
+ if _has_default_index(native_obj, native_namespace):
+ return obj_any
+ return obj_any._with_compliant(
+ obj_any._compliant_series._with_native(native_obj.reset_index(drop=True))
+ )
+ return obj_any
+
+
+def _is_range_index(obj: Any, native_namespace: Any) -> TypeIs[pd.RangeIndex]:
+ return isinstance(obj, native_namespace.RangeIndex)
+
+
+def _has_default_index(
+ native_frame_or_series: pd.Series[Any] | pd.DataFrame, native_namespace: Any
+) -> bool:
+ index = native_frame_or_series.index
+ return (
+ _is_range_index(index, native_namespace)
+ and index.start == 0
+ and index.stop == len(index)
+ and index.step == 1
+ )
+
+
+def maybe_convert_dtypes(
+ obj: FrameOrSeriesT, *args: bool, **kwargs: bool | str
+) -> FrameOrSeriesT:
+ """Convert columns or series to the best possible dtypes using dtypes supporting ``pd.NA``, if df is pandas-like.
+
+ Arguments:
+ obj: DataFrame or Series.
+ *args: Additional arguments which gets passed through.
+ **kwargs: Additional arguments which gets passed through.
+
+ Returns:
+ Same type as input.
+
+ Notes:
+ For non-pandas-like inputs, this is a no-op.
+ Also, `args` and `kwargs` just get passed down to the underlying library as-is.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> import numpy as np
+ >>> df_pd = pd.DataFrame(
+ ... {
+ ... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
+ ... "b": pd.Series([True, False, np.nan], dtype=np.dtype("O")),
+ ... }
+ ... )
+ >>> df = nw.from_native(df_pd)
+ >>> nw.to_native(
+ ... nw.maybe_convert_dtypes(df)
+ ... ).dtypes # doctest: +NORMALIZE_WHITESPACE
+ a Int32
+ b boolean
+ dtype: object
+ """
+ obj_any = cast("Any", obj)
+ native_obj = obj_any.to_native()
+ if is_pandas_like_dataframe(native_obj):
+ return obj_any._with_compliant(
+ obj_any._compliant_frame._with_native(
+ native_obj.convert_dtypes(*args, **kwargs)
+ )
+ )
+ if is_pandas_like_series(native_obj):
+ return obj_any._with_compliant(
+ obj_any._compliant_series._with_native(
+ native_obj.convert_dtypes(*args, **kwargs)
+ )
+ )
+ return obj_any
+
+
+def scale_bytes(sz: int, unit: SizeUnit) -> int | float:
+ """Scale size in bytes to other size units (eg: "kb", "mb", "gb", "tb").
+
+ Arguments:
+ sz: original size in bytes
+ unit: size unit to convert into
+
+ Returns:
+ Integer or float.
+ """
+ if unit in {"b", "bytes"}:
+ return sz
+ elif unit in {"kb", "kilobytes"}:
+ return sz / 1024
+ elif unit in {"mb", "megabytes"}:
+ return sz / 1024**2
+ elif unit in {"gb", "gigabytes"}:
+ return sz / 1024**3
+ elif unit in {"tb", "terabytes"}:
+ return sz / 1024**4
+ else:
+ msg = f"`unit` must be one of {{'b', 'kb', 'mb', 'gb', 'tb'}}, got {unit!r}"
+ raise ValueError(msg)
+
+
+def is_ordered_categorical(series: Series[Any]) -> bool:
+ """Return whether indices of categories are semantically meaningful.
+
+ This is a convenience function to accessing what would otherwise be
+ the `is_ordered` property from the DataFrame Interchange Protocol,
+ see https://data-apis.org/dataframe-protocol/latest/API.html.
+
+ - For Polars:
+ - Enums are always ordered.
+ - Categoricals are ordered if `dtype.ordering == "physical"`.
+ - For pandas-like APIs:
+ - Categoricals are ordered if `dtype.cat.ordered == True`.
+ - For PyArrow table:
+ - Categoricals are ordered if `dtype.type.ordered == True`.
+
+ Arguments:
+ series: Input Series.
+
+ Returns:
+ Whether the Series is an ordered categorical.
+
+ Examples:
+ >>> import narwhals as nw
+ >>> import pandas as pd
+ >>> import polars as pl
+ >>> data = ["x", "y"]
+ >>> s_pd = pd.Series(data, dtype=pd.CategoricalDtype(ordered=True))
+ >>> s_pl = pl.Series(data, dtype=pl.Categorical(ordering="physical"))
+
+ Let's define a library-agnostic function:
+
+ >>> @nw.narwhalify
+ ... def func(s):
+ ... return nw.is_ordered_categorical(s)
+
+ Then, we can pass any supported library to `func`:
+
+ >>> func(s_pd)
+ True
+ >>> func(s_pl)
+ True
+ """
+ from narwhals._interchange.series import InterchangeSeries
+
+ dtypes = series._compliant_series._version.dtypes
+ compliant = series._compliant_series
+ # If it doesn't match any branches, let's just play it safe and return False.
+ result: bool = False
+ if isinstance(compliant, InterchangeSeries) and isinstance(
+ series.dtype, dtypes.Categorical
+ ):
+ result = compliant.native.describe_categorical["is_ordered"]
+ elif series.dtype == dtypes.Enum:
+ result = True
+ elif series.dtype != dtypes.Categorical:
+ result = False
+ else:
+ native = series.to_native()
+ if is_polars_series(native):
+ result = cast("pl.Categorical", native.dtype).ordering == "physical"
+ elif is_pandas_like_series(native):
+ result = bool(native.cat.ordered)
+ elif is_pyarrow_chunked_array(native):
+ from narwhals._arrow.utils import is_dictionary
+
+ result = is_dictionary(native.type) and native.type.ordered
+ return result
+
+
+def generate_unique_token(
+ n_bytes: int, columns: Sequence[str]
+) -> str: # pragma: no cover
+ msg = (
+ "Use `generate_temporary_column_name` instead. `generate_unique_token` is "
+ "deprecated and it will be removed in future versions"
+ )
+ issue_deprecation_warning(msg, _version="1.13.0")
+ return generate_temporary_column_name(n_bytes=n_bytes, columns=columns)
+
+
+def generate_temporary_column_name(n_bytes: int, columns: Sequence[str]) -> str:
+ """Generates a unique column name that is not present in the given list of columns.
+
+ It relies on [python secrets token_hex](https://docs.python.org/3/library/secrets.html#secrets.token_hex)
+ function to return a string nbytes random bytes.
+
+ Arguments:
+ n_bytes: The number of bytes to generate for the token.
+ columns: The list of columns to check for uniqueness.
+
+ Returns:
+ A unique token that is not present in the given list of columns.
+
+ Raises:
+ AssertionError: If a unique token cannot be generated after 100 attempts.
+
+ Examples:
+ >>> import narwhals as nw
+ >>> columns = ["abc", "xyz"]
+ >>> nw.generate_temporary_column_name(n_bytes=8, columns=columns) not in columns
+ True
+ """
+ counter = 0
+ while True:
+ token = token_hex(n_bytes)
+ if token not in columns:
+ return token
+
+ counter += 1
+ if counter > 100:
+ msg = (
+ "Internal Error: Narwhals was not able to generate a column name with "
+ f"{n_bytes=} and not in {columns}"
+ )
+ raise AssertionError(msg)
+
+
+def parse_columns_to_drop(
+ frame: _StoresColumns, subset: Iterable[str], /, *, strict: bool
+) -> list[str]:
+ if not strict:
+ return list(set(frame.columns).intersection(subset))
+ to_drop = list(subset)
+ if error := check_columns_exist(to_drop, available=frame.columns):
+ raise error
+ return to_drop
+
+
+def is_sequence_but_not_str(sequence: Sequence[_T] | Any) -> TypeIs[Sequence[_T]]:
+ return isinstance(sequence, Sequence) and not isinstance(sequence, str)
+
+
+def is_slice_none(obj: Any) -> TypeIs[_SliceNone]:
+ return isinstance(obj, slice) and obj == slice(None)
+
+
+def is_sized_multi_index_selector(
+ obj: Any,
+) -> TypeIs[SizedMultiIndexSelector[Series[Any] | CompliantSeries[Any]]]:
+ return (
+ (
+ is_sequence_but_not_str(obj)
+ and ((len(obj) > 0 and isinstance(obj[0], int)) or (len(obj) == 0))
+ )
+ or is_numpy_array_1d_int(obj)
+ or is_narwhals_series_int(obj)
+ or is_compliant_series_int(obj)
+ )
+
+
+def is_sequence_like(
+ obj: Sequence[_T] | Any,
+) -> TypeIs[Sequence[_T] | Series[Any] | _1DArray]:
+ return (
+ is_sequence_but_not_str(obj)
+ or is_numpy_array_1d(obj)
+ or is_narwhals_series(obj)
+ or is_compliant_series(obj)
+ )
+
+
+def is_slice_index(obj: Any) -> TypeIs[_SliceIndex]:
+ return isinstance(obj, slice) and (
+ isinstance(obj.start, int)
+ or isinstance(obj.stop, int)
+ or (isinstance(obj.step, int) and obj.start is None and obj.stop is None)
+ )
+
+
+def is_range(obj: Any) -> TypeIs[range]:
+ return isinstance(obj, range)
+
+
+def is_single_index_selector(obj: Any) -> TypeIs[SingleIndexSelector]:
+ return bool(isinstance(obj, int) and not isinstance(obj, bool))
+
+
+def is_index_selector(
+ obj: Any,
+) -> TypeIs[SingleIndexSelector | MultiIndexSelector[Series[Any] | CompliantSeries[Any]]]:
+ return (
+ is_single_index_selector(obj)
+ or is_sized_multi_index_selector(obj)
+ or is_slice_index(obj)
+ )
+
+
+def is_list_of(obj: Any, tp: type[_T]) -> TypeIs[list[_T]]:
+ # Check if an object is a list of `tp`, only sniffing the first element.
+ return bool(isinstance(obj, list) and obj and isinstance(obj[0], tp))
+
+
+def is_sequence_of(obj: Any, tp: type[_T]) -> TypeIs[Sequence[_T]]:
+ # Check if an object is a sequence of `tp`, only sniffing the first element.
+ return bool(
+ is_sequence_but_not_str(obj)
+ and (first := next(iter(obj), None))
+ and isinstance(first, tp)
+ )
+
+
+def find_stacklevel() -> int:
+ """Find the first place in the stack that is not inside narwhals.
+
+ Returns:
+ Stacklevel.
+
+ Taken from:
+ https://github.com/pandas-dev/pandas/blob/ab89c53f48df67709a533b6a95ce3d911871a0a8/pandas/util/_exceptions.py#L30-L51
+ """
+ import inspect
+ from pathlib import Path
+
+ import narwhals as nw
+
+ pkg_dir = str(Path(nw.__file__).parent)
+
+ # https://stackoverflow.com/questions/17407119/python-inspect-stack-is-slow
+ frame = inspect.currentframe()
+ n = 0
+ try:
+ while frame:
+ fname = inspect.getfile(frame)
+ if fname.startswith(pkg_dir) or (
+ (qualname := getattr(frame.f_code, "co_qualname", None))
+ # ignore @singledispatch wrappers
+ and qualname.startswith("singledispatch.")
+ ):
+ frame = frame.f_back
+ n += 1
+ else: # pragma: no cover
+ break
+ else: # pragma: no cover
+ pass
+ finally:
+ # https://docs.python.org/3/library/inspect.html
+ # > Though the cycle detector will catch these, destruction of the frames
+ # > (and local variables) can be made deterministic by removing the cycle
+ # > in a finally clause.
+ del frame
+ return n
+
+
+def issue_deprecation_warning(message: str, _version: str) -> None:
+ """Issue a deprecation warning.
+
+ Arguments:
+ message: The message associated with the warning.
+ _version: Narwhals version when the warning was introduced. Just used for internal
+ bookkeeping.
+ """
+ warn(message=message, category=DeprecationWarning, stacklevel=find_stacklevel())
+
+
+def validate_strict_and_pass_though(
+ strict: bool | None, # noqa: FBT001
+ pass_through: bool | None, # noqa: FBT001
+ *,
+ pass_through_default: bool,
+ emit_deprecation_warning: bool,
+) -> bool:
+ if strict is None and pass_through is None:
+ pass_through = pass_through_default
+ elif strict is not None and pass_through is None:
+ if emit_deprecation_warning:
+ msg = (
+ "`strict` in `from_native` is deprecated, please use `pass_through` instead.\n\n"
+ "Note: `strict` will remain available in `narwhals.stable.v1`.\n"
+ "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n"
+ )
+ issue_deprecation_warning(msg, _version="1.13.0")
+ pass_through = not strict
+ elif strict is None and pass_through is not None:
+ pass
+ else:
+ msg = "Cannot pass both `strict` and `pass_through`"
+ raise ValueError(msg)
+ return pass_through
+
+
+def deprecate_native_namespace(
+ *, warn_version: str = "", required: bool = False
+) -> Callable[[Callable[P, R]], Callable[P, R]]:
+ """Decorator to transition from `native_namespace` to `backend` argument.
+
+ Arguments:
+ warn_version: Emit a deprecation warning from this version.
+ required: Raise when both `native_namespace`, `backend` are `None`.
+
+ Returns:
+ Wrapped function, with `native_namespace` **removed**.
+ """
+
+ def decorate(fn: Callable[P, R], /) -> Callable[P, R]:
+ @wraps(fn)
+ def wrapper(*args: P.args, **kwds: P.kwargs) -> R:
+ backend = kwds.pop("backend", None)
+ native_namespace = kwds.pop("native_namespace", None)
+ if native_namespace is not None and backend is None:
+ if warn_version:
+ msg = (
+ "`native_namespace` is deprecated, please use `backend` instead.\n\n"
+ "Note: `native_namespace` will remain available in `narwhals.stable.v1`.\n"
+ "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n"
+ )
+ issue_deprecation_warning(msg, _version=warn_version)
+ backend = native_namespace
+ elif native_namespace is not None and backend is not None:
+ msg = "Can't pass both `native_namespace` and `backend`"
+ raise ValueError(msg)
+ elif native_namespace is None and backend is None and required:
+ msg = f"`backend` must be specified in `{fn.__name__}`."
+ raise ValueError(msg)
+ kwds["backend"] = backend
+ return fn(*args, **kwds)
+
+ return wrapper
+
+ return decorate
+
+
+def _validate_rolling_arguments(
+ window_size: int, min_samples: int | None
+) -> tuple[int, int]:
+ ensure_type(window_size, int, param_name="window_size")
+ ensure_type(min_samples, int, type(None), param_name="min_samples")
+
+ if window_size < 1:
+ msg = "window_size must be greater or equal than 1"
+ raise ValueError(msg)
+
+ if min_samples is not None:
+ if min_samples < 1:
+ msg = "min_samples must be greater or equal than 1"
+ raise ValueError(msg)
+
+ if min_samples > window_size:
+ msg = "`min_samples` must be less or equal than `window_size`"
+ raise InvalidOperationError(msg)
+ else:
+ min_samples = window_size
+
+ return window_size, min_samples
+
+
+def generate_repr(header: str, native_repr: str) -> str:
+ try:
+ terminal_width = os.get_terminal_size().columns
+ except OSError:
+ terminal_width = int(os.getenv("COLUMNS", 80)) # noqa: PLW1508
+ native_lines = native_repr.expandtabs().splitlines()
+ max_native_width = max(len(line) for line in native_lines)
+
+ if max_native_width + 2 <= terminal_width:
+ length = max(max_native_width, len(header))
+ output = f"┌{'─' * length}┐\n"
+ header_extra = length - len(header)
+ output += f"|{' ' * (header_extra // 2)}{header}{' ' * (header_extra // 2 + header_extra % 2)}|\n"
+ output += f"|{'-' * (length)}|\n"
+ start_extra = (length - max_native_width) // 2
+ end_extra = (length - max_native_width) // 2 + (length - max_native_width) % 2
+ for line in native_lines:
+ output += f"|{' ' * (start_extra)}{line}{' ' * (end_extra + max_native_width - len(line))}|\n"
+ output += f"└{'─' * length}┘"
+ return output
+
+ diff = 39 - len(header)
+ return (
+ f"┌{'─' * (39)}┐\n"
+ f"|{' ' * (diff // 2)}{header}{' ' * (diff // 2 + diff % 2)}|\n"
+ "| Use `.to_native` to see native output |\n└"
+ f"{'─' * 39}┘"
+ )
+
+
+def check_columns_exist(
+ subset: Sequence[str], /, *, available: Sequence[str]
+) -> ColumnNotFoundError | None:
+ if missing := set(subset).difference(available):
+ return ColumnNotFoundError.from_missing_and_available_column_names(
+ missing, available
+ )
+ return None
+
+
+def check_column_names_are_unique(columns: Sequence[str]) -> None:
+ len_unique_columns = len(set(columns))
+ if len(columns) != len_unique_columns:
+ from collections import Counter
+
+ counter = Counter(columns)
+ duplicates = {k: v for k, v in counter.items() if v > 1}
+ msg = "".join(f"\n- '{k}' {v} times" for k, v in duplicates.items())
+ msg = f"Expected unique column names, got:{msg}"
+ raise DuplicateError(msg)
+
+
+def _parse_time_unit_and_time_zone(
+ time_unit: TimeUnit | Iterable[TimeUnit] | None,
+ time_zone: str | timezone | Iterable[str | timezone | None] | None,
+) -> tuple[Set[TimeUnit], Set[str | None]]:
+ time_units: Set[TimeUnit] = (
+ {"ms", "us", "ns", "s"}
+ if time_unit is None
+ else {time_unit}
+ if isinstance(time_unit, str)
+ else set(time_unit)
+ )
+ time_zones: Set[str | None] = (
+ {None}
+ if time_zone is None
+ else {str(time_zone)}
+ if isinstance(time_zone, (str, timezone))
+ else {str(tz) if tz is not None else None for tz in time_zone}
+ )
+ return time_units, time_zones
+
+
+def dtype_matches_time_unit_and_time_zone(
+ dtype: DType, dtypes: DTypes, time_units: Set[TimeUnit], time_zones: Set[str | None]
+) -> bool:
+ return (
+ isinstance(dtype, dtypes.Datetime)
+ and (dtype.time_unit in time_units)
+ and (
+ dtype.time_zone in time_zones
+ or ("*" in time_zones and dtype.time_zone is not None)
+ )
+ )
+
+
+def get_column_names(frame: _StoresColumns, /) -> Sequence[str]:
+ return frame.columns
+
+
+def exclude_column_names(frame: _StoresColumns, names: Container[str]) -> Sequence[str]:
+ return [col_name for col_name in frame.columns if col_name not in names]
+
+
+def passthrough_column_names(names: Sequence[str], /) -> EvalNames[Any]:
+ def fn(_frame: Any, /) -> Sequence[str]:
+ return names
+
+ return fn
+
+
+def _hasattr_static(obj: Any, attr: str) -> bool:
+ sentinel = object()
+ return getattr_static(obj, attr, sentinel) is not sentinel
+
+
+def is_compliant_dataframe(
+ obj: CompliantDataFrame[
+ CompliantSeriesT, CompliantExprT, NativeFrameT_co, ToNarwhalsT_co
+ ]
+ | Any,
+) -> TypeIs[
+ CompliantDataFrame[CompliantSeriesT, CompliantExprT, NativeFrameT_co, ToNarwhalsT_co]
+]:
+ return _hasattr_static(obj, "__narwhals_dataframe__")
+
+
+def is_compliant_lazyframe(
+ obj: CompliantLazyFrame[CompliantExprT, NativeFrameT_co, ToNarwhalsT_co] | Any,
+) -> TypeIs[CompliantLazyFrame[CompliantExprT, NativeFrameT_co, ToNarwhalsT_co]]:
+ return _hasattr_static(obj, "__narwhals_lazyframe__")
+
+
+def is_compliant_series(
+ obj: CompliantSeries[NativeSeriesT_co] | Any,
+) -> TypeIs[CompliantSeries[NativeSeriesT_co]]:
+ return _hasattr_static(obj, "__narwhals_series__")
+
+
+def is_compliant_series_int(
+ obj: CompliantSeries[NativeSeriesT_co] | Any,
+) -> TypeIs[CompliantSeries[NativeSeriesT_co]]:
+ return is_compliant_series(obj) and obj.dtype.is_integer()
+
+
+def is_compliant_expr(
+ obj: CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co] | Any,
+) -> TypeIs[CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co]]:
+ return hasattr(obj, "__narwhals_expr__")
+
+
+def is_eager_allowed(obj: Implementation) -> TypeIs[EagerAllowedImplementation]:
+ return obj in {
+ Implementation.PANDAS,
+ Implementation.MODIN,
+ Implementation.CUDF,
+ Implementation.POLARS,
+ Implementation.PYARROW,
+ }
+
+
+def has_native_namespace(obj: Any) -> TypeIs[SupportsNativeNamespace]:
+ return hasattr(obj, "__native_namespace__")
+
+
+def _supports_dataframe_interchange(obj: Any) -> TypeIs[DataFrameLike]:
+ return hasattr(obj, "__dataframe__")
+
+
+def supports_arrow_c_stream(obj: Any) -> TypeIs[ArrowStreamExportable]:
+ return _hasattr_static(obj, "__arrow_c_stream__")
+
+
+def _remap_full_join_keys(
+ left_on: Sequence[str], right_on: Sequence[str], suffix: str
+) -> dict[str, str]:
+ """Remap join keys to avoid collisions.
+
+ If left keys collide with the right keys, append the suffix.
+ If there's no collision, let the right keys be.
+
+ Arguments:
+ left_on: Left keys.
+ right_on: Right keys.
+ suffix: Suffix to append to right keys.
+
+ Returns:
+ A map of old to new right keys.
+ """
+ right_keys_suffixed = (
+ f"{key}{suffix}" if key in left_on else key for key in right_on
+ )
+ return dict(zip(right_on, right_keys_suffixed))
+
+
+def _into_arrow_table(data: IntoArrowTable, context: _FullContext, /) -> pa.Table:
+ """Guards `ArrowDataFrame.from_arrow` w/ safer imports.
+
+ Arguments:
+ data: Object which implements `__arrow_c_stream__`.
+ context: Initialized compliant object.
+
+ Returns:
+ A PyArrow Table.
+ """
+ if find_spec("pyarrow"):
+ import pyarrow as pa # ignore-banned-import
+
+ from narwhals._arrow.namespace import ArrowNamespace
+
+ version = context._version
+ ns = ArrowNamespace(backend_version=parse_version(pa), version=version)
+ return ns._dataframe.from_arrow(data, context=ns).native
+ else: # pragma: no cover
+ msg = f"'pyarrow>=14.0.0' is required for `from_arrow` for object of type {qualified_type_name(data)!r}."
+ raise ModuleNotFoundError(msg)
+
+
+# TODO @dangotbanned: Extend with runtime behavior for `v1.*`
+# See `narwhals.exceptions.NarwhalsUnstableWarning`
+def unstable(fn: _Fn, /) -> _Fn:
+ """Visual-only marker for unstable functionality.
+
+ Arguments:
+ fn: Function to decorate.
+
+ Returns:
+ Decorated function (unchanged).
+
+ Examples:
+ >>> from narwhals._utils import unstable
+ >>> @unstable
+ ... def a_work_in_progress_feature(*args):
+ ... return args
+ >>>
+ >>> a_work_in_progress_feature.__name__
+ 'a_work_in_progress_feature'
+ >>> a_work_in_progress_feature(1, 2, 3)
+ (1, 2, 3)
+ """
+ return fn
+
+
+def _is_naive_format(format: str) -> bool:
+ """Determines if a datetime format string is 'naive', i.e., does not include timezone information.
+
+ A format is considered naive if it does not contain any of the following
+
+ - '%s': Unix timestamp
+ - '%z': UTC offset
+ - 'Z' : UTC timezone designator
+
+ Arguments:
+ format: The datetime format string to check.
+
+ Returns:
+ bool: True if the format is naive (does not include timezone info), False otherwise.
+ """
+ return not any(x in format for x in ("%s", "%z", "Z"))
+
+
+class not_implemented: # noqa: N801
+ """Mark some functionality as unsupported.
+
+ Arguments:
+ alias: optional name used instead of the data model hook [`__set_name__`].
+
+ Returns:
+ An exception-raising [descriptor].
+
+ Notes:
+ - Attribute/method name *doesn't* need to be declared twice
+ - Allows different behavior when looked up on the class vs instance
+ - Allows us to use `isinstance(...)` instead of monkeypatching an attribute to the function
+
+ Examples:
+ >>> from narwhals._utils import not_implemented
+ >>> class Thing:
+ ... def totally_ready(self) -> str:
+ ... return "I'm ready!"
+ ...
+ ... not_ready_yet = not_implemented()
+ >>>
+ >>> thing = Thing()
+ >>> thing.totally_ready()
+ "I'm ready!"
+ >>> thing.not_ready_yet()
+ Traceback (most recent call last):
+ ...
+ NotImplementedError: 'not_ready_yet' is not implemented for: 'Thing'.
+ ...
+ >>> isinstance(Thing.not_ready_yet, not_implemented)
+ True
+
+ [`__set_name__`]: https://docs.python.org/3/reference/datamodel.html#object.__set_name__
+ [descriptor]: https://docs.python.org/3/howto/descriptor.html
+ """
+
+ def __init__(self, alias: str | None = None, /) -> None:
+ # NOTE: Don't like this
+ # Trying to workaround `mypy` requiring `@property` everywhere
+ self._alias: str | None = alias
+
+ def __repr__(self) -> str:
+ return f"<{type(self).__name__}>: {self._name_owner}.{self._name}"
+
+ def __set_name__(self, owner: type[_T], name: str) -> None:
+ # https://docs.python.org/3/howto/descriptor.html#customized-names
+ self._name_owner: str = owner.__name__
+ self._name: str = self._alias or name
+
+ def __get__(
+ self, instance: _T | Literal["raise"] | None, owner: type[_T] | None = None, /
+ ) -> Any:
+ if instance is None:
+ # NOTE: Branch for `cls._name`
+ # We can check that to see if an instance of `type(self)` for
+ # https://narwhals-dev.github.io/narwhals/api-completeness/expr/
+ return self
+ # NOTE: Prefer not exposing the actual class we're defining in
+ # `_implementation` may not be available everywhere
+ who = getattr(instance, "_implementation", self._name_owner)
+ raise _not_implemented_error(self._name, who)
+
+ def __call__(self, *args: Any, **kwds: Any) -> Any:
+ # NOTE: Purely to duck-type as assignable to **any** instance method
+ # Wouldn't be reachable through *regular* attribute access
+ return self.__get__("raise")
+
+ @classmethod
+ def deprecated(cls, message: LiteralString, /) -> Self:
+ """Alt constructor, wraps with `@deprecated`.
+
+ Arguments:
+ message: **Static-only** deprecation message, emitted in an IDE.
+
+ Returns:
+ An exception-raising [descriptor].
+
+ [descriptor]: https://docs.python.org/3/howto/descriptor.html
+ """
+ obj = cls()
+ return deprecated(message)(obj)
+
+
+def _not_implemented_error(what: str, who: str, /) -> NotImplementedError:
+ msg = (
+ f"{what!r} is not implemented for: {who!r}.\n\n"
+ "If you would like to see this functionality in `narwhals`, "
+ "please open an issue at: https://github.com/narwhals-dev/narwhals/issues"
+ )
+ return NotImplementedError(msg)
+
+
+class requires: # noqa: N801
+ """Method decorator for raising under certain constraints.
+
+ Attributes:
+ _min_version: Minimum backend version.
+ _hint: Optional suggested alternative.
+
+ Examples:
+ >>> from narwhals._utils import requires, Implementation
+ >>> class SomeBackend:
+ ... _implementation = Implementation.PYARROW
+ ... _backend_version = 20, 0, 0
+ ...
+ ... @requires.backend_version((9000, 0, 0))
+ ... def really_complex_feature(self) -> str:
+ ... return "hello"
+ >>> backend = SomeBackend()
+ >>> backend.really_complex_feature()
+ Traceback (most recent call last):
+ ...
+ NotImplementedError: `really_complex_feature` is only available in 'pyarrow>=9000.0.0', found version '20.0.0'.
+ """
+
+ _min_version: tuple[int, ...]
+ _hint: str
+
+ @classmethod
+ def backend_version(cls, minimum: tuple[int, ...], /, hint: str = "") -> Self:
+ """Method decorator for raising below a minimum `_backend_version`.
+
+ Arguments:
+ minimum: Minimum backend version.
+ hint: Optional suggested alternative.
+
+ Returns:
+ An exception-raising decorator.
+ """
+ obj = cls.__new__(cls)
+ obj._min_version = minimum
+ obj._hint = hint
+ return obj
+
+ @staticmethod
+ def _unparse_version(backend_version: tuple[int, ...], /) -> str:
+ return ".".join(f"{d}" for d in backend_version)
+
+ def _ensure_version(self, instance: _FullContext, /) -> None:
+ if instance._backend_version >= self._min_version:
+ return
+ method = self._wrapped_name
+ backend = instance._implementation
+ minimum = self._unparse_version(self._min_version)
+ found = self._unparse_version(instance._backend_version)
+ msg = f"`{method}` is only available in '{backend}>={minimum}', found version {found!r}."
+ if self._hint:
+ msg = f"{msg}\n{self._hint}"
+ raise NotImplementedError(msg)
+
+ def __call__(self, fn: _Method[_ContextT, P, R], /) -> _Method[_ContextT, P, R]:
+ self._wrapped_name = fn.__name__
+
+ @wraps(fn)
+ def wrapper(instance: _ContextT, *args: P.args, **kwds: P.kwargs) -> R:
+ self._ensure_version(instance)
+ return fn(instance, *args, **kwds)
+
+ # NOTE: Only getting a complaint from `mypy`
+ return wrapper # type: ignore[return-value]
+
+
+def convert_str_slice_to_int_slice(
+ str_slice: _SliceName, columns: Sequence[str]
+) -> tuple[int | None, int | None, Any]:
+ start = columns.index(str_slice.start) if str_slice.start is not None else None
+ stop = columns.index(str_slice.stop) + 1 if str_slice.stop is not None else None
+ step = str_slice.step
+ return (start, stop, step)
+
+
+def inherit_doc(
+ tp_parent: Callable[P, R1], /
+) -> Callable[[_Constructor[_T, P, R2]], _Constructor[_T, P, R2]]:
+ """Steal the class-level docstring from parent and attach to child `__init__`.
+
+ Returns:
+ Decorated constructor.
+
+ Notes:
+ - Passes static typing (mostly)
+ - Passes at runtime
+ """
+
+ def decorate(init_child: _Constructor[_T, P, R2], /) -> _Constructor[_T, P, R2]:
+ if init_child.__name__ == "__init__" and issubclass(type(tp_parent), type):
+ init_child.__doc__ = getdoc(tp_parent)
+ return init_child
+ else: # pragma: no cover
+ msg = (
+ f"`@{inherit_doc.__name__}` is only allowed to decorate an `__init__` with a class-level doc.\n"
+ f"Method: {init_child.__qualname__!r}\n"
+ f"Parent: {tp_parent!r}"
+ )
+ raise TypeError(msg)
+
+ return decorate
+
+
+def qualified_type_name(obj: object | type[Any], /) -> str:
+ tp = obj if isinstance(obj, type) else type(obj)
+ module = tp.__module__ if tp.__module__ != "builtins" else ""
+ return f"{module}.{tp.__name__}".lstrip(".")
+
+
+def ensure_type(obj: Any, /, *valid_types: type[Any], param_name: str = "") -> None:
+ """Validate that an object is an instance of one or more specified types.
+
+ Parameters:
+ obj: The object to validate.
+ *valid_types: One or more valid types that `obj` is expected to match.
+ param_name: The name of the parameter being validated.
+ Used to improve error message clarity.
+
+ Raises:
+ TypeError: If `obj` is not an instance of any of the provided `valid_types`.
+
+ Examples:
+ >>> from narwhals._utils import ensure_type
+ >>> ensure_type(42, int, float)
+ >>> ensure_type("hello", str)
+
+ >>> ensure_type("hello", int, param_name="test")
+ Traceback (most recent call last):
+ ...
+ TypeError: Expected 'int', got: 'str'
+ test='hello'
+ ^^^^^^^
+ >>> import polars as pl
+ >>> import pandas as pd
+ >>> df = pl.DataFrame([[1], [2], [3], [4], [5]], schema=[*"abcde"])
+ >>> ensure_type(df, pd.DataFrame, param_name="df")
+ Traceback (most recent call last):
+ ...
+ TypeError: Expected 'pandas.core.frame.DataFrame', got: 'polars.dataframe.frame.DataFrame'
+ df=polars.dataframe.frame.DataFrame(...)
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ """
+ if not isinstance(obj, valid_types): # pragma: no cover
+ tp_names = " | ".join(qualified_type_name(tp) for tp in valid_types)
+ msg = f"Expected {tp_names!r}, got: {qualified_type_name(obj)!r}"
+ if param_name:
+ left_pad = " " * 4
+ val = repr(obj)
+ if len(val) > 40: # truncate long reprs
+ val = f"{qualified_type_name(obj)}(...)"
+ assign = f"{left_pad}{param_name}="
+ underline = (" " * len(assign)) + ("^" * len(val))
+ msg = f"{msg}\n{assign}{val}\n{underline}"
+ raise TypeError(msg)
+
+
+class _DeferredIterable(Generic[_T]):
+ """Store a callable producing an iterable to defer collection until we need it."""
+
+ def __init__(self, into_iter: Callable[[], Iterable[_T]], /) -> None:
+ self._into_iter: Callable[[], Iterable[_T]] = into_iter
+
+ def __iter__(self) -> Iterator[_T]:
+ yield from self._into_iter()
+
+ def to_tuple(self) -> tuple[_T, ...]:
+ # Collect and return as a `tuple`.
+ it = self._into_iter()
+ return it if isinstance(it, tuple) else tuple(it)
diff --git a/venv/lib/python3.8/site-packages/narwhals/dataframe.py b/venv/lib/python3.8/site-packages/narwhals/dataframe.py
new file mode 100644
index 0000000..b0ff471
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/dataframe.py
@@ -0,0 +1,3234 @@
+from __future__ import annotations
+
+from abc import abstractmethod
+from itertools import chain
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ Generic,
+ Iterable,
+ Iterator,
+ Literal,
+ NoReturn,
+ Sequence,
+ TypeVar,
+ overload,
+)
+from warnings import warn
+
+from narwhals._expression_parsing import (
+ ExprKind,
+ all_exprs_are_scalar_like,
+ check_expressions_preserve_length,
+ is_scalar_like,
+)
+from narwhals._utils import (
+ Implementation,
+ find_stacklevel,
+ flatten,
+ generate_repr,
+ is_compliant_dataframe,
+ is_compliant_lazyframe,
+ is_index_selector,
+ is_list_of,
+ is_sequence_like,
+ is_slice_none,
+ issue_deprecation_warning,
+ parse_version,
+ supports_arrow_c_stream,
+)
+from narwhals.dependencies import get_polars, is_numpy_array
+from narwhals.exceptions import (
+ InvalidIntoExprError,
+ LengthChangingExprError,
+ OrderDependentExprError,
+)
+from narwhals.schema import Schema
+from narwhals.series import Series
+from narwhals.translate import to_native
+
+if TYPE_CHECKING:
+ from io import BytesIO
+ from pathlib import Path
+ from types import ModuleType
+
+ import pandas as pd
+ import polars as pl
+ import pyarrow as pa
+ from typing_extensions import Concatenate, ParamSpec, Self, TypeAlias
+
+ from narwhals._compliant import CompliantDataFrame, CompliantLazyFrame
+ from narwhals._compliant.typing import CompliantExprAny, EagerNamespaceAny
+ from narwhals.group_by import GroupBy, LazyGroupBy
+ from narwhals.typing import (
+ AsofJoinStrategy,
+ IntoDataFrame,
+ IntoExpr,
+ IntoFrame,
+ JoinStrategy,
+ LazyUniqueKeepStrategy,
+ MultiColSelector as _MultiColSelector,
+ MultiIndexSelector as _MultiIndexSelector,
+ PivotAgg,
+ SingleColSelector,
+ SingleIndexSelector,
+ SizeUnit,
+ UniqueKeepStrategy,
+ _2DArray,
+ )
+
+ PS = ParamSpec("PS")
+
+_FrameT = TypeVar("_FrameT", bound="IntoFrame")
+FrameT = TypeVar("FrameT", bound="IntoFrame")
+DataFrameT = TypeVar("DataFrameT", bound="IntoDataFrame")
+R = TypeVar("R")
+
+MultiColSelector: TypeAlias = "_MultiColSelector[Series[Any]]"
+MultiIndexSelector: TypeAlias = "_MultiIndexSelector[Series[Any]]"
+
+
+class BaseFrame(Generic[_FrameT]):
+ _compliant_frame: Any
+ _level: Literal["full", "lazy", "interchange"]
+
+ def __native_namespace__(self) -> ModuleType:
+ return self._compliant_frame.__native_namespace__() # type: ignore[no-any-return]
+
+ def __narwhals_namespace__(self) -> Any:
+ return self._compliant_frame.__narwhals_namespace__()
+
+ def _with_compliant(self, df: Any) -> Self:
+ # construct, preserving properties
+ return self.__class__(df, level=self._level) # type: ignore[call-arg]
+
+ def _flatten_and_extract(
+ self, *exprs: IntoExpr | Iterable[IntoExpr], **named_exprs: IntoExpr
+ ) -> tuple[list[CompliantExprAny], list[ExprKind]]:
+ """Process `args` and `kwargs`, extracting underlying objects as we go, interpreting strings as column names."""
+ out_exprs = []
+ out_kinds = []
+ for expr in flatten(exprs):
+ compliant_expr = self._extract_compliant(expr)
+ out_exprs.append(compliant_expr)
+ out_kinds.append(ExprKind.from_into_expr(expr, str_as_lit=False))
+ for alias, expr in named_exprs.items():
+ compliant_expr = self._extract_compliant(expr).alias(alias)
+ out_exprs.append(compliant_expr)
+ out_kinds.append(ExprKind.from_into_expr(expr, str_as_lit=False))
+ return out_exprs, out_kinds
+
+ @abstractmethod
+ def _extract_compliant(self, arg: Any) -> Any:
+ raise NotImplementedError
+
+ @property
+ def schema(self) -> Schema:
+ return Schema(self._compliant_frame.schema.items())
+
+ def collect_schema(self) -> Schema:
+ native_schema = dict(self._compliant_frame.collect_schema())
+
+ return Schema(native_schema)
+
+ def pipe(
+ self,
+ function: Callable[Concatenate[Self, PS], R],
+ *args: PS.args,
+ **kwargs: PS.kwargs,
+ ) -> R:
+ return function(self, *args, **kwargs)
+
+ def with_row_index(self, name: str = "index") -> Self:
+ return self._with_compliant(self._compliant_frame.with_row_index(name))
+
+ def drop_nulls(self, subset: str | list[str] | None) -> Self:
+ subset = [subset] if isinstance(subset, str) else subset
+ return self._with_compliant(self._compliant_frame.drop_nulls(subset=subset))
+
+ @property
+ def columns(self) -> list[str]:
+ return self._compliant_frame.columns # type: ignore[no-any-return]
+
+ def with_columns(
+ self, *exprs: IntoExpr | Iterable[IntoExpr], **named_exprs: IntoExpr
+ ) -> Self:
+ compliant_exprs, kinds = self._flatten_and_extract(*exprs, **named_exprs)
+ compliant_exprs = [
+ compliant_expr.broadcast(kind) if is_scalar_like(kind) else compliant_expr
+ for compliant_expr, kind in zip(compliant_exprs, kinds)
+ ]
+ return self._with_compliant(self._compliant_frame.with_columns(*compliant_exprs))
+
+ def select(
+ self, *exprs: IntoExpr | Iterable[IntoExpr], **named_exprs: IntoExpr
+ ) -> Self:
+ flat_exprs = tuple(flatten(exprs))
+ if flat_exprs and all(isinstance(x, str) for x in flat_exprs) and not named_exprs:
+ # fast path!
+ try:
+ return self._with_compliant(
+ self._compliant_frame.simple_select(*flat_exprs)
+ )
+ except Exception as e:
+ # Column not found is the only thing that can realistically be raised here.
+ if error := self._compliant_frame._check_columns_exist(flat_exprs):
+ raise error from e
+ raise
+ compliant_exprs, kinds = self._flatten_and_extract(*flat_exprs, **named_exprs)
+ if compliant_exprs and all_exprs_are_scalar_like(*flat_exprs, **named_exprs):
+ return self._with_compliant(self._compliant_frame.aggregate(*compliant_exprs))
+ compliant_exprs = [
+ compliant_expr.broadcast(kind) if is_scalar_like(kind) else compliant_expr
+ for compliant_expr, kind in zip(compliant_exprs, kinds)
+ ]
+ return self._with_compliant(self._compliant_frame.select(*compliant_exprs))
+
+ def rename(self, mapping: dict[str, str]) -> Self:
+ return self._with_compliant(self._compliant_frame.rename(mapping))
+
+ def head(self, n: int) -> Self:
+ return self._with_compliant(self._compliant_frame.head(n))
+
+ def tail(self, n: int) -> Self:
+ return self._with_compliant(self._compliant_frame.tail(n))
+
+ def drop(self, *columns: Iterable[str], strict: bool) -> Self:
+ return self._with_compliant(self._compliant_frame.drop(columns, strict=strict))
+
+ def filter(
+ self, *predicates: IntoExpr | Iterable[IntoExpr] | list[bool], **constraints: Any
+ ) -> Self:
+ if len(predicates) == 1 and is_list_of(predicates[0], bool):
+ predicate = predicates[0]
+ else:
+ from narwhals.functions import col
+
+ flat_predicates = flatten(predicates)
+ check_expressions_preserve_length(*flat_predicates, function_name="filter")
+ plx = self.__narwhals_namespace__()
+ compliant_predicates, _kinds = self._flatten_and_extract(*flat_predicates)
+ compliant_constraints = (
+ (col(name) == v)._to_compliant_expr(plx)
+ for name, v in constraints.items()
+ )
+ predicate = plx.all_horizontal(
+ *chain(compliant_predicates, compliant_constraints)
+ )
+ return self._with_compliant(self._compliant_frame.filter(predicate))
+
+ def sort(
+ self,
+ by: str | Iterable[str],
+ *more_by: str,
+ descending: bool | Sequence[bool] = False,
+ nulls_last: bool = False,
+ ) -> Self:
+ by = flatten([*flatten([by]), *more_by])
+ return self._with_compliant(
+ self._compliant_frame.sort(*by, descending=descending, nulls_last=nulls_last)
+ )
+
+ def join(
+ self,
+ other: Self,
+ on: str | list[str] | None = None,
+ how: JoinStrategy = "inner",
+ *,
+ left_on: str | list[str] | None = None,
+ right_on: str | list[str] | None = None,
+ suffix: str = "_right",
+ ) -> Self:
+ on = [on] if isinstance(on, str) else on
+ left_on = [left_on] if isinstance(left_on, str) else left_on
+ right_on = [right_on] if isinstance(right_on, str) else right_on
+
+ if how not in (
+ _supported_joins := ("inner", "left", "full", "cross", "anti", "semi")
+ ):
+ msg = f"Only the following join strategies are supported: {_supported_joins}; found '{how}'."
+ raise NotImplementedError(msg)
+
+ if how == "cross" and (
+ left_on is not None or right_on is not None or on is not None
+ ):
+ msg = "Can not pass `left_on`, `right_on` or `on` keys for cross join"
+ raise ValueError(msg)
+
+ if how != "cross" and (on is None and (left_on is None or right_on is None)):
+ msg = f"Either (`left_on` and `right_on`) or `on` keys should be specified for {how}."
+ raise ValueError(msg)
+
+ if how != "cross" and (
+ on is not None and (left_on is not None or right_on is not None)
+ ):
+ msg = f"If `on` is specified, `left_on` and `right_on` should be None for {how}."
+ raise ValueError(msg)
+
+ if on is not None:
+ left_on = right_on = on
+
+ if (isinstance(left_on, list) and isinstance(right_on, list)) and (
+ len(left_on) != len(right_on)
+ ):
+ msg = "`left_on` and `right_on` must have the same length."
+ raise ValueError(msg)
+
+ return self._with_compliant(
+ self._compliant_frame.join(
+ self._extract_compliant(other),
+ how=how,
+ left_on=left_on,
+ right_on=right_on,
+ suffix=suffix,
+ )
+ )
+
+ def gather_every(self, n: int, offset: int = 0) -> Self:
+ return self._with_compliant(
+ self._compliant_frame.gather_every(n=n, offset=offset)
+ )
+
+ def join_asof( # noqa: C901
+ self,
+ other: Self,
+ *,
+ left_on: str | None = None,
+ right_on: str | None = None,
+ on: str | None = None,
+ by_left: str | list[str] | None = None,
+ by_right: str | list[str] | None = None,
+ by: str | list[str] | None = None,
+ strategy: AsofJoinStrategy = "backward",
+ suffix: str = "_right",
+ ) -> Self:
+ _supported_strategies = ("backward", "forward", "nearest")
+
+ if strategy not in _supported_strategies:
+ msg = f"Only the following strategies are supported: {_supported_strategies}; found '{strategy}'."
+ raise NotImplementedError(msg)
+
+ if (on is None) and (left_on is None or right_on is None):
+ msg = "Either (`left_on` and `right_on`) or `on` keys should be specified."
+ raise ValueError(msg)
+ if (on is not None) and (left_on is not None or right_on is not None):
+ msg = "If `on` is specified, `left_on` and `right_on` should be None."
+ raise ValueError(msg)
+ if (by is None) and (
+ (by_left is None and by_right is not None)
+ or (by_left is not None and by_right is None)
+ ):
+ msg = (
+ "Can not specify only `by_left` or `by_right`, you need to specify both."
+ )
+ raise ValueError(msg)
+ if (by is not None) and (by_left is not None or by_right is not None):
+ msg = "If `by` is specified, `by_left` and `by_right` should be None."
+ raise ValueError(msg)
+ if on is not None:
+ left_on = right_on = on
+ if by is not None:
+ by_left = by_right = by
+ if isinstance(by_left, str):
+ by_left = [by_left]
+ if isinstance(by_right, str):
+ by_right = [by_right]
+
+ if (isinstance(by_left, list) and isinstance(by_right, list)) and (
+ len(by_left) != len(by_right)
+ ):
+ msg = "`by_left` and `by_right` must have the same length."
+ raise ValueError(msg)
+
+ return self._with_compliant(
+ self._compliant_frame.join_asof(
+ self._extract_compliant(other),
+ left_on=left_on,
+ right_on=right_on,
+ by_left=by_left,
+ by_right=by_right,
+ strategy=strategy,
+ suffix=suffix,
+ )
+ )
+
+ def unpivot(
+ self,
+ on: str | list[str] | None,
+ *,
+ index: str | list[str] | None,
+ variable_name: str,
+ value_name: str,
+ ) -> Self:
+ on = [on] if isinstance(on, str) else on
+ index = [index] if isinstance(index, str) else index
+
+ return self._with_compliant(
+ self._compliant_frame.unpivot(
+ on=on, index=index, variable_name=variable_name, value_name=value_name
+ )
+ )
+
+ def __neq__(self, other: object) -> NoReturn:
+ msg = (
+ "DataFrame.__neq__ and LazyFrame.__neq__ are not implemented, please "
+ "use expressions instead.\n\n"
+ "Hint: instead of\n"
+ " df != 0\n"
+ "you may want to use\n"
+ " df.select(nw.all() != 0)"
+ )
+ raise NotImplementedError(msg)
+
+ def __eq__(self, other: object) -> NoReturn:
+ msg = (
+ "DataFrame.__eq__ and LazyFrame.__eq__ are not implemented, please "
+ "use expressions instead.\n\n"
+ "Hint: instead of\n"
+ " df == 0\n"
+ "you may want to use\n"
+ " df.select(nw.all() == 0)"
+ )
+ raise NotImplementedError(msg)
+
+ def explode(self, columns: str | Sequence[str], *more_columns: str) -> Self:
+ to_explode = (
+ [columns, *more_columns]
+ if isinstance(columns, str)
+ else [*columns, *more_columns]
+ )
+
+ return self._with_compliant(self._compliant_frame.explode(columns=to_explode))
+
+
+class DataFrame(BaseFrame[DataFrameT]):
+ """Narwhals DataFrame, backed by a native eager dataframe.
+
+ Warning:
+ This class is not meant to be instantiated directly - instead:
+
+ - If the native object is a eager dataframe from one of the supported
+ backend (e.g. pandas.DataFrame, polars.DataFrame, pyarrow.Table),
+ you can use [`narwhals.from_native`][]:
+ ```py
+ narwhals.from_native(native_dataframe)
+ narwhals.from_native(native_dataframe, eager_only=True)
+ ```
+
+ - If the object is a dictionary of column names and generic sequences mapping
+ (e.g. `dict[str, list]`), you can create a DataFrame via
+ [`narwhals.from_dict`][]:
+ ```py
+ narwhals.from_dict(
+ data={"a": [1, 2, 3]},
+ backend=narwhals.get_native_namespace(another_object),
+ )
+ ```
+ """
+
+ def _extract_compliant(self, arg: Any) -> Any:
+ from narwhals.expr import Expr
+ from narwhals.series import Series
+
+ plx: EagerNamespaceAny = self.__narwhals_namespace__()
+ if isinstance(arg, BaseFrame):
+ return arg._compliant_frame
+ if isinstance(arg, Series):
+ return arg._compliant_series._to_expr()
+ if isinstance(arg, Expr):
+ return arg._to_compliant_expr(self.__narwhals_namespace__())
+ if isinstance(arg, str):
+ return plx.col(arg)
+ if get_polars() is not None and "polars" in str(type(arg)): # pragma: no cover
+ msg = (
+ f"Expected Narwhals object, got: {type(arg)}.\n\n"
+ "Perhaps you:\n"
+ "- Forgot a `nw.from_native` somewhere?\n"
+ "- Used `pl.col` instead of `nw.col`?"
+ )
+ raise TypeError(msg)
+ if is_numpy_array(arg):
+ return plx._series.from_numpy(arg, context=plx)._to_expr()
+ raise InvalidIntoExprError.from_invalid_type(type(arg))
+
+ @property
+ def _series(self) -> type[Series[Any]]:
+ return Series
+
+ @property
+ def _lazyframe(self) -> type[LazyFrame[Any]]:
+ return LazyFrame
+
+ def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> None:
+ self._level: Literal["full", "lazy", "interchange"] = level
+ # NOTE: Interchange support (`DataFrameLike`) is the source of the error
+ self._compliant_frame: CompliantDataFrame[Any, Any, DataFrameT, Self] # type: ignore[type-var]
+ if is_compliant_dataframe(df):
+ self._compliant_frame = df.__narwhals_dataframe__()
+ else: # pragma: no cover
+ msg = f"Expected an object which implements `__narwhals_dataframe__`, got: {type(df)}"
+ raise AssertionError(msg)
+
+ @property
+ def implementation(self) -> Implementation:
+ """Return implementation of native frame.
+
+ This can be useful when you need to use special-casing for features outside of
+ Narwhals' scope - for example, when dealing with pandas' Period Dtype.
+
+ Returns:
+ Implementation.
+
+ Examples:
+ >>> import narwhals as nw
+ >>> import pandas as pd
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3]})
+ >>> df = nw.from_native(df_native)
+ >>> df.implementation
+ <Implementation.PANDAS: 'pandas'>
+ >>> df.implementation.is_pandas()
+ True
+ >>> df.implementation.is_pandas_like()
+ True
+ >>> df.implementation.is_polars()
+ False
+ """
+ return self._compliant_frame._implementation
+
+ def __len__(self) -> int:
+ return self._compliant_frame.__len__()
+
+ def __array__(self, dtype: Any = None, copy: bool | None = None) -> _2DArray: # noqa: FBT001
+ return self._compliant_frame.__array__(dtype, copy=copy)
+
+ def __repr__(self) -> str: # pragma: no cover
+ return generate_repr("Narwhals DataFrame", self.to_native().__repr__())
+
+ def __arrow_c_stream__(self, requested_schema: object | None = None) -> object:
+ """Export a DataFrame via the Arrow PyCapsule Interface.
+
+ - if the underlying dataframe implements the interface, it'll return that
+ - else, it'll call `to_arrow` and then defer to PyArrow's implementation
+
+ See [PyCapsule Interface](https://arrow.apache.org/docs/dev/format/CDataInterface/PyCapsuleInterface.html)
+ for more.
+ """
+ native_frame = self._compliant_frame._native_frame
+ if supports_arrow_c_stream(native_frame):
+ return native_frame.__arrow_c_stream__(requested_schema=requested_schema)
+ try:
+ import pyarrow as pa # ignore-banned-import
+ except ModuleNotFoundError as exc: # pragma: no cover
+ msg = f"'pyarrow>=14.0.0' is required for `DataFrame.__arrow_c_stream__` for object of type {type(native_frame)}"
+ raise ModuleNotFoundError(msg) from exc
+ if parse_version(pa) < (14, 0): # pragma: no cover
+ msg = f"'pyarrow>=14.0.0' is required for `DataFrame.__arrow_c_stream__` for object of type {type(native_frame)}"
+ raise ModuleNotFoundError(msg) from None
+ pa_table = self.to_arrow()
+ return pa_table.__arrow_c_stream__(requested_schema=requested_schema) # type: ignore[no-untyped-call]
+
+ def lazy(
+ self, backend: ModuleType | Implementation | str | None = None
+ ) -> LazyFrame[Any]:
+ """Restrict available API methods to lazy-only ones.
+
+ If `backend` is specified, then a conversion between different backends
+ might be triggered.
+
+ If a library does not support lazy execution and `backend` is not specified,
+ then this is will only restrict the API to lazy-only operations. This is useful
+ if you want to ensure that you write dataframe-agnostic code which all has
+ the possibility of running entirely lazily.
+
+ Arguments:
+ backend: Which lazy backend collect to. This will be the underlying
+ backend for the resulting Narwhals LazyFrame. If not specified, and the
+ given library does not support lazy execution, then this will restrict
+ the API to lazy-only operations.
+
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `DASK`, `DUCKDB`
+ or `POLARS`.
+ - As a string: `"dask"`, `"duckdb"` or `"polars"`
+ - Directly as a module `dask.dataframe`, `duckdb` or `polars`.
+
+ Returns:
+ A new LazyFrame.
+
+ Examples:
+ >>> import polars as pl
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"a": [1, 2], "b": [4, 6]})
+ >>> df = nw.from_native(df_native)
+
+ If we call `df.lazy`, we get a `narwhals.LazyFrame` backed by a Polars
+ LazyFrame.
+
+ >>> df.lazy() # doctest: +SKIP
+ ┌─────────────────────────────┐
+ | Narwhals LazyFrame |
+ |-----------------------------|
+ |<LazyFrame at 0x7F52B9937230>|
+ └─────────────────────────────┘
+
+ We can also pass DuckDB as the backend, and then we'll get a
+ `narwhals.LazyFrame` backed by a `duckdb.DuckDBPyRelation`.
+
+ >>> df.lazy(backend=nw.Implementation.DUCKDB)
+ ┌──────────────────┐
+ |Narwhals LazyFrame|
+ |------------------|
+ |┌───────┬───────┐ |
+ |│ a │ b │ |
+ |│ int64 │ int64 │ |
+ |├───────┼───────┤ |
+ |│ 1 │ 4 │ |
+ |│ 2 │ 6 │ |
+ |└───────┴───────┘ |
+ └──────────────────┘
+ """
+ lazy_backend = None if backend is None else Implementation.from_backend(backend)
+ supported_lazy_backends = (
+ Implementation.DASK,
+ Implementation.DUCKDB,
+ Implementation.POLARS,
+ )
+ if lazy_backend is not None and lazy_backend not in supported_lazy_backends:
+ msg = (
+ "Not-supported backend."
+ f"\n\nExpected one of {supported_lazy_backends} or `None`, got {lazy_backend}"
+ )
+ raise ValueError(msg)
+ return self._lazyframe(
+ self._compliant_frame.lazy(backend=lazy_backend), level="lazy"
+ )
+
+ def to_native(self) -> DataFrameT:
+ """Convert Narwhals DataFrame to native one.
+
+ Returns:
+ Object of class that user started with.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame(
+ ... {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+ ... )
+
+ Calling `to_native` on a Narwhals DataFrame returns the native object:
+
+ >>> nw.from_native(df_native).to_native()
+ foo bar ham
+ 0 1 6.0 a
+ 1 2 7.0 b
+ 2 3 8.0 c
+ """
+ return self._compliant_frame._native_frame
+
+ def to_pandas(self) -> pd.DataFrame:
+ """Convert this DataFrame to a pandas DataFrame.
+
+ Returns:
+ A pandas DataFrame.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame(
+ ... {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.to_pandas()
+ foo bar ham
+ 0 1 6.0 a
+ 1 2 7.0 b
+ 2 3 8.0 c
+ """
+ return self._compliant_frame.to_pandas()
+
+ def to_polars(self) -> pl.DataFrame:
+ """Convert this DataFrame to a polars DataFrame.
+
+ Returns:
+ A polars DataFrame.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"foo": [1, 2], "bar": [6.0, 7.0]})
+ >>> df = nw.from_native(df_native)
+ >>> df.to_polars()
+ shape: (2, 2)
+ ┌─────┬─────┐
+ │ foo ┆ bar │
+ │ --- ┆ --- │
+ │ i64 ┆ f64 │
+ ╞═════╪═════╡
+ │ 1 ┆ 6.0 │
+ │ 2 ┆ 7.0 │
+ └─────┴─────┘
+ """
+ return self._compliant_frame.to_polars()
+
+ @overload
+ def write_csv(self, file: None = None) -> str: ...
+
+ @overload
+ def write_csv(self, file: str | Path | BytesIO) -> None: ...
+
+ def write_csv(self, file: str | Path | BytesIO | None = None) -> str | None:
+ r"""Write dataframe to comma-separated values (CSV) file.
+
+ Arguments:
+ file: String, path object or file-like object to which the dataframe will be
+ written. If None, the resulting csv format is returned as a string.
+
+ Returns:
+ String or None.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame(
+ ... {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.write_csv()
+ 'foo,bar,ham\n1,6.0,a\n2,7.0,b\n3,8.0,c\n'
+
+ If we had passed a file name to `write_csv`, it would have been
+ written to that file.
+ """
+ return self._compliant_frame.write_csv(file)
+
+ def write_parquet(self, file: str | Path | BytesIO) -> None:
+ """Write dataframe to parquet file.
+
+ Arguments:
+ file: String, path object or file-like object to which the dataframe will be
+ written.
+
+ Returns:
+ None.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"foo": [1, 2], "bar": [6.0, 7.0]})
+ >>> df = nw.from_native(df_native)
+ >>> df.write_parquet("out.parquet") # doctest:+SKIP
+ """
+ self._compliant_frame.write_parquet(file)
+
+ def to_numpy(self) -> _2DArray:
+ """Convert this DataFrame to a NumPy ndarray.
+
+ Returns:
+ A NumPy ndarray array.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"foo": [1, 2], "bar": [6.5, 7.0]})
+ >>> df = nw.from_native(df_native)
+ >>> df.to_numpy()
+ array([[1. , 6.5],
+ [2. , 7. ]])
+ """
+ return self._compliant_frame.to_numpy(None, copy=None)
+
+ @property
+ def shape(self) -> tuple[int, int]:
+ """Get the shape of the DataFrame.
+
+ Returns:
+ The shape of the dataframe as a tuple.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"foo": [1, 2]})
+ >>> df = nw.from_native(df_native)
+ >>> df.shape
+ (2, 1)
+ """
+ return self._compliant_frame.shape
+
+ def get_column(self, name: str) -> Series[Any]:
+ """Get a single column by name.
+
+ Arguments:
+ name: The column name as a string.
+
+ Returns:
+ A Narwhals Series, backed by a native series.
+
+ Notes:
+ Although `name` is typed as `str`, pandas does allow non-string column
+ names, and they will work when passed to this function if the
+ `narwhals.DataFrame` is backed by a pandas dataframe with non-string
+ columns. This function can only be used to extract a column by name, so
+ there is no risk of ambiguity.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2]})
+ >>> df = nw.from_native(df_native)
+ >>> df.get_column("a").to_native()
+ 0 1
+ 1 2
+ Name: a, dtype: int64
+ """
+ return self._series(self._compliant_frame.get_column(name), level=self._level)
+
+ def estimated_size(self, unit: SizeUnit = "b") -> int | float:
+ """Return an estimation of the total (heap) allocated size of the `DataFrame`.
+
+ Estimated size is given in the specified unit (bytes by default).
+
+ Arguments:
+ unit: 'b', 'kb', 'mb', 'gb', 'tb', 'bytes', 'kilobytes', 'megabytes',
+ 'gigabytes', or 'terabytes'.
+
+ Returns:
+ Integer or Float.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"foo": [1, 2], "bar": [6.0, 7.0]})
+ >>> df = nw.from_native(df_native)
+ >>> df.estimated_size()
+ 32
+ """
+ return self._compliant_frame.estimated_size(unit=unit)
+
+ # `str` overlaps with `Sequence[str]`
+ # We can ignore this but we must keep this overload ordering
+ @overload
+ def __getitem__(self, item: tuple[SingleIndexSelector, SingleColSelector]) -> Any: ...
+
+ @overload
+ def __getitem__( # type: ignore[overload-overlap]
+ self, item: str | tuple[MultiIndexSelector, SingleColSelector]
+ ) -> Series[Any]: ...
+
+ @overload
+ def __getitem__(
+ self,
+ item: (
+ SingleIndexSelector
+ | MultiIndexSelector
+ | MultiColSelector
+ | tuple[SingleIndexSelector, MultiColSelector]
+ | tuple[MultiIndexSelector, MultiColSelector]
+ ),
+ ) -> Self: ...
+ def __getitem__( # noqa: C901, PLR0912
+ self,
+ item: (
+ SingleIndexSelector
+ | SingleColSelector
+ | MultiColSelector
+ | MultiIndexSelector
+ | tuple[SingleIndexSelector, SingleColSelector]
+ | tuple[SingleIndexSelector, MultiColSelector]
+ | tuple[MultiIndexSelector, SingleColSelector]
+ | tuple[MultiIndexSelector, MultiColSelector]
+ ),
+ ) -> Series[Any] | Self | Any:
+ """Extract column or slice of DataFrame.
+
+ Arguments:
+ item: How to slice dataframe. What happens depends on what is passed. It's easiest
+ to explain by example. Suppose we have a Dataframe `df`
+
+ - `df['a']` extracts column `'a'` and returns a `Series`.
+ - `df[0:2]` extracts the first two rows and returns a `DataFrame`.
+ - `df[0:2, 'a']` extracts the first two rows from column `'a'` and returns
+ a `Series`.
+ - `df[0:2, 0]` extracts the first two rows from the first column and returns
+ a `Series`.
+ - `df[[0, 1], [0, 1, 2]]` extracts the first two rows and the first three columns
+ and returns a `DataFrame`
+ - `df[:, [0, 1, 2]]` extracts all rows from the first three columns and returns a
+ `DataFrame`.
+ - `df[:, ['a', 'c']]` extracts all rows and columns `'a'` and `'c'` and returns a
+ `DataFrame`.
+ - `df[['a', 'c']]` extracts all rows and columns `'a'` and `'c'` and returns a
+ `DataFrame`.
+ - `df[0: 2, ['a', 'c']]` extracts the first two rows and columns `'a'` and `'c'` and
+ returns a `DataFrame`
+ - `df[:, 0: 2]` extracts all rows from the first two columns and returns a `DataFrame`
+ - `df[:, 'a': 'c']` extracts all rows and all columns positioned between `'a'` and `'c'`
+ _inclusive_ and returns a `DataFrame`. For example, if the columns are
+ `'a', 'd', 'c', 'b'`, then that would extract columns `'a'`, `'d'`, and `'c'`.
+
+ Returns:
+ A Narwhals Series, backed by a native series.
+
+ Notes:
+ - Integers are always interpreted as positions
+ - Strings are always interpreted as column names.
+
+ In contrast with Polars, pandas allows non-string column names.
+ If you don't know whether the column name you're trying to extract
+ is definitely a string (e.g. `df[df.columns[0]]`) then you should
+ use `DataFrame.get_column` instead.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2]})
+ >>> df = nw.from_native(df_native)
+ >>> df["a"].to_native()
+ 0 1
+ 1 2
+ Name: a, dtype: int64
+ """
+ from narwhals.series import Series
+
+ msg = (
+ f"Unexpected type for `DataFrame.__getitem__`, got: {type(item)}.\n\n"
+ "Hints:\n"
+ "- use `df.item` to select a single item.\n"
+ "- Use `df[indices, :]` to select rows positionally.\n"
+ "- Use `df.filter(mask)` to filter rows based on a boolean mask."
+ )
+
+ if isinstance(item, tuple):
+ if len(item) > 2:
+ tuple_msg = (
+ "Tuples cannot be passed to DataFrame.__getitem__ directly.\n\n"
+ "Hint: instead of `df[indices]`, did you mean `df[indices, :]`?"
+ )
+ raise TypeError(tuple_msg)
+ rows = None if not item or is_slice_none(item[0]) else item[0]
+ columns = None if len(item) < 2 or is_slice_none(item[1]) else item[1]
+ if rows is None and columns is None:
+ return self
+ elif is_index_selector(item):
+ rows = item
+ columns = None
+ elif is_sequence_like(item) or isinstance(item, (slice, str)):
+ rows = None
+ columns = item
+ else:
+ raise TypeError(msg)
+
+ if isinstance(rows, str):
+ raise TypeError(msg)
+
+ compliant = self._compliant_frame
+
+ if isinstance(columns, (int, str)):
+ if isinstance(rows, int):
+ return self.item(rows, columns)
+ col_name = columns if isinstance(columns, str) else self.columns[columns]
+ series = self.get_column(col_name)
+ return series[rows] if rows is not None else series
+ if isinstance(rows, Series):
+ rows = rows._compliant_series
+ if isinstance(columns, Series):
+ columns = columns._compliant_series
+ if rows is None:
+ return self._with_compliant(compliant[:, columns])
+ if columns is None:
+ return self._with_compliant(compliant[rows, :])
+ return self._with_compliant(compliant[rows, columns])
+
+ def __contains__(self, key: str) -> bool:
+ return key in self.columns
+
+ @overload
+ def to_dict(self, *, as_series: Literal[True] = ...) -> dict[str, Series[Any]]: ...
+ @overload
+ def to_dict(self, *, as_series: Literal[False]) -> dict[str, list[Any]]: ...
+ @overload
+ def to_dict(
+ self, *, as_series: bool
+ ) -> dict[str, Series[Any]] | dict[str, list[Any]]: ...
+ def to_dict(
+ self, *, as_series: bool = True
+ ) -> dict[str, Series[Any]] | dict[str, list[Any]]:
+ """Convert DataFrame to a dictionary mapping column name to values.
+
+ Arguments:
+ as_series: If set to true ``True``, then the values are Narwhals Series,
+ otherwise the values are Any.
+
+ Returns:
+ A mapping from column name to values / Series.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"A": [1, 2], "fruits": ["banana", "apple"]})
+ >>> df = nw.from_native(df_native)
+ >>> df.to_dict(as_series=False)
+ {'A': [1, 2], 'fruits': ['banana', 'apple']}
+ """
+ if as_series:
+ return {
+ key: self._series(value, level=self._level)
+ for key, value in self._compliant_frame.to_dict(
+ as_series=as_series
+ ).items()
+ }
+ return self._compliant_frame.to_dict(as_series=as_series)
+
+ def row(self, index: int) -> tuple[Any, ...]:
+ """Get values at given row.
+
+ Warning:
+ You should NEVER use this method to iterate over a DataFrame;
+ if you require row-iteration you should strongly prefer use of iter_rows()
+ instead.
+
+ Arguments:
+ index: Row number.
+
+ Returns:
+ A tuple of the values in the selected row.
+
+ Notes:
+ cuDF doesn't support this method.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"a": [1, 2], "b": [4, 5]})
+ >>> nw.from_native(df_native).row(1)
+ (<pyarrow.Int64Scalar: 2>, <pyarrow.Int64Scalar: 5>)
+ """
+ return self._compliant_frame.row(index)
+
+ # inherited
+ def pipe(
+ self,
+ function: Callable[Concatenate[Self, PS], R],
+ *args: PS.args,
+ **kwargs: PS.kwargs,
+ ) -> R:
+ """Pipe function call.
+
+ Arguments:
+ function: Function to apply.
+ args: Positional arguments to pass to function.
+ kwargs: Keyword arguments to pass to function.
+
+ Returns:
+ The original object with the function applied.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2], "ba": [4, 5]})
+ >>> nw.from_native(df_native).pipe(
+ ... lambda _df: _df.select(
+ ... [x for x in _df.columns if len(x) == 1]
+ ... ).to_native()
+ ... )
+ a
+ 0 1
+ 1 2
+ """
+ return super().pipe(function, *args, **kwargs)
+
+ def drop_nulls(self, subset: str | list[str] | None = None) -> Self:
+ """Drop rows that contain null values.
+
+ Arguments:
+ subset: Column name(s) for which null values are considered. If set to None
+ (default), use all columns.
+
+ Returns:
+ The original object with the rows removed that contained the null values.
+
+ Notes:
+ pandas handles null values differently from Polars and PyArrow.
+ See [null_handling](../concepts/null_handling.md)
+ for reference.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"a": [1.0, None], "ba": [1.0, 2.0]})
+ >>> nw.from_native(df_native).drop_nulls().to_native()
+ pyarrow.Table
+ a: double
+ ba: double
+ ----
+ a: [[1]]
+ ba: [[1]]
+ """
+ return super().drop_nulls(subset=subset)
+
+ def with_row_index(self, name: str = "index") -> Self:
+ """Insert column which enumerates rows.
+
+ Arguments:
+ name: The name of the column as a string. The default is "index".
+
+ Returns:
+ The original object with the column added.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"a": [1, 2], "b": [4, 5]})
+ >>> nw.from_native(df_native).with_row_index().to_native()
+ pyarrow.Table
+ index: int64
+ a: int64
+ b: int64
+ ----
+ index: [[0,1]]
+ a: [[1,2]]
+ b: [[4,5]]
+ """
+ return super().with_row_index(name)
+
+ @property
+ def schema(self) -> Schema:
+ r"""Get an ordered mapping of column names to their data type.
+
+ Returns:
+ A Narwhals Schema object that displays the mapping of column names.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"foo": [1, 2], "bar": [6.0, 7.0]})
+ >>> nw.from_native(df_native).schema
+ Schema({'foo': Int64, 'bar': Float64})
+ """
+ return super().schema
+
+ def collect_schema(self) -> Schema:
+ r"""Get an ordered mapping of column names to their data type.
+
+ Returns:
+ A Narwhals Schema object that displays the mapping of column names.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"foo": [1, 2], "bar": [6.0, 7.0]})
+ >>> nw.from_native(df_native).collect_schema()
+ Schema({'foo': Int64, 'bar': Float64})
+ """
+ return super().collect_schema()
+
+ @property
+ def columns(self) -> list[str]:
+ """Get column names.
+
+ Returns:
+ The column names stored in a list.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"foo": [1, 2], "bar": [6.0, 7.0]})
+ >>> nw.from_native(df_native).columns
+ ['foo', 'bar']
+ """
+ return super().columns
+
+ @overload
+ def rows(self, *, named: Literal[False] = False) -> list[tuple[Any, ...]]: ...
+
+ @overload
+ def rows(self, *, named: Literal[True]) -> list[dict[str, Any]]: ...
+
+ @overload
+ def rows(self, *, named: bool) -> list[tuple[Any, ...]] | list[dict[str, Any]]: ...
+
+ def rows(
+ self, *, named: bool = False
+ ) -> list[tuple[Any, ...]] | list[dict[str, Any]]:
+ """Returns all data in the DataFrame as a list of rows of python-native values.
+
+ Arguments:
+ named: By default, each row is returned as a tuple of values given
+ in the same order as the frame columns. Setting named=True will
+ return rows of dictionaries instead.
+
+ Returns:
+ The data as a list of rows.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"foo": [1, 2], "bar": [6.0, 7.0]})
+ >>> nw.from_native(df_native).rows()
+ [(1, 6.0), (2, 7.0)]
+ """
+ return self._compliant_frame.rows(named=named) # type: ignore[return-value]
+
+ def iter_columns(self) -> Iterator[Series[Any]]:
+ """Returns an iterator over the columns of this DataFrame.
+
+ Yields:
+ A Narwhals Series, backed by a native series.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"foo": [1, 2], "bar": [6.0, 7.0]})
+ >>> iter_columns = nw.from_native(df_native).iter_columns()
+ >>> next(iter_columns)
+ ┌───────────────────────┐
+ | Narwhals Series |
+ |-----------------------|
+ |0 1 |
+ |1 2 |
+ |Name: foo, dtype: int64|
+ └───────────────────────┘
+ >>> next(iter_columns)
+ ┌─────────────────────────┐
+ | Narwhals Series |
+ |-------------------------|
+ |0 6.0 |
+ |1 7.0 |
+ |Name: bar, dtype: float64|
+ └─────────────────────────┘
+ """
+ for series in self._compliant_frame.iter_columns():
+ yield self._series(series, level=self._level)
+
+ @overload
+ def iter_rows(
+ self, *, named: Literal[False], buffer_size: int = ...
+ ) -> Iterator[tuple[Any, ...]]: ...
+
+ @overload
+ def iter_rows(
+ self, *, named: Literal[True], buffer_size: int = ...
+ ) -> Iterator[dict[str, Any]]: ...
+
+ @overload
+ def iter_rows(
+ self, *, named: bool, buffer_size: int = ...
+ ) -> Iterator[tuple[Any, ...]] | Iterator[dict[str, Any]]: ...
+
+ def iter_rows(
+ self, *, named: bool = False, buffer_size: int = 512
+ ) -> Iterator[tuple[Any, ...]] | Iterator[dict[str, Any]]:
+ """Returns an iterator over the DataFrame of rows of python-native values.
+
+ Arguments:
+ named: By default, each row is returned as a tuple of values given
+ in the same order as the frame columns. Setting named=True will
+ return rows of dictionaries instead.
+ buffer_size: Determines the number of rows that are buffered
+ internally while iterating over the data.
+ See https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.iter_rows.html
+
+ Returns:
+ An iterator over the DataFrame of rows.
+
+ Notes:
+ cuDF doesn't support this method.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"foo": [1, 2], "bar": [6.0, 7.0]})
+ >>> iter_rows = nw.from_native(df_native).iter_rows()
+ >>> next(iter_rows)
+ (1, 6.0)
+ >>> next(iter_rows)
+ (2, 7.0)
+ """
+ return self._compliant_frame.iter_rows(named=named, buffer_size=buffer_size) # type: ignore[return-value]
+
+ def with_columns(
+ self, *exprs: IntoExpr | Iterable[IntoExpr], **named_exprs: IntoExpr
+ ) -> Self:
+ r"""Add columns to this DataFrame.
+
+ Added columns will replace existing columns with the same name.
+
+ Arguments:
+ *exprs: Column(s) to add, specified as positional arguments.
+ Accepts expression input. Strings are parsed as column names, other
+ non-expression inputs are parsed as literals.
+
+ **named_exprs: Additional columns to add, specified as keyword arguments.
+ The columns will be renamed to the keyword used.
+
+ Returns:
+ DataFrame: A new DataFrame with the columns added.
+
+ Note:
+ Creating a new DataFrame using this method does not create a new copy of
+ existing data.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2], "b": [0.5, 4.0]})
+ >>> (
+ ... nw.from_native(df_native)
+ ... .with_columns((nw.col("a") * 2).alias("a*2"))
+ ... .to_native()
+ ... )
+ a b a*2
+ 0 1 0.5 2
+ 1 2 4.0 4
+ """
+ return super().with_columns(*exprs, **named_exprs)
+
+ def select(
+ self, *exprs: IntoExpr | Iterable[IntoExpr], **named_exprs: IntoExpr
+ ) -> Self:
+ r"""Select columns from this DataFrame.
+
+ Arguments:
+ *exprs: Column(s) to select, specified as positional arguments.
+ Accepts expression input. Strings are parsed as column names,
+ other non-expression inputs are parsed as literals.
+
+ **named_exprs: Additional columns to select, specified as keyword arguments.
+ The columns will be renamed to the keyword used.
+
+ Returns:
+ The dataframe containing only the selected columns.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"a": [1, 2], "b": [3, 4]})
+ >>> nw.from_native(df_native).select("a", a_plus_1=nw.col("a") + 1)
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ |pyarrow.Table |
+ |a: int64 |
+ |a_plus_1: int64 |
+ |---- |
+ |a: [[1,2]] |
+ |a_plus_1: [[2,3]] |
+ └──────────────────┘
+ """
+ return super().select(*exprs, **named_exprs)
+
+ def rename(self, mapping: dict[str, str]) -> Self:
+ """Rename column names.
+
+ Arguments:
+ mapping: Key value pairs that map from old name to new name.
+
+ Returns:
+ The dataframe with the specified columns renamed.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"foo": [1, 2], "bar": [6, 7]})
+ >>> nw.from_native(df_native).rename({"foo": "apple"}).to_native()
+ pyarrow.Table
+ apple: int64
+ bar: int64
+ ----
+ apple: [[1,2]]
+ bar: [[6,7]]
+ """
+ return super().rename(mapping)
+
+ def head(self, n: int = 5) -> Self:
+ """Get the first `n` rows.
+
+ Arguments:
+ n: Number of rows to return. If a negative value is passed, return all rows
+ except the last `abs(n)`.
+
+ Returns:
+ A subset of the dataframe of shape (n, n_columns).
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2], "b": [0.5, 4.0]})
+ >>> nw.from_native(df_native).head(1).to_native()
+ a b
+ 0 1 0.5
+ """
+ return super().head(n)
+
+ def tail(self, n: int = 5) -> Self:
+ """Get the last `n` rows.
+
+ Arguments:
+ n: Number of rows to return. If a negative value is passed, return all rows
+ except the first `abs(n)`.
+
+ Returns:
+ A subset of the dataframe of shape (n, n_columns).
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2], "b": [0.5, 4.0]})
+ >>> nw.from_native(df_native).tail(1)
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 1 2 4.0 |
+ └──────────────────┘
+ """
+ return super().tail(n)
+
+ def drop(self, *columns: str | Iterable[str], strict: bool = True) -> Self:
+ """Remove columns from the dataframe.
+
+ Returns:
+ The dataframe with the specified columns removed.
+
+ Arguments:
+ *columns: Names of the columns that should be removed from the dataframe.
+ strict: Validate that all column names exist in the schema and throw an
+ exception if a column name does not exist in the schema.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame(
+ ... {"foo": [1, 2], "bar": [6.0, 7.0], "ham": ["a", "b"]}
+ ... )
+ >>> nw.from_native(df_native).drop("ham").to_native()
+ foo bar
+ 0 1 6.0
+ 1 2 7.0
+ """
+ return super().drop(*flatten(columns), strict=strict)
+
+ def unique(
+ self,
+ subset: str | list[str] | None = None,
+ *,
+ keep: UniqueKeepStrategy = "any",
+ maintain_order: bool = False,
+ ) -> Self:
+ """Drop duplicate rows from this dataframe.
+
+ Arguments:
+ subset: Column name(s) to consider when identifying duplicate rows.
+ keep: {'first', 'last', 'any', 'none'}
+ Which of the duplicate rows to keep.
+
+ * 'any': Does not give any guarantee of which row is kept.
+ This allows more optimizations.
+ * 'none': Don't keep duplicate rows.
+ * 'first': Keep first unique row.
+ * 'last': Keep last unique row.
+ maintain_order: Keep the same order as the original DataFrame. This may be more
+ expensive to compute.
+
+ Returns:
+ The dataframe with the duplicate rows removed.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame(
+ ... {"foo": [1, 2], "bar": ["a", "a"], "ham": ["b", "b"]}
+ ... )
+ >>> nw.from_native(df_native).unique(["bar", "ham"]).to_native()
+ foo bar ham
+ 0 1 a b
+ """
+ if keep not in {"any", "none", "first", "last"}:
+ msg = f"Expected {'any', 'none', 'first', 'last'}, got: {keep}"
+ raise ValueError(msg)
+ if isinstance(subset, str):
+ subset = [subset]
+ return self._with_compliant(
+ self._compliant_frame.unique(subset, keep=keep, maintain_order=maintain_order)
+ )
+
+ def filter(
+ self, *predicates: IntoExpr | Iterable[IntoExpr] | list[bool], **constraints: Any
+ ) -> Self:
+ r"""Filter the rows in the DataFrame based on one or more predicate expressions.
+
+ The original order of the remaining rows is preserved.
+
+ Arguments:
+ *predicates: Expression(s) that evaluates to a boolean Series. Can
+ also be a (single!) boolean list.
+ **constraints: Column filters; use `name = value` to filter columns by the supplied value.
+ Each constraint will behave the same as `nw.col(name).eq(value)`, and will be implicitly
+ joined with the other filter conditions using &.
+
+ Returns:
+ The filtered dataframe.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame(
+ ... {"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]}
+ ... )
+
+ Filter on one condition
+
+ >>> nw.from_native(df_native).filter(nw.col("foo") > 1).to_native()
+ foo bar ham
+ 1 2 7 b
+ 2 3 8 c
+
+ Filter on multiple conditions with implicit `&`
+
+ >>> nw.from_native(df_native).filter(
+ ... nw.col("foo") < 3, nw.col("ham") == "a"
+ ... ).to_native()
+ foo bar ham
+ 0 1 6 a
+
+ Filter on multiple conditions with `|`
+
+ >>> nw.from_native(df_native).filter(
+ ... (nw.col("foo") == 1) | (nw.col("ham") == "c")
+ ... ).to_native()
+ foo bar ham
+ 0 1 6 a
+ 2 3 8 c
+
+ Filter using `**kwargs` syntax
+
+ >>> nw.from_native(df_native).filter(foo=2, ham="b").to_native()
+ foo bar ham
+ 1 2 7 b
+ """
+ return super().filter(*predicates, **constraints)
+
+ @overload
+ def group_by(
+ self, *keys: IntoExpr | Iterable[IntoExpr], drop_null_keys: Literal[False] = ...
+ ) -> GroupBy[Self]: ...
+
+ @overload
+ def group_by(
+ self, *keys: str | Iterable[str], drop_null_keys: Literal[True]
+ ) -> GroupBy[Self]: ...
+
+ def group_by(
+ self, *keys: IntoExpr | Iterable[IntoExpr], drop_null_keys: bool = False
+ ) -> GroupBy[Self]:
+ r"""Start a group by operation.
+
+ Arguments:
+ *keys: Column(s) to group by. Accepts expression input. Strings are parsed as
+ column names.
+ drop_null_keys: if True, then groups where any key is null won't be included
+ in the result.
+
+ Returns:
+ GroupBy: Object which can be used to perform aggregations.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame(
+ ... {
+ ... "a": ["a", "b", "a", "b", "c"],
+ ... "b": [1, 2, 1, 3, 3],
+ ... "c": [5, 4, 3, 2, 1],
+ ... }
+ ... )
+
+ Group by one column and compute the sum of another column
+
+ >>> nw.from_native(df_native, eager_only=True).group_by("a").agg(
+ ... nw.col("b").sum()
+ ... ).sort("a").to_native()
+ a b
+ 0 a 2
+ 1 b 5
+ 2 c 3
+
+ Group by multiple columns and compute the max of another column
+
+ >>> (
+ ... nw.from_native(df_native, eager_only=True)
+ ... .group_by(["a", "b"])
+ ... .agg(nw.max("c"))
+ ... .sort("a", "b")
+ ... .to_native()
+ ... )
+ a b c
+ 0 a 1 5
+ 1 b 2 4
+ 2 b 3 2
+ 3 c 3 1
+
+ Expressions are also accepted.
+
+ >>> nw.from_native(df_native, eager_only=True).group_by(
+ ... "a", nw.col("b") // 2
+ ... ).agg(nw.col("c").mean()).to_native()
+ a b c
+ 0 a 0 4.0
+ 1 b 1 3.0
+ 2 c 1 1.0
+ """
+ from narwhals.group_by import GroupBy
+
+ flat_keys = flatten(keys)
+
+ if all(isinstance(key, str) for key in flat_keys):
+ return GroupBy(self, flat_keys, drop_null_keys=drop_null_keys)
+
+ from narwhals import col
+ from narwhals.expr import Expr
+ from narwhals.series import Series
+
+ key_is_expr_or_series = tuple(isinstance(k, (Expr, Series)) for k in flat_keys)
+
+ if drop_null_keys and any(key_is_expr_or_series):
+ msg = "drop_null_keys cannot be True when keys contains Expr or Series"
+ raise NotImplementedError(msg)
+
+ _keys = [
+ k if is_expr else col(k)
+ for k, is_expr in zip(flat_keys, key_is_expr_or_series)
+ ]
+ expr_flat_keys, kinds = self._flatten_and_extract(*_keys)
+
+ if not all(kind is ExprKind.ELEMENTWISE for kind in kinds):
+ from narwhals.exceptions import ComputeError
+
+ msg = (
+ "Group by is not supported with keys that are not elementwise expressions"
+ )
+ raise ComputeError(msg)
+
+ return GroupBy(self, expr_flat_keys, drop_null_keys=drop_null_keys)
+
+ def sort(
+ self,
+ by: str | Iterable[str],
+ *more_by: str,
+ descending: bool | Sequence[bool] = False,
+ nulls_last: bool = False,
+ ) -> Self:
+ r"""Sort the dataframe by the given columns.
+
+ Arguments:
+ by: Column(s) names to sort by.
+ *more_by: Additional columns to sort by, specified as positional arguments.
+ descending: Sort in descending order. When sorting by multiple columns, can be
+ specified per column by passing a sequence of booleans.
+ nulls_last: Place null values last.
+
+ Returns:
+ The sorted dataframe.
+
+ Note:
+ Unlike Polars, it is not possible to specify a sequence of booleans for
+ `nulls_last` in order to control per-column behaviour. Instead a single
+ boolean is applied for all `by` columns.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame(
+ ... {"foo": [2, 1], "bar": [6.0, 7.0], "ham": ["a", "b"]}
+ ... )
+ >>> nw.from_native(df_native).sort("foo")
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | foo bar ham |
+ | 1 1 7.0 b |
+ | 0 2 6.0 a |
+ └──────────────────┘
+ """
+ return super().sort(by, *more_by, descending=descending, nulls_last=nulls_last)
+
+ def join(
+ self,
+ other: Self,
+ on: str | list[str] | None = None,
+ how: JoinStrategy = "inner",
+ *,
+ left_on: str | list[str] | None = None,
+ right_on: str | list[str] | None = None,
+ suffix: str = "_right",
+ ) -> Self:
+ r"""Join in SQL-like fashion.
+
+ Arguments:
+ other: DataFrame to join with.
+ on: Name(s) of the join columns in both DataFrames. If set, `left_on` and
+ `right_on` should be None.
+ how: Join strategy.
+
+ * *inner*: Returns rows that have matching values in both tables.
+ * *left*: Returns all rows from the left table, and the matched rows from the right table.
+ * *full*: Returns all rows in both dataframes, with the suffix appended to the right join keys.
+ * *cross*: Returns the Cartesian product of rows from both tables.
+ * *semi*: Filter rows that have a match in the right table.
+ * *anti*: Filter rows that do not have a match in the right table.
+ left_on: Join column of the left DataFrame.
+ right_on: Join column of the right DataFrame.
+ suffix: Suffix to append to columns with a duplicate name.
+
+ Returns:
+ A new joined DataFrame
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_1_native = pd.DataFrame({"id": ["a", "b"], "price": [6.0, 7.0]})
+ >>> df_2_native = pd.DataFrame({"id": ["a", "b", "c"], "qty": [1, 2, 3]})
+ >>> nw.from_native(df_1_native).join(nw.from_native(df_2_native), on="id")
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | id price qty |
+ | 0 a 6.0 1 |
+ | 1 b 7.0 2 |
+ └──────────────────┘
+ """
+ return super().join(
+ other, how=how, left_on=left_on, right_on=right_on, on=on, suffix=suffix
+ )
+
+ def join_asof(
+ self,
+ other: Self,
+ *,
+ left_on: str | None = None,
+ right_on: str | None = None,
+ on: str | None = None,
+ by_left: str | list[str] | None = None,
+ by_right: str | list[str] | None = None,
+ by: str | list[str] | None = None,
+ strategy: AsofJoinStrategy = "backward",
+ suffix: str = "_right",
+ ) -> Self:
+ """Perform an asof join.
+
+ This is similar to a left-join except that we match on nearest key rather than equal keys.
+
+ For Polars, both DataFrames must be sorted by the `on` key (within each `by` group
+ if specified).
+
+ Arguments:
+ other: DataFrame to join with.
+ left_on: Name(s) of the left join column(s).
+ right_on: Name(s) of the right join column(s).
+ on: Join column of both DataFrames. If set, left_on and right_on should be None.
+ by_left: join on these columns before doing asof join.
+ by_right: join on these columns before doing asof join.
+ by: join on these columns before doing asof join.
+ strategy: Join strategy. The default is "backward".
+ suffix: Suffix to append to columns with a duplicate name.
+
+ * *backward*: selects the last row in the right DataFrame whose "on" key is less than or equal to the left's key.
+ * *forward*: selects the first row in the right DataFrame whose "on" key is greater than or equal to the left's key.
+ * *nearest*: search selects the last row in the right DataFrame whose value is nearest to the left's key.
+
+ Returns:
+ A new joined DataFrame
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> data_gdp = {
+ ... "datetime": [
+ ... datetime(2016, 1, 1),
+ ... datetime(2017, 1, 1),
+ ... datetime(2018, 1, 1),
+ ... datetime(2019, 1, 1),
+ ... datetime(2020, 1, 1),
+ ... ],
+ ... "gdp": [4164, 4411, 4566, 4696, 4827],
+ ... }
+ >>> data_population = {
+ ... "datetime": [
+ ... datetime(2016, 3, 1),
+ ... datetime(2018, 8, 1),
+ ... datetime(2019, 1, 1),
+ ... ],
+ ... "population": [82.19, 82.66, 83.12],
+ ... }
+ >>> gdp_native = pd.DataFrame(data_gdp)
+ >>> population_native = pd.DataFrame(data_population)
+ >>> gdp = nw.from_native(gdp_native)
+ >>> population = nw.from_native(population_native)
+ >>> population.join_asof(gdp, on="datetime", strategy="backward")
+ ┌──────────────────────────────┐
+ | Narwhals DataFrame |
+ |------------------------------|
+ | datetime population gdp|
+ |0 2016-03-01 82.19 4164|
+ |1 2018-08-01 82.66 4566|
+ |2 2019-01-01 83.12 4696|
+ └──────────────────────────────┘
+ """
+ return super().join_asof(
+ other,
+ left_on=left_on,
+ right_on=right_on,
+ on=on,
+ by_left=by_left,
+ by_right=by_right,
+ by=by,
+ strategy=strategy,
+ suffix=suffix,
+ )
+
+ # --- descriptive ---
+ def is_duplicated(self) -> Series[Any]:
+ r"""Get a mask of all duplicated rows in this DataFrame.
+
+ Returns:
+ A new Series.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"foo": [2, 2, 2], "bar": [6.0, 6.0, 7.0]})
+ >>> nw.from_native(df_native).is_duplicated()
+ ┌───────────────┐
+ |Narwhals Series|
+ |---------------|
+ | 0 True |
+ | 1 True |
+ | 2 False |
+ | dtype: bool |
+ └───────────────┘
+ """
+ return ~self.is_unique()
+
+ def is_empty(self) -> bool:
+ r"""Check if the dataframe is empty.
+
+ Returns:
+ A boolean indicating whether the dataframe is empty (True) or not (False).
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"foo": [2, 2, 2], "bar": [6.0, 6.0, 7.0]})
+ >>> nw.from_native(df_native).is_empty()
+ False
+ """
+ return len(self) == 0
+
+ def is_unique(self) -> Series[Any]:
+ r"""Get a mask of all unique rows in this DataFrame.
+
+ Returns:
+ A new Series.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"foo": [2, 2, 2], "bar": [6.0, 6.0, 7.0]})
+ >>> nw.from_native(df_native).is_unique()
+ ┌───────────────┐
+ |Narwhals Series|
+ |---------------|
+ | 0 False |
+ | 1 False |
+ | 2 True |
+ | dtype: bool |
+ └───────────────┘
+ """
+ return self._series(self._compliant_frame.is_unique(), level=self._level)
+
+ def null_count(self) -> Self:
+ r"""Create a new DataFrame that shows the null counts per column.
+
+ Returns:
+ A dataframe of shape (1, n_columns).
+
+ Notes:
+ pandas handles null values differently from Polars and PyArrow.
+ See [null_handling](../concepts/null_handling.md/)
+ for reference.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"foo": [1, None], "bar": [2, 3]})
+ >>> nw.from_native(df_native).null_count()
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | pyarrow.Table |
+ | foo: int64 |
+ | bar: int64 |
+ | ---- |
+ | foo: [[1]] |
+ | bar: [[0]] |
+ └──────────────────┘
+ """
+ plx = self._compliant_frame.__narwhals_namespace__()
+ result = self._compliant_frame.select(plx.all().null_count())
+ return self._with_compliant(result)
+
+ def item(self, row: int | None = None, column: int | str | None = None) -> Any:
+ r"""Return the DataFrame as a scalar, or return the element at the given row/column.
+
+ Arguments:
+ row: The *n*-th row.
+ column: The column selected via an integer or a string (column name).
+
+ Returns:
+ A scalar or the specified element in the dataframe.
+
+ Notes:
+ If row/col not provided, this is equivalent to df[0,0], with a check that the shape is (1,1).
+ With row/col, this is equivalent to df[row,col].
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"foo": [1, None], "bar": [2, 3]})
+ >>> nw.from_native(df_native).item(0, 1)
+ 2
+ """
+ return self._compliant_frame.item(row=row, column=column)
+
+ def clone(self) -> Self:
+ r"""Create a copy of this DataFrame.
+
+ Returns:
+ An identical copy of the original dataframe.
+ """
+ return self._with_compliant(self._compliant_frame.clone())
+
+ def gather_every(self, n: int, offset: int = 0) -> Self:
+ r"""Take every nth row in the DataFrame and return as a new DataFrame.
+
+ Arguments:
+ n: Gather every *n*-th row.
+ offset: Starting index.
+
+ Returns:
+ The dataframe containing only the selected rows.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"foo": [1, None, 2, 3]})
+ >>> nw.from_native(df_native).gather_every(2)
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | pyarrow.Table |
+ | foo: int64 |
+ | ---- |
+ | foo: [[1,2]] |
+ └──────────────────┘
+ """
+ return super().gather_every(n=n, offset=offset)
+
+ def pivot(
+ self,
+ on: str | list[str],
+ *,
+ index: str | list[str] | None = None,
+ values: str | list[str] | None = None,
+ aggregate_function: PivotAgg | None = None,
+ maintain_order: bool | None = None,
+ sort_columns: bool = False,
+ separator: str = "_",
+ ) -> Self:
+ r"""Create a spreadsheet-style pivot table as a DataFrame.
+
+ Arguments:
+ on: Name of the column(s) whose values will be used as the header of the
+ output DataFrame.
+ index: One or multiple keys to group by. If None, all remaining columns not
+ specified on `on` and `values` will be used. At least one of `index` and
+ `values` must be specified.
+ values: One or multiple keys to group by. If None, all remaining columns not
+ specified on `on` and `index` will be used. At least one of `index` and
+ `values` must be specified.
+ aggregate_function: Choose from
+
+ - None: no aggregation takes place, will raise error if multiple values
+ are in group.
+ - A predefined aggregate function string, one of
+ {'min', 'max', 'first', 'last', 'sum', 'mean', 'median', 'len'}
+ maintain_order: Has no effect and is kept around only for backwards-compatibility.
+ sort_columns: Sort the transposed columns by name. Default is by order of
+ discovery.
+ separator: Used as separator/delimiter in generated column names in case of
+ multiple `values` columns.
+
+ Returns:
+ A new dataframe.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> data = {
+ ... "ix": [1, 1, 2, 2, 1, 2],
+ ... "col": ["a", "a", "a", "a", "b", "b"],
+ ... "foo": [0, 1, 2, 2, 7, 1],
+ ... "bar": [0, 2, 0, 0, 9, 4],
+ ... }
+ >>> df_native = pd.DataFrame(data)
+ >>> nw.from_native(df_native).pivot(
+ ... "col", index="ix", aggregate_function="sum"
+ ... )
+ ┌─────────────────────────────────┐
+ | Narwhals DataFrame |
+ |---------------------------------|
+ | ix foo_a foo_b bar_a bar_b|
+ |0 1 1 7 2 9|
+ |1 2 4 1 0 4|
+ └─────────────────────────────────┘
+ """
+ if values is None and index is None:
+ msg = "At least one of `values` and `index` must be passed"
+ raise ValueError(msg)
+ if maintain_order is not None:
+ msg = (
+ "`maintain_order` has no effect and is only kept around for backwards-compatibility. "
+ "You can safely remove this argument."
+ )
+ warn(message=msg, category=UserWarning, stacklevel=find_stacklevel())
+ on = [on] if isinstance(on, str) else on
+ values = [values] if isinstance(values, str) else values
+ index = [index] if isinstance(index, str) else index
+
+ return self._with_compliant(
+ self._compliant_frame.pivot(
+ on=on,
+ index=index,
+ values=values,
+ aggregate_function=aggregate_function,
+ sort_columns=sort_columns,
+ separator=separator,
+ )
+ )
+
+ def to_arrow(self) -> pa.Table:
+ r"""Convert to arrow table.
+
+ Returns:
+ A new PyArrow table.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"foo": [1, None], "bar": [2, 3]})
+ >>> nw.from_native(df_native).to_arrow()
+ pyarrow.Table
+ foo: double
+ bar: int64
+ ----
+ foo: [[1,null]]
+ bar: [[2,3]]
+ """
+ return self._compliant_frame.to_arrow()
+
+ def sample(
+ self,
+ n: int | None = None,
+ *,
+ fraction: float | None = None,
+ with_replacement: bool = False,
+ seed: int | None = None,
+ ) -> Self:
+ r"""Sample from this DataFrame.
+
+ Arguments:
+ n: Number of items to return. Cannot be used with fraction.
+ fraction: Fraction of items to return. Cannot be used with n.
+ with_replacement: Allow values to be sampled more than once.
+ seed: Seed for the random number generator. If set to None (default), a random
+ seed is generated for each sample operation.
+
+ Returns:
+ A new dataframe.
+
+ Notes:
+ The results may not be consistent across libraries.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"foo": [1, 2, 3], "bar": [19, 32, 4]})
+ >>> nw.from_native(df_native).sample(n=2) # doctest:+SKIP
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | foo bar |
+ | 2 3 4 |
+ | 1 2 32 |
+ └──────────────────┘
+ """
+ return self._with_compliant(
+ self._compliant_frame.sample(
+ n=n, fraction=fraction, with_replacement=with_replacement, seed=seed
+ )
+ )
+
+ def unpivot(
+ self,
+ on: str | list[str] | None = None,
+ *,
+ index: str | list[str] | None = None,
+ variable_name: str = "variable",
+ value_name: str = "value",
+ ) -> Self:
+ r"""Unpivot a DataFrame from wide to long format.
+
+ Optionally leaves identifiers set.
+
+ This function is useful to massage a DataFrame into a format where one or more
+ columns are identifier variables (index) while all other columns, considered
+ measured variables (on), are "unpivoted" to the row axis leaving just
+ two non-identifier columns, 'variable' and 'value'.
+
+ Arguments:
+ on: Column(s) to use as values variables; if `on` is empty all columns that
+ are not in `index` will be used.
+ index: Column(s) to use as identifier variables.
+ variable_name: Name to give to the `variable` column. Defaults to "variable".
+ value_name: Name to give to the `value` column. Defaults to "value".
+
+ Returns:
+ The unpivoted dataframe.
+
+ Notes:
+ If you're coming from pandas, this is similar to `pandas.DataFrame.melt`,
+ but with `index` replacing `id_vars` and `on` replacing `value_vars`.
+ In other frameworks, you might know this operation as `pivot_longer`.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> data = {"a": ["x", "y", "z"], "b": [1, 3, 5], "c": [2, 4, 6]}
+ >>> df_native = pd.DataFrame(data)
+ >>> nw.from_native(df_native).unpivot(["b", "c"], index="a")
+ ┌────────────────────┐
+ | Narwhals DataFrame |
+ |--------------------|
+ | a variable value|
+ |0 x b 1|
+ |1 y b 3|
+ |2 z b 5|
+ |3 x c 2|
+ |4 y c 4|
+ |5 z c 6|
+ └────────────────────┘
+ """
+ return super().unpivot(
+ on=on, index=index, variable_name=variable_name, value_name=value_name
+ )
+
+ def explode(self, columns: str | Sequence[str], *more_columns: str) -> Self:
+ """Explode the dataframe to long format by exploding the given columns.
+
+ Notes:
+ It is possible to explode multiple columns only if these columns must have
+ matching element counts.
+
+ Arguments:
+ columns: Column names. The underlying columns being exploded must be of the `List` data type.
+ *more_columns: Additional names of columns to explode, specified as positional arguments.
+
+ Returns:
+ New DataFrame
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> data = {"a": ["x", "y"], "b": [[1, 2], [3]]}
+ >>> df_native = pl.DataFrame(data)
+ >>> nw.from_native(df_native).explode("b").to_native()
+ shape: (3, 2)
+ ┌─────┬─────┐
+ │ a ┆ b │
+ │ --- ┆ --- │
+ │ str ┆ i64 │
+ ╞═════╪═════╡
+ │ x ┆ 1 │
+ │ x ┆ 2 │
+ │ y ┆ 3 │
+ └─────┴─────┘
+ """
+ return super().explode(columns, *more_columns)
+
+
+class LazyFrame(BaseFrame[FrameT]):
+ """Narwhals LazyFrame, backed by a native lazyframe.
+
+ Warning:
+ This class is not meant to be instantiated directly - instead use
+ [`narwhals.from_native`][] with a native
+ object that is a lazy dataframe from one of the supported
+ backend (e.g. polars.LazyFrame, dask_expr._collection.DataFrame):
+ ```py
+ narwhals.from_native(native_lazyframe)
+ ```
+ """
+
+ def _extract_compliant(self, arg: Any) -> Any:
+ from narwhals.expr import Expr
+ from narwhals.series import Series
+
+ if isinstance(arg, BaseFrame):
+ return arg._compliant_frame
+ if isinstance(arg, Series): # pragma: no cover
+ msg = "Binary operations between Series and LazyFrame are not supported."
+ raise TypeError(msg)
+ if isinstance(arg, str): # pragma: no cover
+ plx = self.__narwhals_namespace__()
+ return plx.col(arg)
+ if isinstance(arg, Expr):
+ if arg._metadata.n_orderable_ops:
+ msg = (
+ "Order-dependent expressions are not supported for use in LazyFrame.\n\n"
+ "Hint: To make the expression valid, use `.over` with `order_by` specified.\n\n"
+ "For example, if you wrote `nw.col('price').cum_sum()` and you have a column\n"
+ "`'date'` which orders your data, then replace:\n\n"
+ " nw.col('price').cum_sum()\n\n"
+ " with:\n\n"
+ " nw.col('price').cum_sum().over(order_by='date')\n"
+ " ^^^^^^^^^^^^^^^^^^^^^^\n\n"
+ "See https://narwhals-dev.github.io/narwhals/concepts/order_dependence/."
+ )
+ raise OrderDependentExprError(msg)
+ if arg._metadata.is_filtration:
+ msg = (
+ "Length-changing expressions are not supported for use in LazyFrame, unless\n"
+ "followed by an aggregation.\n\n"
+ "Hints:\n"
+ "- Instead of `lf.select(nw.col('a').head())`, use `lf.select('a').head()\n"
+ "- Instead of `lf.select(nw.col('a').drop_nulls()).select(nw.sum('a'))`,\n"
+ " use `lf.select(nw.col('a').drop_nulls().sum())\n"
+ )
+ raise LengthChangingExprError(msg)
+ return arg._to_compliant_expr(self.__narwhals_namespace__())
+ if get_polars() is not None and "polars" in str(type(arg)): # pragma: no cover
+ msg = (
+ f"Expected Narwhals object, got: {type(arg)}.\n\n"
+ "Perhaps you:\n"
+ "- Forgot a `nw.from_native` somewhere?\n"
+ "- Used `pl.col` instead of `nw.col`?"
+ )
+ raise TypeError(msg)
+ raise InvalidIntoExprError.from_invalid_type(type(arg)) # pragma: no cover
+
+ @property
+ def _dataframe(self) -> type[DataFrame[Any]]:
+ return DataFrame
+
+ def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> None:
+ self._level = level
+ self._compliant_frame: CompliantLazyFrame[Any, FrameT, Self] # type: ignore[type-var]
+ if is_compliant_lazyframe(df):
+ self._compliant_frame = df.__narwhals_lazyframe__()
+ else: # pragma: no cover
+ msg = f"Expected Polars LazyFrame or an object that implements `__narwhals_lazyframe__`, got: {type(df)}"
+ raise AssertionError(msg)
+
+ def __repr__(self) -> str: # pragma: no cover
+ return generate_repr("Narwhals LazyFrame", self.to_native().__repr__())
+
+ @property
+ def implementation(self) -> Implementation:
+ """Return implementation of native frame.
+
+ This can be useful when you need to use special-casing for features outside of
+ Narwhals' scope - for example, when dealing with pandas' Period Dtype.
+
+ Returns:
+ Implementation.
+
+ Examples:
+ >>> import narwhals as nw
+ >>> import dask.dataframe as dd
+ >>> lf_native = dd.from_dict({"a": [1, 2]}, npartitions=1)
+ >>> nw.from_native(lf_native).implementation
+ <Implementation.DASK: 'dask'>
+ """
+ return self._compliant_frame._implementation
+
+ def __getitem__(self, item: str | slice) -> NoReturn:
+ msg = "Slicing is not supported on LazyFrame"
+ raise TypeError(msg)
+
+ def collect(
+ self, backend: ModuleType | Implementation | str | None = None, **kwargs: Any
+ ) -> DataFrame[Any]:
+ r"""Materialize this LazyFrame into a DataFrame.
+
+ As each underlying lazyframe has different arguments to set when materializing
+ the lazyframe into a dataframe, we allow to pass them as kwargs (see examples
+ below for how to generalize the specification).
+
+ Arguments:
+ backend: specifies which eager backend collect to. This will be the underlying
+ backend for the resulting Narwhals DataFrame. If None, then the following
+ default conversions will be applied
+
+ - `polars.LazyFrame` -> `polars.DataFrame`
+ - `dask.DataFrame` -> `pandas.DataFrame`
+ - `duckdb.PyRelation` -> `pyarrow.Table`
+ - `pyspark.DataFrame` -> `pyarrow.Table`
+
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`
+ or `POLARS`.
+ - As a string: `"pandas"`, `"pyarrow"` or `"polars"`
+ - Directly as a module `pandas`, `pyarrow` or `polars`.
+ kwargs: backend specific kwargs to pass along. To know more please check the
+ backend specific documentation
+
+ - [polars.LazyFrame.collect](https://docs.pola.rs/api/python/dev/reference/lazyframe/api/polars.LazyFrame.collect.html)
+ - [dask.dataframe.DataFrame.compute](https://docs.dask.org/en/stable/generated/dask.dataframe.DataFrame.compute.html)
+
+ Returns:
+ DataFrame
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> lf_native = duckdb.sql("SELECT * FROM VALUES (1, 2), (3, 4) df(a, b)")
+ >>> lf = nw.from_native(lf_native)
+ >>> lf
+ ┌──────────────────┐
+ |Narwhals LazyFrame|
+ |------------------|
+ |┌───────┬───────┐ |
+ |│ a │ b │ |
+ |│ int32 │ int32 │ |
+ |├───────┼───────┤ |
+ |│ 1 │ 2 │ |
+ |│ 3 │ 4 │ |
+ |└───────┴───────┘ |
+ └──────────────────┘
+ >>> lf.collect()
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | pyarrow.Table |
+ | a: int32 |
+ | b: int32 |
+ | ---- |
+ | a: [[1,3]] |
+ | b: [[2,4]] |
+ └──────────────────┘
+ """
+ eager_backend = None if backend is None else Implementation.from_backend(backend)
+ supported_eager_backends = (
+ Implementation.POLARS,
+ Implementation.PANDAS,
+ Implementation.PYARROW,
+ )
+ if eager_backend is not None and eager_backend not in supported_eager_backends:
+ msg = f"Unsupported `backend` value.\nExpected one of {supported_eager_backends} or None, got: {eager_backend}."
+ raise ValueError(msg)
+ return self._dataframe(
+ self._compliant_frame.collect(backend=eager_backend, **kwargs), level="full"
+ )
+
+ def to_native(self) -> FrameT:
+ """Convert Narwhals LazyFrame to native one.
+
+ Returns:
+ Object of class that user started with.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> lf_native = duckdb.sql("SELECT * FROM VALUES (1, 2), (3, 4) df(a, b)")
+ >>> nw.from_native(lf_native).to_native()
+ ┌───────┬───────┐
+ │ a │ b │
+ │ int32 │ int32 │
+ ├───────┼───────┤
+ │ 1 │ 2 │
+ │ 3 │ 4 │
+ └───────┴───────┘
+ <BLANKLINE>
+ """
+ return to_native(narwhals_object=self, pass_through=False)
+
+ # inherited
+ def pipe(
+ self,
+ function: Callable[Concatenate[Self, PS], R],
+ *args: PS.args,
+ **kwargs: PS.kwargs,
+ ) -> R:
+ """Pipe function call.
+
+ Arguments:
+ function: Function to apply.
+ args: Positional arguments to pass to function.
+ kwargs: Keyword arguments to pass to function.
+
+ Returns:
+ The original object with the function applied.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> lf_native = duckdb.sql("SELECT * FROM VALUES (1, 2), (3, 4) df(a, b)")
+ >>> nw.from_native(lf_native).pipe(lambda x: x.select("a")).to_native()
+ ┌───────┐
+ │ a │
+ │ int32 │
+ ├───────┤
+ │ 1 │
+ │ 3 │
+ └───────┘
+ <BLANKLINE>
+ """
+ return super().pipe(function, *args, **kwargs)
+
+ def drop_nulls(self, subset: str | list[str] | None = None) -> Self:
+ """Drop rows that contain null values.
+
+ Arguments:
+ subset: Column name(s) for which null values are considered. If set to None
+ (default), use all columns.
+
+ Returns:
+ The original object with the rows removed that contained the null values.
+
+ Notes:
+ pandas handles null values differently from Polars and PyArrow.
+ See [null_handling](../concepts/null_handling.md/)
+ for reference.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> lf_native = duckdb.sql("SELECT * FROM VALUES (1, NULL), (3, 4) df(a, b)")
+ >>> nw.from_native(lf_native).drop_nulls()
+ ┌──────────────────┐
+ |Narwhals LazyFrame|
+ |------------------|
+ |┌───────┬───────┐ |
+ |│ a │ b │ |
+ |│ int32 │ int32 │ |
+ |├───────┼───────┤ |
+ |│ 3 │ 4 │ |
+ |└───────┴───────┘ |
+ └──────────────────┘
+ """
+ return super().drop_nulls(subset=subset)
+
+ def with_row_index(self, name: str = "index") -> Self:
+ """Insert column which enumerates rows.
+
+ Arguments:
+ name: The name of the column as a string. The default is "index".
+
+ Returns:
+ The original object with the column added.
+
+ Examples:
+ >>> import dask.dataframe as dd
+ >>> import narwhals as nw
+ >>> lf_native = dd.from_dict({"a": [1, 2], "b": [4, 5]}, npartitions=1)
+ >>> nw.from_native(lf_native).with_row_index().collect()
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | index a b |
+ | 0 0 1 4 |
+ | 1 1 2 5 |
+ └──────────────────┘
+ """
+ return super().with_row_index(name)
+
+ @property
+ def schema(self) -> Schema:
+ r"""Get an ordered mapping of column names to their data type.
+
+ Returns:
+ A Narwhals Schema object that displays the mapping of column names.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> lf_native = duckdb.sql("SELECT * FROM VALUES (1, 4.5), (3, 2.) df(a, b)")
+ >>> nw.from_native(lf_native).schema
+ Schema({'a': Int32, 'b': Decimal})
+ """
+ return super().schema
+
+ def collect_schema(self) -> Schema:
+ r"""Get an ordered mapping of column names to their data type.
+
+ Returns:
+ A Narwhals Schema object that displays the mapping of column names.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> lf_native = duckdb.sql("SELECT * FROM VALUES (1, 4.5), (3, 2.) df(a, b)")
+ >>> nw.from_native(lf_native).collect_schema()
+ Schema({'a': Int32, 'b': Decimal})
+ """
+ return super().collect_schema()
+
+ @property
+ def columns(self) -> list[str]:
+ r"""Get column names.
+
+ Returns:
+ The column names stored in a list.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> lf_native = duckdb.sql("SELECT * FROM VALUES (1, 4.5), (3, 2.) df(a, b)")
+ >>> nw.from_native(lf_native).columns
+ ['a', 'b']
+ """
+ return super().columns
+
+ def with_columns(
+ self, *exprs: IntoExpr | Iterable[IntoExpr], **named_exprs: IntoExpr
+ ) -> Self:
+ r"""Add columns to this LazyFrame.
+
+ Added columns will replace existing columns with the same name.
+
+ Arguments:
+ *exprs: Column(s) to add, specified as positional arguments.
+ Accepts expression input. Strings are parsed as column names, other
+ non-expression inputs are parsed as literals.
+
+ **named_exprs: Additional columns to add, specified as keyword arguments.
+ The columns will be renamed to the keyword used.
+
+ Returns:
+ LazyFrame: A new LazyFrame with the columns added.
+
+ Note:
+ Creating a new LazyFrame using this method does not create a new copy of
+ existing data.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> lf_native = duckdb.sql("SELECT * FROM VALUES (1, 4.5), (3, 2.) df(a, b)")
+ >>> nw.from_native(lf_native).with_columns(c=nw.col("a") + 1)
+ ┌────────────────────────────────┐
+ | Narwhals LazyFrame |
+ |--------------------------------|
+ |┌───────┬──────────────┬───────┐|
+ |│ a │ b │ c │|
+ |│ int32 │ decimal(2,1) │ int32 │|
+ |├───────┼──────────────┼───────┤|
+ |│ 1 │ 4.5 │ 2 │|
+ |│ 3 │ 2.0 │ 4 │|
+ |└───────┴──────────────┴───────┘|
+ └────────────────────────────────┘
+ """
+ if not exprs and not named_exprs:
+ msg = "At least one expression must be passed to LazyFrame.with_columns"
+ raise ValueError(msg)
+ return super().with_columns(*exprs, **named_exprs)
+
+ def select(
+ self, *exprs: IntoExpr | Iterable[IntoExpr], **named_exprs: IntoExpr
+ ) -> Self:
+ r"""Select columns from this LazyFrame.
+
+ Arguments:
+ *exprs: Column(s) to select, specified as positional arguments.
+ Accepts expression input. Strings are parsed as column names.
+ **named_exprs: Additional columns to select, specified as keyword arguments.
+ The columns will be renamed to the keyword used.
+
+ Returns:
+ The LazyFrame containing only the selected columns.
+
+ Notes:
+ If you'd like to select a column whose name isn't a string (for example,
+ if you're working with pandas) then you should explicitly use `nw.col` instead
+ of just passing the column name. For example, to select a column named
+ `0` use `df.select(nw.col(0))`, not `df.select(0)`.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> lf_native = duckdb.sql("SELECT * FROM VALUES (1, 4.5), (3, 2.) df(a, b)")
+ >>> nw.from_native(lf_native).select("a", a_plus_1=nw.col("a") + 1)
+ ┌────────────────────┐
+ | Narwhals LazyFrame |
+ |--------------------|
+ |┌───────┬──────────┐|
+ |│ a │ a_plus_1 │|
+ |│ int32 │ int32 │|
+ |├───────┼──────────┤|
+ |│ 1 │ 2 │|
+ |│ 3 │ 4 │|
+ |└───────┴──────────┘|
+ └────────────────────┘
+ """
+ if not exprs and not named_exprs:
+ msg = "At least one expression must be passed to LazyFrame.select"
+ raise ValueError(msg)
+ return super().select(*exprs, **named_exprs)
+
+ def rename(self, mapping: dict[str, str]) -> Self:
+ r"""Rename column names.
+
+ Arguments:
+ mapping: Key value pairs that map from old name to new name, or a
+ function that takes the old name as input and returns the
+ new name.
+
+ Returns:
+ The LazyFrame with the specified columns renamed.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> lf_native = duckdb.sql("SELECT * FROM VALUES (1, 4.5), (3, 2.) df(a, b)")
+ >>> nw.from_native(lf_native).rename({"a": "c"})
+ ┌────────────────────────┐
+ | Narwhals LazyFrame |
+ |------------------------|
+ |┌───────┬──────────────┐|
+ |│ c │ b │|
+ |│ int32 │ decimal(2,1) │|
+ |├───────┼──────────────┤|
+ |│ 1 │ 4.5 │|
+ |│ 3 │ 2.0 │|
+ |└───────┴──────────────┘|
+ └────────────────────────┘
+ """
+ return super().rename(mapping)
+
+ def head(self, n: int = 5) -> Self:
+ r"""Get `n` rows.
+
+ Arguments:
+ n: Number of rows to return.
+
+ Returns:
+ A subset of the LazyFrame of shape (n, n_columns).
+
+ Examples:
+ >>> import dask.dataframe as dd
+ >>> import narwhals as nw
+ >>> lf_native = dd.from_dict({"a": [1, 2, 3], "b": [4, 5, 6]}, npartitions=1)
+ >>> nw.from_native(lf_native).head(2).collect()
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 1 4 |
+ | 1 2 5 |
+ └──────────────────┘
+ """
+ return super().head(n)
+
+ def tail(self, n: int = 5) -> Self: # pragma: no cover
+ r"""Get the last `n` rows.
+
+ Warning:
+ `LazyFrame.tail` is deprecated and will be removed in a future version.
+ Note: this will remain available in `narwhals.stable.v1`.
+ See [stable api](../backcompat.md/) for more information.
+
+ Arguments:
+ n: Number of rows to return.
+
+ Returns:
+ A subset of the LazyFrame of shape (n, n_columns).
+ """
+ return super().tail(n)
+
+ def drop(self, *columns: str | Iterable[str], strict: bool = True) -> Self:
+ r"""Remove columns from the LazyFrame.
+
+ Arguments:
+ *columns: Names of the columns that should be removed from the dataframe.
+ strict: Validate that all column names exist in the schema and throw an
+ exception if a column name does not exist in the schema.
+
+ Returns:
+ The LazyFrame with the specified columns removed.
+
+ Warning:
+ `strict` argument is ignored for `polars<1.0.0`.
+
+ Please consider upgrading to a newer version or pass to eager mode.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> lf_native = duckdb.sql("SELECT * FROM VALUES (1, 2), (3, 4) df(a, b)")
+ >>> nw.from_native(lf_native).drop("a").to_native()
+ ┌───────┐
+ │ b │
+ │ int32 │
+ ├───────┤
+ │ 2 │
+ │ 4 │
+ └───────┘
+ <BLANKLINE>
+ """
+ return super().drop(*flatten(columns), strict=strict)
+
+ def unique(
+ self,
+ subset: str | list[str] | None = None,
+ *,
+ keep: LazyUniqueKeepStrategy = "any",
+ ) -> Self:
+ """Drop duplicate rows from this LazyFrame.
+
+ Arguments:
+ subset: Column name(s) to consider when identifying duplicate rows.
+ If set to `None`, use all columns.
+ keep: {'any', 'none'}
+ Which of the duplicate rows to keep.
+
+ * 'any': Does not give any guarantee of which row is kept.
+ * 'none': Don't keep duplicate rows.
+
+ Returns:
+ The LazyFrame with unique rows.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> lf_native = duckdb.sql("SELECT * FROM VALUES (1, 1), (3, 4) df(a, b)")
+ >>> nw.from_native(lf_native).unique("a").sort("a", descending=True)
+ ┌──────────────────┐
+ |Narwhals LazyFrame|
+ |------------------|
+ |┌───────┬───────┐ |
+ |│ a │ b │ |
+ |│ int32 │ int32 │ |
+ |├───────┼───────┤ |
+ |│ 3 │ 4 │ |
+ |│ 1 │ 1 │ |
+ |└───────┴───────┘ |
+ └──────────────────┘
+ """
+ if keep not in {"any", "none"}:
+ msg = (
+ "narwhals.LazyFrame makes no assumptions about row order, so only "
+ f"'any' and 'none' are supported for `keep` in `unique`. Got: {keep}."
+ )
+ raise ValueError(msg)
+ if isinstance(subset, str):
+ subset = [subset]
+ return self._with_compliant(
+ self._compliant_frame.unique(subset=subset, keep=keep)
+ )
+
+ def filter(
+ self, *predicates: IntoExpr | Iterable[IntoExpr] | list[bool], **constraints: Any
+ ) -> Self:
+ r"""Filter the rows in the LazyFrame based on a predicate expression.
+
+ The original order of the remaining rows is preserved.
+
+ Arguments:
+ *predicates: Expression that evaluates to a boolean Series. Can
+ also be a (single!) boolean list.
+ **constraints: Column filters; use `name = value` to filter columns by the supplied value.
+ Each constraint will behave the same as `nw.col(name).eq(value)`, and will be implicitly
+ joined with the other filter conditions using &.
+
+ Returns:
+ The filtered LazyFrame.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> df_native = duckdb.sql('''
+ ... SELECT * FROM VALUES
+ ... (1, 6, 'a'),
+ ... (2, 7, 'b'),
+ ... (3, 8, 'c')
+ ... df(foo, bar, ham)
+ ... ''')
+
+ Filter on one condition
+
+ >>> nw.from_native(df_native).filter(nw.col("foo") > 1).to_native()
+ ┌───────┬───────┬─────────┐
+ │ foo │ bar │ ham │
+ │ int32 │ int32 │ varchar │
+ ├───────┼───────┼─────────┤
+ │ 2 │ 7 │ b │
+ │ 3 │ 8 │ c │
+ └───────┴───────┴─────────┘
+ <BLANKLINE>
+
+ Filter on multiple conditions with implicit `&`
+
+ >>> nw.from_native(df_native).filter(
+ ... nw.col("foo") < 3, nw.col("ham") == "a"
+ ... ).to_native()
+ ┌───────┬───────┬─────────┐
+ │ foo │ bar │ ham │
+ │ int32 │ int32 │ varchar │
+ ├───────┼───────┼─────────┤
+ │ 1 │ 6 │ a │
+ └───────┴───────┴─────────┘
+ <BLANKLINE>
+
+ Filter on multiple conditions with `|`
+
+ >>> nw.from_native(df_native).filter(
+ ... (nw.col("foo") == 1) | (nw.col("ham") == "c")
+ ... ).to_native()
+ ┌───────┬───────┬─────────┐
+ │ foo │ bar │ ham │
+ │ int32 │ int32 │ varchar │
+ ├───────┼───────┼─────────┤
+ │ 1 │ 6 │ a │
+ │ 3 │ 8 │ c │
+ └───────┴───────┴─────────┘
+ <BLANKLINE>
+
+ Filter using `**kwargs` syntax
+
+ >>> nw.from_native(df_native).filter(foo=2, ham="b").to_native()
+ ┌───────┬───────┬─────────┐
+ │ foo │ bar │ ham │
+ │ int32 │ int32 │ varchar │
+ ├───────┼───────┼─────────┤
+ │ 2 │ 7 │ b │
+ └───────┴───────┴─────────┘
+ <BLANKLINE>
+ """
+ if (
+ len(predicates) == 1 and is_list_of(predicates[0], bool) and not constraints
+ ): # pragma: no cover
+ msg = "`LazyFrame.filter` is not supported with Python boolean masks - use expressions instead."
+ raise TypeError(msg)
+
+ return super().filter(*predicates, **constraints)
+
+ @overload
+ def group_by(
+ self, *keys: IntoExpr | Iterable[IntoExpr], drop_null_keys: Literal[False] = ...
+ ) -> LazyGroupBy[Self]: ...
+
+ @overload
+ def group_by(
+ self, *keys: str | Iterable[str], drop_null_keys: Literal[True]
+ ) -> LazyGroupBy[Self]: ...
+
+ def group_by(
+ self, *keys: IntoExpr | Iterable[IntoExpr], drop_null_keys: bool = False
+ ) -> LazyGroupBy[Self]:
+ r"""Start a group by operation.
+
+ Arguments:
+ *keys: Column(s) to group by. Accepts expression input. Strings are parsed as
+ column names.
+ drop_null_keys: if True, then groups where any key is null won't be
+ included in the result.
+
+ Returns:
+ Object which can be used to perform aggregations.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> df_native = duckdb.sql(
+ ... "SELECT * FROM VALUES (1, 'a'), (2, 'b'), (3, 'a') df(a, b)"
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.group_by("b").agg(nw.col("a").sum()).sort("b").to_native()
+ ┌─────────┬────────┐
+ │ b │ a │
+ │ varchar │ int128 │
+ ├─────────┼────────┤
+ │ a │ 4 │
+ │ b │ 2 │
+ └─────────┴────────┘
+ <BLANKLINE>
+
+ Expressions are also accepted.
+
+ >>> df.group_by(nw.col("b").str.len_chars()).agg(
+ ... nw.col("a").sum()
+ ... ).to_native()
+ ┌───────┬────────┐
+ │ b │ a │
+ │ int64 │ int128 │
+ ├───────┼────────┤
+ │ 1 │ 6 │
+ └───────┴────────┘
+ <BLANKLINE>
+ """
+ from narwhals.group_by import LazyGroupBy
+
+ flat_keys = flatten(keys)
+
+ if all(isinstance(key, str) for key in flat_keys):
+ return LazyGroupBy(self, flat_keys, drop_null_keys=drop_null_keys)
+
+ from narwhals import col
+ from narwhals.expr import Expr
+
+ key_is_expr = tuple(isinstance(k, Expr) for k in flat_keys)
+
+ if drop_null_keys and any(key_is_expr):
+ msg = "drop_null_keys cannot be True when keys contains Expr"
+ raise NotImplementedError(msg)
+
+ _keys = [k if is_expr else col(k) for k, is_expr in zip(flat_keys, key_is_expr)]
+ expr_flat_keys, kinds = self._flatten_and_extract(*_keys)
+
+ if not all(kind is ExprKind.ELEMENTWISE for kind in kinds):
+ from narwhals.exceptions import ComputeError
+
+ msg = (
+ "Group by is not supported with keys that are not elementwise expressions"
+ )
+ raise ComputeError(msg)
+
+ return LazyGroupBy(self, expr_flat_keys, drop_null_keys=drop_null_keys)
+
+ def sort(
+ self,
+ by: str | Iterable[str],
+ *more_by: str,
+ descending: bool | Sequence[bool] = False,
+ nulls_last: bool = False,
+ ) -> Self:
+ r"""Sort the LazyFrame by the given columns.
+
+ Arguments:
+ by: Column(s) names to sort by.
+ *more_by: Additional columns to sort by, specified as positional arguments.
+ descending: Sort in descending order. When sorting by multiple columns, can be
+ specified per column by passing a sequence of booleans.
+ nulls_last: Place null values last; can specify a single boolean applying to
+ all columns or a sequence of booleans for per-column control.
+
+ Returns:
+ The sorted LazyFrame.
+
+ Warning:
+ Unlike Polars, it is not possible to specify a sequence of booleans for
+ `nulls_last` in order to control per-column behaviour. Instead a single
+ boolean is applied for all `by` columns.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> df_native = duckdb.sql(
+ ... "SELECT * FROM VALUES (1, 6.0, 'a'), (2, 5.0, 'c'), (NULL, 4.0, 'b') df(a, b, c)"
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.sort("a")
+ ┌──────────────────────────────────┐
+ | Narwhals LazyFrame |
+ |----------------------------------|
+ |┌───────┬──────────────┬─────────┐|
+ |│ a │ b │ c │|
+ |│ int32 │ decimal(2,1) │ varchar │|
+ |├───────┼──────────────┼─────────┤|
+ |│ NULL │ 4.0 │ b │|
+ |│ 1 │ 6.0 │ a │|
+ |│ 2 │ 5.0 │ c │|
+ |└───────┴──────────────┴─────────┘|
+ └──────────────────────────────────┘
+ """
+ return super().sort(by, *more_by, descending=descending, nulls_last=nulls_last)
+
+ def join(
+ self,
+ other: Self,
+ on: str | list[str] | None = None,
+ how: JoinStrategy = "inner",
+ *,
+ left_on: str | list[str] | None = None,
+ right_on: str | list[str] | None = None,
+ suffix: str = "_right",
+ ) -> Self:
+ r"""Add a join operation to the Logical Plan.
+
+ Arguments:
+ other: Lazy DataFrame to join with.
+ on: Name(s) of the join columns in both DataFrames. If set, `left_on` and
+ `right_on` should be None.
+ how: Join strategy.
+
+ * *inner*: Returns rows that have matching values in both tables.
+ * *left*: Returns all rows from the left table, and the matched rows from the right table.
+ * *full*: Returns all rows in both dataframes, with the suffix appended to the right join keys.
+ * *cross*: Returns the Cartesian product of rows from both tables.
+ * *semi*: Filter rows that have a match in the right table.
+ * *anti*: Filter rows that do not have a match in the right table.
+ left_on: Join column of the left DataFrame.
+ right_on: Join column of the right DataFrame.
+ suffix: Suffix to append to columns with a duplicate name.
+
+ Returns:
+ A new joined LazyFrame.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> df_native1 = duckdb.sql(
+ ... "SELECT * FROM VALUES (1, 'a'), (2, 'b') df(a, b)"
+ ... )
+ >>> df_native2 = duckdb.sql(
+ ... "SELECT * FROM VALUES (1, 'x'), (3, 'y') df(a, c)"
+ ... )
+ >>> df1 = nw.from_native(df_native1)
+ >>> df2 = nw.from_native(df_native2)
+ >>> df1.join(df2, on="a")
+ ┌─────────────────────────────┐
+ | Narwhals LazyFrame |
+ |-----------------------------|
+ |┌───────┬─────────┬─────────┐|
+ |│ a │ b │ c │|
+ |│ int32 │ varchar │ varchar │|
+ |├───────┼─────────┼─────────┤|
+ |│ 1 │ a │ x │|
+ |└───────┴─────────┴─────────┘|
+ └─────────────────────────────┘
+ """
+ return super().join(
+ other, how=how, left_on=left_on, right_on=right_on, on=on, suffix=suffix
+ )
+
+ def join_asof(
+ self,
+ other: Self,
+ *,
+ left_on: str | None = None,
+ right_on: str | None = None,
+ on: str | None = None,
+ by_left: str | list[str] | None = None,
+ by_right: str | list[str] | None = None,
+ by: str | list[str] | None = None,
+ strategy: AsofJoinStrategy = "backward",
+ suffix: str = "_right",
+ ) -> Self:
+ """Perform an asof join.
+
+ This is similar to a left-join except that we match on nearest key rather than equal keys.
+
+ For Polars, both DataFrames must be sorted by the `on` key (within each `by` group
+ if specified).
+
+ Arguments:
+ other: DataFrame to join with.
+ left_on: Name(s) of the left join column(s).
+ right_on: Name(s) of the right join column(s).
+ on: Join column of both DataFrames. If set, left_on and right_on should be None.
+ by_left: join on these columns before doing asof join
+ by_right: join on these columns before doing asof join
+ by: join on these columns before doing asof join
+ strategy: Join strategy. The default is "backward".
+
+ * *backward*: selects the last row in the right DataFrame whose "on" key is less than or equal to the left's key.
+ * *forward*: selects the first row in the right DataFrame whose "on" key is greater than or equal to the left's key.
+ * *nearest*: search selects the last row in the right DataFrame whose value is nearest to the left's key.
+
+ suffix: Suffix to append to columns with a duplicate name.
+
+ Returns:
+ A new joined LazyFrame.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> data_gdp = {
+ ... "datetime": [
+ ... datetime(2016, 1, 1),
+ ... datetime(2017, 1, 1),
+ ... datetime(2018, 1, 1),
+ ... datetime(2019, 1, 1),
+ ... datetime(2020, 1, 1),
+ ... ],
+ ... "gdp": [4164, 4411, 4566, 4696, 4827],
+ ... }
+ >>> data_population = {
+ ... "datetime": [
+ ... datetime(2016, 3, 1),
+ ... datetime(2018, 8, 1),
+ ... datetime(2019, 1, 1),
+ ... ],
+ ... "population": [82.19, 82.66, 83.12],
+ ... }
+ >>> gdp_native = pl.DataFrame(data_gdp)
+ >>> population_native = pl.DataFrame(data_population)
+ >>> gdp = nw.from_native(gdp_native)
+ >>> population = nw.from_native(population_native)
+ >>> population.join_asof(gdp, on="datetime", strategy="backward").to_native()
+ shape: (3, 3)
+ ┌─────────────────────┬────────────┬──────┐
+ │ datetime ┆ population ┆ gdp │
+ │ --- ┆ --- ┆ --- │
+ │ datetime[μs] ┆ f64 ┆ i64 │
+ ╞═════════════════════╪════════════╪══════╡
+ │ 2016-03-01 00:00:00 ┆ 82.19 ┆ 4164 │
+ │ 2018-08-01 00:00:00 ┆ 82.66 ┆ 4566 │
+ │ 2019-01-01 00:00:00 ┆ 83.12 ┆ 4696 │
+ └─────────────────────┴────────────┴──────┘
+ """
+ return super().join_asof(
+ other,
+ left_on=left_on,
+ right_on=right_on,
+ on=on,
+ by_left=by_left,
+ by_right=by_right,
+ by=by,
+ strategy=strategy,
+ suffix=suffix,
+ )
+
+ def lazy(self) -> Self:
+ """Restrict available API methods to lazy-only ones.
+
+ This is a no-op, and exists only for compatibility with `DataFrame.lazy`.
+
+ Returns:
+ A LazyFrame.
+ """
+ return self
+
+ def gather_every(self, n: int, offset: int = 0) -> Self:
+ r"""Take every nth row in the DataFrame and return as a new DataFrame.
+
+ Warning:
+ `LazyFrame.gather_every` is deprecated and will be removed in a future version.
+ Note: this will remain available in `narwhals.stable.v1`.
+ See [stable api](../backcompat.md/) for more information.
+
+ Arguments:
+ n: Gather every *n*-th row.
+ offset: Starting index.
+
+ Returns:
+ The LazyFrame containing only the selected rows.
+ """
+ msg = (
+ "`LazyFrame.gather_every` is deprecated and will be removed in a future version.\n\n"
+ "Note: this will remain available in `narwhals.stable.v1`.\n"
+ "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n"
+ )
+ issue_deprecation_warning(msg, _version="1.29.0")
+
+ return super().gather_every(n=n, offset=offset)
+
+ def unpivot(
+ self,
+ on: str | list[str] | None = None,
+ *,
+ index: str | list[str] | None = None,
+ variable_name: str = "variable",
+ value_name: str = "value",
+ ) -> Self:
+ r"""Unpivot a DataFrame from wide to long format.
+
+ Optionally leaves identifiers set.
+
+ This function is useful to massage a DataFrame into a format where one or more
+ columns are identifier variables (index) while all other columns, considered
+ measured variables (on), are "unpivoted" to the row axis leaving just
+ two non-identifier columns, 'variable' and 'value'.
+
+ Arguments:
+ on: Column(s) to use as values variables; if `on` is empty all columns that
+ are not in `index` will be used.
+ index: Column(s) to use as identifier variables.
+ variable_name: Name to give to the `variable` column. Defaults to "variable".
+ value_name: Name to give to the `value` column. Defaults to "value".
+
+ Returns:
+ The unpivoted LazyFrame.
+
+ Notes:
+ If you're coming from pandas, this is similar to `pandas.DataFrame.melt`,
+ but with `index` replacing `id_vars` and `on` replacing `value_vars`.
+ In other frameworks, you might know this operation as `pivot_longer`.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> df_native = duckdb.sql(
+ ... "SELECT * FROM VALUES ('x', 1, 2), ('y', 3, 4), ('z', 5, 6) df(a, b, c)"
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.unpivot(on=["b", "c"], index="a").sort("a", "variable").to_native()
+ ┌─────────┬──────────┬───────┐
+ │ a │ variable │ value │
+ │ varchar │ varchar │ int32 │
+ ├─────────┼──────────┼───────┤
+ │ x │ b │ 1 │
+ │ x │ c │ 2 │
+ │ y │ b │ 3 │
+ │ y │ c │ 4 │
+ │ z │ b │ 5 │
+ │ z │ c │ 6 │
+ └─────────┴──────────┴───────┘
+ <BLANKLINE>
+ """
+ return super().unpivot(
+ on=on, index=index, variable_name=variable_name, value_name=value_name
+ )
+
+ def explode(self, columns: str | Sequence[str], *more_columns: str) -> Self:
+ """Explode the dataframe to long format by exploding the given columns.
+
+ Notes:
+ It is possible to explode multiple columns only if these columns have
+ matching element counts.
+
+ Arguments:
+ columns: Column names. The underlying columns being exploded must be of the `List` data type.
+ *more_columns: Additional names of columns to explode, specified as positional arguments.
+
+ Returns:
+ New LazyFrame
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> df_native = duckdb.sql(
+ ... "SELECT * FROM VALUES ('x', [1, 2]), ('y', [3, 4]), ('z', [5, 6]) df(a, b)"
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.explode("b").to_native()
+ ┌─────────┬───────┐
+ │ a │ b │
+ │ varchar │ int32 │
+ ├─────────┼───────┤
+ │ x │ 1 │
+ │ x │ 2 │
+ │ y │ 3 │
+ │ y │ 4 │
+ │ z │ 5 │
+ │ z │ 6 │
+ └─────────┴───────┘
+ <BLANKLINE>
+ """
+ return super().explode(columns, *more_columns)
diff --git a/venv/lib/python3.8/site-packages/narwhals/dependencies.py b/venv/lib/python3.8/site-packages/narwhals/dependencies.py
new file mode 100644
index 0000000..d775677
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/dependencies.py
@@ -0,0 +1,472 @@
+# pandas / Polars / etc. : if a user passes a dataframe from one of these
+# libraries, it means they must already have imported the given module.
+# So, we can just check sys.modules.
+from __future__ import annotations
+
+import sys
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+ import cudf
+ import dask.dataframe as dd
+ import duckdb
+ import ibis
+ import modin.pandas as mpd
+ import pandas as pd
+ import polars as pl
+ import pyarrow as pa
+ import pyspark.sql as pyspark_sql
+ from pyspark.sql.connect.dataframe import DataFrame as PySparkConnectDataFrame
+ from typing_extensions import TypeGuard, TypeIs
+
+ from narwhals._spark_like.dataframe import SQLFrameDataFrame
+ from narwhals.dataframe import DataFrame, LazyFrame
+ from narwhals.series import Series
+ from narwhals.typing import (
+ FrameT,
+ IntoDataFrameT,
+ IntoSeriesT,
+ _1DArray,
+ _1DArrayInt,
+ _2DArray,
+ _NDArray,
+ _NumpyScalar,
+ _ShapeT,
+ )
+
+
+# We silently allow these but - given that they claim
+# to be drop-in replacements for pandas - testing is
+# their responsibility.
+IMPORT_HOOKS = frozenset(["fireducks"])
+
+
+def get_polars() -> Any:
+ """Get Polars module (if already imported - else return None)."""
+ return sys.modules.get("polars", None)
+
+
+def get_pandas() -> Any:
+ """Get pandas module (if already imported - else return None)."""
+ return sys.modules.get("pandas", None)
+
+
+def get_modin() -> Any: # pragma: no cover
+ """Get modin.pandas module (if already imported - else return None)."""
+ if (modin := sys.modules.get("modin", None)) is not None:
+ return modin.pandas
+ return None
+
+
+def get_cudf() -> Any:
+ """Get cudf module (if already imported - else return None)."""
+ return sys.modules.get("cudf", None)
+
+
+def get_cupy() -> Any:
+ """Get cupy module (if already imported - else return None)."""
+ return sys.modules.get("cupy", None)
+
+
+def get_pyarrow() -> Any: # pragma: no cover
+ """Get pyarrow module (if already imported - else return None)."""
+ return sys.modules.get("pyarrow", None)
+
+
+def get_numpy() -> Any:
+ """Get numpy module (if already imported - else return None)."""
+ return sys.modules.get("numpy", None)
+
+
+def get_dask() -> Any:
+ """Get dask (if already imported - else return None)."""
+ return sys.modules.get("dask", None)
+
+
+def get_dask_dataframe() -> Any:
+ """Get dask.dataframe module (if already imported - else return None)."""
+ return sys.modules.get("dask.dataframe", None)
+
+
+def get_duckdb() -> Any:
+ """Get duckdb module (if already imported - else return None)."""
+ return sys.modules.get("duckdb", None)
+
+
+def get_ibis() -> Any:
+ """Get ibis module (if already imported - else return None)."""
+ return sys.modules.get("ibis", None)
+
+
+def get_dask_expr() -> Any: # pragma: no cover
+ """Get dask_expr module (if already imported - else return None)."""
+ if (dd := get_dask_dataframe()) is not None and hasattr(dd, "dask_expr"):
+ return dd.dask_expr
+ return sys.modules.get("dask_expr", None)
+
+
+def get_pyspark() -> Any: # pragma: no cover
+ """Get pyspark module (if already imported - else return None)."""
+ return sys.modules.get("pyspark", None)
+
+
+def get_pyspark_sql() -> Any:
+ """Get pyspark.sql module (if already imported - else return None)."""
+ return sys.modules.get("pyspark.sql", None)
+
+
+def get_pyspark_connect() -> Any:
+ """Get pyspark.sql.connect module (if already imported - else return None)."""
+ return sys.modules.get("pyspark.sql.connect", None)
+
+
+def get_sqlframe() -> Any:
+ """Get sqlframe module (if already imported - else return None)."""
+ return sys.modules.get("sqlframe", None)
+
+
+def is_pandas_dataframe(df: Any) -> TypeIs[pd.DataFrame]:
+ """Check whether `df` is a pandas DataFrame without importing pandas."""
+ return ((pd := get_pandas()) is not None and isinstance(df, pd.DataFrame)) or any(
+ (mod := sys.modules.get(module_name, None)) is not None
+ and isinstance(df, mod.pandas.DataFrame)
+ for module_name in IMPORT_HOOKS
+ )
+
+
+def is_pandas_series(ser: Any) -> TypeIs[pd.Series[Any]]:
+ """Check whether `ser` is a pandas Series without importing pandas."""
+ return ((pd := get_pandas()) is not None and isinstance(ser, pd.Series)) or any(
+ (mod := sys.modules.get(module_name, None)) is not None
+ and isinstance(ser, mod.pandas.Series)
+ for module_name in IMPORT_HOOKS
+ )
+
+
+def is_pandas_index(index: Any) -> TypeIs[pd.Index[Any]]:
+ """Check whether `index` is a pandas Index without importing pandas."""
+ return ((pd := get_pandas()) is not None and isinstance(index, pd.Index)) or any(
+ (mod := sys.modules.get(module_name, None)) is not None
+ and isinstance(index, mod.pandas.Index)
+ for module_name in IMPORT_HOOKS
+ )
+
+
+def is_modin_dataframe(df: Any) -> TypeIs[mpd.DataFrame]:
+ """Check whether `df` is a modin DataFrame without importing modin."""
+ return (mpd := get_modin()) is not None and isinstance(df, mpd.DataFrame)
+
+
+def is_modin_series(ser: Any) -> TypeIs[mpd.Series]:
+ """Check whether `ser` is a modin Series without importing modin."""
+ return (mpd := get_modin()) is not None and isinstance(ser, mpd.Series)
+
+
+def is_modin_index(index: Any) -> TypeIs[mpd.Index[Any]]: # pragma: no cover
+ """Check whether `index` is a modin Index without importing modin."""
+ return (mpd := get_modin()) is not None and isinstance(index, mpd.Index)
+
+
+def is_cudf_dataframe(df: Any) -> TypeIs[cudf.DataFrame]:
+ """Check whether `df` is a cudf DataFrame without importing cudf."""
+ return (cudf := get_cudf()) is not None and isinstance(df, cudf.DataFrame)
+
+
+def is_cudf_series(ser: Any) -> TypeIs[cudf.Series[Any]]:
+ """Check whether `ser` is a cudf Series without importing cudf."""
+ return (cudf := get_cudf()) is not None and isinstance(ser, cudf.Series)
+
+
+def is_cudf_index(index: Any) -> TypeIs[cudf.Index]:
+ """Check whether `index` is a cudf Index without importing cudf."""
+ return (cudf := get_cudf()) is not None and isinstance(
+ index, cudf.Index
+ ) # pragma: no cover
+
+
+def is_cupy_scalar(obj: Any) -> bool:
+ return (
+ (cupy := get_cupy()) is not None
+ and isinstance(obj, cupy.ndarray)
+ and obj.size == 1
+ ) # pragma: no cover
+
+
+def is_dask_dataframe(df: Any) -> TypeIs[dd.DataFrame]:
+ """Check whether `df` is a Dask DataFrame without importing Dask."""
+ return (dd := get_dask_dataframe()) is not None and isinstance(df, dd.DataFrame)
+
+
+def is_duckdb_relation(df: Any) -> TypeIs[duckdb.DuckDBPyRelation]:
+ """Check whether `df` is a DuckDB Relation without importing DuckDB."""
+ return (duckdb := get_duckdb()) is not None and isinstance(
+ df, duckdb.DuckDBPyRelation
+ )
+
+
+def is_ibis_table(df: Any) -> TypeIs[ibis.Table]:
+ """Check whether `df` is a Ibis Table without importing Ibis."""
+ return (ibis := get_ibis()) is not None and isinstance(df, ibis.expr.types.Table)
+
+
+def is_polars_dataframe(df: Any) -> TypeIs[pl.DataFrame]:
+ """Check whether `df` is a Polars DataFrame without importing Polars."""
+ return (pl := get_polars()) is not None and isinstance(df, pl.DataFrame)
+
+
+def is_polars_lazyframe(df: Any) -> TypeIs[pl.LazyFrame]:
+ """Check whether `df` is a Polars LazyFrame without importing Polars."""
+ return (pl := get_polars()) is not None and isinstance(df, pl.LazyFrame)
+
+
+def is_polars_series(ser: Any) -> TypeIs[pl.Series]:
+ """Check whether `ser` is a Polars Series without importing Polars."""
+ return (pl := get_polars()) is not None and isinstance(ser, pl.Series)
+
+
+def is_pyarrow_chunked_array(ser: Any) -> TypeIs[pa.ChunkedArray[Any]]:
+ """Check whether `ser` is a PyArrow ChunkedArray without importing PyArrow."""
+ return (pa := get_pyarrow()) is not None and isinstance(ser, pa.ChunkedArray)
+
+
+def is_pyarrow_table(df: Any) -> TypeIs[pa.Table]:
+ """Check whether `df` is a PyArrow Table without importing PyArrow."""
+ return (pa := get_pyarrow()) is not None and isinstance(df, pa.Table)
+
+
+def is_pyarrow_scalar(obj: Any) -> TypeIs[pa.Scalar[Any]]:
+ return (pa := get_pyarrow()) is not None and isinstance(obj, pa.Scalar)
+
+
+def is_pyspark_dataframe(df: Any) -> TypeIs[pyspark_sql.DataFrame]:
+ """Check whether `df` is a PySpark DataFrame without importing PySpark."""
+ return bool(
+ (pyspark_sql := get_pyspark_sql()) is not None
+ and isinstance(df, pyspark_sql.DataFrame)
+ )
+
+
+def is_pyspark_connect_dataframe(df: Any) -> TypeIs[PySparkConnectDataFrame]:
+ """Check whether `df` is a PySpark Connect DataFrame without importing PySpark."""
+ if get_pyspark_connect() is not None: # pragma: no cover
+ try:
+ from pyspark.sql.connect.dataframe import DataFrame
+ except ImportError:
+ return False
+ return isinstance(df, DataFrame)
+ return False
+
+
+def is_sqlframe_dataframe(df: Any) -> TypeIs[SQLFrameDataFrame]:
+ """Check whether `df` is a SQLFrame DataFrame without importing SQLFrame."""
+ if get_sqlframe() is not None:
+ from sqlframe.base.dataframe import BaseDataFrame
+
+ return isinstance(df, BaseDataFrame)
+ return False # pragma: no cover
+
+
+def is_numpy_array(arr: Any | _NDArray[_ShapeT]) -> TypeIs[_NDArray[_ShapeT]]:
+ """Check whether `arr` is a NumPy Array without importing NumPy."""
+ return (np := get_numpy()) is not None and isinstance(arr, np.ndarray)
+
+
+def is_numpy_array_1d(arr: Any) -> TypeIs[_1DArray]:
+ """Check whether `arr` is a 1D NumPy Array without importing NumPy."""
+ return is_numpy_array(arr) and arr.ndim == 1
+
+
+def is_numpy_array_1d_int(arr: Any) -> TypeIs[_1DArrayInt]:
+ return (
+ (np := get_numpy())
+ and is_numpy_array_1d(arr)
+ and np.issubdtype(arr.dtype, np.integer)
+ )
+
+
+def is_numpy_array_2d(arr: Any) -> TypeIs[_2DArray]:
+ """Check whether `arr` is a 2D NumPy Array without importing NumPy."""
+ return is_numpy_array(arr) and arr.ndim == 2
+
+
+def is_numpy_scalar(scalar: Any) -> TypeGuard[_NumpyScalar]:
+ """Check whether `scalar` is a NumPy Scalar without importing NumPy."""
+ # NOTE: Needs to stay as `TypeGuard`
+ # - Used in `Series.__getitem__`, but not annotated
+ # - `TypeGuard` is *hiding* that the check introduces an intersection
+ return (np := get_numpy()) is not None and isinstance(scalar, np.generic)
+
+
+def is_pandas_like_dataframe(df: Any) -> bool:
+ """Check whether `df` is a pandas-like DataFrame without doing any imports.
+
+ By "pandas-like", we mean: pandas, Modin, cuDF.
+ """
+ return is_pandas_dataframe(df) or is_modin_dataframe(df) or is_cudf_dataframe(df)
+
+
+def is_pandas_like_series(ser: Any) -> bool:
+ """Check whether `ser` is a pandas-like Series without doing any imports.
+
+ By "pandas-like", we mean: pandas, Modin, cuDF.
+ """
+ return is_pandas_series(ser) or is_modin_series(ser) or is_cudf_series(ser)
+
+
+def is_pandas_like_index(index: Any) -> bool:
+ """Check whether `index` is a pandas-like Index without doing any imports.
+
+ By "pandas-like", we mean: pandas, Modin, cuDF.
+ """
+ return (
+ is_pandas_index(index) or is_modin_index(index) or is_cudf_index(index)
+ ) # pragma: no cover
+
+
+def is_into_series(native_series: Any | IntoSeriesT) -> TypeIs[IntoSeriesT]:
+ """Check whether `native_series` can be converted to a Narwhals Series.
+
+ Arguments:
+ native_series: The object to check.
+
+ Returns:
+ `True` if `native_series` can be converted to a Narwhals Series, `False` otherwise.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import polars as pl
+ >>> import numpy as np
+ >>> import narwhals as nw
+
+ >>> s_pd = pd.Series([1, 2, 3])
+ >>> s_pl = pl.Series([1, 2, 3])
+ >>> np_arr = np.array([1, 2, 3])
+
+ >>> nw.dependencies.is_into_series(s_pd)
+ True
+ >>> nw.dependencies.is_into_series(s_pl)
+ True
+ >>> nw.dependencies.is_into_series(np_arr)
+ False
+ """
+ from narwhals.series import Series
+
+ return (
+ isinstance(native_series, Series)
+ or hasattr(native_series, "__narwhals_series__")
+ or is_polars_series(native_series)
+ or is_pyarrow_chunked_array(native_series)
+ or is_pandas_like_series(native_series)
+ )
+
+
+def is_into_dataframe(native_dataframe: Any | IntoDataFrameT) -> TypeIs[IntoDataFrameT]:
+ """Check whether `native_dataframe` can be converted to a Narwhals DataFrame.
+
+ Arguments:
+ native_dataframe: The object to check.
+
+ Returns:
+ `True` if `native_dataframe` can be converted to a Narwhals DataFrame, `False` otherwise.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import polars as pl
+ >>> import numpy as np
+ >>> from narwhals.dependencies import is_into_dataframe
+
+ >>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+ >>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+ >>> np_arr = np.array([[1, 4], [2, 5], [3, 6]])
+
+ >>> is_into_dataframe(df_pd)
+ True
+ >>> is_into_dataframe(df_pl)
+ True
+ >>> is_into_dataframe(np_arr)
+ False
+ """
+ from narwhals.dataframe import DataFrame
+
+ return (
+ isinstance(native_dataframe, DataFrame)
+ or hasattr(native_dataframe, "__narwhals_dataframe__")
+ or is_polars_dataframe(native_dataframe)
+ or is_pyarrow_table(native_dataframe)
+ or is_pandas_like_dataframe(native_dataframe)
+ )
+
+
+def is_narwhals_dataframe(
+ df: DataFrame[IntoDataFrameT] | Any,
+) -> TypeIs[DataFrame[IntoDataFrameT]]:
+ """Check whether `df` is a Narwhals DataFrame.
+
+ This is useful if you expect a user to pass in a Narwhals
+ DataFrame directly, and you want to catch both `narwhals.DataFrame`
+ and `narwhals.stable.v1.DataFrame`.
+ """
+ from narwhals.dataframe import DataFrame
+
+ return isinstance(df, DataFrame)
+
+
+def is_narwhals_lazyframe(lf: Any | LazyFrame[FrameT]) -> TypeIs[LazyFrame[FrameT]]:
+ """Check whether `lf` is a Narwhals LazyFrame.
+
+ This is useful if you expect a user to pass in a Narwhals
+ LazyFrame directly, and you want to catch both `narwhals.LazyFrame`
+ and `narwhals.stable.v1.LazyFrame`.
+ """
+ from narwhals.dataframe import LazyFrame
+
+ return isinstance(lf, LazyFrame)
+
+
+def is_narwhals_series(ser: Any | Series[IntoSeriesT]) -> TypeIs[Series[IntoSeriesT]]:
+ """Check whether `ser` is a Narwhals Series.
+
+ This is useful if you expect a user to pass in a Narwhals
+ Series directly, and you want to catch both `narwhals.Series`
+ and `narwhals.stable.v1.Series`.
+ """
+ from narwhals.series import Series
+
+ return isinstance(ser, Series)
+
+
+def is_narwhals_series_int(ser: Any | Series[IntoSeriesT]) -> TypeIs[Series[IntoSeriesT]]:
+ return is_narwhals_series(ser) and ser.dtype.is_integer()
+
+
+__all__ = [
+ "get_cudf",
+ "get_ibis",
+ "get_modin",
+ "get_numpy",
+ "get_pandas",
+ "get_polars",
+ "get_pyarrow",
+ "is_cudf_dataframe",
+ "is_cudf_series",
+ "is_dask_dataframe",
+ "is_ibis_table",
+ "is_into_dataframe",
+ "is_into_series",
+ "is_modin_dataframe",
+ "is_modin_series",
+ "is_narwhals_dataframe",
+ "is_narwhals_lazyframe",
+ "is_narwhals_series",
+ "is_numpy_array",
+ "is_pandas_dataframe",
+ "is_pandas_index",
+ "is_pandas_like_dataframe",
+ "is_pandas_like_series",
+ "is_pandas_series",
+ "is_polars_dataframe",
+ "is_polars_lazyframe",
+ "is_polars_series",
+ "is_pyarrow_chunked_array",
+ "is_pyarrow_table",
+]
diff --git a/venv/lib/python3.8/site-packages/narwhals/dtypes.py b/venv/lib/python3.8/site-packages/narwhals/dtypes.py
new file mode 100644
index 0000000..3756e9a
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/dtypes.py
@@ -0,0 +1,773 @@
+from __future__ import annotations
+
+import enum
+from collections import OrderedDict
+from datetime import timezone
+from itertools import starmap
+from typing import TYPE_CHECKING, Iterable, Mapping
+
+from narwhals._utils import _DeferredIterable, isinstance_or_issubclass
+
+if TYPE_CHECKING:
+ from typing import Iterator, Sequence
+
+ from typing_extensions import Self
+
+ from narwhals.typing import IntoDType, TimeUnit
+
+
+def _validate_dtype(dtype: DType | type[DType]) -> None:
+ if not isinstance_or_issubclass(dtype, DType):
+ msg = (
+ f"Expected Narwhals dtype, got: {type(dtype)}.\n\n"
+ "Hint: if you were trying to cast to a type, use e.g. nw.Int64 instead of 'int64'."
+ )
+ raise TypeError(msg)
+
+
+class DType:
+ def __repr__(self) -> str: # pragma: no cover
+ return self.__class__.__qualname__
+
+ @classmethod
+ def is_numeric(cls: type[Self]) -> bool:
+ return issubclass(cls, NumericType)
+
+ @classmethod
+ def is_integer(cls: type[Self]) -> bool:
+ return issubclass(cls, IntegerType)
+
+ @classmethod
+ def is_signed_integer(cls: type[Self]) -> bool:
+ return issubclass(cls, SignedIntegerType)
+
+ @classmethod
+ def is_unsigned_integer(cls: type[Self]) -> bool:
+ return issubclass(cls, UnsignedIntegerType)
+
+ @classmethod
+ def is_float(cls: type[Self]) -> bool:
+ return issubclass(cls, FloatType)
+
+ @classmethod
+ def is_decimal(cls: type[Self]) -> bool:
+ return issubclass(cls, Decimal)
+
+ @classmethod
+ def is_temporal(cls: type[Self]) -> bool:
+ return issubclass(cls, TemporalType)
+
+ @classmethod
+ def is_nested(cls: type[Self]) -> bool:
+ return issubclass(cls, NestedType)
+
+ def __eq__(self, other: DType | type[DType]) -> bool: # type: ignore[override]
+ from narwhals._utils import isinstance_or_issubclass
+
+ return isinstance_or_issubclass(other, type(self))
+
+ def __hash__(self) -> int:
+ return hash(self.__class__)
+
+
+class NumericType(DType):
+ """Base class for numeric data types."""
+
+
+class IntegerType(NumericType):
+ """Base class for integer data types."""
+
+
+class SignedIntegerType(IntegerType):
+ """Base class for signed integer data types."""
+
+
+class UnsignedIntegerType(IntegerType):
+ """Base class for unsigned integer data types."""
+
+
+class FloatType(NumericType):
+ """Base class for float data types."""
+
+
+class TemporalType(DType):
+ """Base class for temporal data types."""
+
+
+class NestedType(DType):
+ """Base class for nested data types."""
+
+
+class Decimal(NumericType):
+ """Decimal type.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s = pl.Series(["1.5"], dtype=pl.Decimal)
+ >>> nw.from_native(s, series_only=True).dtype
+ Decimal
+ """
+
+
+class Int128(SignedIntegerType):
+ """128-bit signed integer type.
+
+ Examples:
+ >>> import polars as pl
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> import duckdb
+ >>> s_native = pl.Series([2, 1, 3, 7])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> df_native = pa.table({"a": [2, 1, 3, 7]})
+ >>> rel = duckdb.sql(" SELECT CAST (a AS INT128) AS a FROM df_native ")
+
+ >>> s.cast(nw.Int128).dtype
+ Int128
+ >>> nw.from_native(rel).schema["a"]
+ Int128
+ """
+
+
+class Int64(SignedIntegerType):
+ """64-bit signed integer type.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s_native = pl.Series([2, 1, 3, 7])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.cast(nw.Int64).dtype
+ Int64
+ """
+
+
+class Int32(SignedIntegerType):
+ """32-bit signed integer type.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> s_native = pa.chunked_array([[2, 1, 3, 7]])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.cast(nw.Int32).dtype
+ Int32
+ """
+
+
+class Int16(SignedIntegerType):
+ """16-bit signed integer type.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s_native = pl.Series([2, 1, 3, 7])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.cast(nw.Int16).dtype
+ Int16
+ """
+
+
+class Int8(SignedIntegerType):
+ """8-bit signed integer type.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series([2, 1, 3, 7])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.cast(nw.Int8).dtype
+ Int8
+ """
+
+
+class UInt128(UnsignedIntegerType):
+ """128-bit unsigned integer type.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> import duckdb
+ >>> df_native = pd.DataFrame({"a": [2, 1, 3, 7]})
+ >>> rel = duckdb.sql(" SELECT CAST (a AS UINT128) AS a FROM df_native ")
+ >>> nw.from_native(rel).schema["a"]
+ UInt128
+ """
+
+
+class UInt64(UnsignedIntegerType):
+ """64-bit unsigned integer type.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series([2, 1, 3, 7])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.cast(nw.UInt64).dtype
+ UInt64
+ """
+
+
+class UInt32(UnsignedIntegerType):
+ """32-bit unsigned integer type.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s_native = pl.Series([2, 1, 3, 7])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.cast(nw.UInt32).dtype
+ UInt32
+ """
+
+
+class UInt16(UnsignedIntegerType):
+ """16-bit unsigned integer type.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s_native = pl.Series([2, 1, 3, 7])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.cast(nw.UInt16).dtype
+ UInt16
+ """
+
+
+class UInt8(UnsignedIntegerType):
+ """8-bit unsigned integer type.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s_native = pl.Series([2, 1, 3, 7])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.cast(nw.UInt8).dtype
+ UInt8
+ """
+
+
+class Float64(FloatType):
+ """64-bit floating point type.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> s_native = pa.chunked_array([[0.001, 0.1, 0.01, 0.1]])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.cast(nw.Float64).dtype
+ Float64
+ """
+
+
+class Float32(FloatType):
+ """32-bit floating point type.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s_native = pl.Series([0.001, 0.1, 0.01, 0.1])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.cast(nw.Float32).dtype
+ Float32
+ """
+
+
+class String(DType):
+ """UTF-8 encoded string type.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series(["beluga", "narwhal", "orca", "vaquita"])
+ >>> nw.from_native(s_native, series_only=True).dtype
+ String
+ """
+
+
+class Boolean(DType):
+ """Boolean type.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> s_native = pa.chunked_array([[True, False, False, True]])
+ >>> nw.from_native(s_native, series_only=True).dtype
+ Boolean
+ """
+
+
+class Object(DType):
+ """Data type for wrapping arbitrary Python objects.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> class Foo: ...
+ >>> s_native = pd.Series([Foo(), Foo()])
+ >>> nw.from_native(s_native, series_only=True).dtype
+ Object
+ """
+
+
+class Unknown(DType):
+ """Type representing DataType values that could not be determined statically.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series(pd.period_range("2000-01", periods=4, freq="M"))
+ >>> nw.from_native(s_native, series_only=True).dtype
+ Unknown
+ """
+
+
+class _DatetimeMeta(type):
+ @property
+ def time_unit(cls) -> TimeUnit:
+ return "us"
+
+ @property
+ def time_zone(cls) -> str | None:
+ return None
+
+
+class Datetime(TemporalType, metaclass=_DatetimeMeta):
+ """Data type representing a calendar date and time of day.
+
+ Arguments:
+ time_unit: Unit of time. Defaults to `'us'` (microseconds).
+ time_zone: Time zone string, as defined in zoneinfo (to see valid strings run
+ `import zoneinfo; zoneinfo.available_timezones()` for a full list).
+
+ Notes:
+ Adapted from [Polars implementation](https://github.com/pola-rs/polars/blob/py-1.7.1/py-polars/polars/datatypes/classes.py#L398-L457)
+
+ Examples:
+ >>> from datetime import datetime, timedelta
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s_native = (
+ ... pl.Series([datetime(2024, 12, 9) + timedelta(days=n) for n in range(5)])
+ ... .cast(pl.Datetime("ms"))
+ ... .dt.replace_time_zone("Africa/Accra")
+ ... )
+ >>> nw.from_native(s_native, series_only=True).dtype
+ Datetime(time_unit='ms', time_zone='Africa/Accra')
+ """
+
+ def __init__(
+ self, time_unit: TimeUnit = "us", time_zone: str | timezone | None = None
+ ) -> None:
+ if time_unit not in {"s", "ms", "us", "ns"}:
+ msg = (
+ "invalid `time_unit`"
+ f"\n\nExpected one of {{'ns','us','ms', 's'}}, got {time_unit!r}."
+ )
+ raise ValueError(msg)
+
+ if isinstance(time_zone, timezone):
+ time_zone = str(time_zone)
+
+ self.time_unit: TimeUnit = time_unit
+ self.time_zone: str | None = time_zone
+
+ def __eq__(self, other: object) -> bool:
+ # allow comparing object instances to class
+ if type(other) is _DatetimeMeta:
+ return True
+ elif isinstance(other, self.__class__):
+ return self.time_unit == other.time_unit and self.time_zone == other.time_zone
+ else: # pragma: no cover
+ return False
+
+ def __hash__(self) -> int: # pragma: no cover
+ return hash((self.__class__, self.time_unit, self.time_zone))
+
+ def __repr__(self) -> str: # pragma: no cover
+ class_name = self.__class__.__name__
+ return f"{class_name}(time_unit={self.time_unit!r}, time_zone={self.time_zone!r})"
+
+
+class _DurationMeta(type):
+ @property
+ def time_unit(cls) -> TimeUnit:
+ return "us"
+
+
+class Duration(TemporalType, metaclass=_DurationMeta):
+ """Data type representing a time duration.
+
+ Arguments:
+ time_unit: Unit of time. Defaults to `'us'` (microseconds).
+
+ Notes:
+ Adapted from [Polars implementation](https://github.com/pola-rs/polars/blob/py-1.7.1/py-polars/polars/datatypes/classes.py#L460-L502)
+
+ Examples:
+ >>> from datetime import timedelta
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> s_native = pa.chunked_array(
+ ... [[timedelta(seconds=d) for d in range(1, 4)]], type=pa.duration("ms")
+ ... )
+ >>> nw.from_native(s_native, series_only=True).dtype
+ Duration(time_unit='ms')
+ """
+
+ def __init__(self, time_unit: TimeUnit = "us") -> None:
+ if time_unit not in {"s", "ms", "us", "ns"}:
+ msg = (
+ "invalid `time_unit`"
+ f"\n\nExpected one of {{'ns','us','ms', 's'}}, got {time_unit!r}."
+ )
+ raise ValueError(msg)
+
+ self.time_unit: TimeUnit = time_unit
+
+ def __eq__(self, other: object) -> bool:
+ # allow comparing object instances to class
+ if type(other) is _DurationMeta:
+ return True
+ elif isinstance(other, self.__class__):
+ return self.time_unit == other.time_unit
+ else: # pragma: no cover
+ return False
+
+ def __hash__(self) -> int: # pragma: no cover
+ return hash((self.__class__, self.time_unit))
+
+ def __repr__(self) -> str: # pragma: no cover
+ class_name = self.__class__.__name__
+ return f"{class_name}(time_unit={self.time_unit!r})"
+
+
+class Categorical(DType):
+ """A categorical encoding of a set of strings.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s_native = pl.Series(["beluga", "narwhal", "orca"])
+ >>> nw.from_native(s_native, series_only=True).cast(nw.Categorical).dtype
+ Categorical
+ """
+
+
+class Enum(DType):
+ """A fixed categorical encoding of a unique set of strings.
+
+ Polars has an Enum data type. In pandas, ordered categories get mapped
+ to Enum. PyArrow has no Enum equivalent.
+
+ Examples:
+ >>> import narwhals as nw
+ >>> nw.Enum(["beluga", "narwhal", "orca"])
+ Enum(categories=['beluga', 'narwhal', 'orca'])
+ """
+
+ def __init__(self, categories: Iterable[str] | type[enum.Enum]) -> None:
+ self._delayed_categories: _DeferredIterable[str] | None = None
+ self._cached_categories: tuple[str, ...] | None = None
+
+ if isinstance(categories, _DeferredIterable):
+ self._delayed_categories = categories
+ elif isinstance(categories, type) and issubclass(categories, enum.Enum):
+ self._cached_categories = tuple(member.value for member in categories)
+ else:
+ self._cached_categories = tuple(categories)
+
+ @property
+ def categories(self) -> tuple[str, ...]:
+ if cached := self._cached_categories:
+ return cached
+ elif delayed := self._delayed_categories:
+ self._cached_categories = delayed.to_tuple()
+ return self._cached_categories
+ else: # pragma: no cover
+ msg = f"Internal structure of {type(self).__name__!r} is invalid."
+ raise TypeError(msg)
+
+ def __eq__(self, other: object) -> bool:
+ # allow comparing object instances to class
+ if type(other) is type:
+ return other is Enum
+ return isinstance(other, type(self)) and self.categories == other.categories
+
+ def __hash__(self) -> int:
+ return hash((self.__class__, tuple(self.categories)))
+
+ def __repr__(self) -> str:
+ return f"{type(self).__name__}(categories={list(self.categories)!r})"
+
+
+class Field:
+ """Definition of a single field within a `Struct` DataType.
+
+ Arguments:
+ name: The name of the field within its parent `Struct`.
+ dtype: The `DataType` of the field's values.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> data = [{"a": 1, "b": ["narwhal", "beluga"]}, {"a": 2, "b": ["orca"]}]
+ >>> ser_pa = pa.chunked_array([data])
+ >>> nw.from_native(ser_pa, series_only=True).dtype.fields
+ [Field('a', Int64), Field('b', List(String))]
+ """
+
+ name: str
+ dtype: IntoDType
+
+ def __init__(self, name: str, dtype: IntoDType) -> None:
+ self.name = name
+ self.dtype = dtype
+
+ def __eq__(self, other: Field) -> bool: # type: ignore[override]
+ return (self.name == other.name) & (self.dtype == other.dtype)
+
+ def __hash__(self) -> int:
+ return hash((self.name, self.dtype))
+
+ def __repr__(self) -> str:
+ class_name = self.__class__.__name__
+ return f"{class_name}({self.name!r}, {self.dtype})"
+
+
+class Struct(NestedType):
+ """Struct composite type.
+
+ Arguments:
+ fields: The fields that make up the struct. Can be either a sequence of Field
+ objects or a mapping of column names to data types.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> s_native = pa.chunked_array(
+ ... [[{"a": 1, "b": ["narwhal", "beluga"]}, {"a": 2, "b": ["orca"]}]]
+ ... )
+ >>> nw.from_native(s_native, series_only=True).dtype
+ Struct({'a': Int64, 'b': List(String)})
+ """
+
+ fields: list[Field]
+
+ def __init__(self, fields: Sequence[Field] | Mapping[str, IntoDType]) -> None:
+ if isinstance(fields, Mapping):
+ self.fields = list(starmap(Field, fields.items()))
+ else:
+ self.fields = list(fields)
+
+ def __eq__(self, other: DType | type[DType]) -> bool: # type: ignore[override]
+ # The comparison allows comparing objects to classes, and specific
+ # inner types to those without (eg: inner=None). if one of the
+ # arguments is not specific about its inner type we infer it
+ # as being equal. (See the List type for more info).
+ if type(other) is type and issubclass(other, self.__class__):
+ return True
+ elif isinstance(other, self.__class__):
+ return self.fields == other.fields
+ else:
+ return False
+
+ def __hash__(self) -> int:
+ return hash((self.__class__, tuple(self.fields)))
+
+ def __iter__(self) -> Iterator[tuple[str, IntoDType]]: # pragma: no cover
+ for fld in self.fields:
+ yield fld.name, fld.dtype
+
+ def __reversed__(self) -> Iterator[tuple[str, IntoDType]]:
+ for fld in reversed(self.fields):
+ yield fld.name, fld.dtype
+
+ def __repr__(self) -> str:
+ class_name = self.__class__.__name__
+ return f"{class_name}({dict(self)})"
+
+ def to_schema(self) -> OrderedDict[str, IntoDType]:
+ """Return Struct dtype as a schema dict.
+
+ Returns:
+ Mapping from column name to dtype.
+ """
+ return OrderedDict(self)
+
+
+class List(NestedType):
+ """Variable length list type.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> s_native = pd.Series(
+ ... [["narwhal", "orca"], ["beluga", "vaquita"]],
+ ... dtype=pd.ArrowDtype(pa.large_list(pa.large_string())),
+ ... )
+ >>> nw.from_native(s_native, series_only=True).dtype
+ List(String)
+ """
+
+ inner: IntoDType
+
+ def __init__(self, inner: IntoDType) -> None:
+ self.inner = inner
+
+ def __eq__(self, other: DType | type[DType]) -> bool: # type: ignore[override]
+ # This equality check allows comparison of type classes and type instances.
+ # If a parent type is not specific about its inner type, we infer it as equal:
+ # > list[i64] == list[i64] -> True
+ # > list[i64] == list[f32] -> False
+ # > list[i64] == list -> True
+
+ # allow comparing object instances to class
+ if type(other) is type and issubclass(other, self.__class__):
+ return True
+ elif isinstance(other, self.__class__):
+ return self.inner == other.inner
+ else:
+ return False
+
+ def __hash__(self) -> int:
+ return hash((self.__class__, self.inner))
+
+ def __repr__(self) -> str:
+ class_name = self.__class__.__name__
+ return f"{class_name}({self.inner!r})"
+
+
+class Array(NestedType):
+ """Fixed length list type.
+
+ Arguments:
+ inner: The datatype of the values within each array.
+ shape: The shape of the arrays.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s_native = pl.Series([[1, 2], [3, 4], [5, 6]], dtype=pl.Array(pl.Int32, 2))
+ >>> nw.from_native(s_native, series_only=True).dtype
+ Array(Int32, shape=(2,))
+ """
+
+ inner: IntoDType
+ size: int
+ shape: tuple[int, ...]
+
+ def __init__(self, inner: IntoDType, shape: int | tuple[int, ...]) -> None:
+ inner_shape: tuple[int, ...] = inner.shape if isinstance(inner, Array) else ()
+ if isinstance(shape, int):
+ self.inner = inner
+ self.size = shape
+ self.shape = (shape, *inner_shape)
+
+ elif isinstance(shape, tuple) and len(shape) != 0 and isinstance(shape[0], int):
+ if len(shape) > 1:
+ inner = Array(inner, shape[1:])
+
+ self.inner = inner
+ self.size = shape[0]
+ self.shape = shape + inner_shape
+
+ else:
+ msg = f"invalid input for shape: {shape!r}"
+ raise TypeError(msg)
+
+ def __eq__(self, other: DType | type[DType]) -> bool: # type: ignore[override]
+ # This equality check allows comparison of type classes and type instances.
+ # If a parent type is not specific about its inner type, we infer it as equal:
+ # > array[i64] == array[i64] -> True
+ # > array[i64] == array[f32] -> False
+ # > array[i64] == array -> True
+
+ # allow comparing object instances to class
+ if type(other) is type and issubclass(other, self.__class__):
+ return True
+ elif isinstance(other, self.__class__):
+ if self.shape != other.shape:
+ return False
+ else:
+ return self.inner == other.inner
+ else:
+ return False
+
+ def __hash__(self) -> int:
+ return hash((self.__class__, self.inner, self.shape))
+
+ def __repr__(self) -> str:
+ # Get leaf type
+ dtype_ = self
+ for _ in self.shape:
+ dtype_ = dtype_.inner # type: ignore[assignment]
+
+ class_name = self.__class__.__name__
+ return f"{class_name}({dtype_!r}, shape={self.shape})"
+
+
+class Date(TemporalType):
+ """Data type representing a calendar date.
+
+ Examples:
+ >>> from datetime import date, timedelta
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> s_native = pa.chunked_array(
+ ... [[date(2024, 12, 1) + timedelta(days=d) for d in range(4)]]
+ ... )
+ >>> nw.from_native(s_native, series_only=True).dtype
+ Date
+ """
+
+
+class Time(TemporalType):
+ """Data type representing the time of day.
+
+ Examples:
+ >>> import polars as pl
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> import duckdb
+ >>> from datetime import time
+ >>> data = [time(9, 0), time(9, 1, 10), time(9, 2)]
+ >>> ser_pl = pl.Series(data)
+ >>> ser_pa = pa.chunked_array([pa.array(data, type=pa.time64("ns"))])
+ >>> rel = duckdb.sql(
+ ... " SELECT * FROM (VALUES (TIME '12:00:00'), (TIME '14:30:15')) df(t)"
+ ... )
+
+ >>> nw.from_native(ser_pl, series_only=True).dtype
+ Time
+ >>> nw.from_native(ser_pa, series_only=True).dtype
+ Time
+ >>> nw.from_native(rel).schema["t"]
+ Time
+ """
+
+
+class Binary(DType):
+ """Binary type.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> import pyarrow as pa
+ >>> import duckdb
+ >>> data = [b"test1", b"test2"]
+ >>> ser_pl = pl.Series(data, dtype=pl.Binary)
+ >>> ser_pa = pa.chunked_array([pa.array(data, type=pa.binary())])
+ >>> rel = duckdb.sql(
+ ... "SELECT * FROM (VALUES (BLOB 'test1'), (BLOB 'test2')) AS df(t)"
+ ... )
+
+ >>> nw.from_native(ser_pl, series_only=True).dtype
+ Binary
+ >>> nw.from_native(ser_pa, series_only=True).dtype
+ Binary
+ >>> nw.from_native(rel).schema["t"]
+ Binary
+ """
diff --git a/venv/lib/python3.8/site-packages/narwhals/exceptions.py b/venv/lib/python3.8/site-packages/narwhals/exceptions.py
new file mode 100644
index 0000000..6a022ec
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/exceptions.py
@@ -0,0 +1,125 @@
+from __future__ import annotations
+
+from typing import Iterable, Sequence
+
+
+class NarwhalsError(ValueError):
+ """Base class for all Narwhals exceptions."""
+
+
+class FormattedKeyError(KeyError):
+ """KeyError with formatted error message.
+
+ Python's `KeyError` has special casing around formatting
+ (see https://bugs.python.org/issue2651). Use this class when the error
+ message has newlines and other special format characters.
+ Needed by https://github.com/tensorflow/tensorflow/issues/36857.
+ """
+
+ def __init__(self, message: str) -> None:
+ self.message = message
+
+ def __str__(self) -> str:
+ return self.message
+
+
+class ColumnNotFoundError(FormattedKeyError, NarwhalsError):
+ """Exception raised when column name isn't present."""
+
+ def __init__(self, message: str) -> None:
+ self.message = message
+ super().__init__(self.message)
+
+ @classmethod
+ def from_missing_and_available_column_names(
+ cls, missing_columns: Iterable[str], available_columns: Sequence[str], /
+ ) -> ColumnNotFoundError:
+ message = (
+ f"The following columns were not found: {sorted(missing_columns)}"
+ f"\n\nHint: Did you mean one of these columns: {list(available_columns)}?"
+ )
+ return ColumnNotFoundError(message)
+
+
+class ComputeError(NarwhalsError):
+ """Exception raised when the underlying computation could not be evaluated."""
+
+
+class ShapeError(NarwhalsError):
+ """Exception raised when trying to perform operations on data structures with incompatible shapes."""
+
+
+class MultiOutputExpressionError(NarwhalsError):
+ """Exception raised when using multi-output expression in unsupported context."""
+
+
+class DuplicateError(NarwhalsError):
+ """Exception when duplicate column names are encountered."""
+
+
+class InvalidOperationError(NarwhalsError):
+ """Exception raised during invalid operations."""
+
+
+class InvalidIntoExprError(TypeError, NarwhalsError):
+ """Exception raised when object can't be converted to expression."""
+
+ def __init__(self, message: str) -> None:
+ self.message = message
+ super().__init__(self.message)
+
+ @classmethod
+ def from_invalid_type(cls: type, invalid_type: type) -> InvalidIntoExprError:
+ message = (
+ f"Expected an object which can be converted into an expression, got {invalid_type}\n\n"
+ "Hint:\n"
+ "- if you were trying to select a column which does not have a string\n"
+ " column name, then you should explicitly use `nw.col`.\n"
+ " For example, `df.select(nw.col(0))` if you have a column named `0`.\n"
+ "- if you were trying to create a new literal column, then you \n"
+ " should explicitly use `nw.lit`.\n"
+ " For example, `df.select(nw.lit(0))` if you want to create a new\n"
+ " column with literal value `0`."
+ )
+ return InvalidIntoExprError(message)
+
+
+class AnonymousExprError(NarwhalsError): # pragma: no cover
+ """Exception raised when trying to perform operations on anonymous expressions."""
+
+ def __init__(self, message: str) -> None:
+ self.message = message
+ super().__init__(self.message)
+
+ @classmethod
+ def from_expr_name(cls: type, expr_name: str) -> AnonymousExprError:
+ message = (
+ f"Anonymous expressions are not supported in `{expr_name}`.\n"
+ "Instead of `nw.all()`, try using a named expression, such as "
+ "`nw.col('a', 'b')`"
+ )
+ return AnonymousExprError(message)
+
+
+class OrderDependentExprError(NarwhalsError):
+ """Exception raised when trying to use an order-dependent expressions with LazyFrames."""
+
+ def __init__(self, message: str) -> None:
+ self.message = message
+ super().__init__(self.message)
+
+
+class LengthChangingExprError(NarwhalsError):
+ """Exception raised when trying to use an expression which changes length with LazyFrames."""
+
+ def __init__(self, message: str) -> None:
+ self.message = message
+ super().__init__(self.message)
+
+
+class UnsupportedDTypeError(NarwhalsError):
+ """Exception raised when trying to convert to a DType which is not supported by the given backend."""
+
+
+class NarwhalsUnstableWarning(UserWarning):
+ """Warning issued when a method or function is considered unstable in the stable api."""
diff --git a/venv/lib/python3.8/site-packages/narwhals/expr.py b/venv/lib/python3.8/site-packages/narwhals/expr.py
new file mode 100644
index 0000000..fcd48f1
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/expr.py
@@ -0,0 +1,2544 @@
+from __future__ import annotations
+
+import math
+from typing import TYPE_CHECKING, Any, Callable, Iterable, Mapping, Sequence
+
+from narwhals._expression_parsing import (
+ ExprMetadata,
+ apply_n_ary_operation,
+ combine_metadata,
+ extract_compliant,
+)
+from narwhals._utils import (
+ _validate_rolling_arguments,
+ ensure_type,
+ flatten,
+ issue_deprecation_warning,
+)
+from narwhals.dtypes import _validate_dtype
+from narwhals.exceptions import InvalidOperationError
+from narwhals.expr_cat import ExprCatNamespace
+from narwhals.expr_dt import ExprDateTimeNamespace
+from narwhals.expr_list import ExprListNamespace
+from narwhals.expr_name import ExprNameNamespace
+from narwhals.expr_str import ExprStringNamespace
+from narwhals.expr_struct import ExprStructNamespace
+from narwhals.translate import to_native
+
+if TYPE_CHECKING:
+ from typing import TypeVar
+
+ from typing_extensions import Concatenate, ParamSpec, Self, TypeAlias
+
+ from narwhals._compliant import CompliantExpr, CompliantNamespace
+ from narwhals.dtypes import DType
+ from narwhals.typing import (
+ ClosedInterval,
+ FillNullStrategy,
+ IntoDType,
+ IntoExpr,
+ NonNestedLiteral,
+ NumericLiteral,
+ RankMethod,
+ RollingInterpolationMethod,
+ TemporalLiteral,
+ )
+
+ PS = ParamSpec("PS")
+ R = TypeVar("R")
+ _ToCompliant: TypeAlias = Callable[
+ [CompliantNamespace[Any, Any]], CompliantExpr[Any, Any]
+ ]
+
+
+class Expr:
+ def __init__(self, to_compliant_expr: _ToCompliant, metadata: ExprMetadata) -> None:
+ # callable from CompliantNamespace to CompliantExpr
+ def func(plx: CompliantNamespace[Any, Any]) -> CompliantExpr[Any, Any]:
+ result = to_compliant_expr(plx)
+ result._metadata = self._metadata
+ return result
+
+ self._to_compliant_expr: _ToCompliant = func
+ self._metadata = metadata
+
+ def _with_elementwise_op(self, to_compliant_expr: Callable[[Any], Any]) -> Self:
+ return self.__class__(to_compliant_expr, self._metadata.with_elementwise_op())
+
+ def _with_aggregation(self, to_compliant_expr: Callable[[Any], Any]) -> Self:
+ return self.__class__(to_compliant_expr, self._metadata.with_aggregation())
+
+ def _with_orderable_aggregation(
+ self, to_compliant_expr: Callable[[Any], Any]
+ ) -> Self:
+ return self.__class__(
+ to_compliant_expr, self._metadata.with_orderable_aggregation()
+ )
+
+ def _with_orderable_window(self, to_compliant_expr: Callable[[Any], Any]) -> Self:
+ return self.__class__(to_compliant_expr, self._metadata.with_orderable_window())
+
+ def _with_unorderable_window(self, to_compliant_expr: Callable[[Any], Any]) -> Self:
+ return self.__class__(to_compliant_expr, self._metadata.with_unorderable_window())
+
+ def _with_filtration(self, to_compliant_expr: Callable[[Any], Any]) -> Self:
+ return self.__class__(to_compliant_expr, self._metadata.with_filtration())
+
+ def _with_orderable_filtration(self, to_compliant_expr: Callable[[Any], Any]) -> Self:
+ return self.__class__(
+ to_compliant_expr, self._metadata.with_orderable_filtration()
+ )
+
+ def __repr__(self) -> str:
+ return f"Narwhals Expr\nmetadata: {self._metadata}\n"
+
+ def _taxicab_norm(self) -> Self:
+ # This is just used to test out the stable api feature in a realistic-ish way.
+ # It's not intended to be used.
+ return self._with_aggregation(
+ lambda plx: self._to_compliant_expr(plx).abs().sum()
+ )
+
+ # --- convert ---
+ def alias(self, name: str) -> Self:
+ """Rename the expression.
+
+ Arguments:
+ name: The new name.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select((nw.col("b") + 10).alias("c"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | c |
+ | 0 14 |
+ | 1 15 |
+ └──────────────────┘
+ """
+ # Don't use `_with_elementwise_op` so that `_metadata.last_node` is preserved.
+ return self.__class__(
+ lambda plx: self._to_compliant_expr(plx).alias(name), self._metadata
+ )
+
+ def pipe(
+ self,
+ function: Callable[Concatenate[Self, PS], R],
+ *args: PS.args,
+ **kwargs: PS.kwargs,
+ ) -> R:
+ """Pipe function call.
+
+ Arguments:
+ function: Function to apply.
+ args: Positional arguments to pass to function.
+ kwargs: Keyword arguments to pass to function.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3, 4]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(a_piped=nw.col("a").pipe(lambda x: x + 1))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a a_piped |
+ | 0 1 2 |
+ | 1 2 3 |
+ | 2 3 4 |
+ | 3 4 5 |
+ └──────────────────┘
+ """
+ return function(self, *args, **kwargs)
+
+ def cast(self, dtype: IntoDType) -> Self:
+ """Redefine an object's data type.
+
+ Arguments:
+ dtype: Data type that the object will be cast into.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("foo").cast(nw.Float32), nw.col("bar").cast(nw.UInt8))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | foo bar |
+ | 0 1.0 6 |
+ | 1 2.0 7 |
+ | 2 3.0 8 |
+ └──────────────────┘
+ """
+ _validate_dtype(dtype)
+ return self._with_elementwise_op(
+ lambda plx: self._to_compliant_expr(plx).cast(dtype)
+ )
+
+ # --- binary ---
+ def __eq__(self, other: Self | Any) -> Self: # type: ignore[override]
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x == y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __ne__(self, other: Self | Any) -> Self: # type: ignore[override]
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x != y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __and__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x & y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __rand__(self, other: Any) -> Self:
+ return (self & other).alias("literal") # type: ignore[no-any-return]
+
+ def __or__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x | y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __ror__(self, other: Any) -> Self:
+ return (self | other).alias("literal") # type: ignore[no-any-return]
+
+ def __add__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x + y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __radd__(self, other: Any) -> Self:
+ return (self + other).alias("literal") # type: ignore[no-any-return]
+
+ def __sub__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x - y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __rsub__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x.__rsub__(y), self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __truediv__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x / y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __rtruediv__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x.__rtruediv__(y), self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __mul__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x * y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __rmul__(self, other: Any) -> Self:
+ return (self * other).alias("literal") # type: ignore[no-any-return]
+
+ def __le__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x <= y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __lt__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x < y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __gt__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x > y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __ge__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x >= y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __pow__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x**y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __rpow__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x.__rpow__(y), self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __floordiv__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x // y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __rfloordiv__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x.__rfloordiv__(y), self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __mod__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x % y, self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ def __rmod__(self, other: Any) -> Self:
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, lambda x, y: x.__rmod__(y), self, other, str_as_lit=True
+ ),
+ ExprMetadata.from_binary_op(self, other),
+ )
+
+ # --- unary ---
+ def __invert__(self) -> Self:
+ return self._with_elementwise_op(
+ lambda plx: self._to_compliant_expr(plx).__invert__()
+ )
+
+ def any(self) -> Self:
+ """Return whether any of the values in the column are `True`.
+
+ If there are no non-null elements, the result is `False`.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [True, False], "b": [True, True]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").any())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 True True |
+ └──────────────────┘
+ """
+ return self._with_aggregation(lambda plx: self._to_compliant_expr(plx).any())
+
+ def all(self) -> Self:
+ """Return whether all values in the column are `True`.
+
+ If there are no non-null elements, the result is `True`.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [True, False], "b": [True, True]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").all())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 False True |
+ └──────────────────┘
+ """
+ return self._with_aggregation(lambda plx: self._to_compliant_expr(plx).all())
+
+ def ewm_mean(
+ self,
+ *,
+ com: float | None = None,
+ span: float | None = None,
+ half_life: float | None = None,
+ alpha: float | None = None,
+ adjust: bool = True,
+ min_samples: int = 1,
+ ignore_nulls: bool = False,
+ ) -> Self:
+ r"""Compute exponentially-weighted moving average.
+
+ Arguments:
+ com: Specify decay in terms of center of mass, $\gamma$, with <br> $\alpha = \frac{1}{1+\gamma}\forall\gamma\geq0$
+ span: Specify decay in terms of span, $\theta$, with <br> $\alpha = \frac{2}{\theta + 1} \forall \theta \geq 1$
+ half_life: Specify decay in terms of half-life, $\tau$, with <br> $\alpha = 1 - \exp \left\{ \frac{ -\ln(2) }{ \tau } \right\} \forall \tau > 0$
+ alpha: Specify smoothing factor alpha directly, $0 < \alpha \leq 1$.
+ adjust: Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings
+
+ - When `adjust=True` (the default) the EW function is calculated
+ using weights $w_i = (1 - \alpha)^i$
+ - When `adjust=False` the EW function is calculated recursively by
+ $$
+ y_0=x_0
+ $$
+ $$
+ y_t = (1 - \alpha)y_{t - 1} + \alpha x_t
+ $$
+ min_samples: Minimum number of observations in window required to have a value, (otherwise result is null).
+ ignore_nulls: Ignore missing values when calculating weights.
+
+ - When `ignore_nulls=False` (default), weights are based on absolute
+ positions.
+ For example, the weights of $x_0$ and $x_2$ used in
+ calculating the final weighted average of $[x_0, None, x_2]$ are
+ $(1-\alpha)^2$ and $1$ if `adjust=True`, and
+ $(1-\alpha)^2$ and $\alpha$ if `adjust=False`.
+ - When `ignore_nulls=True`, weights are based
+ on relative positions. For example, the weights of
+ $x_0$ and $x_2$ used in calculating the final weighted
+ average of $[x_0, None, x_2]$ are
+ $1-\alpha$ and $1$ if `adjust=True`,
+ and $1-\alpha$ and $\alpha$ if `adjust=False`.
+
+ Returns:
+ Expr
+
+ Examples:
+ >>> import pandas as pd
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> from narwhals.typing import IntoFrameT
+ >>>
+ >>> data = {"a": [1, 2, 3]}
+ >>> df_pd = pd.DataFrame(data)
+ >>> df_pl = pl.DataFrame(data)
+
+ We define a library agnostic function:
+
+ >>> def agnostic_ewm_mean(df_native: IntoFrameT) -> IntoFrameT:
+ ... df = nw.from_native(df_native)
+ ... return df.select(
+ ... nw.col("a").ewm_mean(com=1, ignore_nulls=False)
+ ... ).to_native()
+
+ We can then pass either pandas or Polars to `agnostic_ewm_mean`:
+
+ >>> agnostic_ewm_mean(df_pd)
+ a
+ 0 1.000000
+ 1 1.666667
+ 2 2.428571
+
+ >>> agnostic_ewm_mean(df_pl) # doctest: +NORMALIZE_WHITESPACE
+ shape: (3, 1)
+ ┌──────────┐
+ │ a │
+ │ --- │
+ │ f64 │
+ ╞══════════╡
+ │ 1.0 │
+ │ 1.666667 │
+ │ 2.428571 │
+ └──────────┘
+ """
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).ewm_mean(
+ com=com,
+ span=span,
+ half_life=half_life,
+ alpha=alpha,
+ adjust=adjust,
+ min_samples=min_samples,
+ ignore_nulls=ignore_nulls,
+ )
+ )
+
+ def mean(self) -> Self:
+ """Get mean value.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [-1, 0, 1], "b": [2, 4, 6]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").mean())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 0.0 4.0 |
+ └──────────────────┘
+ """
+ return self._with_aggregation(lambda plx: self._to_compliant_expr(plx).mean())
+
+ def median(self) -> Self:
+ """Get median value.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ Results might slightly differ across backends due to differences in the underlying algorithms used to compute the median.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").median())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 3.0 4.0 |
+ └──────────────────┘
+ """
+ return self._with_aggregation(lambda plx: self._to_compliant_expr(plx).median())
+
+ def std(self, *, ddof: int = 1) -> Self:
+ """Get standard deviation.
+
+ Arguments:
+ ddof: "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof,
+ where N represents the number of elements. By default ddof is 1.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").std(ddof=0))
+ ┌─────────────────────┐
+ | Narwhals DataFrame |
+ |---------------------|
+ | a b|
+ |0 17.79513 1.265789|
+ └─────────────────────┘
+ """
+ return self._with_aggregation(
+ lambda plx: self._to_compliant_expr(plx).std(ddof=ddof)
+ )
+
+ def var(self, *, ddof: int = 1) -> Self:
+ """Get variance.
+
+ Arguments:
+ ddof: "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof,
+ where N represents the number of elements. By default ddof is 1.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").var(ddof=0))
+ ┌───────────────────────┐
+ | Narwhals DataFrame |
+ |-----------------------|
+ | a b|
+ |0 316.666667 1.602222|
+ └───────────────────────┘
+ """
+ return self._with_aggregation(
+ lambda plx: self._to_compliant_expr(plx).var(ddof=ddof)
+ )
+
+ def map_batches(
+ self,
+ function: Callable[[Any], CompliantExpr[Any, Any]],
+ return_dtype: DType | None = None,
+ ) -> Self:
+ """Apply a custom python function to a whole Series or sequence of Series.
+
+ The output of this custom function is presumed to be either a Series,
+ or a NumPy array (in which case it will be automatically converted into
+ a Series).
+
+ Arguments:
+ function: Function to apply to Series.
+ return_dtype: Dtype of the output Series.
+ If not set, the dtype will be inferred based on the first non-null value
+ that is returned by the function.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... nw.col("a", "b")
+ ... .map_batches(lambda s: s.to_numpy() + 1, return_dtype=nw.Float64)
+ ... .name.suffix("_mapped")
+ ... )
+ ┌───────────────────────────┐
+ | Narwhals DataFrame |
+ |---------------------------|
+ | a b a_mapped b_mapped|
+ |0 1 4 2.0 5.0|
+ |1 2 5 3.0 6.0|
+ |2 3 6 4.0 7.0|
+ └───────────────────────────┘
+ """
+ # safest assumptions
+ return self._with_orderable_filtration(
+ lambda plx: self._to_compliant_expr(plx).map_batches(
+ function=function, return_dtype=return_dtype
+ )
+ )
+
+ def skew(self) -> Self:
+ """Calculate the sample skewness of a column.
+
+ Returns:
+ An expression representing the sample skewness of the column.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 2, 10, 100]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").skew())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 0.0 1.472427 |
+ └──────────────────┘
+ """
+ return self._with_aggregation(lambda plx: self._to_compliant_expr(plx).skew())
+
+ def sum(self) -> Expr:
+ """Return the sum value.
+
+ If there are no non-null elements, the result is zero.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> df_native = duckdb.sql("SELECT * FROM VALUES (5, 50), (10, 100) df(a, b)")
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").sum())
+ ┌───────────────────┐
+ |Narwhals LazyFrame |
+ |-------------------|
+ |┌────────┬────────┐|
+ |│ a │ b │|
+ |│ int128 │ int128 │|
+ |├────────┼────────┤|
+ |│ 15 │ 150 │|
+ |└────────┴────────┘|
+ └───────────────────┘
+ """
+ return self._with_aggregation(lambda plx: self._to_compliant_expr(plx).sum())
+
+ def min(self) -> Self:
+ """Returns the minimum value(s) from a column(s).
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2], "b": [4, 3]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.min("a", "b"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 1 3 |
+ └──────────────────┘
+ """
+ return self._with_aggregation(lambda plx: self._to_compliant_expr(plx).min())
+
+ def max(self) -> Self:
+ """Returns the maximum value(s) from a column(s).
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [10, 20], "b": [50, 100]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.max("a", "b"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 20 100 |
+ └──────────────────┘
+ """
+ return self._with_aggregation(lambda plx: self._to_compliant_expr(plx).max())
+
+ def arg_min(self) -> Self:
+ """Returns the index of the minimum value.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [10, 20], "b": [150, 100]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").arg_min().name.suffix("_arg_min"))
+ ┌───────────────────────┐
+ | Narwhals DataFrame |
+ |-----------------------|
+ | a_arg_min b_arg_min|
+ |0 0 1|
+ └───────────────────────┘
+ """
+ return self._with_orderable_aggregation(
+ lambda plx: self._to_compliant_expr(plx).arg_min()
+ )
+
+ def arg_max(self) -> Self:
+ """Returns the index of the maximum value.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [10, 20], "b": [150, 100]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").arg_max().name.suffix("_arg_max"))
+ ┌───────────────────────┐
+ | Narwhals DataFrame |
+ |-----------------------|
+ | a_arg_max b_arg_max|
+ |0 1 0|
+ └───────────────────────┘
+ """
+ return self._with_orderable_aggregation(
+ lambda plx: self._to_compliant_expr(plx).arg_max()
+ )
+
+ def count(self) -> Self:
+ """Returns the number of non-null elements in the column.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.all().count())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 3 2 |
+ └──────────────────┘
+ """
+ return self._with_aggregation(lambda plx: self._to_compliant_expr(plx).count())
+
+ def n_unique(self) -> Self:
+ """Returns count of unique values.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").n_unique())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 5 3 |
+ └──────────────────┘
+ """
+ return self._with_aggregation(lambda plx: self._to_compliant_expr(plx).n_unique())
+
+ def unique(self) -> Self:
+ """Return unique values of this expression.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").unique().sum())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 9 12 |
+ └──────────────────┘
+ """
+ return self._with_filtration(lambda plx: self._to_compliant_expr(plx).unique())
+
+ def abs(self) -> Self:
+ """Return absolute value of each element.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, -2], "b": [-3, 4]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(nw.col("a", "b").abs().name.suffix("_abs"))
+ ┌─────────────────────┐
+ | Narwhals DataFrame |
+ |---------------------|
+ | a b a_abs b_abs|
+ |0 1 -3 1 3|
+ |1 -2 4 2 4|
+ └─────────────────────┘
+ """
+ return self._with_elementwise_op(lambda plx: self._to_compliant_expr(plx).abs())
+
+ def cum_sum(self, *, reverse: bool = False) -> Self:
+ """Return cumulative sum.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Arguments:
+ reverse: reverse the operation
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(a_cum_sum=nw.col("a").cum_sum())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b a_cum_sum|
+ |0 1 2 1|
+ |1 1 4 2|
+ |2 3 4 5|
+ |3 5 6 10|
+ |4 5 6 15|
+ └──────────────────┘
+ """
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).cum_sum(reverse=reverse)
+ )
+
+ def diff(self) -> Self:
+ """Returns the difference between each element and the previous one.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Returns:
+ A new expression.
+
+ Notes:
+ pandas may change the dtype here, for example when introducing missing
+ values in an integer column. To ensure, that the dtype doesn't change,
+ you may want to use `fill_null` and `cast`. For example, to calculate
+ the diff and fill missing values with `0` in a Int64 column, you could
+ do:
+
+ nw.col("a").diff().fill_null(0).cast(nw.Int64)
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(a_diff=nw.col("a").diff())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | shape: (5, 2) |
+ | ┌─────┬────────┐ |
+ | │ a ┆ a_diff │ |
+ | │ --- ┆ --- │ |
+ | │ i64 ┆ i64 │ |
+ | ╞═════╪════════╡ |
+ | │ 1 ┆ null │ |
+ | │ 1 ┆ 0 │ |
+ | │ 3 ┆ 2 │ |
+ | │ 5 ┆ 2 │ |
+ | │ 5 ┆ 0 │ |
+ | └─────┴────────┘ |
+ └──────────────────┘
+ """
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).diff()
+ )
+
+ def shift(self, n: int) -> Self:
+ """Shift values by `n` positions.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Arguments:
+ n: Number of positions to shift values by.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ pandas may change the dtype here, for example when introducing missing
+ values in an integer column. To ensure, that the dtype doesn't change,
+ you may want to use `fill_null` and `cast`. For example, to shift
+ and fill missing values with `0` in a Int64 column, you could
+ do:
+
+ nw.col("a").shift(1).fill_null(0).cast(nw.Int64)
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(a_shift=nw.col("a").shift(n=1))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ |shape: (5, 2) |
+ |┌─────┬─────────┐ |
+ |│ a ┆ a_shift │ |
+ |│ --- ┆ --- │ |
+ |│ i64 ┆ i64 │ |
+ |╞═════╪═════════╡ |
+ |│ 1 ┆ null │ |
+ |│ 1 ┆ 1 │ |
+ |│ 3 ┆ 1 │ |
+ |│ 5 ┆ 3 │ |
+ |│ 5 ┆ 5 │ |
+ |└─────┴─────────┘ |
+ └──────────────────┘
+ """
+ ensure_type(n, int, param_name="n")
+
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).shift(n)
+ )
+
+ def replace_strict(
+ self,
+ old: Sequence[Any] | Mapping[Any, Any],
+ new: Sequence[Any] | None = None,
+ *,
+ return_dtype: IntoDType | None = None,
+ ) -> Self:
+ """Replace all values by different values.
+
+ This function must replace all non-null input values (else it raises an error).
+
+ Arguments:
+ old: Sequence of values to replace. It also accepts a mapping of values to
+ their replacement as syntactic sugar for
+ `replace_strict(old=list(mapping.keys()), new=list(mapping.values()))`.
+ new: Sequence of values to replace by. Length must match the length of `old`.
+ return_dtype: The data type of the resulting expression. If set to `None`
+ (default), the data type is determined automatically based on the other
+ inputs.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [3, 0, 1, 2]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... b=nw.col("a").replace_strict(
+ ... [0, 1, 2, 3],
+ ... ["zero", "one", "two", "three"],
+ ... return_dtype=nw.String,
+ ... )
+ ... )
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 3 three |
+ | 1 0 zero |
+ | 2 1 one |
+ | 3 2 two |
+ └──────────────────┘
+ """
+ if new is None:
+ if not isinstance(old, Mapping):
+ msg = "`new` argument is required if `old` argument is not a Mapping type"
+ raise TypeError(msg)
+
+ new = list(old.values())
+ old = list(old.keys())
+
+ return self._with_elementwise_op(
+ lambda plx: self._to_compliant_expr(plx).replace_strict(
+ old, new, return_dtype=return_dtype
+ )
+ )
+
+ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self:
+ """Sort this column. Place null values first.
+
+ Warning:
+ `Expr.sort` is deprecated and will be removed in a future version.
+ Hint: instead of `df.select(nw.col('a').sort())`, use
+ `df.select(nw.col('a')).sort()` instead.
+ Note: this will remain available in `narwhals.stable.v1`.
+ See [stable api](../backcompat.md/) for more information.
+
+ Arguments:
+ descending: Sort in descending order.
+ nulls_last: Place null values last instead of first.
+
+ Returns:
+ A new expression.
+ """
+ msg = (
+ "`Expr.sort` is deprecated and will be removed in a future version.\n\n"
+ "Hint: instead of `df.select(nw.col('a').sort())`, use `df.select(nw.col('a')).sort()`.\n\n"
+ "Note: this will remain available in `narwhals.stable.v1`.\n"
+ "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n"
+ )
+ issue_deprecation_warning(msg, _version="1.23.0")
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).sort(
+ descending=descending, nulls_last=nulls_last
+ )
+ )
+
+ # --- transform ---
+ def is_between(
+ self,
+ lower_bound: Any | IntoExpr,
+ upper_bound: Any | IntoExpr,
+ closed: ClosedInterval = "both",
+ ) -> Self:
+ """Check if this expression is between the given lower and upper bounds.
+
+ Arguments:
+ lower_bound: Lower bound value. String literals are interpreted as column names.
+ upper_bound: Upper bound value. String literals are interpreted as column names.
+ closed: Define which sides of the interval are closed (inclusive).
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3, 4, 5]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(b=nw.col("a").is_between(2, 4, "right"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 1 False |
+ | 1 2 False |
+ | 2 3 True |
+ | 3 4 True |
+ | 4 5 False |
+ └──────────────────┘
+ """
+
+ def func(
+ compliant_expr: CompliantExpr[Any, Any],
+ lb: CompliantExpr[Any, Any],
+ ub: CompliantExpr[Any, Any],
+ ) -> CompliantExpr[Any, Any]:
+ if closed == "left":
+ return (compliant_expr >= lb) & (compliant_expr < ub)
+ elif closed == "right":
+ return (compliant_expr > lb) & (compliant_expr <= ub)
+ elif closed == "none":
+ return (compliant_expr > lb) & (compliant_expr < ub)
+ return (compliant_expr >= lb) & (compliant_expr <= ub)
+
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx, func, self, lower_bound, upper_bound, str_as_lit=False
+ ),
+ combine_metadata(
+ self,
+ lower_bound,
+ upper_bound,
+ str_as_lit=False,
+ allow_multi_output=False,
+ to_single_output=False,
+ ),
+ )
+
+ def is_in(self, other: Any) -> Self:
+ """Check if elements of this expression are present in the other iterable.
+
+ Arguments:
+ other: iterable
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 9, 10]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(b=nw.col("a").is_in([1, 2]))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 1 True |
+ | 1 2 True |
+ | 2 9 False |
+ | 3 10 False |
+ └──────────────────┘
+ """
+ if isinstance(other, Iterable) and not isinstance(other, (str, bytes)):
+ return self._with_elementwise_op(
+ lambda plx: self._to_compliant_expr(plx).is_in(
+ to_native(other, pass_through=True)
+ )
+ )
+ else:
+ msg = "Narwhals `is_in` doesn't accept expressions as an argument, as opposed to Polars. You should provide an iterable instead."
+ raise NotImplementedError(msg)
+
+ def filter(self, *predicates: Any) -> Self:
+ """Filters elements based on a condition, returning a new expression.
+
+ Arguments:
+ predicates: Conditions to filter by (which get ANDed together).
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame(
+ ... {"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]}
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.select(
+ ... nw.col("a").filter(nw.col("a") > 4),
+ ... nw.col("b").filter(nw.col("b") < 13),
+ ... )
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 3 5 10 |
+ | 4 6 11 |
+ | 5 7 12 |
+ └──────────────────┘
+ """
+ flat_predicates = flatten(predicates)
+ metadata = combine_metadata(
+ self,
+ *flat_predicates,
+ str_as_lit=False,
+ allow_multi_output=True,
+ to_single_output=False,
+ ).with_filtration()
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx,
+ lambda *exprs: exprs[0].filter(*exprs[1:]),
+ self,
+ *flat_predicates,
+ str_as_lit=False,
+ ),
+ metadata,
+ )
+
+ def is_null(self) -> Self:
+ """Returns a boolean Series indicating which values are null.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ pandas handles null values differently from Polars and PyArrow.
+ See [null_handling](../concepts/null_handling.md/)
+ for reference.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> df_native = duckdb.sql(
+ ... "SELECT * FROM VALUES (null, CAST('NaN' AS DOUBLE)), (2, 2.) df(a, b)"
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... a_is_null=nw.col("a").is_null(), b_is_null=nw.col("b").is_null()
+ ... )
+ ┌──────────────────────────────────────────┐
+ | Narwhals LazyFrame |
+ |------------------------------------------|
+ |┌───────┬────────┬───────────┬───────────┐|
+ |│ a │ b │ a_is_null │ b_is_null │|
+ |│ int32 │ double │ boolean │ boolean │|
+ |├───────┼────────┼───────────┼───────────┤|
+ |│ NULL │ nan │ true │ false │|
+ |│ 2 │ 2.0 │ false │ false │|
+ |└───────┴────────┴───────────┴───────────┘|
+ └──────────────────────────────────────────┘
+ """
+ return self._with_elementwise_op(
+ lambda plx: self._to_compliant_expr(plx).is_null()
+ )
+
+ def is_nan(self) -> Self:
+ """Indicate which values are NaN.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ pandas handles null values differently from Polars and PyArrow.
+ See [null_handling](../concepts/null_handling.md/)
+ for reference.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>> df_native = duckdb.sql(
+ ... "SELECT * FROM VALUES (null, CAST('NaN' AS DOUBLE)), (2, 2.) df(a, b)"
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... a_is_nan=nw.col("a").is_nan(), b_is_nan=nw.col("b").is_nan()
+ ... )
+ ┌────────────────────────────────────────┐
+ | Narwhals LazyFrame |
+ |----------------------------------------|
+ |┌───────┬────────┬──────────┬──────────┐|
+ |│ a │ b │ a_is_nan │ b_is_nan │|
+ |│ int32 │ double │ boolean │ boolean │|
+ |├───────┼────────┼──────────┼──────────┤|
+ |│ NULL │ nan │ NULL │ true │|
+ |│ 2 │ 2.0 │ false │ false │|
+ |└───────┴────────┴──────────┴──────────┘|
+ └────────────────────────────────────────┘
+ """
+ return self._with_elementwise_op(
+ lambda plx: self._to_compliant_expr(plx).is_nan()
+ )
+
+ def arg_true(self) -> Self:
+ """Find elements where boolean expression is True.
+
+ Returns:
+ A new expression.
+ """
+ msg = (
+ "`Expr.arg_true` is deprecated and will be removed in a future version.\n\n"
+ "Note: this will remain available in `narwhals.stable.v1`.\n"
+ "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n"
+ )
+ issue_deprecation_warning(msg, _version="1.23.0")
+ return self._with_filtration(lambda plx: self._to_compliant_expr(plx).arg_true())
+
+ def fill_null(
+ self,
+ value: Expr | NonNestedLiteral = None,
+ strategy: FillNullStrategy | None = None,
+ limit: int | None = None,
+ ) -> Self:
+ """Fill null values with given value.
+
+ Arguments:
+ value: Value or expression used to fill null values.
+ strategy: Strategy used to fill null values.
+ limit: Number of consecutive null values to fill when using the 'forward' or 'backward' strategy.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ pandas handles null values differently from Polars and PyArrow.
+ See [null_handling](../concepts/null_handling.md/)
+ for reference.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame(
+ ... {
+ ... "a": [2, None, None, 3],
+ ... "b": [2.0, float("nan"), float("nan"), 3.0],
+ ... "c": [1, 2, 3, 4],
+ ... }
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... nw.col("a", "b").fill_null(0).name.suffix("_filled"),
+ ... nw.col("a").fill_null(nw.col("c")).name.suffix("_filled_with_c"),
+ ... )
+ ┌────────────────────────────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |------------------------------------------------------------|
+ |shape: (4, 6) |
+ |┌──────┬─────┬─────┬──────────┬──────────┬─────────────────┐|
+ |│ a ┆ b ┆ c ┆ a_filled ┆ b_filled ┆ a_filled_with_c │|
+ |│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │|
+ |│ i64 ┆ f64 ┆ i64 ┆ i64 ┆ f64 ┆ i64 │|
+ |╞══════╪═════╪═════╪══════════╪══════════╪═════════════════╡|
+ |│ 2 ┆ 2.0 ┆ 1 ┆ 2 ┆ 2.0 ┆ 2 │|
+ |│ null ┆ NaN ┆ 2 ┆ 0 ┆ NaN ┆ 2 │|
+ |│ null ┆ NaN ┆ 3 ┆ 0 ┆ NaN ┆ 3 │|
+ |│ 3 ┆ 3.0 ┆ 4 ┆ 3 ┆ 3.0 ┆ 3 │|
+ |└──────┴─────┴─────┴──────────┴──────────┴─────────────────┘|
+ └────────────────────────────────────────────────────────────┘
+
+ Using a strategy:
+
+ >>> df.select(
+ ... nw.col("a", "b"),
+ ... nw.col("a", "b")
+ ... .fill_null(strategy="forward", limit=1)
+ ... .name.suffix("_nulls_forward_filled"),
+ ... )
+ ┌────────────────────────────────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |----------------------------------------------------------------|
+ |shape: (4, 4) |
+ |┌──────┬─────┬────────────────────────┬────────────────────────┐|
+ |│ a ┆ b ┆ a_nulls_forward_filled ┆ b_nulls_forward_filled │|
+ |│ --- ┆ --- ┆ --- ┆ --- │|
+ |│ i64 ┆ f64 ┆ i64 ┆ f64 │|
+ |╞══════╪═════╪════════════════════════╪════════════════════════╡|
+ |│ 2 ┆ 2.0 ┆ 2 ┆ 2.0 │|
+ |│ null ┆ NaN ┆ 2 ┆ NaN │|
+ |│ null ┆ NaN ┆ null ┆ NaN │|
+ |│ 3 ┆ 3.0 ┆ 3 ┆ 3.0 │|
+ |└──────┴─────┴────────────────────────┴────────────────────────┘|
+ └────────────────────────────────────────────────────────────────┘
+ """
+ if value is not None and strategy is not None:
+ msg = "cannot specify both `value` and `strategy`"
+ raise ValueError(msg)
+ if value is None and strategy is None:
+ msg = "must specify either a fill `value` or `strategy`"
+ raise ValueError(msg)
+ if strategy is not None and strategy not in {"forward", "backward"}:
+ msg = f"strategy not supported: {strategy}"
+ raise ValueError(msg)
+
+ return self.__class__(
+ lambda plx: self._to_compliant_expr(plx).fill_null(
+ value=extract_compliant(plx, value, str_as_lit=True),
+ strategy=strategy,
+ limit=limit,
+ ),
+ self._metadata.with_orderable_window()
+ if strategy is not None
+ else self._metadata,
+ )
+
+ # --- partial reduction ---
+ def drop_nulls(self) -> Self:
+ """Drop null values.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ pandas handles null values differently from Polars and PyArrow.
+ See [null_handling](../concepts/null_handling.md/)
+ for reference.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a").drop_nulls())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | shape: (5, 1) |
+ | ┌─────┐ |
+ | │ a │ |
+ | │ --- │ |
+ | │ f64 │ |
+ | ╞═════╡ |
+ | │ 2.0 │ |
+ | │ 4.0 │ |
+ | │ NaN │ |
+ | │ 3.0 │ |
+ | │ 5.0 │ |
+ | └─────┘ |
+ └──────────────────┘
+ """
+ return self._with_filtration(
+ lambda plx: self._to_compliant_expr(plx).drop_nulls()
+ )
+
+ def sample(
+ self,
+ n: int | None = None,
+ *,
+ fraction: float | None = None,
+ with_replacement: bool = False,
+ seed: int | None = None,
+ ) -> Self:
+ """Sample randomly from this expression.
+
+ Warning:
+ `Expr.sample` is deprecated and will be removed in a future version.
+ Hint: instead of `df.select(nw.col('a').sample())`, use
+ `df.select(nw.col('a')).sample()` instead.
+ Note: this will remain available in `narwhals.stable.v1`.
+ See [stable api](../backcompat.md/) for more information.
+
+ Arguments:
+ n: Number of items to return. Cannot be used with fraction.
+ fraction: Fraction of items to return. Cannot be used with n.
+ with_replacement: Allow values to be sampled more than once.
+ seed: Seed for the random number generator. If set to None (default), a random
+ seed is generated for each sample operation.
+
+ Returns:
+ A new expression.
+ """
+ msg = (
+ "`Expr.sample` is deprecated and will be removed in a future version.\n\n"
+ "Hint: instead of `df.select(nw.col('a').sample())`, use `df.select(nw.col('a')).sample()`.\n\n"
+ "Note: this will remain available in `narwhals.stable.v1`.\n"
+ "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n"
+ )
+ issue_deprecation_warning(msg, _version="1.23.0")
+ return self._with_filtration(
+ lambda plx: self._to_compliant_expr(plx).sample(
+ n, fraction=fraction, with_replacement=with_replacement, seed=seed
+ )
+ )
+
+ def over(
+ self,
+ *partition_by: str | Sequence[str],
+ order_by: str | Sequence[str] | None = None,
+ ) -> Self:
+ """Compute expressions over the given groups (optionally with given order).
+
+ Arguments:
+ partition_by: Names of columns to compute window expression over.
+ Must be names of columns, as opposed to expressions -
+ so, this is a bit less flexible than Polars' `Expr.over`.
+ order_by: Column(s) to order window functions by.
+ For lazy backends, this argument is required when `over` is applied
+ to order-dependent functions, see [order-dependence](../concepts/order_dependence.md).
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 4], "b": ["x", "x", "y"]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(a_min_per_group=nw.col("a").min().over("b"))
+ ┌────────────────────────┐
+ | Narwhals DataFrame |
+ |------------------------|
+ | a b a_min_per_group|
+ |0 1 x 1|
+ |1 2 x 1|
+ |2 4 y 4|
+ └────────────────────────┘
+
+ Cumulative operations are also supported, but (currently) only for
+ pandas and Polars:
+
+ >>> df.with_columns(a_cum_sum_per_group=nw.col("a").cum_sum().over("b"))
+ ┌────────────────────────────┐
+ | Narwhals DataFrame |
+ |----------------------------|
+ | a b a_cum_sum_per_group|
+ |0 1 x 1|
+ |1 2 x 3|
+ |2 4 y 4|
+ └────────────────────────────┘
+ """
+ flat_partition_by = flatten(partition_by)
+ flat_order_by = [order_by] if isinstance(order_by, str) else (order_by or [])
+ if not flat_partition_by and not flat_order_by: # pragma: no cover
+ msg = "At least one of `partition_by` or `order_by` must be specified."
+ raise ValueError(msg)
+
+ current_meta = self._metadata
+ if flat_order_by:
+ next_meta = current_meta.with_ordered_over()
+ elif not flat_partition_by: # pragma: no cover
+ msg = "At least one of `partition_by` or `order_by` must be specified."
+ raise InvalidOperationError(msg)
+ else:
+ next_meta = current_meta.with_partitioned_over()
+
+ return self.__class__(
+ lambda plx: self._to_compliant_expr(plx).over(
+ flat_partition_by, flat_order_by
+ ),
+ next_meta,
+ )
+
+ def is_duplicated(self) -> Self:
+ r"""Return a boolean mask indicating duplicated values.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(nw.all().is_duplicated().name.suffix("_is_duplicated"))
+ ┌─────────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |-----------------------------------------|
+ | a b a_is_duplicated b_is_duplicated|
+ |0 1 a True True|
+ |1 2 a False True|
+ |2 3 b False False|
+ |3 1 c True False|
+ └─────────────────────────────────────────┘
+ """
+ return ~self.is_unique()
+
+ def is_unique(self) -> Self:
+ r"""Return a boolean mask indicating unique values.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(nw.all().is_unique().name.suffix("_is_unique"))
+ ┌─────────────────────────────────┐
+ | Narwhals DataFrame |
+ |---------------------------------|
+ | a b a_is_unique b_is_unique|
+ |0 1 a False False|
+ |1 2 a True False|
+ |2 3 b True True|
+ |3 1 c False True|
+ └─────────────────────────────────┘
+ """
+ return self._with_unorderable_window(
+ lambda plx: self._to_compliant_expr(plx).is_unique()
+ )
+
+ def null_count(self) -> Self:
+ r"""Count null values.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ pandas handles null values differently from Polars and PyArrow.
+ See [null_handling](../concepts/null_handling.md/)
+ for reference.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame(
+ ... {"a": [1, 2, None, 1], "b": ["a", None, "b", None]}
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.all().null_count())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 1 2 |
+ └──────────────────┘
+ """
+ return self._with_aggregation(
+ lambda plx: self._to_compliant_expr(plx).null_count()
+ )
+
+ def is_first_distinct(self) -> Self:
+ r"""Return a boolean mask indicating the first occurrence of each distinct value.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... nw.all().is_first_distinct().name.suffix("_is_first_distinct")
+ ... )
+ ┌─────────────────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |-------------------------------------------------|
+ | a b a_is_first_distinct b_is_first_distinct|
+ |0 1 a True True|
+ |1 2 a True False|
+ |2 3 b True True|
+ |3 1 c False True|
+ └─────────────────────────────────────────────────┘
+ """
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).is_first_distinct()
+ )
+
+ def is_last_distinct(self) -> Self:
+ r"""Return a boolean mask indicating the last occurrence of each distinct value.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... nw.all().is_last_distinct().name.suffix("_is_last_distinct")
+ ... )
+ ┌───────────────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |-----------------------------------------------|
+ | a b a_is_last_distinct b_is_last_distinct|
+ |0 1 a False False|
+ |1 2 a True True|
+ |2 3 b True True|
+ |3 1 c True True|
+ └───────────────────────────────────────────────┘
+ """
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).is_last_distinct()
+ )
+
+ def quantile(
+ self, quantile: float, interpolation: RollingInterpolationMethod
+ ) -> Self:
+ r"""Get quantile value.
+
+ Arguments:
+ quantile: Quantile between 0.0 and 1.0.
+ interpolation: Interpolation method.
+
+ Returns:
+ A new expression.
+
+ Note:
+ - pandas and Polars may have implementation differences for a given interpolation method.
+ - [dask](https://docs.dask.org/en/stable/generated/dask.dataframe.Series.quantile.html) has
+ its own method to approximate quantile and it doesn't implement 'nearest', 'higher',
+ 'lower', 'midpoint' as interpolation method - use 'linear' which is closest to the
+ native 'dask' - method.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame(
+ ... {"a": list(range(50)), "b": list(range(50, 100))}
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a", "b").quantile(0.5, interpolation="linear"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 24.5 74.5 |
+ └──────────────────┘
+ """
+ return self._with_aggregation(
+ lambda plx: self._to_compliant_expr(plx).quantile(quantile, interpolation)
+ )
+
+ def head(self, n: int = 10) -> Self:
+ r"""Get the first `n` rows.
+
+ Warning:
+ `Expr.head` is deprecated and will be removed in a future version.
+ Hint: instead of `df.select(nw.col('a').head())`, use
+ `df.select(nw.col('a')).head()` instead.
+ Note: this will remain available in `narwhals.stable.v1`.
+ See [stable api](../backcompat.md/) for more information.
+
+ Arguments:
+ n: Number of rows to return.
+
+ Returns:
+ A new expression.
+ """
+ msg = (
+ "`Expr.head` is deprecated and will be removed in a future version.\n\n"
+ "Hint: instead of `df.select(nw.col('a').head())`, use `df.select(nw.col('a')).head()`.\n\n"
+ "Note: this will remain available in `narwhals.stable.v1`.\n"
+ "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n"
+ )
+ issue_deprecation_warning(msg, _version="1.23.0")
+ return self._with_orderable_filtration(
+ lambda plx: self._to_compliant_expr(plx).head(n)
+ )
+
+ def tail(self, n: int = 10) -> Self:
+ r"""Get the last `n` rows.
+
+ Warning:
+ `Expr.tail` is deprecated and will be removed in a future version.
+ Hint: instead of `df.select(nw.col('a').tail())`, use
+ `df.select(nw.col('a')).tail()` instead.
+ Note: this will remain available in `narwhals.stable.v1`.
+ See [stable api](../backcompat.md/) for more information.
+
+ Arguments:
+ n: Number of rows to return.
+
+ Returns:
+ A new expression.
+ """
+ msg = (
+ "`Expr.tail` is deprecated and will be removed in a future version.\n\n"
+ "Hint: instead of `df.select(nw.col('a').tail())`, use `df.select(nw.col('a')).tail()`.\n\n"
+ "Note: this will remain available in `narwhals.stable.v1`.\n"
+ "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n"
+ )
+ issue_deprecation_warning(msg, _version="1.23.0")
+ return self._with_filtration(lambda plx: self._to_compliant_expr(plx).tail(n))
+
+ def round(self, decimals: int = 0) -> Self:
+ r"""Round underlying floating point data by `decimals` digits.
+
+ Arguments:
+ decimals: Number of decimals to round by.
+
+ Returns:
+ A new expression.
+
+
+ Notes:
+ For values exactly halfway between rounded decimal values pandas behaves differently than Polars and Arrow.
+
+ pandas rounds to the nearest even value (e.g. -0.5 and 0.5 round to 0.0, 1.5 and 2.5 round to 2.0, 3.5 and
+ 4.5 to 4.0, etc..).
+
+ Polars and Arrow round away from 0 (e.g. -0.5 to -1.0, 0.5 to 1.0, 1.5 to 2.0, 2.5 to 3.0, etc..).
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1.12345, 2.56789, 3.901234]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(a_rounded=nw.col("a").round(1))
+ ┌──────────────────────┐
+ | Narwhals DataFrame |
+ |----------------------|
+ | a a_rounded|
+ |0 1.123450 1.1|
+ |1 2.567890 2.6|
+ |2 3.901234 3.9|
+ └──────────────────────┘
+ """
+ return self._with_elementwise_op(
+ lambda plx: self._to_compliant_expr(plx).round(decimals)
+ )
+
+ def len(self) -> Self:
+ r"""Return the number of elements in the column.
+
+ Null values count towards the total.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": ["x", "y", "z"], "b": [1, 2, 1]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(
+ ... nw.col("a").filter(nw.col("b") == 1).len().alias("a1"),
+ ... nw.col("a").filter(nw.col("b") == 2).len().alias("a2"),
+ ... )
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a1 a2 |
+ | 0 2 1 |
+ └──────────────────┘
+ """
+ return self._with_aggregation(lambda plx: self._to_compliant_expr(plx).len())
+
+ def gather_every(self, n: int, offset: int = 0) -> Self:
+ r"""Take every nth value in the Series and return as new Series.
+
+ Warning:
+ `Expr.gather_every` is deprecated and will be removed in a future version.
+ Hint: instead of `df.select(nw.col('a').gather_every())`, use
+ `df.select(nw.col('a')).gather_every()` instead.
+ Note: this will remain available in `narwhals.stable.v1`.
+ See [stable api](../backcompat.md/) for more information.
+
+ Arguments:
+ n: Gather every *n*-th row.
+ offset: Starting index.
+
+ Returns:
+ A new expression.
+ """
+ msg = (
+ "`Expr.gather_every` is deprecated and will be removed in a future version.\n\n"
+ "Hint: instead of `df.select(nw.col('a').gather_every())`, use `df.select(nw.col('a')).gather_every()`.\n\n"
+ "Note: this will remain available in `narwhals.stable.v1`.\n"
+ "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n"
+ )
+ issue_deprecation_warning(msg, _version="1.23.0")
+ return self._with_filtration(
+ lambda plx: self._to_compliant_expr(plx).gather_every(n=n, offset=offset)
+ )
+
+ def clip(
+ self,
+ lower_bound: IntoExpr | NumericLiteral | TemporalLiteral | None = None,
+ upper_bound: IntoExpr | NumericLiteral | TemporalLiteral | None = None,
+ ) -> Self:
+ r"""Clip values in the Series.
+
+ Arguments:
+ lower_bound: Lower bound value. String literals are treated as column names.
+ upper_bound: Upper bound value. String literals are treated as column names.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 2, 3]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(a_clipped=nw.col("a").clip(-1, 3))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a a_clipped |
+ | 0 1 1 |
+ | 1 2 2 |
+ | 2 3 3 |
+ └──────────────────┘
+ """
+ return self.__class__(
+ lambda plx: apply_n_ary_operation(
+ plx,
+ lambda *exprs: exprs[0].clip(
+ exprs[1] if lower_bound is not None else None,
+ exprs[2] if upper_bound is not None else None,
+ ),
+ self,
+ lower_bound,
+ upper_bound,
+ str_as_lit=False,
+ ),
+ combine_metadata(
+ self,
+ lower_bound,
+ upper_bound,
+ str_as_lit=False,
+ allow_multi_output=False,
+ to_single_output=False,
+ ),
+ )
+
+ def mode(self) -> Self:
+ r"""Compute the most occurring value(s).
+
+ Can return multiple values.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 1, 2, 3], "b": [1, 1, 2, 2]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a").mode()).sort("a")
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a |
+ | 0 1 |
+ └──────────────────┘
+ """
+ return self._with_filtration(lambda plx: self._to_compliant_expr(plx).mode())
+
+ def is_finite(self) -> Self:
+ """Returns boolean values indicating which original values are finite.
+
+ Warning:
+ pandas handles null values differently from Polars and PyArrow.
+ See [null_handling](../concepts/null_handling.md/)
+ for reference.
+ `is_finite` will return False for NaN and Null's in the Dask and
+ pandas non-nullable backend, while for Polars, PyArrow and pandas
+ nullable backends null values are kept as such.
+
+ Returns:
+ Expression of `Boolean` data type.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"a": [float("nan"), float("inf"), 2.0, None]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(a_is_finite=nw.col("a").is_finite())
+ ┌──────────────────────┐
+ | Narwhals DataFrame |
+ |----------------------|
+ |shape: (4, 2) |
+ |┌──────┬─────────────┐|
+ |│ a ┆ a_is_finite │|
+ |│ --- ┆ --- │|
+ |│ f64 ┆ bool │|
+ |╞══════╪═════════════╡|
+ |│ NaN ┆ false │|
+ |│ inf ┆ false │|
+ |│ 2.0 ┆ true │|
+ |│ null ┆ null │|
+ |└──────┴─────────────┘|
+ └──────────────────────┘
+ """
+ return self._with_elementwise_op(
+ lambda plx: self._to_compliant_expr(plx).is_finite()
+ )
+
+ def cum_count(self, *, reverse: bool = False) -> Self:
+ r"""Return the cumulative count of the non-null values in the column.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Arguments:
+ reverse: reverse the operation
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": ["x", "k", None, "d"]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... nw.col("a").cum_count().alias("a_cum_count"),
+ ... nw.col("a").cum_count(reverse=True).alias("a_cum_count_reverse"),
+ ... )
+ ┌─────────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |-----------------------------------------|
+ | a a_cum_count a_cum_count_reverse|
+ |0 x 1 3|
+ |1 k 2 2|
+ |2 None 2 1|
+ |3 d 3 1|
+ └─────────────────────────────────────────┘
+ """
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).cum_count(reverse=reverse)
+ )
+
+ def cum_min(self, *, reverse: bool = False) -> Self:
+ r"""Return the cumulative min of the non-null values in the column.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Arguments:
+ reverse: reverse the operation
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [3, 1, None, 2]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... nw.col("a").cum_min().alias("a_cum_min"),
+ ... nw.col("a").cum_min(reverse=True).alias("a_cum_min_reverse"),
+ ... )
+ ┌────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |------------------------------------|
+ | a a_cum_min a_cum_min_reverse|
+ |0 3.0 3.0 1.0|
+ |1 1.0 1.0 1.0|
+ |2 NaN NaN NaN|
+ |3 2.0 1.0 2.0|
+ └────────────────────────────────────┘
+ """
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).cum_min(reverse=reverse)
+ )
+
+ def cum_max(self, *, reverse: bool = False) -> Self:
+ r"""Return the cumulative max of the non-null values in the column.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Arguments:
+ reverse: reverse the operation
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 3, None, 2]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... nw.col("a").cum_max().alias("a_cum_max"),
+ ... nw.col("a").cum_max(reverse=True).alias("a_cum_max_reverse"),
+ ... )
+ ┌────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |------------------------------------|
+ | a a_cum_max a_cum_max_reverse|
+ |0 1.0 1.0 3.0|
+ |1 3.0 3.0 3.0|
+ |2 NaN NaN NaN|
+ |3 2.0 3.0 2.0|
+ └────────────────────────────────────┘
+ """
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).cum_max(reverse=reverse)
+ )
+
+ def cum_prod(self, *, reverse: bool = False) -> Self:
+ r"""Return the cumulative product of the non-null values in the column.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Arguments:
+ reverse: reverse the operation
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1, 3, None, 2]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... nw.col("a").cum_prod().alias("a_cum_prod"),
+ ... nw.col("a").cum_prod(reverse=True).alias("a_cum_prod_reverse"),
+ ... )
+ ┌──────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |--------------------------------------|
+ | a a_cum_prod a_cum_prod_reverse|
+ |0 1.0 1.0 6.0|
+ |1 3.0 3.0 6.0|
+ |2 NaN NaN NaN|
+ |3 2.0 6.0 2.0|
+ └──────────────────────────────────────┘
+ """
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).cum_prod(reverse=reverse)
+ )
+
+ def rolling_sum(
+ self, window_size: int, *, min_samples: int | None = None, center: bool = False
+ ) -> Self:
+ """Apply a rolling sum (moving sum) over the values.
+
+ A window of length `window_size` will traverse the values. The resulting values
+ will be aggregated to their sum.
+
+ The window at a given row will include the row itself and the `window_size - 1`
+ elements before it.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Arguments:
+ window_size: The length of the window in number of elements. It must be a
+ strictly positive integer.
+ min_samples: The number of values in the window that should be non-null before
+ computing a result. If set to `None` (default), it will be set equal to
+ `window_size`. If provided, it must be a strictly positive integer, and
+ less than or equal to `window_size`
+ center: Set the labels at the center of the window.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... a_rolling_sum=nw.col("a").rolling_sum(window_size=3, min_samples=1)
+ ... )
+ ┌─────────────────────┐
+ | Narwhals DataFrame |
+ |---------------------|
+ | a a_rolling_sum|
+ |0 1.0 1.0|
+ |1 2.0 3.0|
+ |2 NaN 3.0|
+ |3 4.0 6.0|
+ └─────────────────────┘
+ """
+ window_size, min_samples_int = _validate_rolling_arguments(
+ window_size=window_size, min_samples=min_samples
+ )
+
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).rolling_sum(
+ window_size=window_size, min_samples=min_samples_int, center=center
+ )
+ )
+
+ def rolling_mean(
+ self, window_size: int, *, min_samples: int | None = None, center: bool = False
+ ) -> Self:
+ """Apply a rolling mean (moving mean) over the values.
+
+ A window of length `window_size` will traverse the values. The resulting values
+ will be aggregated to their mean.
+
+ The window at a given row will include the row itself and the `window_size - 1`
+ elements before it.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Arguments:
+ window_size: The length of the window in number of elements. It must be a
+ strictly positive integer.
+ min_samples: The number of values in the window that should be non-null before
+ computing a result. If set to `None` (default), it will be set equal to
+ `window_size`. If provided, it must be a strictly positive integer, and
+ less than or equal to `window_size`
+ center: Set the labels at the center of the window.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... a_rolling_mean=nw.col("a").rolling_mean(window_size=3, min_samples=1)
+ ... )
+ ┌──────────────────────┐
+ | Narwhals DataFrame |
+ |----------------------|
+ | a a_rolling_mean|
+ |0 1.0 1.0|
+ |1 2.0 1.5|
+ |2 NaN 1.5|
+ |3 4.0 3.0|
+ └──────────────────────┘
+ """
+ window_size, min_samples = _validate_rolling_arguments(
+ window_size=window_size, min_samples=min_samples
+ )
+
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).rolling_mean(
+ window_size=window_size, min_samples=min_samples, center=center
+ )
+ )
+
+ def rolling_var(
+ self,
+ window_size: int,
+ *,
+ min_samples: int | None = None,
+ center: bool = False,
+ ddof: int = 1,
+ ) -> Self:
+ """Apply a rolling variance (moving variance) over the values.
+
+ A window of length `window_size` will traverse the values. The resulting values
+ will be aggregated to their variance.
+
+ The window at a given row will include the row itself and the `window_size - 1`
+ elements before it.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Arguments:
+ window_size: The length of the window in number of elements. It must be a
+ strictly positive integer.
+ min_samples: The number of values in the window that should be non-null before
+ computing a result. If set to `None` (default), it will be set equal to
+ `window_size`. If provided, it must be a strictly positive integer, and
+ less than or equal to `window_size`.
+ center: Set the labels at the center of the window.
+ ddof: Delta Degrees of Freedom; the divisor for a length N window is N - ddof.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... a_rolling_var=nw.col("a").rolling_var(window_size=3, min_samples=1)
+ ... )
+ ┌─────────────────────┐
+ | Narwhals DataFrame |
+ |---------------------|
+ | a a_rolling_var|
+ |0 1.0 NaN|
+ |1 2.0 0.5|
+ |2 NaN 0.5|
+ |3 4.0 2.0|
+ └─────────────────────┘
+ """
+ window_size, min_samples = _validate_rolling_arguments(
+ window_size=window_size, min_samples=min_samples
+ )
+
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).rolling_var(
+ window_size=window_size, min_samples=min_samples, center=center, ddof=ddof
+ )
+ )
+
+ def rolling_std(
+ self,
+ window_size: int,
+ *,
+ min_samples: int | None = None,
+ center: bool = False,
+ ddof: int = 1,
+ ) -> Self:
+ """Apply a rolling standard deviation (moving standard deviation) over the values.
+
+ A window of length `window_size` will traverse the values. The resulting values
+ will be aggregated to their standard deviation.
+
+ The window at a given row will include the row itself and the `window_size - 1`
+ elements before it.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Arguments:
+ window_size: The length of the window in number of elements. It must be a
+ strictly positive integer.
+ min_samples: The number of values in the window that should be non-null before
+ computing a result. If set to `None` (default), it will be set equal to
+ `window_size`. If provided, it must be a strictly positive integer, and
+ less than or equal to `window_size`.
+ center: Set the labels at the center of the window.
+ ddof: Delta Degrees of Freedom; the divisor for a length N window is N - ddof.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... a_rolling_std=nw.col("a").rolling_std(window_size=3, min_samples=1)
+ ... )
+ ┌─────────────────────┐
+ | Narwhals DataFrame |
+ |---------------------|
+ | a a_rolling_std|
+ |0 1.0 NaN|
+ |1 2.0 0.707107|
+ |2 NaN 0.707107|
+ |3 4.0 1.414214|
+ └─────────────────────┘
+ """
+ window_size, min_samples = _validate_rolling_arguments(
+ window_size=window_size, min_samples=min_samples
+ )
+
+ return self._with_orderable_window(
+ lambda plx: self._to_compliant_expr(plx).rolling_std(
+ window_size=window_size, min_samples=min_samples, center=center, ddof=ddof
+ )
+ )
+
+ def rank(self, method: RankMethod = "average", *, descending: bool = False) -> Self:
+ """Assign ranks to data, dealing with ties appropriately.
+
+ Notes:
+ The resulting dtype may differ between backends.
+
+ Info:
+ For lazy backends, this operation must be followed by `Expr.over` with
+ `order_by` specified, see [order-dependence](../concepts/order_dependence.md).
+
+ Arguments:
+ method: The method used to assign ranks to tied elements.
+ The following methods are available (default is 'average')
+
+ - *"average"*: The average of the ranks that would have been assigned to
+ all the tied values is assigned to each value.
+ - *"min"*: The minimum of the ranks that would have been assigned to all
+ the tied values is assigned to each value. (This is also referred to
+ as "competition" ranking.)
+ - *"max"*: The maximum of the ranks that would have been assigned to all
+ the tied values is assigned to each value.
+ - *"dense"*: Like "min", but the rank of the next highest element is
+ assigned the rank immediately after those assigned to the tied elements.
+ - *"ordinal"*: All values are given a distinct rank, corresponding to the
+ order that the values occur in the Series.
+
+ descending: Rank in descending order.
+
+ Returns:
+ A new expression with rank data.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"a": [3, 6, 1, 1, 6]})
+ >>> df = nw.from_native(df_native)
+ >>> result = df.with_columns(rank=nw.col("a").rank(method="dense"))
+ >>> result
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a rank |
+ | 0 3 2.0 |
+ | 1 6 3.0 |
+ | 2 1 1.0 |
+ | 3 1 1.0 |
+ | 4 6 3.0 |
+ └──────────────────┘
+ """
+ supported_rank_methods = {"average", "min", "max", "dense", "ordinal"}
+ if method not in supported_rank_methods:
+ msg = (
+ "Ranking method must be one of {'average', 'min', 'max', 'dense', 'ordinal'}. "
+ f"Found '{method}'"
+ )
+ raise ValueError(msg)
+
+ return self._with_unorderable_window(
+ lambda plx: self._to_compliant_expr(plx).rank(
+ method=method, descending=descending
+ )
+ )
+
+ def log(self, base: float = math.e) -> Self:
+ r"""Compute the logarithm to a given base.
+
+ Arguments:
+ base: Given base, defaults to `e`
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"values": [1, 2, 4]})
+ >>> df = nw.from_native(df_native)
+ >>> result = df.with_columns(
+ ... log=nw.col("values").log(), log_2=nw.col("values").log(base=2)
+ ... )
+ >>> result
+ ┌────────────────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |------------------------------------------------|
+ |pyarrow.Table |
+ |values: int64 |
+ |log: double |
+ |log_2: double |
+ |---- |
+ |values: [[1,2,4]] |
+ |log: [[0,0.6931471805599453,1.3862943611198906]]|
+ |log_2: [[0,1,2]] |
+ └────────────────────────────────────────────────┘
+ """
+ return self._with_elementwise_op(
+ lambda plx: self._to_compliant_expr(plx).log(base=base)
+ )
+
+ def exp(self) -> Self:
+ r"""Compute the exponent.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"values": [-1, 0, 1]})
+ >>> df = nw.from_native(df_native)
+ >>> result = df.with_columns(exp=nw.col("values").exp())
+ >>> result
+ ┌────────────────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |------------------------------------------------|
+ |pyarrow.Table |
+ |values: int64 |
+ |exp: double |
+ |---- |
+ |values: [[-1,0,1]] |
+ |exp: [[0.36787944117144233,1,2.718281828459045]]|
+ └────────────────────────────────────────────────┘
+ """
+ return self._with_elementwise_op(lambda plx: self._to_compliant_expr(plx).exp())
+
+ @property
+ def str(self) -> ExprStringNamespace[Self]:
+ return ExprStringNamespace(self)
+
+ @property
+ def dt(self) -> ExprDateTimeNamespace[Self]:
+ return ExprDateTimeNamespace(self)
+
+ @property
+ def cat(self) -> ExprCatNamespace[Self]:
+ return ExprCatNamespace(self)
+
+ @property
+ def name(self) -> ExprNameNamespace[Self]:
+ return ExprNameNamespace(self)
+
+ @property
+ def list(self) -> ExprListNamespace[Self]:
+ return ExprListNamespace(self)
+
+ @property
+ def struct(self) -> ExprStructNamespace[Self]:
+ return ExprStructNamespace(self)
+
+
+__all__ = ["Expr"]
diff --git a/venv/lib/python3.8/site-packages/narwhals/expr_cat.py b/venv/lib/python3.8/site-packages/narwhals/expr_cat.py
new file mode 100644
index 0000000..7a0edc2
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/expr_cat.py
@@ -0,0 +1,42 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Generic, TypeVar
+
+if TYPE_CHECKING:
+ from narwhals.expr import Expr
+
+ExprT = TypeVar("ExprT", bound="Expr")
+
+
+class ExprCatNamespace(Generic[ExprT]):
+ def __init__(self, expr: ExprT) -> None:
+ self._expr = expr
+
+ def get_categories(self) -> ExprT:
+ """Get unique categories from column.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame(
+ ... {"fruits": ["apple", "mango", "mango"]},
+ ... schema={"fruits": pl.Categorical},
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("fruits").cat.get_categories()).to_native()
+ shape: (2, 1)
+ ┌────────┐
+ │ fruits │
+ │ --- │
+ │ str │
+ ╞════════╡
+ │ apple │
+ │ mango │
+ └────────┘
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).cat.get_categories()
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/expr_dt.py b/venv/lib/python3.8/site-packages/narwhals/expr_dt.py
new file mode 100644
index 0000000..05d4bd1
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/expr_dt.py
@@ -0,0 +1,784 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Generic, TypeVar
+
+if TYPE_CHECKING:
+ from narwhals.expr import Expr
+ from narwhals.typing import TimeUnit
+
+ExprT = TypeVar("ExprT", bound="Expr")
+
+
+class ExprDateTimeNamespace(Generic[ExprT]):
+ def __init__(self, expr: ExprT) -> None:
+ self._expr = expr
+
+ def date(self) -> ExprT:
+ """Extract the date from underlying DateTime representation.
+
+ Returns:
+ A new expression.
+
+ Raises:
+ NotImplementedError: If pandas default backend is being used.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame(
+ ... {"a": [datetime(2012, 1, 7, 10), datetime(2027, 12, 13)]}
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a").dt.date()).to_native()
+ shape: (2, 1)
+ ┌────────────┐
+ │ a │
+ │ --- │
+ │ date │
+ ╞════════════╡
+ │ 2012-01-07 │
+ │ 2027-12-13 │
+ └────────────┘
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).dt.date()
+ )
+
+ def year(self) -> ExprT:
+ """Extract year from underlying DateTime representation.
+
+ Returns the year number in the calendar date.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame(
+ ... {"a": [datetime(1978, 6, 1), datetime(2065, 1, 1)]}
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(nw.col("a").dt.year().alias("year"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a year|
+ |0 1978-06-01 1978|
+ |1 2065-01-01 2065|
+ └──────────────────┘
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).dt.year()
+ )
+
+ def month(self) -> ExprT:
+ """Extract month from underlying DateTime representation.
+
+ Returns the month number starting from 1. The return value ranges from 1 to 12.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"a": [datetime(1978, 6, 1), datetime(2065, 1, 1)]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(nw.col("a").dt.month().alias("month")).to_native()
+ pyarrow.Table
+ a: timestamp[us]
+ month: int64
+ ----
+ a: [[1978-06-01 00:00:00.000000,2065-01-01 00:00:00.000000]]
+ month: [[6,1]]
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).dt.month()
+ )
+
+ def day(self) -> ExprT:
+ """Extract day from underlying DateTime representation.
+
+ Returns the day of month starting from 1. The return value ranges from 1 to 31. (The last day of month differs by months.)
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"a": [datetime(1978, 6, 1), datetime(2065, 1, 1)]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(nw.col("a").dt.day().alias("day")).to_native()
+ pyarrow.Table
+ a: timestamp[us]
+ day: int64
+ ----
+ a: [[1978-06-01 00:00:00.000000,2065-01-01 00:00:00.000000]]
+ day: [[1,1]]
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).dt.day()
+ )
+
+ def hour(self) -> ExprT:
+ """Extract hour from underlying DateTime representation.
+
+ Returns the hour number from 0 to 23.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame(
+ ... {"a": [datetime(1978, 1, 1, 1), datetime(2065, 1, 1, 10)]}
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(nw.col("a").dt.hour().alias("hour"))
+ ┌──────────────────────────────┐
+ | Narwhals DataFrame |
+ |------------------------------|
+ |shape: (2, 2) |
+ |┌─────────────────────┬──────┐|
+ |│ a ┆ hour │|
+ |│ --- ┆ --- │|
+ |│ datetime[μs] ┆ i8 │|
+ |╞═════════════════════╪══════╡|
+ |│ 1978-01-01 01:00:00 ┆ 1 │|
+ |│ 2065-01-01 10:00:00 ┆ 10 │|
+ |└─────────────────────┴──────┘|
+ └──────────────────────────────┘
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).dt.hour()
+ )
+
+ def minute(self) -> ExprT:
+ """Extract minutes from underlying DateTime representation.
+
+ Returns the minute number from 0 to 59.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame(
+ ... {"a": [datetime(1978, 1, 1, 1, 1), datetime(2065, 1, 1, 10, 20)]}
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(nw.col("a").dt.minute().alias("minute")).to_native()
+ a minute
+ 0 1978-01-01 01:01:00 1
+ 1 2065-01-01 10:20:00 20
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).dt.minute()
+ )
+
+ def second(self) -> ExprT:
+ """Extract seconds from underlying DateTime representation.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table(
+ ... {
+ ... "a": [
+ ... datetime(1978, 1, 1, 1, 1, 1),
+ ... datetime(2065, 1, 1, 10, 20, 30),
+ ... ]
+ ... }
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(nw.col("a").dt.second().alias("second")).to_native()
+ pyarrow.Table
+ a: timestamp[us]
+ second: int64
+ ----
+ a: [[1978-01-01 01:01:01.000000,2065-01-01 10:20:30.000000]]
+ second: [[1,30]]
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).dt.second()
+ )
+
+ def millisecond(self) -> ExprT:
+ """Extract milliseconds from underlying DateTime representation.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table(
+ ... {
+ ... "a": [
+ ... datetime(1978, 1, 1, 1, 1, 1, 0),
+ ... datetime(2065, 1, 1, 10, 20, 30, 67000),
+ ... ]
+ ... }
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... nw.col("a").dt.millisecond().alias("millisecond")
+ ... ).to_native()
+ pyarrow.Table
+ a: timestamp[us]
+ millisecond: int64
+ ----
+ a: [[1978-01-01 01:01:01.000000,2065-01-01 10:20:30.067000]]
+ millisecond: [[0,67]]
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).dt.millisecond()
+ )
+
+ def microsecond(self) -> ExprT:
+ """Extract microseconds from underlying DateTime representation.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table(
+ ... {
+ ... "a": [
+ ... datetime(1978, 1, 1, 1, 1, 1, 0),
+ ... datetime(2065, 1, 1, 10, 20, 30, 67000),
+ ... ]
+ ... }
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... nw.col("a").dt.microsecond().alias("microsecond")
+ ... ).to_native()
+ pyarrow.Table
+ a: timestamp[us]
+ microsecond: int64
+ ----
+ a: [[1978-01-01 01:01:01.000000,2065-01-01 10:20:30.067000]]
+ microsecond: [[0,67000]]
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).dt.microsecond()
+ )
+
+ def nanosecond(self) -> ExprT:
+ """Extract Nanoseconds from underlying DateTime representation.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table(
+ ... {
+ ... "a": [
+ ... datetime(1978, 1, 1, 1, 1, 1, 0),
+ ... datetime(2065, 1, 1, 10, 20, 30, 67000),
+ ... ]
+ ... }
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... nw.col("a").dt.nanosecond().alias("nanosecond")
+ ... ).to_native()
+ pyarrow.Table
+ a: timestamp[us]
+ nanosecond: int64
+ ----
+ a: [[1978-01-01 01:01:01.000000,2065-01-01 10:20:30.067000]]
+ nanosecond: [[0,67000000]]
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).dt.nanosecond()
+ )
+
+ def ordinal_day(self) -> ExprT:
+ """Get ordinal day.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame(
+ ... {"a": [datetime(2020, 1, 1), datetime(2020, 8, 3)]}
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(a_ordinal_day=nw.col("a").dt.ordinal_day())
+ ┌───────────────────────────┐
+ | Narwhals DataFrame |
+ |---------------------------|
+ | a a_ordinal_day|
+ |0 2020-01-01 1|
+ |1 2020-08-03 216|
+ └───────────────────────────┘
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).dt.ordinal_day()
+ )
+
+ def weekday(self) -> ExprT:
+ """Extract the week day from the underlying Date representation.
+
+ Returns:
+ Returns the ISO weekday number where monday = 1 and sunday = 7
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame(
+ ... {"a": [datetime(2020, 1, 1), datetime(2020, 8, 3)]}
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(a_week_day=nw.col("a").dt.weekday())
+ ┌────────────────────────┐
+ | Narwhals DataFrame |
+ |------------------------|
+ | a a_week_day|
+ |0 2020-01-01 3|
+ |1 2020-08-03 1|
+ └────────────────────────┘
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).dt.weekday()
+ )
+
+ def total_minutes(self) -> ExprT:
+ """Get total minutes.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ The function outputs the total minutes in the int dtype by default,
+ however, pandas may change the dtype to float when there are missing values,
+ consider using `fill_null()` and `cast` in this case.
+
+ Examples:
+ >>> from datetime import timedelta
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame(
+ ... {"a": [timedelta(minutes=10), timedelta(minutes=20, seconds=40)]}
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... a_total_minutes=nw.col("a").dt.total_minutes()
+ ... ).to_native()
+ shape: (2, 2)
+ ┌──────────────┬─────────────────┐
+ │ a ┆ a_total_minutes │
+ │ --- ┆ --- │
+ │ duration[μs] ┆ i64 │
+ ╞══════════════╪═════════════════╡
+ │ 10m ┆ 10 │
+ │ 20m 40s ┆ 20 │
+ └──────────────┴─────────────────┘
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).dt.total_minutes()
+ )
+
+ def total_seconds(self) -> ExprT:
+ """Get total seconds.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ The function outputs the total seconds in the int dtype by default,
+ however, pandas may change the dtype to float when there are missing values,
+ consider using `fill_null()` and `cast` in this case.
+
+ Examples:
+ >>> from datetime import timedelta
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame(
+ ... {"a": [timedelta(seconds=10), timedelta(seconds=20, milliseconds=40)]}
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... a_total_seconds=nw.col("a").dt.total_seconds()
+ ... ).to_native()
+ shape: (2, 2)
+ ┌──────────────┬─────────────────┐
+ │ a ┆ a_total_seconds │
+ │ --- ┆ --- │
+ │ duration[μs] ┆ i64 │
+ ╞══════════════╪═════════════════╡
+ │ 10s ┆ 10 │
+ │ 20s 40ms ┆ 20 │
+ └──────────────┴─────────────────┘
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).dt.total_seconds()
+ )
+
+ def total_milliseconds(self) -> ExprT:
+ """Get total milliseconds.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ The function outputs the total milliseconds in the int dtype by default,
+ however, pandas may change the dtype to float when there are missing values,
+ consider using `fill_null()` and `cast` in this case.
+
+ Examples:
+ >>> from datetime import timedelta
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame(
+ ... {
+ ... "a": [
+ ... timedelta(milliseconds=10),
+ ... timedelta(milliseconds=20, microseconds=40),
+ ... ]
+ ... }
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... a_total_milliseconds=nw.col("a").dt.total_milliseconds()
+ ... ).to_native()
+ shape: (2, 2)
+ ┌──────────────┬──────────────────────┐
+ │ a ┆ a_total_milliseconds │
+ │ --- ┆ --- │
+ │ duration[μs] ┆ i64 │
+ ╞══════════════╪══════════════════════╡
+ │ 10ms ┆ 10 │
+ │ 20040µs ┆ 20 │
+ └──────────────┴──────────────────────┘
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).dt.total_milliseconds()
+ )
+
+ def total_microseconds(self) -> ExprT:
+ """Get total microseconds.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ The function outputs the total microseconds in the int dtype by default,
+ however, pandas may change the dtype to float when there are missing values,
+ consider using `fill_null()` and `cast` in this case.
+
+ Examples:
+ >>> from datetime import timedelta
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table(
+ ... {
+ ... "a": [
+ ... timedelta(microseconds=10),
+ ... timedelta(milliseconds=1, microseconds=200),
+ ... ]
+ ... }
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... a_total_microseconds=nw.col("a").dt.total_microseconds()
+ ... ).to_native()
+ pyarrow.Table
+ a: duration[us]
+ a_total_microseconds: int64
+ ----
+ a: [[10,1200]]
+ a_total_microseconds: [[10,1200]]
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).dt.total_microseconds()
+ )
+
+ def total_nanoseconds(self) -> ExprT:
+ """Get total nanoseconds.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ The function outputs the total nanoseconds in the int dtype by default,
+ however, pandas may change the dtype to float when there are missing values,
+ consider using `fill_null()` and `cast` in this case.
+
+ Examples:
+ >>> from datetime import timedelta
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame(
+ ... {
+ ... "a": pd.to_datetime(
+ ... [
+ ... "2024-01-01 00:00:00.000000001",
+ ... "2024-01-01 00:00:00.000000002",
+ ... ]
+ ... )
+ ... }
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... a_diff_total_nanoseconds=nw.col("a").diff().dt.total_nanoseconds()
+ ... ).to_native()
+ a a_diff_total_nanoseconds
+ 0 2024-01-01 00:00:00.000000001 NaN
+ 1 2024-01-01 00:00:00.000000002 1.0
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).dt.total_nanoseconds()
+ )
+
+ def to_string(self, format: str) -> ExprT:
+ """Convert a Date/Time/Datetime column into a String column with the given format.
+
+ Arguments:
+ format: Format to format temporal column with.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ Unfortunately, different libraries interpret format directives a bit
+ differently.
+
+ - Chrono, the library used by Polars, uses `"%.f"` for fractional seconds,
+ whereas pandas and Python stdlib use `".%f"`.
+ - PyArrow interprets `"%S"` as "seconds, including fractional seconds"
+ whereas most other tools interpret it as "just seconds, as 2 digits".
+ ---
+ Therefore, we make the following adjustments.
+
+ - for pandas-like libraries, we replace `"%S.%f"` with `"%S%.f"`.
+ - for PyArrow, we replace `"%S.%f"` with `"%S"`.
+ ---
+ Workarounds like these don't make us happy, and we try to avoid them as
+ much as possible, but here we feel like it's the best compromise.
+
+ If you just want to format a date/datetime Series as a local datetime
+ string, and have it work as consistently as possible across libraries,
+ we suggest using:
+
+ - `"%Y-%m-%dT%H:%M:%S%.f"` for datetimes
+ - `"%Y-%m-%d"` for dates
+ ---
+ Though note that, even then, different tools may return a different number
+ of trailing zeros. Nonetheless, this is probably consistent enough for
+ most applications.
+
+ If you have an application where this is not enough, please open an issue
+ and let us know.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame(
+ ... {"a": [datetime(2020, 3, 1), datetime(2020, 5, 1)]}
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a").dt.to_string("%Y/%m/%d %H:%M:%S"))
+ ┌───────────────────────┐
+ | Narwhals DataFrame |
+ |-----------------------|
+ |shape: (2, 1) |
+ |┌─────────────────────┐|
+ |│ a │|
+ |│ --- │|
+ |│ str │|
+ |╞═════════════════════╡|
+ |│ 2020/03/01 00:00:00 │|
+ |│ 2020/05/01 00:00:00 │|
+ |└─────────────────────┘|
+ └───────────────────────┘
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).dt.to_string(format)
+ )
+
+ def replace_time_zone(self, time_zone: str | None) -> ExprT:
+ """Replace time zone.
+
+ Arguments:
+ time_zone: Target time zone.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> from datetime import datetime, timezone
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame(
+ ... {
+ ... "a": [
+ ... datetime(2024, 1, 1, tzinfo=timezone.utc),
+ ... datetime(2024, 1, 2, tzinfo=timezone.utc),
+ ... ]
+ ... }
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a").dt.replace_time_zone("Asia/Kathmandu")).to_native()
+ a
+ 0 2024-01-01 00:00:00+05:45
+ 1 2024-01-02 00:00:00+05:45
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).dt.replace_time_zone(time_zone)
+ )
+
+ def convert_time_zone(self, time_zone: str) -> ExprT:
+ """Convert to a new time zone.
+
+ If converting from a time-zone-naive column, then conversion happens
+ as if converting from UTC.
+
+ Arguments:
+ time_zone: Target time zone.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> from datetime import datetime, timezone
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame(
+ ... {
+ ... "a": [
+ ... datetime(2024, 1, 1, tzinfo=timezone.utc),
+ ... datetime(2024, 1, 2, tzinfo=timezone.utc),
+ ... ]
+ ... }
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a").dt.convert_time_zone("Asia/Kathmandu")).to_native()
+ a
+ 0 2024-01-01 05:45:00+05:45
+ 1 2024-01-02 05:45:00+05:45
+ """
+ if time_zone is None:
+ msg = "Target `time_zone` cannot be `None` in `convert_time_zone`. Please use `replace_time_zone(None)` if you want to remove the time zone."
+ raise TypeError(msg)
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).dt.convert_time_zone(time_zone)
+ )
+
+ def timestamp(self, time_unit: TimeUnit = "us") -> ExprT:
+ """Return a timestamp in the given time unit.
+
+ Arguments:
+ time_unit: One of
+ - 'ns': nanosecond.
+ - 'us': microsecond.
+ - 'ms': millisecond.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> from datetime import date
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"date": [date(2001, 1, 1), None]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(nw.col("date").dt.timestamp("ms").alias("timestamp_ms"))
+ ┌─────────────────────────────┐
+ | Narwhals DataFrame |
+ |-----------------------------|
+ |shape: (2, 2) |
+ |┌────────────┬──────────────┐|
+ |│ date ┆ timestamp_ms │|
+ |│ --- ┆ --- │|
+ |│ date ┆ i64 │|
+ |╞════════════╪══════════════╡|
+ |│ 2001-01-01 ┆ 978307200000 │|
+ |│ null ┆ null │|
+ |└────────────┴──────────────┘|
+ └─────────────────────────────┘
+ """
+ if time_unit not in {"ns", "us", "ms"}:
+ msg = (
+ "invalid `time_unit`"
+ f"\n\nExpected one of {{'ns', 'us', 'ms'}}, got {time_unit!r}."
+ )
+ raise ValueError(msg)
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).dt.timestamp(time_unit)
+ )
+
+ def truncate(self, every: str) -> ExprT:
+ """Divide the date/datetime range into buckets.
+
+ Arguments:
+ every: Length of bucket. Must be of form `<multiple><unit>`,
+ where `multiple` is a positive integer and `unit` is one of
+
+ - 'ns': nanosecond.
+ - 'us': microsecond.
+ - 'ms': millisecond.
+ - 's': second.
+ - 'm': minute.
+ - 'h': hour.
+ - 'd': day.
+ - 'mo': month.
+ - 'q': quarter.
+ - 'y': year.
+
+ Returns:
+ Expression of data type `Date` or `Datetime`.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"datetime": [datetime(2021, 3, 1, 12, 34)]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... nw.col("datetime").dt.truncate("1h").alias("datetime_trunc")
+ ... )
+ ┌─────────────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |---------------------------------------------|
+ |shape: (1, 2) |
+ |┌─────────────────────┬─────────────────────┐|
+ |│ datetime ┆ datetime_trunc │|
+ |│ --- ┆ --- │|
+ |│ datetime[μs] ┆ datetime[μs] │|
+ |╞═════════════════════╪═════════════════════╡|
+ |│ 2021-03-01 12:34:00 ┆ 2021-03-01 12:00:00 │|
+ |└─────────────────────┴─────────────────────┘|
+ └─────────────────────────────────────────────┘
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).dt.truncate(every)
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/expr_list.py b/venv/lib/python3.8/site-packages/narwhals/expr_list.py
new file mode 100644
index 0000000..f169801
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/expr_list.py
@@ -0,0 +1,47 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Generic, TypeVar
+
+if TYPE_CHECKING:
+ from narwhals.expr import Expr
+
+ExprT = TypeVar("ExprT", bound="Expr")
+
+
+class ExprListNamespace(Generic[ExprT]):
+ def __init__(self, expr: ExprT) -> None:
+ self._expr = expr
+
+ def len(self) -> ExprT:
+ """Return the number of elements in each list.
+
+ Null values count towards the total.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"a": [[1, 2], [3, 4, None], None, []]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(a_len=nw.col("a").list.len())
+ ┌────────────────────────┐
+ | Narwhals DataFrame |
+ |------------------------|
+ |shape: (4, 2) |
+ |┌──────────────┬───────┐|
+ |│ a ┆ a_len │|
+ |│ --- ┆ --- │|
+ |│ list[i64] ┆ u32 │|
+ |╞══════════════╪═══════╡|
+ |│ [1, 2] ┆ 2 │|
+ |│ [3, 4, null] ┆ 3 │|
+ |│ null ┆ null │|
+ |│ [] ┆ 0 │|
+ |└──────────────┴───────┘|
+ └────────────────────────┘
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).list.len()
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/expr_name.py b/venv/lib/python3.8/site-packages/narwhals/expr_name.py
new file mode 100644
index 0000000..4f77bec
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/expr_name.py
@@ -0,0 +1,161 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Callable, Generic, TypeVar
+
+if TYPE_CHECKING:
+ from narwhals.expr import Expr
+
+ExprT = TypeVar("ExprT", bound="Expr")
+
+
+class ExprNameNamespace(Generic[ExprT]):
+ def __init__(self, expr: ExprT) -> None:
+ self._expr = expr
+
+ def keep(self) -> ExprT:
+ r"""Keep the original root name of the expression.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ This will undo any previous renaming operations on the expression.
+ Due to implementation constraints, this method can only be called as the last
+ expression in a chain. Only one name operation per expression will work.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"foo": [1, 2], "BAR": [4, 5]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("foo").alias("alias_for_foo").name.keep()).columns
+ ['foo']
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).name.keep()
+ )
+
+ def map(self, function: Callable[[str], str]) -> ExprT:
+ r"""Rename the output of an expression by mapping a function over the root name.
+
+ Arguments:
+ function: Function that maps a root name to a new name.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ This will undo any previous renaming operations on the expression.
+ Due to implementation constraints, this method can only be called as the last
+ expression in a chain. Only one name operation per expression will work.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"foo": [1, 2], "BAR": [4, 5]})
+ >>> df = nw.from_native(df_native)
+ >>> renaming_func = lambda s: s[::-1] # reverse column name
+ >>> df.select(nw.col("foo", "BAR").name.map(renaming_func)).columns
+ ['oof', 'RAB']
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).name.map(function)
+ )
+
+ def prefix(self, prefix: str) -> ExprT:
+ r"""Add a prefix to the root column name of the expression.
+
+ Arguments:
+ prefix: Prefix to add to the root column name.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ This will undo any previous renaming operations on the expression.
+ Due to implementation constraints, this method can only be called as the last
+ expression in a chain. Only one name operation per expression will work.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"foo": [1, 2], "BAR": [4, 5]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("foo", "BAR").name.prefix("with_prefix")).columns
+ ['with_prefixfoo', 'with_prefixBAR']
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).name.prefix(prefix)
+ )
+
+ def suffix(self, suffix: str) -> ExprT:
+ r"""Add a suffix to the root column name of the expression.
+
+ Arguments:
+ suffix: Suffix to add to the root column name.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ This will undo any previous renaming operations on the expression.
+ Due to implementation constraints, this method can only be called as the last
+ expression in a chain. Only one name operation per expression will work.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"foo": [1, 2], "BAR": [4, 5]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("foo", "BAR").name.suffix("_with_suffix")).columns
+ ['foo_with_suffix', 'BAR_with_suffix']
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).name.suffix(suffix)
+ )
+
+ def to_lowercase(self) -> ExprT:
+ r"""Make the root column name lowercase.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ This will undo any previous renaming operations on the expression.
+ Due to implementation constraints, this method can only be called as the last
+ expression in a chain. Only one name operation per expression will work.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"foo": [1, 2], "BAR": [4, 5]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("foo", "BAR").name.to_lowercase()).columns
+ ['foo', 'bar']
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).name.to_lowercase()
+ )
+
+ def to_uppercase(self) -> ExprT:
+ r"""Make the root column name uppercase.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ This will undo any previous renaming operations on the expression.
+ Due to implementation constraints, this method can only be called as the last
+ expression in a chain. Only one name operation per expression will work.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"foo": [1, 2], "BAR": [4, 5]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("foo", "BAR").name.to_uppercase()).columns
+ ['FOO', 'BAR']
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).name.to_uppercase()
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/expr_str.py b/venv/lib/python3.8/site-packages/narwhals/expr_str.py
new file mode 100644
index 0000000..e598ff7
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/expr_str.py
@@ -0,0 +1,449 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Generic, TypeVar
+
+if TYPE_CHECKING:
+ from narwhals.expr import Expr
+
+ExprT = TypeVar("ExprT", bound="Expr")
+
+
+class ExprStringNamespace(Generic[ExprT]):
+ def __init__(self, expr: ExprT) -> None:
+ self._expr = expr
+
+ def len_chars(self) -> ExprT:
+ r"""Return the length of each string as the number of characters.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"words": ["foo", "345", None]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(words_len=nw.col("words").str.len_chars())
+ ┌─────────────────────┐
+ | Narwhals DataFrame |
+ |---------------------|
+ |shape: (3, 2) |
+ |┌───────┬───────────┐|
+ |│ words ┆ words_len │|
+ |│ --- ┆ --- │|
+ |│ str ┆ u32 │|
+ |╞═══════╪═══════════╡|
+ |│ foo ┆ 3 │|
+ |│ 345 ┆ 3 │|
+ |│ null ┆ null │|
+ |└───────┴───────────┘|
+ └─────────────────────┘
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).str.len_chars()
+ )
+
+ def replace(
+ self, pattern: str, value: str, *, literal: bool = False, n: int = 1
+ ) -> ExprT:
+ r"""Replace first matching regex/literal substring with a new string value.
+
+ Arguments:
+ pattern: A valid regular expression pattern.
+ value: String that will replace the matched substring.
+ literal: Treat `pattern` as a literal string.
+ n: Number of matches to replace.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"foo": ["123abc", "abc abc123"]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(replaced=nw.col("foo").str.replace("abc", ""))
+ ┌──────────────────────┐
+ | Narwhals DataFrame |
+ |----------------------|
+ | foo replaced|
+ |0 123abc 123|
+ |1 abc abc123 abc123|
+ └──────────────────────┘
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).str.replace(
+ pattern, value, literal=literal, n=n
+ )
+ )
+
+ def replace_all(self, pattern: str, value: str, *, literal: bool = False) -> ExprT:
+ r"""Replace all matching regex/literal substring with a new string value.
+
+ Arguments:
+ pattern: A valid regular expression pattern.
+ value: String that will replace the matched substring.
+ literal: Treat `pattern` as a literal string.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"foo": ["123abc", "abc abc123"]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(replaced=nw.col("foo").str.replace_all("abc", ""))
+ ┌──────────────────────┐
+ | Narwhals DataFrame |
+ |----------------------|
+ | foo replaced|
+ |0 123abc 123|
+ |1 abc abc123 123|
+ └──────────────────────┘
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).str.replace_all(
+ pattern, value, literal=literal
+ )
+ )
+
+ def strip_chars(self, characters: str | None = None) -> ExprT:
+ r"""Remove leading and trailing characters.
+
+ Arguments:
+ characters: The set of characters to be removed. All combinations of this
+ set of characters will be stripped from the start and end of the string.
+ If set to None (default), all leading and trailing whitespace is removed
+ instead.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"fruits": ["apple", "\nmango"]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(stripped=nw.col("fruits").str.strip_chars()).to_dict(
+ ... as_series=False
+ ... )
+ {'fruits': ['apple', '\nmango'], 'stripped': ['apple', 'mango']}
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).str.strip_chars(characters)
+ )
+
+ def starts_with(self, prefix: str) -> ExprT:
+ r"""Check if string values start with a substring.
+
+ Arguments:
+ prefix: prefix substring
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"fruits": ["apple", "mango", None]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(has_prefix=nw.col("fruits").str.starts_with("app"))
+ ┌───────────────────┐
+ |Narwhals DataFrame |
+ |-------------------|
+ | fruits has_prefix|
+ |0 apple True|
+ |1 mango False|
+ |2 None None|
+ └───────────────────┘
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).str.starts_with(prefix)
+ )
+
+ def ends_with(self, suffix: str) -> ExprT:
+ r"""Check if string values end with a substring.
+
+ Arguments:
+ suffix: suffix substring
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"fruits": ["apple", "mango", None]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(has_suffix=nw.col("fruits").str.ends_with("ngo"))
+ ┌───────────────────┐
+ |Narwhals DataFrame |
+ |-------------------|
+ | fruits has_suffix|
+ |0 apple False|
+ |1 mango True|
+ |2 None None|
+ └───────────────────┘
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).str.ends_with(suffix)
+ )
+
+ def contains(self, pattern: str, *, literal: bool = False) -> ExprT:
+ r"""Check if string contains a substring that matches a pattern.
+
+ Arguments:
+ pattern: A Character sequence or valid regular expression pattern.
+ literal: If True, treats the pattern as a literal string.
+ If False, assumes the pattern is a regular expression.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"pets": ["cat", "dog", "rabbit and parrot"]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(
+ ... default_match=nw.col("pets").str.contains("cat|parrot"),
+ ... case_insensitive_match=nw.col("pets").str.contains("cat|(?i)parrot"),
+ ... ).to_native()
+ pyarrow.Table
+ pets: string
+ default_match: bool
+ case_insensitive_match: bool
+ ----
+ pets: [["cat","dog","rabbit and parrot"]]
+ default_match: [[true,false,true]]
+ case_insensitive_match: [[true,false,true]]
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).str.contains(
+ pattern, literal=literal
+ )
+ )
+
+ def slice(self, offset: int, length: int | None = None) -> ExprT:
+ r"""Create subslices of the string values of an expression.
+
+ Arguments:
+ offset: Start index. Negative indexing is supported.
+ length: Length of the slice. If set to `None` (default), the slice is taken to the
+ end of the string.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"s": ["pear", None, "papaya"]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(s_sliced=nw.col("s").str.slice(4, length=3))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | s s_sliced|
+ |0 pear |
+ |1 None None|
+ |2 papaya ya|
+ └──────────────────┘
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).str.slice(
+ offset=offset, length=length
+ )
+ )
+
+ def split(self, by: str) -> ExprT:
+ r"""Split the string values of an expression by a substring.
+
+ Arguments:
+ by: Substring to split by.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"s": ["foo bar", "foo_bar"]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(nw.col("s").str.split("_").alias("s_split"))
+ ┌────────────────────────────┐
+ | Narwhals DataFrame |
+ |----------------------------|
+ |shape: (2, 2) |
+ |┌─────────┬────────────────┐|
+ |│ s ┆ s_split │|
+ |│ --- ┆ --- │|
+ |│ str ┆ list[str] │|
+ |╞═════════╪════════════════╡|
+ |│ foo bar ┆ ["foo bar"] │|
+ |│ foo_bar ┆ ["foo", "bar"] │|
+ |└─────────┴────────────────┘|
+ └────────────────────────────┘
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).str.split(by=by)
+ )
+
+ def head(self, n: int = 5) -> ExprT:
+ r"""Take the first n elements of each string.
+
+ Arguments:
+ n: Number of elements to take. Negative indexing is **not** supported.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ If the length of the string has fewer than `n` characters, the full string is returned.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"lyrics": ["taata", "taatatata", "zukkyun"]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(lyrics_head=nw.col("lyrics").str.head()).to_native()
+ pyarrow.Table
+ lyrics: string
+ lyrics_head: string
+ ----
+ lyrics: [["taata","taatatata","zukkyun"]]
+ lyrics_head: [["taata","taata","zukky"]]
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).str.slice(0, n)
+ )
+
+ def tail(self, n: int = 5) -> ExprT:
+ r"""Take the last n elements of each string.
+
+ Arguments:
+ n: Number of elements to take. Negative indexing is **not** supported.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ If the length of the string has fewer than `n` characters, the full string is returned.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> df_native = pa.table({"lyrics": ["taata", "taatatata", "zukkyun"]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(lyrics_tail=nw.col("lyrics").str.tail()).to_native()
+ pyarrow.Table
+ lyrics: string
+ lyrics_tail: string
+ ----
+ lyrics: [["taata","taatatata","zukkyun"]]
+ lyrics_tail: [["taata","atata","kkyun"]]
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).str.slice(
+ offset=-n, length=None
+ )
+ )
+
+ def to_datetime(self, format: str | None = None) -> ExprT:
+ """Convert to Datetime dtype.
+
+ Notes:
+ - pandas defaults to nanosecond time unit, Polars to microsecond.
+ Prior to pandas 2.0, nanoseconds were the only time unit supported
+ in pandas, with no ability to set any other one. The ability to
+ set the time unit in pandas, if the version permits, will arrive.
+ - timezone-aware strings are all converted to and parsed as UTC.
+
+ Warning:
+ As different backends auto-infer format in different ways, if `format=None`
+ there is no guarantee that the result will be equal.
+
+ Arguments:
+ format: Format to use for conversion. If set to None (default), the format is
+ inferred from the data.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"a": ["2020-01-01", "2020-01-02"]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(nw.col("a").str.to_datetime(format="%Y-%m-%d"))
+ ┌───────────────────────┐
+ | Narwhals DataFrame |
+ |-----------------------|
+ |shape: (2, 1) |
+ |┌─────────────────────┐|
+ |│ a │|
+ |│ --- │|
+ |│ datetime[μs] │|
+ |╞═════════════════════╡|
+ |│ 2020-01-01 00:00:00 │|
+ |│ 2020-01-02 00:00:00 │|
+ |└─────────────────────┘|
+ └───────────────────────┘
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).str.to_datetime(format=format)
+ )
+
+ def to_uppercase(self) -> ExprT:
+ r"""Transform string to uppercase variant.
+
+ Returns:
+ A new expression.
+
+ Notes:
+ The PyArrow backend will convert 'ß' to 'ẞ' instead of 'SS'.
+ For more info see [the related issue](https://github.com/apache/arrow/issues/34599).
+ There may be other unicode-edge-case-related variations across implementations.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"fruits": ["apple", None]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(upper_col=nw.col("fruits").str.to_uppercase())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | fruits upper_col|
+ |0 apple APPLE|
+ |1 None None|
+ └──────────────────┘
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).str.to_uppercase()
+ )
+
+ def to_lowercase(self) -> ExprT:
+ r"""Transform string to lowercase variant.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame({"fruits": ["APPLE", None]})
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(lower_col=nw.col("fruits").str.to_lowercase())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | fruits lower_col|
+ |0 APPLE apple|
+ |1 None None|
+ └──────────────────┘
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).str.to_lowercase()
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/expr_struct.py b/venv/lib/python3.8/site-packages/narwhals/expr_struct.py
new file mode 100644
index 0000000..f09425f
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/expr_struct.py
@@ -0,0 +1,48 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Generic, TypeVar
+
+if TYPE_CHECKING:
+ from narwhals.expr import Expr
+
+ExprT = TypeVar("ExprT", bound="Expr")
+
+
+class ExprStructNamespace(Generic[ExprT]):
+ def __init__(self, expr: ExprT) -> None:
+ self._expr = expr
+
+ def field(self, name: str) -> ExprT:
+ r"""Retrieve a Struct field as a new expression.
+
+ Arguments:
+ name: Name of the struct field to retrieve.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame(
+ ... {"user": [{"id": "0", "name": "john"}, {"id": "1", "name": "jane"}]}
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>> df.with_columns(name=nw.col("user").struct.field("name"))
+ ┌───────────────────────┐
+ | Narwhals DataFrame |
+ |-----------------------|
+ |shape: (2, 2) |
+ |┌──────────────┬──────┐|
+ |│ user ┆ name │|
+ |│ --- ┆ --- │|
+ |│ struct[2] ┆ str │|
+ |╞══════════════╪══════╡|
+ |│ {"0","john"} ┆ john │|
+ |│ {"1","jane"} ┆ jane │|
+ |└──────────────┴──────┘|
+ └───────────────────────┘
+ """
+ return self._expr._with_elementwise_op(
+ lambda plx: self._expr._to_compliant_expr(plx).struct.field(name)
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/functions.py b/venv/lib/python3.8/site-packages/narwhals/functions.py
new file mode 100644
index 0000000..b483236
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/functions.py
@@ -0,0 +1,1793 @@
+from __future__ import annotations
+
+import platform
+import sys
+from importlib.metadata import version
+from typing import TYPE_CHECKING, Any, Iterable, Literal, Mapping, Sequence, cast
+
+from narwhals._expression_parsing import (
+ ExprKind,
+ ExprMetadata,
+ apply_n_ary_operation,
+ combine_metadata,
+ extract_compliant,
+ is_scalar_like,
+)
+from narwhals._utils import (
+ Implementation,
+ Version,
+ deprecate_native_namespace,
+ flatten,
+ is_compliant_expr,
+ is_eager_allowed,
+ is_sequence_but_not_str,
+ parse_version,
+ supports_arrow_c_stream,
+ validate_laziness,
+)
+from narwhals.dependencies import (
+ is_narwhals_series,
+ is_numpy_array,
+ is_numpy_array_2d,
+ is_pyarrow_table,
+)
+from narwhals.exceptions import InvalidOperationError, ShapeError
+from narwhals.expr import Expr
+from narwhals.translate import from_native, to_native
+
+if TYPE_CHECKING:
+ from types import ModuleType
+
+ from typing_extensions import TypeAlias, TypeIs
+
+ from narwhals._compliant import CompliantExpr, CompliantNamespace
+ from narwhals._translate import IntoArrowTable
+ from narwhals.dataframe import DataFrame, LazyFrame
+ from narwhals.dtypes import DType
+ from narwhals.schema import Schema
+ from narwhals.series import Series
+ from narwhals.typing import (
+ ConcatMethod,
+ FrameT,
+ IntoDType,
+ IntoExpr,
+ IntoSeriesT,
+ NativeFrame,
+ NativeLazyFrame,
+ NativeSeries,
+ NonNestedLiteral,
+ _1DArray,
+ _2DArray,
+ )
+
+ _IntoSchema: TypeAlias = "Mapping[str, DType] | Schema | Sequence[str] | None"
+
+
+def concat(items: Iterable[FrameT], *, how: ConcatMethod = "vertical") -> FrameT:
+ """Concatenate multiple DataFrames, LazyFrames into a single entity.
+
+ Arguments:
+ items: DataFrames, LazyFrames to concatenate.
+ how: concatenating strategy
+
+ - vertical: Concatenate vertically. Column names must match.
+ - horizontal: Concatenate horizontally. If lengths don't match, then
+ missing rows are filled with null values. This is only supported
+ when all inputs are (eager) DataFrames.
+ - diagonal: Finds a union between the column schemas and fills missing column
+ values with null.
+
+ Returns:
+ A new DataFrame or LazyFrame resulting from the concatenation.
+
+ Raises:
+ TypeError: The items to concatenate should either all be eager, or all lazy
+
+ Examples:
+ Let's take an example of vertical concatenation:
+
+ >>> import pandas as pd
+ >>> import polars as pl
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+
+ Let's look at one case a for vertical concatenation (pandas backed):
+
+ >>> df_pd_1 = nw.from_native(pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}))
+ >>> df_pd_2 = nw.from_native(pd.DataFrame({"a": [5, 2], "b": [1, 4]}))
+ >>> nw.concat([df_pd_1, df_pd_2], how="vertical")
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 1 4 |
+ | 1 2 5 |
+ | 2 3 6 |
+ | 0 5 1 |
+ | 1 2 4 |
+ └──────────────────┘
+
+ Let's look at one case a for horizontal concatenation (polars backed):
+
+ >>> df_pl_1 = nw.from_native(pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}))
+ >>> df_pl_2 = nw.from_native(pl.DataFrame({"c": [5, 2], "d": [1, 4]}))
+ >>> nw.concat([df_pl_1, df_pl_2], how="horizontal")
+ ┌───────────────────────────┐
+ | Narwhals DataFrame |
+ |---------------------------|
+ |shape: (3, 4) |
+ |┌─────┬─────┬──────┬──────┐|
+ |│ a ┆ b ┆ c ┆ d │|
+ |│ --- ┆ --- ┆ --- ┆ --- │|
+ |│ i64 ┆ i64 ┆ i64 ┆ i64 │|
+ |╞═════╪═════╪══════╪══════╡|
+ |│ 1 ┆ 4 ┆ 5 ┆ 1 │|
+ |│ 2 ┆ 5 ┆ 2 ┆ 4 │|
+ |│ 3 ┆ 6 ┆ null ┆ null │|
+ |└─────┴─────┴──────┴──────┘|
+ └───────────────────────────┘
+
+ Let's look at one case a for diagonal concatenation (pyarrow backed):
+
+ >>> df_pa_1 = nw.from_native(pa.table({"a": [1, 2], "b": [3.5, 4.5]}))
+ >>> df_pa_2 = nw.from_native(pa.table({"a": [3, 4], "z": ["x", "y"]}))
+ >>> nw.concat([df_pa_1, df_pa_2], how="diagonal")
+ ┌──────────────────────────┐
+ | Narwhals DataFrame |
+ |--------------------------|
+ |pyarrow.Table |
+ |a: int64 |
+ |b: double |
+ |z: string |
+ |---- |
+ |a: [[1,2],[3,4]] |
+ |b: [[3.5,4.5],[null,null]]|
+ |z: [[null,null],["x","y"]]|
+ └──────────────────────────┘
+ """
+ from narwhals.dependencies import is_narwhals_lazyframe
+
+ if not items:
+ msg = "No items to concatenate."
+ raise ValueError(msg)
+ items = list(items)
+ validate_laziness(items)
+ if how not in {"horizontal", "vertical", "diagonal"}: # pragma: no cover
+ msg = "Only vertical, horizontal and diagonal concatenations are supported."
+ raise NotImplementedError(msg)
+ first_item = items[0]
+ if is_narwhals_lazyframe(first_item) and how == "horizontal":
+ msg = (
+ "Horizontal concatenation is not supported for LazyFrames.\n\n"
+ "Hint: you may want to use `join` instead."
+ )
+ raise InvalidOperationError(msg)
+ plx = first_item.__narwhals_namespace__()
+ return first_item._with_compliant(
+ plx.concat([df._compliant_frame for df in items], how=how)
+ )
+
+
+@deprecate_native_namespace(warn_version="1.31.0", required=True)
+def new_series(
+ name: str,
+ values: Any,
+ dtype: IntoDType | None = None,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None, # noqa: ARG001
+) -> Series[Any]:
+ """Instantiate Narwhals Series from iterable (e.g. list or array).
+
+ Arguments:
+ name: Name of resulting Series.
+ values: Values of make Series from.
+ dtype: (Narwhals) dtype. If not provided, the native library
+ may auto-infer it from `values`.
+ backend: specifies which eager backend instantiate to.
+
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN` or `CUDF`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.31.0)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+
+ Returns:
+ A new Series
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> values = [4, 1, 2, 3]
+ >>> nw.new_series(name="a", values=values, dtype=nw.Int32, backend=pd)
+ ┌─────────────────────┐
+ | Narwhals Series |
+ |---------------------|
+ |0 4 |
+ |1 1 |
+ |2 2 |
+ |3 3 |
+ |Name: a, dtype: int32|
+ └─────────────────────┘
+ """
+ backend = cast("ModuleType | Implementation | str", backend)
+ return _new_series_impl(name, values, dtype, backend=backend)
+
+
+def _new_series_impl(
+ name: str,
+ values: Any,
+ dtype: IntoDType | None = None,
+ *,
+ backend: ModuleType | Implementation | str,
+) -> Series[Any]:
+ implementation = Implementation.from_backend(backend)
+ if is_eager_allowed(implementation):
+ ns = Version.MAIN.namespace.from_backend(implementation).compliant
+ series = ns._series.from_iterable(values, name=name, context=ns, dtype=dtype)
+ return series.to_narwhals()
+ elif implementation is Implementation.UNKNOWN: # pragma: no cover
+ _native_namespace = implementation.to_native_namespace()
+ try:
+ native_series: NativeSeries = _native_namespace.new_series(
+ name, values, dtype
+ )
+ return from_native(native_series, series_only=True).alias(name)
+ except AttributeError as e:
+ msg = "Unknown namespace is expected to implement `new_series` constructor."
+ raise AttributeError(msg) from e
+ msg = (
+ f"{implementation} support in Narwhals is lazy-only, but `new_series` is an eager-only function.\n\n"
+ "Hint: you may want to use an eager backend and then call `.lazy`, e.g.:\n\n"
+ f" nw.new_series('a', [1,2,3], backend='pyarrow').to_frame().lazy('{implementation}')"
+ )
+ raise ValueError(msg)
+
+
+@deprecate_native_namespace(warn_version="1.26.0")
+def from_dict(
+ data: Mapping[str, Any],
+ schema: Mapping[str, DType] | Schema | None = None,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None, # noqa: ARG001
+) -> DataFrame[Any]:
+ """Instantiate DataFrame from dictionary.
+
+ Indexes (if present, for pandas-like backends) are aligned following
+ the [left-hand-rule](../concepts/pandas_index.md/).
+
+ Notes:
+ For pandas-like dataframes, conversion to schema is applied after dataframe
+ creation.
+
+ Arguments:
+ data: Dictionary to create DataFrame from.
+ schema: The DataFrame schema as Schema or dict of {name: type}. If not
+ specified, the schema will be inferred by the native library.
+ backend: specifies which eager backend instantiate to. Only
+ necessary if inputs are not Narwhals Series.
+
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN` or `CUDF`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.26.0)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+
+ Returns:
+ A new DataFrame.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> data = {"c": [5, 2], "d": [1, 4]}
+ >>> nw.from_dict(data, backend="pandas")
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | c d |
+ | 0 5 1 |
+ | 1 2 4 |
+ └──────────────────┘
+ """
+ if not data:
+ msg = "from_dict cannot be called with empty dictionary"
+ raise ValueError(msg)
+ if backend is None:
+ data, backend = _from_dict_no_backend(data)
+ implementation = Implementation.from_backend(backend)
+ if is_eager_allowed(implementation):
+ ns = Version.MAIN.namespace.from_backend(implementation).compliant
+ return ns._dataframe.from_dict(data, schema=schema, context=ns).to_narwhals()
+ elif implementation is Implementation.UNKNOWN: # pragma: no cover
+ _native_namespace = implementation.to_native_namespace()
+ try:
+ # implementation is UNKNOWN, Narwhals extension using this feature should
+ # implement `from_dict` function in the top-level namespace.
+ native_frame: NativeFrame = _native_namespace.from_dict(data, schema=schema)
+ except AttributeError as e:
+ msg = "Unknown namespace is expected to implement `from_dict` function."
+ raise AttributeError(msg) from e
+ return from_native(native_frame, eager_only=True)
+ msg = (
+ f"{implementation} support in Narwhals is lazy-only, but `from_dict` is an eager-only function.\n\n"
+ "Hint: you may want to use an eager backend and then call `.lazy`, e.g.:\n\n"
+ f" nw.from_dict({{'a': [1, 2]}}, backend='pyarrow').lazy('{implementation}')"
+ )
+ raise ValueError(msg)
+
+
+def _from_dict_no_backend(
+ data: Mapping[str, Series[Any] | Any], /
+) -> tuple[dict[str, Series[Any] | Any], ModuleType]:
+ for val in data.values():
+ if is_narwhals_series(val):
+ native_namespace = val.__native_namespace__()
+ break
+ else:
+ msg = "Calling `from_dict` without `backend` is only supported if all input values are already Narwhals Series"
+ raise TypeError(msg)
+ data = {key: to_native(value, pass_through=True) for key, value in data.items()}
+ return data, native_namespace
+
+
+@deprecate_native_namespace(warn_version="1.31.0", required=True)
+def from_numpy(
+ data: _2DArray,
+ schema: Mapping[str, DType] | Schema | Sequence[str] | None = None,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None, # noqa: ARG001
+) -> DataFrame[Any]:
+ """Construct a DataFrame from a NumPy ndarray.
+
+ Notes:
+ Only row orientation is currently supported.
+
+ For pandas-like dataframes, conversion to schema is applied after dataframe
+ creation.
+
+ Arguments:
+ data: Two-dimensional data represented as a NumPy ndarray.
+ schema: The DataFrame schema as Schema, dict of {name: type}, or a sequence of str.
+ backend: specifies which eager backend instantiate to.
+
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN` or `CUDF`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.31.0)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+
+ Returns:
+ A new DataFrame.
+
+ Examples:
+ >>> import numpy as np
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> arr = np.array([[5, 2, 1], [1, 4, 3]])
+ >>> schema = {"c": nw.Int16(), "d": nw.Float32(), "e": nw.Int8()}
+ >>> nw.from_numpy(arr, schema=schema, backend="pyarrow")
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | pyarrow.Table |
+ | c: int16 |
+ | d: float |
+ | e: int8 |
+ | ---- |
+ | c: [[5,1]] |
+ | d: [[2,4]] |
+ | e: [[1,3]] |
+ └──────────────────┘
+ """
+ backend = cast("ModuleType | Implementation | str", backend)
+ if not is_numpy_array_2d(data):
+ msg = "`from_numpy` only accepts 2D numpy arrays"
+ raise ValueError(msg)
+ if not _is_into_schema(schema):
+ msg = (
+ "`schema` is expected to be one of the following types: "
+ "Mapping[str, DType] | Schema | Sequence[str]. "
+ f"Got {type(schema)}."
+ )
+ raise TypeError(msg)
+ implementation = Implementation.from_backend(backend)
+ if is_eager_allowed(implementation):
+ ns = Version.MAIN.namespace.from_backend(implementation).compliant
+ return ns.from_numpy(data, schema).to_narwhals()
+ elif implementation is Implementation.UNKNOWN: # pragma: no cover
+ _native_namespace = implementation.to_native_namespace()
+ try:
+ # implementation is UNKNOWN, Narwhals extension using this feature should
+ # implement `from_numpy` function in the top-level namespace.
+ native_frame: NativeFrame = _native_namespace.from_numpy(data, schema=schema)
+ except AttributeError as e:
+ msg = "Unknown namespace is expected to implement `from_numpy` function."
+ raise AttributeError(msg) from e
+ return from_native(native_frame, eager_only=True)
+ msg = (
+ f"{implementation} support in Narwhals is lazy-only, but `from_numpy` is an eager-only function.\n\n"
+ "Hint: you may want to use an eager backend and then call `.lazy`, e.g.:\n\n"
+ f" nw.from_numpy(arr, backend='pyarrow').lazy('{implementation}')"
+ )
+ raise ValueError(msg)
+
+
+def _is_into_schema(obj: Any) -> TypeIs[_IntoSchema]:
+ from narwhals.schema import Schema
+
+ return (
+ obj is None or isinstance(obj, (Mapping, Schema)) or is_sequence_but_not_str(obj)
+ )
+
+
+@deprecate_native_namespace(warn_version="1.31.0", required=True)
+def from_arrow(
+ native_frame: IntoArrowTable,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None, # noqa: ARG001
+) -> DataFrame[Any]: # pragma: no cover
+ """Construct a DataFrame from an object which supports the PyCapsule Interface.
+
+ Arguments:
+ native_frame: Object which implements `__arrow_c_stream__`.
+ backend: specifies which eager backend instantiate to.
+
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN` or `CUDF`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.31.0)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+
+ Returns:
+ A new DataFrame.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pd.DataFrame({"a": [1, 2], "b": [4.2, 5.1]})
+ >>> nw.from_arrow(df_native, backend="polars")
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | shape: (2, 2) |
+ | ┌─────┬─────┐ |
+ | │ a ┆ b │ |
+ | │ --- ┆ --- │ |
+ | │ i64 ┆ f64 │ |
+ | ╞═════╪═════╡ |
+ | │ 1 ┆ 4.2 │ |
+ | │ 2 ┆ 5.1 │ |
+ | └─────┴─────┘ |
+ └──────────────────┘
+ """
+ backend = cast("ModuleType | Implementation | str", backend)
+ if not (supports_arrow_c_stream(native_frame) or is_pyarrow_table(native_frame)):
+ msg = f"Given object of type {type(native_frame)} does not support PyCapsule interface"
+ raise TypeError(msg)
+ implementation = Implementation.from_backend(backend)
+ if is_eager_allowed(implementation):
+ ns = Version.MAIN.namespace.from_backend(implementation).compliant
+ return ns._dataframe.from_arrow(native_frame, context=ns).to_narwhals()
+ elif implementation is Implementation.UNKNOWN: # pragma: no cover
+ _native_namespace = implementation.to_native_namespace()
+ try:
+ # implementation is UNKNOWN, Narwhals extension using this feature should
+ # implement PyCapsule support
+ native: NativeFrame = _native_namespace.DataFrame(native_frame)
+ except AttributeError as e:
+ msg = "Unknown namespace is expected to implement `DataFrame` class which accepts object which supports PyCapsule Interface."
+ raise AttributeError(msg) from e
+ return from_native(native, eager_only=True)
+ msg = (
+ f"{implementation} support in Narwhals is lazy-only, but `from_arrow` is an eager-only function.\n\n"
+ "Hint: you may want to use an eager backend and then call `.lazy`, e.g.:\n\n"
+ f" nw.from_arrow(df, backend='pyarrow').lazy('{implementation}')"
+ )
+ raise ValueError(msg)
+
+
+def _get_sys_info() -> dict[str, str]:
+ """System information.
+
+ Returns system and Python version information
+
+ Copied from sklearn
+
+ Returns:
+ Dictionary with system info.
+ """
+ python = sys.version.replace("\n", " ")
+
+ blob = (
+ ("python", python),
+ ("executable", sys.executable),
+ ("machine", platform.platform()),
+ )
+
+ return dict(blob)
+
+
+def _get_deps_info() -> dict[str, str]:
+ """Overview of the installed version of main dependencies.
+
+ This function does not import the modules to collect the version numbers
+ but instead relies on standard Python package metadata.
+
+ Returns version information on relevant Python libraries
+
+ This function and show_versions were copied from sklearn and adapted
+
+ Returns:
+ Mapping from dependency to version.
+ """
+ from importlib.metadata import PackageNotFoundError, version
+
+ from narwhals import __version__
+
+ deps = ("pandas", "polars", "cudf", "modin", "pyarrow", "numpy")
+ deps_info = {"narwhals": __version__}
+
+ for modname in deps:
+ try:
+ deps_info[modname] = version(modname)
+ except PackageNotFoundError: # noqa: PERF203
+ deps_info[modname] = ""
+ return deps_info
+
+
+def show_versions() -> None:
+ """Print useful debugging information.
+
+ Examples:
+ >>> from narwhals import show_versions
+ >>> show_versions() # doctest: +SKIP
+ """
+ sys_info = _get_sys_info()
+ deps_info = _get_deps_info()
+
+ print("\nSystem:") # noqa: T201
+ for k, stat in sys_info.items():
+ print(f"{k:>10}: {stat}") # noqa: T201
+
+ print("\nPython dependencies:") # noqa: T201
+ for k, stat in deps_info.items():
+ print(f"{k:>13}: {stat}") # noqa: T201
+
+
+def get_level(
+ obj: DataFrame[Any] | LazyFrame[Any] | Series[IntoSeriesT],
+) -> Literal["full", "lazy", "interchange"]:
+ """Level of support Narwhals has for current object.
+
+ Arguments:
+ obj: Dataframe or Series.
+
+ Returns:
+ This can be one of
+
+ - 'full': full Narwhals API support
+ - 'lazy': only lazy operations are supported. This excludes anything
+ which involves iterating over rows in Python.
+ - 'interchange': only metadata operations are supported (`df.schema`)
+ """
+ return obj._level
+
+
+@deprecate_native_namespace(warn_version="1.27.2", required=True)
+def read_csv(
+ source: str,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None,
+ **kwargs: Any,
+) -> DataFrame[Any]:
+ """Read a CSV file into a DataFrame.
+
+ Arguments:
+ source: Path to a file.
+ backend: The eager backend for DataFrame creation.
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN` or `CUDF`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.27.2)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+ kwargs: Extra keyword arguments which are passed to the native CSV reader.
+ For example, you could use
+ `nw.read_csv('file.csv', backend='pandas', engine='pyarrow')`.
+
+ Returns:
+ DataFrame.
+
+ Examples:
+ >>> import narwhals as nw
+ >>> nw.read_csv("file.csv", backend="pandas") # doctest:+SKIP
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 1 4 |
+ | 1 2 5 |
+ └──────────────────┘
+ """
+ backend = cast("ModuleType | Implementation | str", backend)
+ eager_backend = Implementation.from_backend(backend)
+ native_namespace = eager_backend.to_native_namespace()
+ native_frame: NativeFrame
+ if eager_backend in {
+ Implementation.POLARS,
+ Implementation.PANDAS,
+ Implementation.MODIN,
+ Implementation.CUDF,
+ }:
+ native_frame = native_namespace.read_csv(source, **kwargs)
+ elif eager_backend is Implementation.PYARROW:
+ from pyarrow import csv # ignore-banned-import
+
+ native_frame = csv.read_csv(source, **kwargs)
+ else: # pragma: no cover
+ try:
+ # implementation is UNKNOWN, Narwhals extension using this feature should
+ # implement `read_csv` function in the top-level namespace.
+ native_frame = native_namespace.read_csv(source=source, **kwargs)
+ except AttributeError as e:
+ msg = "Unknown namespace is expected to implement `read_csv` function."
+ raise AttributeError(msg) from e
+ return from_native(native_frame, eager_only=True)
+
+
+@deprecate_native_namespace(warn_version="1.31.0", required=True)
+def scan_csv(
+ source: str,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None,
+ **kwargs: Any,
+) -> LazyFrame[Any]:
+ """Lazily read from a CSV file.
+
+ For the libraries that do not support lazy dataframes, the function reads
+ a csv file eagerly and then converts the resulting dataframe to a lazyframe.
+
+ Arguments:
+ source: Path to a file.
+ backend: The eager backend for DataFrame creation.
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN` or `CUDF`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.31.0)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+ kwargs: Extra keyword arguments which are passed to the native CSV reader.
+ For example, you could use
+ `nw.scan_csv('file.csv', backend=pd, engine='pyarrow')`.
+
+ Returns:
+ LazyFrame.
+
+ Examples:
+ >>> import duckdb
+ >>> import narwhals as nw
+ >>>
+ >>> nw.scan_csv("file.csv", backend="duckdb").to_native() # doctest:+SKIP
+ ┌─────────┬───────┐
+ │ a │ b │
+ │ varchar │ int32 │
+ ├─────────┼───────┤
+ │ x │ 1 │
+ │ y │ 2 │
+ │ z │ 3 │
+ └─────────┴───────┘
+ """
+ backend = cast("ModuleType | Implementation | str", backend)
+ implementation = Implementation.from_backend(backend)
+ native_namespace = implementation.to_native_namespace()
+ native_frame: NativeFrame | NativeLazyFrame
+ if implementation is Implementation.POLARS:
+ native_frame = native_namespace.scan_csv(source, **kwargs)
+ elif implementation in {
+ Implementation.PANDAS,
+ Implementation.MODIN,
+ Implementation.CUDF,
+ Implementation.DASK,
+ Implementation.DUCKDB,
+ Implementation.IBIS,
+ }:
+ native_frame = native_namespace.read_csv(source, **kwargs)
+ elif implementation is Implementation.PYARROW:
+ from pyarrow import csv # ignore-banned-import
+
+ native_frame = csv.read_csv(source, **kwargs)
+ elif implementation.is_spark_like():
+ if (session := kwargs.pop("session", None)) is None:
+ msg = "Spark like backends require a session object to be passed in `kwargs`."
+ raise ValueError(msg)
+
+ csv_reader = session.read.format("csv")
+ native_frame = (
+ csv_reader.load(source)
+ if (
+ implementation is Implementation.SQLFRAME
+ and parse_version(version("sqlframe")) < (3, 27, 0)
+ )
+ else csv_reader.options(**kwargs).load(source)
+ )
+ else: # pragma: no cover
+ try:
+ # implementation is UNKNOWN, Narwhals extension using this feature should
+ # implement `scan_csv` function in the top-level namespace.
+ native_frame = native_namespace.scan_csv(source=source, **kwargs)
+ except AttributeError as e:
+ msg = "Unknown namespace is expected to implement `scan_csv` function."
+ raise AttributeError(msg) from e
+ return from_native(native_frame).lazy()
+
+
+@deprecate_native_namespace(warn_version="1.31.0", required=True)
+def read_parquet(
+ source: str,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None,
+ **kwargs: Any,
+) -> DataFrame[Any]:
+ """Read into a DataFrame from a parquet file.
+
+ Arguments:
+ source: Path to a file.
+ backend: The eager backend for DataFrame creation.
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN` or `CUDF`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.31.0)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+ kwargs: Extra keyword arguments which are passed to the native parquet reader.
+ For example, you could use
+ `nw.read_parquet('file.parquet', backend=pd, engine='pyarrow')`.
+
+ Returns:
+ DataFrame.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> nw.read_parquet("file.parquet", backend="pyarrow") # doctest:+SKIP
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ |pyarrow.Table |
+ |a: int64 |
+ |c: double |
+ |---- |
+ |a: [[1,2]] |
+ |c: [[0.2,0.1]] |
+ └──────────────────┘
+ """
+ backend = cast("ModuleType | Implementation | str", backend)
+ implementation = Implementation.from_backend(backend)
+ native_namespace = implementation.to_native_namespace()
+ native_frame: NativeFrame
+ if implementation in {
+ Implementation.POLARS,
+ Implementation.PANDAS,
+ Implementation.MODIN,
+ Implementation.CUDF,
+ Implementation.DUCKDB,
+ Implementation.IBIS,
+ }:
+ native_frame = native_namespace.read_parquet(source, **kwargs)
+ elif implementation is Implementation.PYARROW:
+ import pyarrow.parquet as pq # ignore-banned-import
+
+ native_frame = pq.read_table(source, **kwargs)
+ else: # pragma: no cover
+ try:
+ # implementation is UNKNOWN, Narwhals extension using this feature should
+ # implement `read_parquet` function in the top-level namespace.
+ native_frame = native_namespace.read_parquet(source=source, **kwargs)
+ except AttributeError as e:
+ msg = "Unknown namespace is expected to implement `read_parquet` function."
+ raise AttributeError(msg) from e
+ return from_native(native_frame, eager_only=True)
+
+
+@deprecate_native_namespace(warn_version="1.31.0", required=True)
+def scan_parquet(
+ source: str,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None,
+ **kwargs: Any,
+) -> LazyFrame[Any]:
+ """Lazily read from a parquet file.
+
+ For the libraries that do not support lazy dataframes, the function reads
+ a parquet file eagerly and then converts the resulting dataframe to a lazyframe.
+
+ Note:
+ Spark like backends require a session object to be passed in `kwargs`.
+
+ For instance:
+
+ ```py
+ import narwhals as nw
+ from sqlframe.duckdb import DuckDBSession
+
+ nw.scan_parquet(source, backend="sqlframe", session=DuckDBSession())
+ ```
+
+ Arguments:
+ source: Path to a file.
+ backend: The eager backend for DataFrame creation.
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN`, `CUDF`, `PYSPARK` or `SQLFRAME`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"`, `"cudf"`,
+ `"pyspark"` or `"sqlframe"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin`, `cudf`,
+ `pyspark.sql` or `sqlframe`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.31.0)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+ kwargs: Extra keyword arguments which are passed to the native parquet reader.
+ For example, you could use
+ `nw.scan_parquet('file.parquet', backend=pd, engine='pyarrow')`.
+
+ Returns:
+ LazyFrame.
+
+ Examples:
+ >>> import dask.dataframe as dd
+ >>> from sqlframe.duckdb import DuckDBSession
+ >>> import narwhals as nw
+ >>>
+ >>> nw.scan_parquet("file.parquet", backend="dask").collect() # doctest:+SKIP
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 1 4 |
+ | 1 2 5 |
+ └──────────────────┘
+ >>> nw.scan_parquet(
+ ... "file.parquet", backend="sqlframe", session=DuckDBSession()
+ ... ).collect() # doctest:+SKIP
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | pyarrow.Table |
+ | a: int64 |
+ | b: int64 |
+ | ---- |
+ | a: [[1,2]] |
+ | b: [[4,5]] |
+ └──────────────────┘
+ """
+ backend = cast("ModuleType | Implementation | str", backend)
+ implementation = Implementation.from_backend(backend)
+ native_namespace = implementation.to_native_namespace()
+ native_frame: NativeFrame | NativeLazyFrame
+ if implementation is Implementation.POLARS:
+ native_frame = native_namespace.scan_parquet(source, **kwargs)
+ elif implementation in {
+ Implementation.PANDAS,
+ Implementation.MODIN,
+ Implementation.CUDF,
+ Implementation.DASK,
+ Implementation.DUCKDB,
+ Implementation.IBIS,
+ }:
+ native_frame = native_namespace.read_parquet(source, **kwargs)
+ elif implementation is Implementation.PYARROW:
+ import pyarrow.parquet as pq # ignore-banned-import
+
+ native_frame = pq.read_table(source, **kwargs)
+ elif implementation.is_spark_like():
+ if (session := kwargs.pop("session", None)) is None:
+ msg = "Spark like backends require a session object to be passed in `kwargs`."
+ raise ValueError(msg)
+
+ pq_reader = session.read.format("parquet")
+ native_frame = (
+ pq_reader.load(source)
+ if (
+ implementation is Implementation.SQLFRAME
+ and parse_version(version("sqlframe")) < (3, 27, 0)
+ )
+ else pq_reader.options(**kwargs).load(source)
+ )
+
+ else: # pragma: no cover
+ try:
+ # implementation is UNKNOWN, Narwhals extension using this feature should
+ # implement `scan_parquet` function in the top-level namespace.
+ native_frame = native_namespace.scan_parquet(source=source, **kwargs)
+ except AttributeError as e:
+ msg = "Unknown namespace is expected to implement `scan_parquet` function."
+ raise AttributeError(msg) from e
+ return from_native(native_frame).lazy()
+
+
+def col(*names: str | Iterable[str]) -> Expr:
+ """Creates an expression that references one or more columns by their name(s).
+
+ Arguments:
+ names: Name(s) of the columns to use.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pl.DataFrame({"a": [1, 2], "b": [3, 4], "c": ["x", "z"]})
+ >>> nw.from_native(df_native).select(nw.col("a", "b") * nw.col("b"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | shape: (2, 2) |
+ | ┌─────┬─────┐ |
+ | │ a ┆ b │ |
+ | │ --- ┆ --- │ |
+ | │ i64 ┆ i64 │ |
+ | ╞═════╪═════╡ |
+ | │ 3 ┆ 9 │ |
+ | │ 8 ┆ 16 │ |
+ | └─────┴─────┘ |
+ └──────────────────┘
+ """
+ flat_names = flatten(names)
+
+ def func(plx: Any) -> Any:
+ return plx.col(*flat_names)
+
+ return Expr(
+ func,
+ ExprMetadata.selector_single()
+ if len(flat_names) == 1
+ else ExprMetadata.selector_multi_named(),
+ )
+
+
+def exclude(*names: str | Iterable[str]) -> Expr:
+ """Creates an expression that excludes columns by their name(s).
+
+ Arguments:
+ names: Name(s) of the columns to exclude.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pl.DataFrame({"a": [1, 2], "b": [3, 4], "c": ["x", "z"]})
+ >>> nw.from_native(df_native).select(nw.exclude("c", "a"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | shape: (2, 1) |
+ | ┌─────┐ |
+ | │ b │ |
+ | │ --- │ |
+ | │ i64 │ |
+ | ╞═════╡ |
+ | │ 3 │ |
+ | │ 4 │ |
+ | └─────┘ |
+ └──────────────────┘
+ """
+ exclude_names = frozenset(flatten(names))
+
+ def func(plx: Any) -> Any:
+ return plx.exclude(exclude_names)
+
+ return Expr(func, ExprMetadata.selector_multi_unnamed())
+
+
+def nth(*indices: int | Sequence[int]) -> Expr:
+ """Creates an expression that references one or more columns by their index(es).
+
+ Notes:
+ `nth` is not supported for Polars version<1.0.0. Please use
+ [`narwhals.col`][] instead.
+
+ Arguments:
+ indices: One or more indices representing the columns to retrieve.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pa.table({"a": [1, 2], "b": [3, 4], "c": [0.123, 3.14]})
+ >>> nw.from_native(df_native).select(nw.nth(0, 2) * 2)
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ |pyarrow.Table |
+ |a: int64 |
+ |c: double |
+ |---- |
+ |a: [[2,4]] |
+ |c: [[0.246,6.28]] |
+ └──────────────────┘
+ """
+ flat_indices = flatten(indices)
+
+ def func(plx: Any) -> Any:
+ return plx.nth(*flat_indices)
+
+ return Expr(
+ func,
+ ExprMetadata.selector_single()
+ if len(flat_indices) == 1
+ else ExprMetadata.selector_multi_unnamed(),
+ )
+
+
+# Add underscore so it doesn't conflict with builtin `all`
+def all_() -> Expr:
+ """Instantiate an expression representing all columns.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pd.DataFrame({"a": [1, 2], "b": [3.14, 0.123]})
+ >>> nw.from_native(df_native).select(nw.all() * 2)
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 2 6.280 |
+ | 1 4 0.246 |
+ └──────────────────┘
+ """
+ return Expr(lambda plx: plx.all(), ExprMetadata.selector_multi_unnamed())
+
+
+# Add underscore so it doesn't conflict with builtin `len`
+def len_() -> Expr:
+ """Return the number of rows.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pl.DataFrame({"a": [1, 2], "b": [5, None]})
+ >>> nw.from_native(df_native).select(nw.len())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | shape: (1, 1) |
+ | ┌─────┐ |
+ | │ len │ |
+ | │ --- │ |
+ | │ u32 │ |
+ | ╞═════╡ |
+ | │ 2 │ |
+ | └─────┘ |
+ └──────────────────┘
+ """
+
+ def func(plx: Any) -> Any:
+ return plx.len()
+
+ return Expr(func, ExprMetadata.aggregation())
+
+
+def sum(*columns: str) -> Expr:
+ """Sum all values.
+
+ Note:
+ Syntactic sugar for ``nw.col(columns).sum()``
+
+ Arguments:
+ columns: Name(s) of the columns to use in the aggregation function
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pd.DataFrame({"a": [1, 2], "b": [-1.4, 6.2]})
+ >>> nw.from_native(df_native).select(nw.sum("a", "b"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 3 4.8 |
+ └──────────────────┘
+ """
+ return col(*columns).sum()
+
+
+def mean(*columns: str) -> Expr:
+ """Get the mean value.
+
+ Note:
+ Syntactic sugar for ``nw.col(columns).mean()``
+
+ Arguments:
+ columns: Name(s) of the columns to use in the aggregation function
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pa.table({"a": [1, 8, 3], "b": [3.14, 6.28, 42.1]})
+ >>> nw.from_native(df_native).select(nw.mean("a", "b"))
+ ┌─────────────────────────┐
+ | Narwhals DataFrame |
+ |-------------------------|
+ |pyarrow.Table |
+ |a: double |
+ |b: double |
+ |---- |
+ |a: [[4]] |
+ |b: [[17.173333333333336]]|
+ └─────────────────────────┘
+ """
+ return col(*columns).mean()
+
+
+def median(*columns: str) -> Expr:
+ """Get the median value.
+
+ Notes:
+ - Syntactic sugar for ``nw.col(columns).median()``
+ - Results might slightly differ across backends due to differences in the
+ underlying algorithms used to compute the median.
+
+ Arguments:
+ columns: Name(s) of the columns to use in the aggregation function
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pl.DataFrame({"a": [4, 5, 2]})
+ >>> nw.from_native(df_native).select(nw.median("a"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | shape: (1, 1) |
+ | ┌─────┐ |
+ | │ a │ |
+ | │ --- │ |
+ | │ f64 │ |
+ | ╞═════╡ |
+ | │ 4.0 │ |
+ | └─────┘ |
+ └──────────────────┘
+ """
+ return col(*columns).median()
+
+
+def min(*columns: str) -> Expr:
+ """Return the minimum value.
+
+ Note:
+ Syntactic sugar for ``nw.col(columns).min()``.
+
+ Arguments:
+ columns: Name(s) of the columns to use in the aggregation function.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pa.table({"a": [1, 2], "b": [5, 10]})
+ >>> nw.from_native(df_native).select(nw.min("a", "b"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | pyarrow.Table |
+ | a: int64 |
+ | b: int64 |
+ | ---- |
+ | a: [[1]] |
+ | b: [[5]] |
+ └──────────────────┘
+ """
+ return col(*columns).min()
+
+
+def max(*columns: str) -> Expr:
+ """Return the maximum value.
+
+ Note:
+ Syntactic sugar for ``nw.col(columns).max()``.
+
+ Arguments:
+ columns: Name(s) of the columns to use in the aggregation function.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
+ >>> nw.from_native(df_native).select(nw.max("a", "b"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 2 10 |
+ └──────────────────┘
+ """
+ return col(*columns).max()
+
+
+def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+ """Sum all values horizontally across columns.
+
+ Warning:
+ Unlike Polars, we support horizontal sum over numeric columns only.
+
+ Arguments:
+ exprs: Name(s) of the columns to use in the aggregation function. Accepts
+ expression input.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pl.DataFrame({"a": [1, 2, 3], "b": [5, 10, None]})
+ >>> nw.from_native(df_native).with_columns(sum=nw.sum_horizontal("a", "b"))
+ ┌────────────────────┐
+ | Narwhals DataFrame |
+ |--------------------|
+ |shape: (3, 3) |
+ |┌─────┬──────┬─────┐|
+ |│ a ┆ b ┆ sum │|
+ |│ --- ┆ --- ┆ --- │|
+ |│ i64 ┆ i64 ┆ i64 │|
+ |╞═════╪══════╪═════╡|
+ |│ 1 ┆ 5 ┆ 6 │|
+ |│ 2 ┆ 10 ┆ 12 │|
+ |│ 3 ┆ null ┆ 3 │|
+ |└─────┴──────┴─────┘|
+ └────────────────────┘
+ """
+ if not exprs:
+ msg = "At least one expression must be passed to `sum_horizontal`"
+ raise ValueError(msg)
+ flat_exprs = flatten(exprs)
+ return Expr(
+ lambda plx: apply_n_ary_operation(
+ plx, plx.sum_horizontal, *flat_exprs, str_as_lit=False
+ ),
+ ExprMetadata.from_horizontal_op(*flat_exprs),
+ )
+
+
+def min_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+ """Get the minimum value horizontally across columns.
+
+ Notes:
+ We support `min_horizontal` over numeric columns only.
+
+ Arguments:
+ exprs: Name(s) of the columns to use in the aggregation function. Accepts
+ expression input.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pa.table({"a": [1, 8, 3], "b": [4, 5, None]})
+ >>> nw.from_native(df_native).with_columns(h_min=nw.min_horizontal("a", "b"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | pyarrow.Table |
+ | a: int64 |
+ | b: int64 |
+ | h_min: int64 |
+ | ---- |
+ | a: [[1,8,3]] |
+ | b: [[4,5,null]] |
+ | h_min: [[1,5,3]] |
+ └──────────────────┘
+ """
+ if not exprs:
+ msg = "At least one expression must be passed to `min_horizontal`"
+ raise ValueError(msg)
+ flat_exprs = flatten(exprs)
+ return Expr(
+ lambda plx: apply_n_ary_operation(
+ plx, plx.min_horizontal, *flat_exprs, str_as_lit=False
+ ),
+ ExprMetadata.from_horizontal_op(*flat_exprs),
+ )
+
+
+def max_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+ """Get the maximum value horizontally across columns.
+
+ Notes:
+ We support `max_horizontal` over numeric columns only.
+
+ Arguments:
+ exprs: Name(s) of the columns to use in the aggregation function. Accepts
+ expression input.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, None]})
+ >>> nw.from_native(df_native).with_columns(h_max=nw.max_horizontal("a", "b"))
+ ┌──────────────────────┐
+ | Narwhals DataFrame |
+ |----------------------|
+ |shape: (3, 3) |
+ |┌─────┬──────┬───────┐|
+ |│ a ┆ b ┆ h_max │|
+ |│ --- ┆ --- ┆ --- │|
+ |│ i64 ┆ i64 ┆ i64 │|
+ |╞═════╪══════╪═══════╡|
+ |│ 1 ┆ 4 ┆ 4 │|
+ |│ 8 ┆ 5 ┆ 8 │|
+ |│ 3 ┆ null ┆ 3 │|
+ |└─────┴──────┴───────┘|
+ └──────────────────────┘
+ """
+ if not exprs:
+ msg = "At least one expression must be passed to `max_horizontal`"
+ raise ValueError(msg)
+ flat_exprs = flatten(exprs)
+ return Expr(
+ lambda plx: apply_n_ary_operation(
+ plx, plx.max_horizontal, *flat_exprs, str_as_lit=False
+ ),
+ ExprMetadata.from_horizontal_op(*flat_exprs),
+ )
+
+
+class When:
+ def __init__(self, *predicates: IntoExpr | Iterable[IntoExpr]) -> None:
+ self._predicate = all_horizontal(*flatten(predicates))
+
+ def then(self, value: IntoExpr | NonNestedLiteral | _1DArray) -> Then:
+ kind = ExprKind.from_into_expr(value, str_as_lit=False)
+ if self._predicate._metadata.is_scalar_like and not kind.is_scalar_like:
+ msg = (
+ "If you pass a scalar-like predicate to `nw.when`, then "
+ "the `then` value must also be scalar-like."
+ )
+ raise ShapeError(msg)
+
+ return Then(
+ lambda plx: apply_n_ary_operation(
+ plx,
+ lambda *args: plx.when(args[0]).then(args[1]),
+ self._predicate,
+ value,
+ str_as_lit=False,
+ ),
+ combine_metadata(
+ self._predicate,
+ value,
+ str_as_lit=False,
+ allow_multi_output=False,
+ to_single_output=False,
+ ),
+ )
+
+
+class Then(Expr):
+ def otherwise(self, value: IntoExpr | NonNestedLiteral | _1DArray) -> Expr:
+ kind = ExprKind.from_into_expr(value, str_as_lit=False)
+ if self._metadata.is_scalar_like and not is_scalar_like(kind):
+ msg = (
+ "If you pass a scalar-like predicate to `nw.when`, then "
+ "the `otherwise` value must also be scalar-like."
+ )
+ raise ShapeError(msg)
+
+ def func(plx: CompliantNamespace[Any, Any]) -> CompliantExpr[Any, Any]:
+ compliant_expr = self._to_compliant_expr(plx)
+ compliant_value = extract_compliant(plx, value, str_as_lit=False)
+ if (
+ not self._metadata.is_scalar_like
+ and is_scalar_like(kind)
+ and is_compliant_expr(compliant_value)
+ ):
+ compliant_value = compliant_value.broadcast(kind)
+ return compliant_expr.otherwise(compliant_value) # type: ignore[attr-defined, no-any-return]
+
+ return Expr(
+ func,
+ combine_metadata(
+ self,
+ value,
+ str_as_lit=False,
+ allow_multi_output=False,
+ to_single_output=False,
+ ),
+ )
+
+
+def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When:
+ """Start a `when-then-otherwise` expression.
+
+ Expression similar to an `if-else` statement in Python. Always initiated by a
+ `pl.when(<condition>).then(<value if condition>)`, and optionally followed by a
+ `.otherwise(<value if condition is false>)` can be appended at the end. If not
+ appended, and the condition is not `True`, `None` will be returned.
+
+ Info:
+ Chaining multiple `.when(<condition>).then(<value>)` statements is currently
+ not supported.
+ See [Narwhals#668](https://github.com/narwhals-dev/narwhals/issues/668).
+
+ Arguments:
+ predicates: Condition(s) that must be met in order to apply the subsequent
+ statement. Accepts one or more boolean expressions, which are implicitly
+ combined with `&`. String input is parsed as a column name.
+
+ Returns:
+ A "when" object, which `.then` can be called on.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> data = {"a": [1, 2, 3], "b": [5, 10, 15]}
+ >>> df_native = pd.DataFrame(data)
+ >>> nw.from_native(df_native).with_columns(
+ ... nw.when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when")
+ ... )
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b a_when |
+ | 0 1 5 5 |
+ | 1 2 10 5 |
+ | 2 3 15 6 |
+ └──────────────────┘
+ """
+ return When(*predicates)
+
+
+def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+ r"""Compute the bitwise AND horizontally across columns.
+
+ Arguments:
+ exprs: Name(s) of the columns to use in the aggregation function. Accepts
+ expression input.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> data = {
+ ... "a": [False, False, True, True, False, None],
+ ... "b": [False, True, True, None, None, None],
+ ... }
+ >>> df_native = pa.table(data)
+ >>> nw.from_native(df_native).select("a", "b", all=nw.all_horizontal("a", "b"))
+ ┌─────────────────────────────────────────┐
+ | Narwhals DataFrame |
+ |-----------------------------------------|
+ |pyarrow.Table |
+ |a: bool |
+ |b: bool |
+ |all: bool |
+ |---- |
+ |a: [[false,false,true,true,false,null]] |
+ |b: [[false,true,true,null,null,null]] |
+ |all: [[false,false,true,null,false,null]]|
+ └─────────────────────────────────────────┘
+
+ """
+ if not exprs:
+ msg = "At least one expression must be passed to `all_horizontal`"
+ raise ValueError(msg)
+ flat_exprs = flatten(exprs)
+ return Expr(
+ lambda plx: apply_n_ary_operation(
+ plx, plx.all_horizontal, *flat_exprs, str_as_lit=False
+ ),
+ ExprMetadata.from_horizontal_op(*flat_exprs),
+ )
+
+
+def lit(value: NonNestedLiteral, dtype: IntoDType | None = None) -> Expr:
+ """Return an expression representing a literal value.
+
+ Arguments:
+ value: The value to use as literal.
+ dtype: The data type of the literal value. If not provided, the data type will
+ be inferred by the native library.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pd.DataFrame({"a": [1, 2]})
+ >>> nw.from_native(df_native).with_columns(nw.lit(3))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a literal |
+ | 0 1 3 |
+ | 1 2 3 |
+ └──────────────────┘
+ """
+ if is_numpy_array(value):
+ msg = (
+ "numpy arrays are not supported as literal values. "
+ "Consider using `with_columns` to create a new column from the array."
+ )
+ raise ValueError(msg)
+
+ if isinstance(value, (list, tuple)):
+ msg = f"Nested datatypes are not supported yet. Got {value}"
+ raise NotImplementedError(msg)
+
+ return Expr(lambda plx: plx.lit(value, dtype), ExprMetadata.literal())
+
+
+def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+ r"""Compute the bitwise OR horizontally across columns.
+
+ Arguments:
+ exprs: Name(s) of the columns to use in the aggregation function. Accepts
+ expression input.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> data = {
+ ... "a": [False, False, True, True, False, None],
+ ... "b": [False, True, True, None, None, None],
+ ... }
+ >>> df_native = pl.DataFrame(data)
+ >>> nw.from_native(df_native).select("a", "b", any=nw.any_horizontal("a", "b"))
+ ┌─────────────────────────┐
+ | Narwhals DataFrame |
+ |-------------------------|
+ |shape: (6, 3) |
+ |┌───────┬───────┬───────┐|
+ |│ a ┆ b ┆ any │|
+ |│ --- ┆ --- ┆ --- │|
+ |│ bool ┆ bool ┆ bool │|
+ |╞═══════╪═══════╪═══════╡|
+ |│ false ┆ false ┆ false │|
+ |│ false ┆ true ┆ true │|
+ |│ true ┆ true ┆ true │|
+ |│ true ┆ null ┆ true │|
+ |│ false ┆ null ┆ null │|
+ |│ null ┆ null ┆ null │|
+ |└───────┴───────┴───────┘|
+ └─────────────────────────┘
+ """
+ if not exprs:
+ msg = "At least one expression must be passed to `any_horizontal`"
+ raise ValueError(msg)
+ flat_exprs = flatten(exprs)
+ return Expr(
+ lambda plx: apply_n_ary_operation(
+ plx, plx.any_horizontal, *flat_exprs, str_as_lit=False
+ ),
+ ExprMetadata.from_horizontal_op(*flat_exprs),
+ )
+
+
+def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+ """Compute the mean of all values horizontally across columns.
+
+ Arguments:
+ exprs: Name(s) of the columns to use in the aggregation function. Accepts
+ expression input.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> data = {"a": [1, 8, 3], "b": [4, 5, None], "c": ["x", "y", "z"]}
+ >>> df_native = pa.table(data)
+
+ We define a dataframe-agnostic function that computes the horizontal mean of "a"
+ and "b" columns:
+
+ >>> nw.from_native(df_native).select(nw.mean_horizontal("a", "b"))
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | pyarrow.Table |
+ | a: double |
+ | ---- |
+ | a: [[2.5,6.5,3]] |
+ └──────────────────┘
+ """
+ if not exprs:
+ msg = "At least one expression must be passed to `mean_horizontal`"
+ raise ValueError(msg)
+ flat_exprs = flatten(exprs)
+ return Expr(
+ lambda plx: apply_n_ary_operation(
+ plx, plx.mean_horizontal, *flat_exprs, str_as_lit=False
+ ),
+ ExprMetadata.from_horizontal_op(*flat_exprs),
+ )
+
+
+def concat_str(
+ exprs: IntoExpr | Iterable[IntoExpr],
+ *more_exprs: IntoExpr,
+ separator: str = "",
+ ignore_nulls: bool = False,
+) -> Expr:
+ r"""Horizontally concatenate columns into a single string column.
+
+ Arguments:
+ exprs: Columns to concatenate into a single string column. Accepts expression
+ input. Strings are parsed as column names, other non-expression inputs are
+ parsed as literals. Non-`String` columns are cast to `String`.
+ *more_exprs: Additional columns to concatenate into a single string column,
+ specified as positional arguments.
+ separator: String that will be used to separate the values of each column.
+ ignore_nulls: Ignore null values (default is `False`).
+ If set to `False`, null values will be propagated and if the row contains any
+ null values, the output is null.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> data = {
+ ... "a": [1, 2, 3],
+ ... "b": ["dogs", "cats", None],
+ ... "c": ["play", "swim", "walk"],
+ ... }
+ >>> df_native = pd.DataFrame(data)
+ >>> (
+ ... nw.from_native(df_native).select(
+ ... nw.concat_str(
+ ... [nw.col("a") * 2, nw.col("b"), nw.col("c")], separator=" "
+ ... ).alias("full_sentence")
+ ... )
+ ... )
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | full_sentence |
+ | 0 2 dogs play |
+ | 1 4 cats swim |
+ | 2 None |
+ └──────────────────┘
+ """
+ flat_exprs = flatten([*flatten([exprs]), *more_exprs])
+ return Expr(
+ lambda plx: apply_n_ary_operation(
+ plx,
+ lambda *args: plx.concat_str(
+ *args, separator=separator, ignore_nulls=ignore_nulls
+ ),
+ *flat_exprs,
+ str_as_lit=False,
+ ),
+ combine_metadata(
+ *flat_exprs, str_as_lit=False, allow_multi_output=True, to_single_output=True
+ ),
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/group_by.py b/venv/lib/python3.8/site-packages/narwhals/group_by.py
new file mode 100644
index 0000000..6a06a17
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/group_by.py
@@ -0,0 +1,190 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Generic, Iterable, Iterator, Sequence, TypeVar
+
+from narwhals._expression_parsing import all_exprs_are_scalar_like
+from narwhals._utils import flatten, tupleify
+from narwhals.exceptions import InvalidOperationError
+from narwhals.typing import DataFrameT
+
+if TYPE_CHECKING:
+ from narwhals._compliant.typing import CompliantExprAny
+ from narwhals.dataframe import LazyFrame
+ from narwhals.expr import Expr
+
+LazyFrameT = TypeVar("LazyFrameT", bound="LazyFrame[Any]")
+
+
+class GroupBy(Generic[DataFrameT]):
+ def __init__(
+ self,
+ df: DataFrameT,
+ keys: Sequence[str] | Sequence[CompliantExprAny],
+ /,
+ *,
+ drop_null_keys: bool,
+ ) -> None:
+ self._df: DataFrameT = df
+ self._keys = keys
+ self._grouped = self._df._compliant_frame.group_by(
+ self._keys, drop_null_keys=drop_null_keys
+ )
+
+ def agg(self, *aggs: Expr | Iterable[Expr], **named_aggs: Expr) -> DataFrameT:
+ """Compute aggregations for each group of a group by operation.
+
+ Arguments:
+ aggs: Aggregations to compute for each group of the group by operation,
+ specified as positional arguments.
+ named_aggs: Additional aggregations, specified as keyword arguments.
+
+ Returns:
+ A new Dataframe.
+
+ Examples:
+ Group by one column or by multiple columns and call `agg` to compute
+ the grouped sum of another column.
+
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df_native = pd.DataFrame(
+ ... {
+ ... "a": ["a", "b", "a", "b", "c"],
+ ... "b": [1, 2, 1, 3, 3],
+ ... "c": [5, 4, 3, 2, 1],
+ ... }
+ ... )
+ >>> df = nw.from_native(df_native)
+ >>>
+ >>> df.group_by("a").agg(nw.col("b").sum()).sort("a")
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | a b |
+ | 0 a 2 |
+ | 1 b 5 |
+ | 2 c 3 |
+ └──────────────────┘
+ >>>
+ >>> df.group_by("a", "b").agg(nw.col("c").sum()).sort("a", "b").to_native()
+ a b c
+ 0 a 1 8
+ 1 b 2 4
+ 2 b 3 2
+ 3 c 3 1
+ """
+ flat_aggs = tuple(flatten(aggs))
+ if not all_exprs_are_scalar_like(*flat_aggs, **named_aggs):
+ msg = (
+ "Found expression which does not aggregate.\n\n"
+ "All expressions passed to GroupBy.agg must aggregate.\n"
+ "For example, `df.group_by('a').agg(nw.col('b').sum())` is valid,\n"
+ "but `df.group_by('a').agg(nw.col('b'))` is not."
+ )
+ raise InvalidOperationError(msg)
+ plx = self._df.__narwhals_namespace__()
+ compliant_aggs = (
+ *(x._to_compliant_expr(plx) for x in flat_aggs),
+ *(
+ value.alias(key)._to_compliant_expr(plx)
+ for key, value in named_aggs.items()
+ ),
+ )
+ return self._df._with_compliant(self._grouped.agg(*compliant_aggs))
+
+ def __iter__(self) -> Iterator[tuple[Any, DataFrameT]]:
+ yield from (
+ (tupleify(key), self._df._with_compliant(df))
+ for (key, df) in self._grouped.__iter__()
+ )
+
+
+class LazyGroupBy(Generic[LazyFrameT]):
+ def __init__(
+ self,
+ df: LazyFrameT,
+ keys: Sequence[str] | Sequence[CompliantExprAny],
+ /,
+ *,
+ drop_null_keys: bool,
+ ) -> None:
+ self._df: LazyFrameT = df
+ self._keys = keys
+ self._grouped = self._df._compliant_frame.group_by(
+ self._keys, drop_null_keys=drop_null_keys
+ )
+
+ def agg(self, *aggs: Expr | Iterable[Expr], **named_aggs: Expr) -> LazyFrameT:
+ """Compute aggregations for each group of a group by operation.
+
+ Arguments:
+ aggs: Aggregations to compute for each group of the group by operation,
+ specified as positional arguments.
+ named_aggs: Additional aggregations, specified as keyword arguments.
+
+ Returns:
+ A new LazyFrame.
+
+ Examples:
+ Group by one column or by multiple columns and call `agg` to compute
+ the grouped sum of another column.
+
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> from narwhals.typing import IntoFrameT
+ >>> lf_native = pl.LazyFrame(
+ ... {
+ ... "a": ["a", "b", "a", "b", "c"],
+ ... "b": [1, 2, 1, 3, 3],
+ ... "c": [5, 4, 3, 2, 1],
+ ... }
+ ... )
+ >>> lf = nw.from_native(lf_native)
+ >>>
+ >>> nw.to_native(lf.group_by("a").agg(nw.col("b").sum()).sort("a")).collect()
+ shape: (3, 2)
+ ┌─────┬─────┐
+ │ a ┆ b │
+ │ --- ┆ --- │
+ │ str ┆ i64 │
+ ╞═════╪═════╡
+ │ a ┆ 2 │
+ │ b ┆ 5 │
+ │ c ┆ 3 │
+ └─────┴─────┘
+ >>>
+ >>> lf.group_by("a", "b").agg(nw.sum("c")).sort("a", "b").collect()
+ ┌───────────────────┐
+ |Narwhals DataFrame |
+ |-------------------|
+ |shape: (4, 3) |
+ |┌─────┬─────┬─────┐|
+ |│ a ┆ b ┆ c │|
+ |│ --- ┆ --- ┆ --- │|
+ |│ str ┆ i64 ┆ i64 │|
+ |╞═════╪═════╪═════╡|
+ |│ a ┆ 1 ┆ 8 │|
+ |│ b ┆ 2 ┆ 4 │|
+ |│ b ┆ 3 ┆ 2 │|
+ |│ c ┆ 3 ┆ 1 │|
+ |└─────┴─────┴─────┘|
+ └───────────────────┘
+ """
+ flat_aggs = tuple(flatten(aggs))
+ if not all_exprs_are_scalar_like(*flat_aggs, **named_aggs):
+ msg = (
+ "Found expression which does not aggregate.\n\n"
+ "All expressions passed to GroupBy.agg must aggregate.\n"
+ "For example, `df.group_by('a').agg(nw.col('b').sum())` is valid,\n"
+ "but `df.group_by('a').agg(nw.col('b'))` is not."
+ )
+ raise InvalidOperationError(msg)
+ plx = self._df.__narwhals_namespace__()
+ compliant_aggs = (
+ *(x._to_compliant_expr(plx) for x in flat_aggs),
+ *(
+ value.alias(key)._to_compliant_expr(plx)
+ for key, value in named_aggs.items()
+ ),
+ )
+ return self._df._with_compliant(self._grouped.agg(*compliant_aggs))
diff --git a/venv/lib/python3.8/site-packages/narwhals/py.typed b/venv/lib/python3.8/site-packages/narwhals/py.typed
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/py.typed
diff --git a/venv/lib/python3.8/site-packages/narwhals/schema.py b/venv/lib/python3.8/site-packages/narwhals/schema.py
new file mode 100644
index 0000000..88b2bde
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/schema.py
@@ -0,0 +1,208 @@
+"""Schema.
+
+Adapted from Polars implementation at:
+https://github.com/pola-rs/polars/blob/main/py-polars/polars/schema.py.
+"""
+
+from __future__ import annotations
+
+from collections import OrderedDict
+from functools import partial
+from typing import TYPE_CHECKING, Iterable, Mapping, cast
+
+from narwhals._utils import Implementation, Version, parse_version
+
+if TYPE_CHECKING:
+ from typing import Any, ClassVar
+
+ import polars as pl
+ import pyarrow as pa
+
+ from narwhals.dtypes import DType
+ from narwhals.typing import DTypeBackend
+
+ BaseSchema = OrderedDict[str, DType]
+else:
+ # Python 3.8 does not support generic OrderedDict at runtime
+ BaseSchema = OrderedDict
+
+__all__ = ["Schema"]
+
+
+class Schema(BaseSchema):
+ """Ordered mapping of column names to their data type.
+
+ Arguments:
+ schema: The schema definition given by column names and their associated
+ *instantiated* Narwhals data type. Accepts a mapping or an iterable of tuples.
+
+ Examples:
+ Define a schema by passing *instantiated* data types.
+
+ >>> import narwhals as nw
+ >>> schema = nw.Schema({"foo": nw.Int8(), "bar": nw.String()})
+ >>> schema
+ Schema({'foo': Int8, 'bar': String})
+
+ Access the data type associated with a specific column name.
+
+ >>> schema["foo"]
+ Int8
+
+ Access various schema properties using the `names`, `dtypes`, and `len` methods.
+
+ >>> schema.names()
+ ['foo', 'bar']
+ >>> schema.dtypes()
+ [Int8, String]
+ >>> schema.len()
+ 2
+ """
+
+ _version: ClassVar[Version] = Version.MAIN
+
+ def __init__(
+ self, schema: Mapping[str, DType] | Iterable[tuple[str, DType]] | None = None
+ ) -> None:
+ schema = schema or {}
+ super().__init__(schema)
+
+ def names(self) -> list[str]:
+ """Get the column names of the schema.
+
+ Returns:
+ Column names.
+ """
+ return list(self.keys())
+
+ def dtypes(self) -> list[DType]:
+ """Get the data types of the schema.
+
+ Returns:
+ Data types of schema.
+ """
+ return list(self.values())
+
+ def len(self) -> int:
+ """Get the number of columns in the schema.
+
+ Returns:
+ Number of columns.
+ """
+ return len(self)
+
+ def to_arrow(self) -> pa.Schema:
+ """Convert Schema to a pyarrow Schema.
+
+ Returns:
+ A pyarrow Schema.
+
+ Examples:
+ >>> import narwhals as nw
+ >>> schema = nw.Schema({"a": nw.Int64(), "b": nw.Datetime("ns")})
+ >>> schema.to_arrow()
+ a: int64
+ b: timestamp[ns]
+ """
+ import pyarrow as pa # ignore-banned-import
+
+ from narwhals._arrow.utils import narwhals_to_native_dtype
+
+ return pa.schema(
+ (name, narwhals_to_native_dtype(dtype, self._version))
+ for name, dtype in self.items()
+ )
+
+ def to_pandas(
+ self, dtype_backend: DTypeBackend | Iterable[DTypeBackend] = None
+ ) -> dict[str, Any]:
+ """Convert Schema to an ordered mapping of column names to their pandas data type.
+
+ Arguments:
+ dtype_backend: Backend(s) used for the native types. When providing more than
+ one, the length of the iterable must be equal to the length of the schema.
+
+ Returns:
+ An ordered mapping of column names to their pandas data type.
+
+ Examples:
+ >>> import narwhals as nw
+ >>> schema = nw.Schema({"a": nw.Int64(), "b": nw.Datetime("ns")})
+ >>> schema.to_pandas()
+ {'a': 'int64', 'b': 'datetime64[ns]'}
+
+ >>> schema.to_pandas("pyarrow")
+ {'a': 'Int64[pyarrow]', 'b': 'timestamp[ns][pyarrow]'}
+ """
+ import pandas as pd # ignore-banned-import
+
+ from narwhals._pandas_like.utils import narwhals_to_native_dtype
+
+ to_native_dtype = partial(
+ narwhals_to_native_dtype,
+ implementation=Implementation.PANDAS,
+ backend_version=parse_version(pd),
+ version=self._version,
+ )
+ if dtype_backend is None or isinstance(dtype_backend, str):
+ return {
+ name: to_native_dtype(dtype=dtype, dtype_backend=dtype_backend)
+ for name, dtype in self.items()
+ }
+ else:
+ backends = tuple(dtype_backend)
+ if len(backends) != len(self):
+ from itertools import chain, islice, repeat
+
+ n_user, n_actual = len(backends), len(self)
+ suggestion = tuple(
+ islice(
+ chain.from_iterable(islice(repeat(backends), n_actual)), n_actual
+ )
+ )
+ msg = (
+ f"Provided {n_user!r} `dtype_backend`(s), but schema contains {n_actual!r} field(s).\n"
+ "Hint: instead of\n"
+ f" schema.to_pandas({backends})\n"
+ "you may want to use\n"
+ f" schema.to_pandas({backends[0]})\n"
+ f"or\n"
+ f" schema.to_pandas({suggestion})"
+ )
+ raise ValueError(msg)
+ return {
+ name: to_native_dtype(dtype=dtype, dtype_backend=backend)
+ for name, dtype, backend in zip(self.keys(), self.values(), backends)
+ }
+
+ def to_polars(self) -> pl.Schema:
+ """Convert Schema to a polars Schema.
+
+ Returns:
+ A polars Schema or plain dict (prior to polars 1.0).
+
+ Examples:
+ >>> import narwhals as nw
+ >>> schema = nw.Schema({"a": nw.Int64(), "b": nw.Datetime("ns")})
+ >>> schema.to_polars()
+ Schema({'a': Int64, 'b': Datetime(time_unit='ns', time_zone=None)})
+ """
+ import polars as pl # ignore-banned-import
+
+ from narwhals._polars.utils import narwhals_to_native_dtype
+
+ pl_version = parse_version(pl)
+ schema = (
+ (
+ name,
+ narwhals_to_native_dtype(
+ dtype, self._version, backend_version=pl_version
+ ),
+ )
+ for name, dtype in self.items()
+ )
+ return (
+ pl.Schema(schema)
+ if pl_version >= (1, 0, 0)
+ else cast("pl.Schema", dict(schema))
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/selectors.py b/venv/lib/python3.8/site-packages/narwhals/selectors.py
new file mode 100644
index 0000000..338895f
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/selectors.py
@@ -0,0 +1,352 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Iterable, NoReturn
+
+from narwhals._expression_parsing import ExprMetadata, combine_metadata
+from narwhals._utils import flatten
+from narwhals.expr import Expr
+
+if TYPE_CHECKING:
+ from datetime import timezone
+
+ from narwhals.dtypes import DType
+ from narwhals.typing import TimeUnit
+
+
+class Selector(Expr):
+ def _to_expr(self) -> Expr:
+ return Expr(self._to_compliant_expr, self._metadata)
+
+ def __add__(self, other: Any) -> Expr: # type: ignore[override]
+ if isinstance(other, Selector):
+ msg = "unsupported operand type(s) for op: ('Selector' + 'Selector')"
+ raise TypeError(msg)
+ return self._to_expr() + other # type: ignore[no-any-return]
+
+ def __or__(self, other: Any) -> Expr: # type: ignore[override]
+ if isinstance(other, Selector):
+ return self.__class__(
+ lambda plx: self._to_compliant_expr(plx) | other._to_compliant_expr(plx),
+ combine_metadata(
+ self,
+ other,
+ str_as_lit=False,
+ allow_multi_output=True,
+ to_single_output=False,
+ ),
+ )
+ return self._to_expr() | other # type: ignore[no-any-return]
+
+ def __and__(self, other: Any) -> Expr: # type: ignore[override]
+ if isinstance(other, Selector):
+ return self.__class__(
+ lambda plx: self._to_compliant_expr(plx) & other._to_compliant_expr(plx),
+ combine_metadata(
+ self,
+ other,
+ str_as_lit=False,
+ allow_multi_output=True,
+ to_single_output=False,
+ ),
+ )
+ return self._to_expr() & other # type: ignore[no-any-return]
+
+ def __rsub__(self, other: Any) -> NoReturn:
+ raise NotImplementedError
+
+ def __rand__(self, other: Any) -> NoReturn:
+ raise NotImplementedError
+
+ def __ror__(self, other: Any) -> NoReturn:
+ raise NotImplementedError
+
+
+def by_dtype(*dtypes: DType | type[DType] | Iterable[DType | type[DType]]) -> Selector:
+ """Select columns based on their dtype.
+
+ Arguments:
+ dtypes: one or data types to select
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> import narwhals.selectors as ncs
+ >>> df_native = pa.table({"a": [1, 2], "b": ["x", "y"], "c": [4.1, 2.3]})
+ >>> df = nw.from_native(df_native)
+
+ Let's select int64 and float64 dtypes and multiply each value by 2:
+
+ >>> df.select(ncs.by_dtype(nw.Int64, nw.Float64) * 2).to_native()
+ pyarrow.Table
+ a: int64
+ c: double
+ ----
+ a: [[2,4]]
+ c: [[8.2,4.6]]
+ """
+ flattened = flatten(dtypes)
+ return Selector(
+ lambda plx: plx.selectors.by_dtype(flattened),
+ ExprMetadata.selector_multi_unnamed(),
+ )
+
+
+def matches(pattern: str) -> Selector:
+ """Select all columns that match the given regex pattern.
+
+ Arguments:
+ pattern: A valid regular expression pattern.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> import narwhals.selectors as ncs
+ >>> df_native = pd.DataFrame(
+ ... {"bar": [123, 456], "baz": [2.0, 5.5], "zap": [0, 1]}
+ ... )
+ >>> df = nw.from_native(df_native)
+
+ Let's select column names containing an 'a', preceded by a character that is not 'z':
+
+ >>> df.select(ncs.matches("[^z]a")).to_native()
+ bar baz
+ 0 123 2.0
+ 1 456 5.5
+ """
+ return Selector(
+ lambda plx: plx.selectors.matches(pattern), ExprMetadata.selector_multi_unnamed()
+ )
+
+
+def numeric() -> Selector:
+ """Select numeric columns.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> import narwhals.selectors as ncs
+ >>> df_native = pl.DataFrame({"a": [1, 2], "b": ["x", "y"], "c": [4.1, 2.3]})
+ >>> df = nw.from_native(df_native)
+
+ Let's select numeric dtypes and multiply each value by 2:
+
+ >>> df.select(ncs.numeric() * 2).to_native()
+ shape: (2, 2)
+ ┌─────┬─────┐
+ │ a ┆ c │
+ │ --- ┆ --- │
+ │ i64 ┆ f64 │
+ ╞═════╪═════╡
+ │ 2 ┆ 8.2 │
+ │ 4 ┆ 4.6 │
+ └─────┴─────┘
+ """
+ return Selector(
+ lambda plx: plx.selectors.numeric(), ExprMetadata.selector_multi_unnamed()
+ )
+
+
+def boolean() -> Selector:
+ """Select boolean columns.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> import narwhals.selectors as ncs
+ >>> df_native = pl.DataFrame({"a": [1, 2], "b": ["x", "y"], "c": [False, True]})
+ >>> df = nw.from_native(df_native)
+
+ Let's select boolean dtypes:
+
+ >>> df.select(ncs.boolean())
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ | shape: (2, 1) |
+ | ┌───────┐ |
+ | │ c │ |
+ | │ --- │ |
+ | │ bool │ |
+ | ╞═══════╡ |
+ | │ false │ |
+ | │ true │ |
+ | └───────┘ |
+ └──────────────────┘
+ """
+ return Selector(
+ lambda plx: plx.selectors.boolean(), ExprMetadata.selector_multi_unnamed()
+ )
+
+
+def string() -> Selector:
+ """Select string columns.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> import narwhals.selectors as ncs
+ >>> df_native = pl.DataFrame({"a": [1, 2], "b": ["x", "y"], "c": [False, True]})
+ >>> df = nw.from_native(df_native)
+
+ Let's select string dtypes:
+
+ >>> df.select(ncs.string()).to_native()
+ shape: (2, 1)
+ ┌─────┐
+ │ b │
+ │ --- │
+ │ str │
+ ╞═════╡
+ │ x │
+ │ y │
+ └─────┘
+ """
+ return Selector(
+ lambda plx: plx.selectors.string(), ExprMetadata.selector_multi_unnamed()
+ )
+
+
+def categorical() -> Selector:
+ """Select categorical columns.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> import narwhals.selectors as ncs
+ >>> df_native = pl.DataFrame({"a": [1, 2], "b": ["x", "y"], "c": [False, True]})
+
+ Let's convert column "b" to categorical, and then select categorical dtypes:
+
+ >>> df = nw.from_native(df_native).with_columns(
+ ... b=nw.col("b").cast(nw.Categorical())
+ ... )
+ >>> df.select(ncs.categorical()).to_native()
+ shape: (2, 1)
+ ┌─────┐
+ │ b │
+ │ --- │
+ │ cat │
+ ╞═════╡
+ │ x │
+ │ y │
+ └─────┘
+ """
+ return Selector(
+ lambda plx: plx.selectors.categorical(), ExprMetadata.selector_multi_unnamed()
+ )
+
+
+def all() -> Selector:
+ """Select all columns.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> import narwhals.selectors as ncs
+ >>> df_native = pd.DataFrame({"a": [1, 2], "b": ["x", "y"], "c": [False, True]})
+ >>> df = nw.from_native(df_native)
+
+ Let's select all dtypes:
+
+ >>> df.select(ncs.all()).to_native()
+ a b c
+ 0 1 x False
+ 1 2 y True
+ """
+ return Selector(
+ lambda plx: plx.selectors.all(), ExprMetadata.selector_multi_unnamed()
+ )
+
+
+def datetime(
+ time_unit: TimeUnit | Iterable[TimeUnit] | None = None,
+ time_zone: str | timezone | Iterable[str | timezone | None] | None = ("*", None),
+) -> Selector:
+ """Select all datetime columns, optionally filtering by time unit/zone.
+
+ Arguments:
+ time_unit: One (or more) of the allowed timeunit precision strings, "ms", "us",
+ "ns" and "s". Omit to select columns with any valid timeunit.
+ time_zone: Specify which timezone(s) to select
+
+ * One or more timezone strings, as defined in zoneinfo (to see valid options
+ run `import zoneinfo; zoneinfo.available_timezones()` for a full list).
+ * Set `None` to select Datetime columns that do not have a timezone.
+ * Set `"*"` to select Datetime columns that have *any* timezone.
+
+ Returns:
+ A new expression.
+
+ Examples:
+ >>> from datetime import datetime, timezone
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> import narwhals.selectors as ncs
+ >>>
+ >>> utc_tz = timezone.utc
+ >>> data = {
+ ... "tstamp_utc": [
+ ... datetime(2023, 4, 10, 12, 14, 16, 999000, tzinfo=utc_tz),
+ ... datetime(2025, 8, 25, 14, 18, 22, 666000, tzinfo=utc_tz),
+ ... ],
+ ... "tstamp": [
+ ... datetime(2000, 11, 20, 18, 12, 16, 600000),
+ ... datetime(2020, 10, 30, 10, 20, 25, 123000),
+ ... ],
+ ... "numeric": [3.14, 6.28],
+ ... }
+ >>> df_native = pa.table(data)
+ >>> df_nw = nw.from_native(df_native)
+ >>> df_nw.select(ncs.datetime()).to_native()
+ pyarrow.Table
+ tstamp_utc: timestamp[us, tz=UTC]
+ tstamp: timestamp[us]
+ ----
+ tstamp_utc: [[2023-04-10 12:14:16.999000Z,2025-08-25 14:18:22.666000Z]]
+ tstamp: [[2000-11-20 18:12:16.600000,2020-10-30 10:20:25.123000]]
+
+ Select only datetime columns that have any time_zone specification:
+
+ >>> df_nw.select(ncs.datetime(time_zone="*")).to_native()
+ pyarrow.Table
+ tstamp_utc: timestamp[us, tz=UTC]
+ ----
+ tstamp_utc: [[2023-04-10 12:14:16.999000Z,2025-08-25 14:18:22.666000Z]]
+ """
+ return Selector(
+ lambda plx: plx.selectors.datetime(time_unit=time_unit, time_zone=time_zone),
+ ExprMetadata.selector_multi_unnamed(),
+ )
+
+
+__all__ = [
+ "all",
+ "boolean",
+ "by_dtype",
+ "categorical",
+ "datetime",
+ "matches",
+ "numeric",
+ "string",
+]
diff --git a/venv/lib/python3.8/site-packages/narwhals/series.py b/venv/lib/python3.8/site-packages/narwhals/series.py
new file mode 100644
index 0000000..92b784b
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/series.py
@@ -0,0 +1,2665 @@
+from __future__ import annotations
+
+import math
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ Generic,
+ Iterator,
+ Literal,
+ Mapping,
+ Sequence,
+ overload,
+)
+
+from narwhals._utils import (
+ _validate_rolling_arguments,
+ ensure_type,
+ generate_repr,
+ is_compliant_series,
+ is_index_selector,
+ parse_version,
+ supports_arrow_c_stream,
+)
+from narwhals.dependencies import is_numpy_scalar
+from narwhals.dtypes import _validate_dtype
+from narwhals.exceptions import ComputeError
+from narwhals.series_cat import SeriesCatNamespace
+from narwhals.series_dt import SeriesDateTimeNamespace
+from narwhals.series_list import SeriesListNamespace
+from narwhals.series_str import SeriesStringNamespace
+from narwhals.series_struct import SeriesStructNamespace
+from narwhals.translate import to_native
+from narwhals.typing import IntoSeriesT
+
+if TYPE_CHECKING:
+ from types import ModuleType
+
+ import pandas as pd
+ import polars as pl
+ import pyarrow as pa
+ from typing_extensions import Self
+
+ from narwhals._compliant import CompliantSeries
+ from narwhals._utils import Implementation
+ from narwhals.dataframe import DataFrame, MultiIndexSelector
+ from narwhals.dtypes import DType
+ from narwhals.typing import (
+ ClosedInterval,
+ FillNullStrategy,
+ IntoDType,
+ NonNestedLiteral,
+ NumericLiteral,
+ RankMethod,
+ RollingInterpolationMethod,
+ SingleIndexSelector,
+ TemporalLiteral,
+ _1DArray,
+ )
+
+
+class Series(Generic[IntoSeriesT]):
+ """Narwhals Series, backed by a native series.
+
+ Warning:
+ This class is not meant to be instantiated directly - instead:
+
+ - If the native object is a series from one of the supported backend (e.g.
+ pandas.Series, polars.Series, pyarrow.ChunkedArray), you can use
+ [`narwhals.from_native`][]:
+ ```py
+ narwhals.from_native(native_series, allow_series=True)
+ narwhals.from_native(native_series, series_only=True)
+ ```
+
+ - If the object is a generic sequence (e.g. a list or a tuple of values), you can
+ create a series via [`narwhals.new_series`][], e.g.:
+ ```py
+ narwhals.new_series(name="price", values=[10.5, 9.4, 1.2], backend="pandas")
+ ```
+ """
+
+ @property
+ def _dataframe(self) -> type[DataFrame[Any]]:
+ from narwhals.dataframe import DataFrame
+
+ return DataFrame
+
+ def __init__(
+ self, series: Any, *, level: Literal["full", "lazy", "interchange"]
+ ) -> None:
+ self._level: Literal["full", "lazy", "interchange"] = level
+ if is_compliant_series(series):
+ self._compliant_series: CompliantSeries[IntoSeriesT] = (
+ series.__narwhals_series__()
+ )
+ else: # pragma: no cover
+ msg = f"Expected Polars Series or an object which implements `__narwhals_series__`, got: {type(series)}."
+ raise AssertionError(msg)
+
+ @property
+ def implementation(self) -> Implementation:
+ """Return implementation of native Series.
+
+ This can be useful when you need to use special-casing for features outside of
+ Narwhals' scope - for example, when dealing with pandas' Period Dtype.
+
+ Returns:
+ Implementation.
+
+ Examples:
+ >>> import narwhals as nw
+ >>> import pandas as pd
+
+ >>> s_native = pd.Series([1, 2, 3])
+ >>> s = nw.from_native(s_native, series_only=True)
+
+ >>> s.implementation
+ <Implementation.PANDAS: 'pandas'>
+
+ >>> s.implementation.is_pandas()
+ True
+
+ >>> s.implementation.is_pandas_like()
+ True
+
+ >>> s.implementation.is_polars()
+ False
+ """
+ return self._compliant_series._implementation
+
+ def __array__(self, dtype: Any = None, copy: bool | None = None) -> _1DArray: # noqa: FBT001
+ return self._compliant_series.__array__(dtype=dtype, copy=copy)
+
+ @overload
+ def __getitem__(self, idx: SingleIndexSelector) -> Any: ...
+
+ @overload
+ def __getitem__(self, idx: MultiIndexSelector) -> Self: ...
+
+ def __getitem__(self, idx: SingleIndexSelector | MultiIndexSelector) -> Any | Self:
+ """Retrieve elements from the object using integer indexing or slicing.
+
+ Arguments:
+ idx: The index, slice, or sequence of indices to retrieve.
+
+ - If `idx` is an integer, a single element is returned.
+ - If `idx` is a slice, a sequence of integers, or another Series
+ (with integer values) a subset of the Series is returned.
+
+ Returns:
+ A single element if `idx` is an integer, else a subset of the Series.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[1, 2, 3]])
+ >>> nw.from_native(s_native, series_only=True)[0]
+ 1
+
+ >>> nw.from_native(s_native, series_only=True)[
+ ... :2
+ ... ].to_native() # doctest:+ELLIPSIS
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ 1,
+ 2
+ ]
+ ]
+ """
+ if isinstance(idx, int) or (
+ is_numpy_scalar(idx) and idx.dtype.kind in {"i", "u"}
+ ):
+ idx = int(idx) if not isinstance(idx, int) else idx
+ return self._compliant_series.item(idx)
+
+ if isinstance(idx, self.to_native().__class__):
+ idx = self._with_compliant(self._compliant_series._with_native(idx))
+
+ if not is_index_selector(idx):
+ msg = (
+ f"Unexpected type for `Series.__getitem__`: {type(idx)}.\n\n"
+ "Hints:\n"
+ "- use `s.item` to select a single item.\n"
+ "- Use `s[indices]` to select rows positionally.\n"
+ "- Use `s.filter(mask)` to filter rows based on a boolean mask."
+ )
+ raise TypeError(msg)
+ if isinstance(idx, Series):
+ return self._with_compliant(self._compliant_series[idx._compliant_series])
+ assert not isinstance(idx, int) # noqa: S101 # help mypy
+ return self._with_compliant(self._compliant_series[idx])
+
+ def __native_namespace__(self) -> ModuleType:
+ return self._compliant_series.__native_namespace__()
+
+ def __arrow_c_stream__(self, requested_schema: object | None = None) -> object:
+ """Export a Series via the Arrow PyCapsule Interface.
+
+ Narwhals doesn't implement anything itself here:
+
+ - if the underlying series implements the interface, it'll return that
+ - else, it'll call `to_arrow` and then defer to PyArrow's implementation
+
+ See [PyCapsule Interface](https://arrow.apache.org/docs/dev/format/CDataInterface/PyCapsuleInterface.html)
+ for more.
+ """
+ native_series = self._compliant_series.native
+ if supports_arrow_c_stream(native_series):
+ return native_series.__arrow_c_stream__(requested_schema=requested_schema)
+ try:
+ import pyarrow as pa # ignore-banned-import
+ except ModuleNotFoundError as exc: # pragma: no cover
+ msg = f"'pyarrow>=16.0.0' is required for `Series.__arrow_c_stream__` for object of type {type(native_series)}"
+ raise ModuleNotFoundError(msg) from exc
+ if parse_version(pa) < (16, 0): # pragma: no cover
+ msg = f"'pyarrow>=16.0.0' is required for `Series.__arrow_c_stream__` for object of type {type(native_series)}"
+ raise ModuleNotFoundError(msg)
+ from narwhals._arrow.utils import chunked_array
+
+ ca = chunked_array(self.to_arrow())
+ return ca.__arrow_c_stream__(requested_schema=requested_schema)
+
+ def to_native(self) -> IntoSeriesT:
+ """Convert Narwhals series to native series.
+
+ Returns:
+ Series of class that user started with.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pl.Series([1, 2])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.to_native() # doctest: +NORMALIZE_WHITESPACE
+ shape: (2,)
+ Series: '' [i64]
+ [
+ 1
+ 2
+ ]
+ """
+ return self._compliant_series.native
+
+ def scatter(self, indices: int | Sequence[int], values: Any) -> Self:
+ """Set value(s) at given position(s).
+
+ Arguments:
+ indices: Position(s) to set items at.
+ values: Values to set.
+
+ Returns:
+ A new Series with values set at given positions.
+
+ Note:
+ This method always returns a new Series, without modifying the original one.
+ Using this function in a for-loop is an anti-pattern, we recommend building
+ up your positions and values beforehand and doing an update in one go.
+
+ For example, instead of
+
+ ```python
+ for i in [1, 3, 2]:
+ value = some_function(i)
+ s = s.scatter(i, value)
+ ```
+
+ prefer
+
+ ```python
+ positions = [1, 3, 2]
+ values = [some_function(x) for x in positions]
+ s = s.scatter(positions, values)
+ ```
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> df_native = pa.table({"a": [1, 2, 3], "b": [4, 5, 6]})
+ >>> df_nw = nw.from_native(df_native)
+ >>> df_nw.with_columns(df_nw["a"].scatter([0, 1], [999, 888])).to_native()
+ pyarrow.Table
+ a: int64
+ b: int64
+ ----
+ a: [[999,888,3]]
+ b: [[4,5,6]]
+ """
+ return self._with_compliant(
+ self._compliant_series.scatter(indices, self._extract_native(values))
+ )
+
+ @property
+ def shape(self) -> tuple[int]:
+ """Get the shape of the Series.
+
+ Returns:
+ A tuple containing the length of the Series.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series([1, 2, 3])
+ >>> nw.from_native(s_native, series_only=True).shape
+ (3,)
+ """
+ return (self._compliant_series.len(),)
+
+ def _extract_native(self, arg: Any) -> Any:
+ from narwhals.series import Series
+
+ if isinstance(arg, Series):
+ return arg._compliant_series
+ return arg
+
+ def _with_compliant(self, series: Any) -> Self:
+ return self.__class__(series, level=self._level)
+
+ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Self:
+ """Pipe function call.
+
+ Returns:
+ A new Series with the results of the piped function applied.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s_native = pl.Series([1, 2, 3])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.pipe(lambda x: x + 2).to_native() # doctest: +NORMALIZE_WHITESPACE
+ shape: (3,)
+ Series: '' [i64]
+ [
+ 3
+ 4
+ 5
+ ]
+ """
+ return function(self, *args, **kwargs)
+
+ def __repr__(self) -> str: # pragma: no cover
+ return generate_repr("Narwhals Series", self.to_native().__repr__())
+
+ def __len__(self) -> int:
+ return len(self._compliant_series)
+
+ def len(self) -> int:
+ r"""Return the number of elements in the Series.
+
+ Null values count towards the total.
+
+ Returns:
+ The number of elements in the Series.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[1, 2, None]])
+ >>> nw.from_native(s_native, series_only=True).len()
+ 3
+ """
+ return len(self._compliant_series)
+
+ @property
+ def dtype(self) -> DType:
+ """Get the data type of the Series.
+
+ Returns:
+ The data type of the Series.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series([1, 2, 3])
+ >>> nw.from_native(s_native, series_only=True).dtype
+ Int64
+ """
+ return self._compliant_series.dtype
+
+ @property
+ def name(self) -> str:
+ """Get the name of the Series.
+
+ Returns:
+ The name of the Series.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pl.Series("foo", [1, 2, 3])
+ >>> nw.from_native(s_native, series_only=True).name
+ 'foo'
+ """
+ return self._compliant_series.name
+
+ def ewm_mean(
+ self,
+ *,
+ com: float | None = None,
+ span: float | None = None,
+ half_life: float | None = None,
+ alpha: float | None = None,
+ adjust: bool = True,
+ min_samples: int = 1,
+ ignore_nulls: bool = False,
+ ) -> Self:
+ r"""Compute exponentially-weighted moving average.
+
+ Arguments:
+ com: Specify decay in terms of center of mass, $\gamma$, with <br> $\alpha = \frac{1}{1+\gamma}\forall\gamma\geq0$
+ span: Specify decay in terms of span, $\theta$, with <br> $\alpha = \frac{2}{\theta + 1} \forall \theta \geq 1$
+ half_life: Specify decay in terms of half-life, $\tau$, with <br> $\alpha = 1 - \exp \left\{ \frac{ -\ln(2) }{ \tau } \right\} \forall \tau > 0$
+ alpha: Specify smoothing factor alpha directly, $0 < \alpha \leq 1$.
+ adjust: Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings
+
+ - When `adjust=True` (the default) the EW function is calculated
+ using weights $w_i = (1 - \alpha)^i$
+ - When `adjust=False` the EW function is calculated recursively by
+ $$
+ y_0=x_0
+ $$
+ $$
+ y_t = (1 - \alpha)y_{t - 1} + \alpha x_t
+ $$
+ min_samples: Minimum number of observations in window required to have a value (otherwise result is null).
+ ignore_nulls: Ignore missing values when calculating weights.
+
+ - When `ignore_nulls=False` (default), weights are based on absolute
+ positions.
+ For example, the weights of $x_0$ and $x_2$ used in
+ calculating the final weighted average of $[x_0, None, x_2]$ are
+ $(1-\alpha)^2$ and $1$ if `adjust=True`, and
+ $(1-\alpha)^2$ and $\alpha$ if `adjust=False`.
+ - When `ignore_nulls=True`, weights are based
+ on relative positions. For example, the weights of
+ $x_0$ and $x_2$ used in calculating the final weighted
+ average of $[x_0, None, x_2]$ are
+ $1-\alpha$ and $1$ if `adjust=True`,
+ and $1-\alpha$ and $\alpha$ if `adjust=False`.
+
+ Returns:
+ Series
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series(name="a", data=[1, 2, 3])
+ >>> nw.from_native(s_native, series_only=True).ewm_mean(
+ ... com=1, ignore_nulls=False
+ ... ).to_native()
+ 0 1.000000
+ 1 1.666667
+ 2 2.428571
+ Name: a, dtype: float64
+ """
+ return self._with_compliant(
+ self._compliant_series.ewm_mean(
+ com=com,
+ span=span,
+ half_life=half_life,
+ alpha=alpha,
+ adjust=adjust,
+ min_samples=min_samples,
+ ignore_nulls=ignore_nulls,
+ )
+ )
+
+ def cast(self, dtype: IntoDType) -> Self:
+ """Cast between data types.
+
+ Arguments:
+ dtype: Data type that the object will be cast into.
+
+ Returns:
+ A new Series with the specified data type.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[True, False, True]])
+ >>> nw.from_native(s_native, series_only=True).cast(nw.Int64).to_native()
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ 1,
+ 0,
+ 1
+ ]
+ ]
+ """
+ _validate_dtype(dtype)
+ return self._with_compliant(self._compliant_series.cast(dtype))
+
+ def to_frame(self) -> DataFrame[Any]:
+ """Convert to dataframe.
+
+ Returns:
+ A DataFrame containing this Series as a single column.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pl.Series("a", [1, 2])
+ >>> nw.from_native(s_native, series_only=True).to_frame().to_native()
+ shape: (2, 1)
+ ┌─────┐
+ │ a │
+ │ --- │
+ │ i64 │
+ ╞═════╡
+ │ 1 │
+ │ 2 │
+ └─────┘
+ """
+ return self._dataframe(self._compliant_series.to_frame(), level=self._level)
+
+ def to_list(self) -> list[Any]:
+ """Convert to list.
+
+ Notes:
+ This function converts to Python scalars. It's typically
+ more efficient to keep your data in the format native to
+ your original dataframe, so we recommend only calling this
+ when you absolutely need to.
+
+ Returns:
+ A list of Python objects.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[1, 2, 3]])
+ >>> nw.from_native(s_native, series_only=True).to_list()
+ [1, 2, 3]
+ """
+ return self._compliant_series.to_list()
+
+ def mean(self) -> float:
+ """Reduce this Series to the mean value.
+
+ Returns:
+ The average of all elements in the Series.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series([1.2, 4.2])
+ >>> nw.from_native(s_native, series_only=True).mean()
+ np.float64(2.7)
+ """
+ return self._compliant_series.mean()
+
+ def median(self) -> float:
+ """Reduce this Series to the median value.
+
+ Notes:
+ Results might slightly differ across backends due to differences in the underlying algorithms used to compute the median.
+
+ Returns:
+ The median value of all elements in the Series.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[5, 3, 8]])
+ >>> nw.from_native(s_native, series_only=True).median()
+ 5.0
+ """
+ return self._compliant_series.median()
+
+ def skew(self) -> float | None:
+ """Calculate the sample skewness of the Series.
+
+ Returns:
+ The sample skewness of the Series.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pl.Series([1, 1, 2, 10, 100])
+ >>> nw.from_native(s_native, series_only=True).skew()
+ 1.4724267269058975
+
+ Notes:
+ The skewness is a measure of the asymmetry of the probability distribution.
+ A perfectly symmetric distribution has a skewness of 0.
+ """
+ return self._compliant_series.skew()
+
+ def count(self) -> int:
+ """Returns the number of non-null elements in the Series.
+
+ Returns:
+ The number of non-null elements in the Series.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[1, 2, None]])
+ >>> nw.from_native(s_native, series_only=True).count()
+ 2
+ """
+ return self._compliant_series.count()
+
+ def any(self) -> bool:
+ """Return whether any of the values in the Series are True.
+
+ If there are no non-null elements, the result is `False`.
+
+ Notes:
+ Only works on Series of data type Boolean.
+
+ Returns:
+ A boolean indicating if any values in the Series are True.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series([False, True, False])
+ >>> nw.from_native(s_native, series_only=True).any()
+ np.True_
+ """
+ return self._compliant_series.any()
+
+ def all(self) -> bool:
+ """Return whether all values in the Series are True.
+
+ If there are no non-null elements, the result is `True`.
+
+ Returns:
+ A boolean indicating if all values in the Series are True.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[False, True, False]])
+ >>> nw.from_native(s_native, series_only=True).all()
+ False
+ """
+ return self._compliant_series.all()
+
+ def min(self) -> Any:
+ """Get the minimal value in this Series.
+
+ Returns:
+ The minimum value in the Series.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pl.Series([1, 2, 3])
+ >>> nw.from_native(s_native, series_only=True).min()
+ 1
+ """
+ return self._compliant_series.min()
+
+ def max(self) -> Any:
+ """Get the maximum value in this Series.
+
+ Returns:
+ The maximum value in the Series.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series([1, 2, 3])
+ >>> nw.from_native(s_native, series_only=True).max()
+ np.int64(3)
+ """
+ return self._compliant_series.max()
+
+ def arg_min(self) -> int:
+ """Returns the index of the minimum value.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[1, 2, 3]])
+ >>> nw.from_native(s_native, series_only=True).arg_min()
+ 0
+ """
+ return self._compliant_series.arg_min()
+
+ def arg_max(self) -> int:
+ """Returns the index of the maximum value.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pl.Series([1, 2, 3])
+ >>> nw.from_native(s_native, series_only=True).arg_max()
+ 2
+ """
+ return self._compliant_series.arg_max()
+
+ def sum(self) -> float:
+ """Reduce this Series to the sum value.
+
+ If there are no non-null elements, the result is zero.
+
+ Returns:
+ The sum of all elements in the Series.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[1, 2, 3]])
+ >>> nw.from_native(s_native, series_only=True).sum()
+ 6
+ """
+ return self._compliant_series.sum()
+
+ def std(self, *, ddof: int = 1) -> float:
+ """Get the standard deviation of this Series.
+
+ Arguments:
+ ddof: "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof,
+ where N represents the number of elements.
+
+ Returns:
+ The standard deviation of all elements in the Series.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pl.Series([1, 2, 3])
+ >>> nw.from_native(s_native, series_only=True).std()
+ 1.0
+ """
+ return self._compliant_series.std(ddof=ddof)
+
+ def var(self, *, ddof: int = 1) -> float:
+ """Get the variance of this Series.
+
+ Arguments:
+ ddof: "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof,
+ where N represents the number of elements.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[1, 2, 3]])
+ >>> nw.from_native(s_native, series_only=True).var()
+ 1.0
+ """
+ return self._compliant_series.var(ddof=ddof)
+
+ def clip(
+ self,
+ lower_bound: Self | NumericLiteral | TemporalLiteral | None = None,
+ upper_bound: Self | NumericLiteral | TemporalLiteral | None = None,
+ ) -> Self:
+ r"""Clip values in the Series.
+
+ Arguments:
+ lower_bound: Lower bound value.
+ upper_bound: Upper bound value.
+
+ Returns:
+ A new Series with values clipped to the specified bounds.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series([-1, 1, -3, 3, -5, 5])
+ >>> nw.from_native(s_native, series_only=True).clip(-1, 3).to_native()
+ 0 -1
+ 1 1
+ 2 -1
+ 3 3
+ 4 -1
+ 5 3
+ dtype: int64
+ """
+ return self._with_compliant(
+ self._compliant_series.clip(
+ lower_bound=self._extract_native(lower_bound),
+ upper_bound=self._extract_native(upper_bound),
+ )
+ )
+
+ def is_in(self, other: Any) -> Self:
+ """Check if the elements of this Series are in the other sequence.
+
+ Arguments:
+ other: Sequence of primitive type.
+
+ Returns:
+ A new Series with boolean values indicating if the elements are in the other sequence.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[1, 2, 3]])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.is_in([3, 2, 8]).to_native() # doctest: +ELLIPSIS
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ false,
+ true,
+ true
+ ]
+ ]
+ """
+ return self._with_compliant(
+ self._compliant_series.is_in(to_native(other, pass_through=True))
+ )
+
+ def arg_true(self) -> Self:
+ """Find elements where boolean Series is True.
+
+ Returns:
+ A new Series with the indices of elements that are True.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pl.Series([1, None, None, 2])
+ >>> nw.from_native(
+ ... s_native, series_only=True
+ ... ).is_null().arg_true().to_native() # doctest: +NORMALIZE_WHITESPACE
+ shape: (2,)
+ Series: '' [u32]
+ [
+ 1
+ 2
+ ]
+ """
+ return self._with_compliant(self._compliant_series.arg_true())
+
+ def drop_nulls(self) -> Self:
+ """Drop null values.
+
+ Notes:
+ pandas handles null values differently from Polars and PyArrow.
+ See [null_handling](../concepts/null_handling.md/)
+ for reference.
+
+ Returns:
+ A new Series with null values removed.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series([2, 4, None, 3, 5])
+ >>> nw.from_native(s_native, series_only=True).drop_nulls().to_native()
+ 0 2.0
+ 1 4.0
+ 3 3.0
+ 4 5.0
+ dtype: float64
+ """
+ return self._with_compliant(self._compliant_series.drop_nulls())
+
+ def abs(self) -> Self:
+ """Calculate the absolute value of each element.
+
+ Returns:
+ A new Series with the absolute values of the original elements.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[2, -4, 3]])
+ >>> nw.from_native(
+ ... s_native, series_only=True
+ ... ).abs().to_native() # doctest: +ELLIPSIS
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ 2,
+ 4,
+ 3
+ ]
+ ]
+ """
+ return self._with_compliant(self._compliant_series.abs())
+
+ def cum_sum(self, *, reverse: bool = False) -> Self:
+ """Calculate the cumulative sum.
+
+ Arguments:
+ reverse: reverse the operation
+
+ Returns:
+ A new Series with the cumulative sum of non-null values.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series([2, 4, 3])
+ >>> nw.from_native(s_native, series_only=True).cum_sum().to_native()
+ 0 2
+ 1 6
+ 2 9
+ dtype: int64
+ """
+ return self._with_compliant(self._compliant_series.cum_sum(reverse=reverse))
+
+ def unique(self, *, maintain_order: bool = False) -> Self:
+ """Returns unique values of the series.
+
+ Arguments:
+ maintain_order: Keep the same order as the original series. This may be more
+ expensive to compute. Settings this to `True` blocks the possibility
+ to run on the streaming engine for Polars.
+
+ Returns:
+ A new Series with duplicate values removed.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pl.Series([2, 4, 4, 6])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.unique(
+ ... maintain_order=True
+ ... ).to_native() # doctest: +NORMALIZE_WHITESPACE
+ shape: (3,)
+ Series: '' [i64]
+ [
+ 2
+ 4
+ 6
+ ]
+ """
+ return self._with_compliant(
+ self._compliant_series.unique(maintain_order=maintain_order)
+ )
+
+ def diff(self) -> Self:
+ """Calculate the difference with the previous element, for each element.
+
+ Notes:
+ pandas may change the dtype here, for example when introducing missing
+ values in an integer column. To ensure, that the dtype doesn't change,
+ you may want to use `fill_null` and `cast`. For example, to calculate
+ the diff and fill missing values with `0` in a Int64 column, you could
+ do:
+
+ s.diff().fill_null(0).cast(nw.Int64)
+
+ Returns:
+ A new Series with the difference between each element and its predecessor.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[2, 4, 3]])
+ >>> nw.from_native(
+ ... s_native, series_only=True
+ ... ).diff().to_native() # doctest: +ELLIPSIS
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ null,
+ 2,
+ -1
+ ]
+ ]
+ """
+ return self._with_compliant(self._compliant_series.diff())
+
+ def shift(self, n: int) -> Self:
+ """Shift values by `n` positions.
+
+ Arguments:
+ n: Number of indices to shift forward. If a negative value is passed,
+ values are shifted in the opposite direction instead.
+
+ Returns:
+ A new Series with values shifted by n positions.
+
+ Notes:
+ pandas may change the dtype here, for example when introducing missing
+ values in an integer column. To ensure, that the dtype doesn't change,
+ you may want to use `fill_null` and `cast`. For example, to shift
+ and fill missing values with `0` in a Int64 column, you could
+ do:
+
+ s.shift(1).fill_null(0).cast(nw.Int64)
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series([2, 4, 3])
+ >>> nw.from_native(s_native, series_only=True).shift(1).to_native()
+ 0 NaN
+ 1 2.0
+ 2 4.0
+ dtype: float64
+ """
+ ensure_type(n, int, param_name="n")
+
+ return self._with_compliant(self._compliant_series.shift(n))
+
+ def sample(
+ self,
+ n: int | None = None,
+ *,
+ fraction: float | None = None,
+ with_replacement: bool = False,
+ seed: int | None = None,
+ ) -> Self:
+ """Sample randomly from this Series.
+
+ Arguments:
+ n: Number of items to return. Cannot be used with fraction.
+ fraction: Fraction of items to return. Cannot be used with n.
+ with_replacement: Allow values to be sampled more than once.
+ seed: Seed for the random number generator. If set to None (default), a random
+ seed is generated for each sample operation.
+
+ Returns:
+ A new Series containing randomly sampled values from the original Series.
+
+ Notes:
+ The `sample` method returns a Series with a specified number of
+ randomly selected items chosen from this Series.
+ The results are not consistent across libraries.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pl.Series([1, 2, 3, 4])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.sample(
+ ... fraction=1.0, with_replacement=True
+ ... ).to_native() # doctest: +SKIP
+ shape: (4,)
+ Series: '' [i64]
+ [
+ 1
+ 4
+ 3
+ 4
+ ]
+ """
+ return self._with_compliant(
+ self._compliant_series.sample(
+ n=n, fraction=fraction, with_replacement=with_replacement, seed=seed
+ )
+ )
+
+ def alias(self, name: str) -> Self:
+ """Rename the Series.
+
+ Notes:
+ This method is very cheap, but does not guarantee that data
+ will be copied. For example:
+
+ ```python
+ s1: nw.Series
+ s2 = s1.alias("foo")
+ arr = s2.to_numpy()
+ arr[0] = 999
+ ```
+
+ may (depending on the backend, and on the version) result in
+ `s1`'s data being modified. We recommend:
+
+ - if you need to alias an object and don't need the original
+ one around any more, just use `alias` without worrying about it.
+ - if you were expecting `alias` to copy data, then explicitly call
+ `.clone` before calling `alias`.
+
+ Arguments:
+ name: The new name.
+
+ Returns:
+ A new Series with the updated name.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series([1, 2, 3], name="foo")
+ >>> nw.from_native(s_native, series_only=True).alias("bar").to_native()
+ 0 1
+ 1 2
+ 2 3
+ Name: bar, dtype: int64
+ """
+ return self._with_compliant(self._compliant_series.alias(name=name))
+
+ def rename(self, name: str) -> Self:
+ """Rename the Series.
+
+ Alias for `Series.alias()`.
+
+ Notes:
+ This method is very cheap, but does not guarantee that data
+ will be copied. For example:
+
+ ```python
+ s1: nw.Series
+ s2 = s1.rename("foo")
+ arr = s2.to_numpy()
+ arr[0] = 999
+ ```
+
+ may (depending on the backend, and on the version) result in
+ `s1`'s data being modified. We recommend:
+
+ - if you need to rename an object and don't need the original
+ one around any more, just use `rename` without worrying about it.
+ - if you were expecting `rename` to copy data, then explicitly call
+ `.clone` before calling `rename`.
+
+ Arguments:
+ name: The new name.
+
+ Returns:
+ A new Series with the updated name.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pl.Series("foo", [1, 2, 3])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.rename("bar").to_native() # doctest: +NORMALIZE_WHITESPACE
+ shape: (3,)
+ Series: 'bar' [i64]
+ [
+ 1
+ 2
+ 3
+ ]
+ """
+ return self.alias(name=name)
+
+ def replace_strict(
+ self,
+ old: Sequence[Any] | Mapping[Any, Any],
+ new: Sequence[Any] | None = None,
+ *,
+ return_dtype: IntoDType | None = None,
+ ) -> Self:
+ """Replace all values by different values.
+
+ This function must replace all non-null input values (else it raises an error).
+
+ Arguments:
+ old: Sequence of values to replace. It also accepts a mapping of values to
+ their replacement as syntactic sugar for
+ `replace_strict(old=list(mapping.keys()), new=list(mapping.values()))`.
+ new: Sequence of values to replace by. Length must match the length of `old`.
+ return_dtype: The data type of the resulting expression. If set to `None`
+ (default), the data type is determined automatically based on the other
+ inputs.
+
+ Returns:
+ A new Series with values replaced according to the mapping.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series([3, 0, 1, 2], name="a")
+ >>> nw.from_native(s_native, series_only=True).replace_strict(
+ ... [0, 1, 2, 3], ["zero", "one", "two", "three"], return_dtype=nw.String
+ ... ).to_native()
+ 0 three
+ 1 zero
+ 2 one
+ 3 two
+ Name: a, dtype: object
+ """
+ if new is None:
+ if not isinstance(old, Mapping):
+ msg = "`new` argument is required if `old` argument is not a Mapping type"
+ raise TypeError(msg)
+
+ new = list(old.values())
+ old = list(old.keys())
+
+ return self._with_compliant(
+ self._compliant_series.replace_strict(old, new, return_dtype=return_dtype)
+ )
+
+ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self:
+ """Sort this Series. Place null values first.
+
+ Arguments:
+ descending: Sort in descending order.
+ nulls_last: Place null values last instead of first.
+
+ Returns:
+ A new sorted Series.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pl.Series([5, None, 1, 2])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.sort(descending=True).to_native() # doctest: +NORMALIZE_WHITESPACE
+ shape: (4,)
+ Series: '' [i64]
+ [
+ null
+ 5
+ 2
+ 1
+ ]
+ """
+ return self._with_compliant(
+ self._compliant_series.sort(descending=descending, nulls_last=nulls_last)
+ )
+
+ def is_null(self) -> Self:
+ """Returns a boolean Series indicating which values are null.
+
+ Notes:
+ pandas handles null values differently from Polars and PyArrow.
+ See [null_handling](../concepts/null_handling.md/)
+ for reference.
+
+ Returns:
+ A boolean Series indicating which values are null.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[1, 2, None]])
+ >>> nw.from_native(
+ ... s_native, series_only=True
+ ... ).is_null().to_native() # doctest:+ELLIPSIS
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ false,
+ false,
+ true
+ ]
+ ]
+ """
+ return self._with_compliant(self._compliant_series.is_null())
+
+ def is_nan(self) -> Self:
+ """Returns a boolean Series indicating which values are NaN.
+
+ Returns:
+ A boolean Series indicating which values are NaN.
+
+ Notes:
+ pandas handles null values differently from Polars and PyArrow.
+ See [null_handling](../concepts/null_handling.md/)
+ for reference.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series([0.0, None, 2.0], dtype="Float64")
+ >>> nw.from_native(s_native, series_only=True).is_nan().to_native()
+ 0 False
+ 1 <NA>
+ 2 False
+ dtype: boolean
+ """
+ return self._with_compliant(self._compliant_series.is_nan())
+
+ def fill_null(
+ self,
+ value: Self | NonNestedLiteral = None,
+ strategy: FillNullStrategy | None = None,
+ limit: int | None = None,
+ ) -> Self:
+ """Fill null values using the specified value.
+
+ Arguments:
+ value: Value used to fill null values.
+ strategy: Strategy used to fill null values.
+ limit: Number of consecutive null values to fill when using the 'forward' or 'backward' strategy.
+
+ Notes:
+ pandas handles null values differently from Polars and PyArrow.
+ See [null_handling](../concepts/null_handling.md/)
+ for reference.
+
+ Returns:
+ A new Series with null values filled according to the specified value or strategy.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series([1, 2, None])
+ >>>
+ >>> nw.from_native(s_native, series_only=True).fill_null(5).to_native()
+ 0 1.0
+ 1 2.0
+ 2 5.0
+ dtype: float64
+
+ Or using a strategy:
+
+ >>> nw.from_native(s_native, series_only=True).fill_null(
+ ... strategy="forward", limit=1
+ ... ).to_native()
+ 0 1.0
+ 1 2.0
+ 2 2.0
+ dtype: float64
+ """
+ if value is not None and strategy is not None:
+ msg = "cannot specify both `value` and `strategy`"
+ raise ValueError(msg)
+ if value is None and strategy is None:
+ msg = "must specify either a fill `value` or `strategy`"
+ raise ValueError(msg)
+ if strategy is not None and strategy not in {"forward", "backward"}:
+ msg = f"strategy not supported: {strategy}"
+ raise ValueError(msg)
+ return self._with_compliant(
+ self._compliant_series.fill_null(
+ value=self._extract_native(value), strategy=strategy, limit=limit
+ )
+ )
+
+ def is_between(
+ self,
+ lower_bound: Any | Self,
+ upper_bound: Any | Self,
+ closed: ClosedInterval = "both",
+ ) -> Self:
+ """Get a boolean mask of the values that are between the given lower/upper bounds.
+
+ Arguments:
+ lower_bound: Lower bound value.
+ upper_bound: Upper bound value.
+ closed: Define which sides of the interval are closed (inclusive).
+
+ Notes:
+ If the value of the `lower_bound` is greater than that of the `upper_bound`,
+ then the values will be False, as no value can satisfy the condition.
+
+ Returns:
+ A boolean Series indicating which values are between the given bounds.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[1, 2, 3, 4, 5]])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.is_between(2, 4, "right").to_native() # doctest: +ELLIPSIS
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ false,
+ false,
+ true,
+ true,
+ false
+ ]
+ ]
+ """
+ return self._with_compliant(
+ self._compliant_series.is_between(
+ self._extract_native(lower_bound),
+ self._extract_native(upper_bound),
+ closed=closed,
+ )
+ )
+
+ def n_unique(self) -> int:
+ """Count the number of unique values.
+
+ Returns:
+ Number of unique values in the Series.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pl.Series([1, 2, 2, 3])
+ >>> nw.from_native(s_native, series_only=True).n_unique()
+ 3
+ """
+ return self._compliant_series.n_unique()
+
+ def to_numpy(self) -> _1DArray:
+ """Convert to numpy.
+
+ Returns:
+ NumPy ndarray representation of the Series.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series([1, 2, 3], name="a")
+ >>> nw.from_native(s_native, series_only=True).to_numpy()
+ array([1, 2, 3]...)
+ """
+ return self._compliant_series.to_numpy(None, copy=None)
+
+ def to_pandas(self) -> pd.Series[Any]:
+ """Convert to pandas Series.
+
+ Returns:
+ A pandas Series containing the data from this Series.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pl.Series("a", [1, 2, 3])
+ >>> nw.from_native(s_native, series_only=True).to_pandas()
+ 0 1
+ 1 2
+ 2 3
+ Name: a, dtype: int64
+ """
+ return self._compliant_series.to_pandas()
+
+ def to_polars(self) -> pl.Series:
+ """Convert to polars Series.
+
+ Returns:
+ A polars Series containing the data from this Series.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[1, 2, 3]])
+ >>> nw.from_native(
+ ... s_native, series_only=True
+ ... ).to_polars() # doctest: +NORMALIZE_WHITESPACE
+ shape: (3,)
+ Series: '' [i64]
+ [
+ 1
+ 2
+ 3
+ ]
+ """
+ return self._compliant_series.to_polars()
+
+ def __add__(self, other: object) -> Self:
+ return self._with_compliant(
+ self._compliant_series.__add__(self._extract_native(other))
+ )
+
+ def __radd__(self, other: object) -> Self:
+ return self._with_compliant(
+ self._compliant_series.__radd__(self._extract_native(other))
+ )
+
+ def __sub__(self, other: object) -> Self:
+ return self._with_compliant(
+ self._compliant_series.__sub__(self._extract_native(other))
+ )
+
+ def __rsub__(self, other: object) -> Self:
+ return self._with_compliant(
+ self._compliant_series.__rsub__(self._extract_native(other))
+ )
+
+ def __mul__(self, other: object) -> Self:
+ return self._with_compliant(
+ self._compliant_series.__mul__(self._extract_native(other))
+ )
+
+ def __rmul__(self, other: object) -> Self:
+ return self._with_compliant(
+ self._compliant_series.__rmul__(self._extract_native(other))
+ )
+
+ def __truediv__(self, other: object) -> Self:
+ return self._with_compliant(
+ self._compliant_series.__truediv__(self._extract_native(other))
+ )
+
+ def __rtruediv__(self, other: object) -> Self:
+ return self._with_compliant(
+ self._compliant_series.__rtruediv__(self._extract_native(other))
+ )
+
+ def __floordiv__(self, other: object) -> Self:
+ return self._with_compliant(
+ self._compliant_series.__floordiv__(self._extract_native(other))
+ )
+
+ def __rfloordiv__(self, other: object) -> Self:
+ return self._with_compliant(
+ self._compliant_series.__rfloordiv__(self._extract_native(other))
+ )
+
+ def __pow__(self, other: object) -> Self:
+ return self._with_compliant(
+ self._compliant_series.__pow__(self._extract_native(other))
+ )
+
+ def __rpow__(self, other: object) -> Self:
+ return self._with_compliant(
+ self._compliant_series.__rpow__(self._extract_native(other))
+ )
+
+ def __mod__(self, other: object) -> Self:
+ return self._with_compliant(
+ self._compliant_series.__mod__(self._extract_native(other))
+ )
+
+ def __rmod__(self, other: object) -> Self:
+ return self._with_compliant(
+ self._compliant_series.__rmod__(self._extract_native(other))
+ )
+
+ def __eq__(self, other: object) -> Self: # type: ignore[override]
+ return self._with_compliant(
+ self._compliant_series.__eq__(self._extract_native(other))
+ )
+
+ def __ne__(self, other: object) -> Self: # type: ignore[override]
+ return self._with_compliant(
+ self._compliant_series.__ne__(self._extract_native(other))
+ )
+
+ def __gt__(self, other: Any) -> Self:
+ return self._with_compliant(
+ self._compliant_series.__gt__(self._extract_native(other))
+ )
+
+ def __ge__(self, other: Any) -> Self:
+ return self._with_compliant(
+ self._compliant_series.__ge__(self._extract_native(other))
+ )
+
+ def __lt__(self, other: Any) -> Self:
+ return self._with_compliant(
+ self._compliant_series.__lt__(self._extract_native(other))
+ )
+
+ def __le__(self, other: Any) -> Self:
+ return self._with_compliant(
+ self._compliant_series.__le__(self._extract_native(other))
+ )
+
+ def __and__(self, other: Any) -> Self:
+ return self._with_compliant(
+ self._compliant_series.__and__(self._extract_native(other))
+ )
+
+ def __rand__(self, other: Any) -> Self:
+ return self._with_compliant(
+ self._compliant_series.__rand__(self._extract_native(other))
+ )
+
+ def __or__(self, other: Any) -> Self:
+ return self._with_compliant(
+ self._compliant_series.__or__(self._extract_native(other))
+ )
+
+ def __ror__(self, other: Any) -> Self:
+ return self._with_compliant(
+ self._compliant_series.__ror__(self._extract_native(other))
+ )
+
+ # unary
+ def __invert__(self) -> Self:
+ return self._with_compliant(self._compliant_series.__invert__())
+
+ def filter(self, predicate: Any) -> Self:
+ """Filter elements in the Series based on a condition.
+
+ Returns:
+ A new Series with elements that satisfy the condition.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series([4, 10, 15, 34, 50])
+ >>> s_nw = nw.from_native(s_native, series_only=True)
+ >>> s_nw.filter(s_nw > 10).to_native()
+ 2 15
+ 3 34
+ 4 50
+ dtype: int64
+ """
+ return self._with_compliant(
+ self._compliant_series.filter(self._extract_native(predicate))
+ )
+
+ # --- descriptive ---
+ def is_duplicated(self) -> Self:
+ r"""Get a mask of all duplicated rows in the Series.
+
+ Returns:
+ A new Series with boolean values indicating duplicated rows.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[1, 2, 3, 1]])
+ >>> nw.from_native(
+ ... s_native, series_only=True
+ ... ).is_duplicated().to_native() # doctest: +ELLIPSIS
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ true,
+ false,
+ false,
+ true
+ ]
+ ]
+ """
+ return ~self.is_unique()
+
+ def is_empty(self) -> bool:
+ r"""Check if the series is empty.
+
+ Returns:
+ A boolean indicating if the series is empty.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pl.Series([1, 2, 3])
+ >>> s_nw = nw.from_native(s_native, series_only=True)
+
+ >>> s_nw.is_empty()
+ False
+ >>> s_nw.filter(s_nw > 10).is_empty()
+ True
+ """
+ return self._compliant_series.len() == 0
+
+ def is_unique(self) -> Self:
+ r"""Get a mask of all unique rows in the Series.
+
+ Returns:
+ A new Series with boolean values indicating unique rows.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series([1, 2, 3, 1])
+ >>> nw.from_native(s_native, series_only=True).is_unique().to_native()
+ 0 False
+ 1 True
+ 2 True
+ 3 False
+ dtype: bool
+ """
+ return self._with_compliant(self._compliant_series.is_unique())
+
+ def null_count(self) -> int:
+ r"""Count the number of null values.
+
+ Notes:
+ pandas handles null values differently from Polars and PyArrow.
+ See [null_handling](../concepts/null_handling.md/)
+ for reference.
+
+ Returns:
+ The number of null values in the Series.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[1, None, None]])
+ >>> nw.from_native(s_native, series_only=True).null_count()
+ 2
+ """
+ return self._compliant_series.null_count()
+
+ def is_first_distinct(self) -> Self:
+ r"""Return a boolean mask indicating the first occurrence of each distinct value.
+
+ Returns:
+ A new Series with boolean values indicating the first occurrence of each distinct value.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pl.Series([1, 1, 2, 3, 2])
+ >>> nw.from_native(
+ ... s_native, series_only=True
+ ... ).is_first_distinct().to_native() # doctest: +NORMALIZE_WHITESPACE
+ shape: (5,)
+ Series: '' [bool]
+ [
+ true
+ false
+ true
+ true
+ false
+ ]
+ """
+ return self._with_compliant(self._compliant_series.is_first_distinct())
+
+ def is_last_distinct(self) -> Self:
+ r"""Return a boolean mask indicating the last occurrence of each distinct value.
+
+ Returns:
+ A new Series with boolean values indicating the last occurrence of each distinct value.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series([1, 1, 2, 3, 2])
+ >>> nw.from_native(s_native, series_only=True).is_last_distinct().to_native()
+ 0 False
+ 1 True
+ 2 False
+ 3 True
+ 4 True
+ dtype: bool
+ """
+ return self._with_compliant(self._compliant_series.is_last_distinct())
+
+ def is_sorted(self, *, descending: bool = False) -> bool:
+ r"""Check if the Series is sorted.
+
+ Arguments:
+ descending: Check if the Series is sorted in descending order.
+
+ Returns:
+ A boolean indicating if the Series is sorted.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[3, 2, 1]])
+ >>> s_nw = nw.from_native(s_native, series_only=True)
+
+ >>> s_nw.is_sorted(descending=False)
+ False
+
+ >>> s_nw.is_sorted(descending=True)
+ True
+ """
+ return self._compliant_series.is_sorted(descending=descending)
+
+ def value_counts(
+ self,
+ *,
+ sort: bool = False,
+ parallel: bool = False,
+ name: str | None = None,
+ normalize: bool = False,
+ ) -> DataFrame[Any]:
+ r"""Count the occurrences of unique values.
+
+ Arguments:
+ sort: Sort the output by count in descending order. If set to False (default),
+ the order of the output is random.
+ parallel: Execute the computation in parallel. Used for Polars only.
+ name: Give the resulting count column a specific name; if `normalize` is True
+ defaults to "proportion", otherwise defaults to "count".
+ normalize: If true gives relative frequencies of the unique values
+
+ Returns:
+ A DataFrame with two columns
+
+ - The original values as first column
+ - Either count or proportion as second column, depending on normalize parameter.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series([1, 1, 2, 3, 2], name="s")
+ >>> nw.from_native(s_native, series_only=True).value_counts(
+ ... sort=True
+ ... ).to_native()
+ s count
+ 0 1 2
+ 1 2 2
+ 2 3 1
+ """
+ return self._dataframe(
+ self._compliant_series.value_counts(
+ sort=sort, parallel=parallel, name=name, normalize=normalize
+ ),
+ level=self._level,
+ )
+
+ def quantile(
+ self, quantile: float, interpolation: RollingInterpolationMethod
+ ) -> float:
+ """Get quantile value of the series.
+
+ Note:
+ pandas and Polars may have implementation differences for a given interpolation method.
+
+ Arguments:
+ quantile: Quantile between 0.0 and 1.0.
+ interpolation: Interpolation method.
+
+ Returns:
+ The quantile value.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pl.Series(list(range(50)))
+ >>> s_nw = nw.from_native(s_native, series_only=True)
+ >>> [
+ ... s_nw.quantile(quantile=q, interpolation="nearest")
+ ... for q in (0.1, 0.25, 0.5, 0.75, 0.9)
+ ... ]
+ [5.0, 12.0, 25.0, 37.0, 44.0]
+ """
+ return self._compliant_series.quantile(
+ quantile=quantile, interpolation=interpolation
+ )
+
+ def zip_with(self, mask: Self, other: Self) -> Self:
+ """Take values from self or other based on the given mask.
+
+ Where mask evaluates true, take values from self. Where mask evaluates false,
+ take values from other.
+
+ Arguments:
+ mask: Boolean Series
+ other: Series of same type.
+
+ Returns:
+ A new Series with values selected from self or other based on the mask.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> data_native = pa.chunked_array([[1, 2, 3, 4, 5]])
+ >>> other_native = pa.chunked_array([[5, 4, 3, 2, 1]])
+ >>> mask_native = pa.chunked_array([[True, False, True, False, True]])
+ >>>
+ >>> data_nw = nw.from_native(data_native, series_only=True)
+ >>> other_nw = nw.from_native(other_native, series_only=True)
+ >>> mask_nw = nw.from_native(mask_native, series_only=True)
+ >>>
+ >>> data_nw.zip_with(mask_nw, other_nw).to_native() # doctest: +ELLIPSIS
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ 1,
+ 4,
+ 3,
+ 2,
+ 5
+ ]
+ ]
+ """
+ return self._with_compliant(
+ self._compliant_series.zip_with(
+ self._extract_native(mask), self._extract_native(other)
+ )
+ )
+
+ def item(self, index: int | None = None) -> Any:
+ r"""Return the Series as a scalar, or return the element at the given index.
+
+ If no index is provided, this is equivalent to `s[0]`, with a check
+ that the shape is (1,). With an index, this is equivalent to `s[index]`.
+
+ Returns:
+ The scalar value of the Series or the element at the given index.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> nw.from_native(pl.Series("a", [1]), series_only=True).item()
+ 1
+
+ >>> nw.from_native(pl.Series("a", [9, 8, 7]), series_only=True).item(-1)
+ 7
+ """
+ return self._compliant_series.item(index=index)
+
+ def head(self, n: int = 10) -> Self:
+ r"""Get the first `n` rows.
+
+ Arguments:
+ n: Number of rows to return.
+
+ Returns:
+ A new Series containing the first n rows.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series(list(range(10)))
+ >>> nw.from_native(s_native, series_only=True).head(3).to_native()
+ 0 0
+ 1 1
+ 2 2
+ dtype: int64
+ """
+ return self._with_compliant(self._compliant_series.head(n))
+
+ def tail(self, n: int = 10) -> Self:
+ r"""Get the last `n` rows.
+
+ Arguments:
+ n: Number of rows to return.
+
+ Returns:
+ A new Series with the last n rows.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([list(range(10))])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.tail(3).to_native() # doctest: +ELLIPSIS
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ 7,
+ 8,
+ 9
+ ]
+ ]
+ """
+ return self._with_compliant(self._compliant_series.tail(n))
+
+ def round(self, decimals: int = 0) -> Self:
+ r"""Round underlying floating point data by `decimals` digits.
+
+ Arguments:
+ decimals: Number of decimals to round by.
+
+ Returns:
+ A new Series with rounded values.
+
+ Notes:
+ For values exactly halfway between rounded decimal values pandas behaves differently than Polars and Arrow.
+
+ pandas rounds to the nearest even value (e.g. -0.5 and 0.5 round to 0.0, 1.5 and 2.5 round to 2.0, 3.5 and
+ 4.5 to 4.0, etc..).
+
+ Polars and Arrow round away from 0 (e.g. -0.5 to -1.0, 0.5 to 1.0, 1.5 to 2.0, 2.5 to 3.0, etc..).
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pl.Series([1.12345, 2.56789, 3.901234])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.round(1).to_native() # doctest: +NORMALIZE_WHITESPACE
+ shape: (3,)
+ Series: '' [f64]
+ [
+ 1.1
+ 2.6
+ 3.9
+ ]
+ """
+ return self._with_compliant(self._compliant_series.round(decimals))
+
+ def to_dummies(
+ self, *, separator: str = "_", drop_first: bool = False
+ ) -> DataFrame[Any]:
+ r"""Get dummy/indicator variables.
+
+ Arguments:
+ separator: Separator/delimiter used when generating column names.
+ drop_first: Remove the first category from the variable being encoded.
+
+ Returns:
+ A new DataFrame containing the dummy/indicator variables.
+
+ Notes:
+ pandas and Polars handle null values differently. Polars distinguishes
+ between NaN and Null, whereas pandas doesn't.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series([1, 2, 3], name="a")
+ >>> s_nw = nw.from_native(s_native, series_only=True)
+
+ >>> s_nw.to_dummies(drop_first=False).to_native()
+ a_1 a_2 a_3
+ 0 1 0 0
+ 1 0 1 0
+ 2 0 0 1
+
+ >>> s_nw.to_dummies(drop_first=True).to_native()
+ a_2 a_3
+ 0 0 0
+ 1 1 0
+ 2 0 1
+ """
+ return self._dataframe(
+ self._compliant_series.to_dummies(separator=separator, drop_first=drop_first),
+ level=self._level,
+ )
+
+ def gather_every(self, n: int, offset: int = 0) -> Self:
+ r"""Take every nth value in the Series and return as new Series.
+
+ Arguments:
+ n: Gather every *n*-th row.
+ offset: Starting index.
+
+ Returns:
+ A new Series with every nth value starting from the offset.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[1, 2, 3, 4]])
+ >>> nw.from_native(s_native, series_only=True).gather_every(
+ ... n=2, offset=1
+ ... ).to_native() # doctest:+ELLIPSIS
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ 2,
+ 4
+ ]
+ ]
+ """
+ return self._with_compliant(
+ self._compliant_series.gather_every(n=n, offset=offset)
+ )
+
+ def to_arrow(self) -> pa.Array[Any]:
+ r"""Convert to arrow.
+
+ Returns:
+ A PyArrow Array containing the data from the Series.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pl.Series([1, 2, 3, 4])
+ >>> nw.from_native(
+ ... s_native, series_only=True
+ ... ).to_arrow() # doctest:+NORMALIZE_WHITESPACE
+ <pyarrow.lib.Int64Array object at ...>
+ [
+ 1,
+ 2,
+ 3,
+ 4
+ ]
+ """
+ return self._compliant_series.to_arrow()
+
+ def mode(self) -> Self:
+ r"""Compute the most occurring value(s).
+
+ Can return multiple values.
+
+ Returns:
+ A new Series containing the mode(s) (values that appear most frequently).
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series([1, 1, 2, 2, 3])
+ >>> nw.from_native(s_native, series_only=True).mode().sort().to_native()
+ 0 1
+ 1 2
+ dtype: int64
+ """
+ return self._with_compliant(self._compliant_series.mode())
+
+ def is_finite(self) -> Self:
+ """Returns a boolean Series indicating which values are finite.
+
+ Warning:
+ Different backend handle null values differently. `is_finite` will return
+ False for NaN and Null's in the Dask and pandas non-nullable backend, while
+ for Polars, PyArrow and pandas nullable backends null values are kept as such.
+
+ Returns:
+ Expression of `Boolean` data type.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[float("nan"), float("inf"), 2.0, None]])
+ >>> nw.from_native(
+ ... s_native, series_only=True
+ ... ).is_finite().to_native() # doctest: +ELLIPSIS
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ false,
+ false,
+ true,
+ null
+ ]
+ ]
+ """
+ return self._with_compliant(self._compliant_series.is_finite())
+
+ def cum_count(self, *, reverse: bool = False) -> Self:
+ r"""Return the cumulative count of the non-null values in the series.
+
+ Arguments:
+ reverse: reverse the operation
+
+ Returns:
+ A new Series with the cumulative count of non-null values.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pl.Series(["x", "k", None, "d"])
+ >>> nw.from_native(s_native, series_only=True).cum_count(
+ ... reverse=True
+ ... ).to_native() # doctest:+NORMALIZE_WHITESPACE
+ shape: (4,)
+ Series: '' [u32]
+ [
+ 3
+ 2
+ 1
+ 1
+ ]
+ """
+ return self._with_compliant(self._compliant_series.cum_count(reverse=reverse))
+
+ def cum_min(self, *, reverse: bool = False) -> Self:
+ r"""Return the cumulative min of the non-null values in the series.
+
+ Arguments:
+ reverse: reverse the operation
+
+ Returns:
+ A new Series with the cumulative min of non-null values.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series([3, 1, None, 2])
+ >>> nw.from_native(s_native, series_only=True).cum_min().to_native()
+ 0 3.0
+ 1 1.0
+ 2 NaN
+ 3 1.0
+ dtype: float64
+ """
+ return self._with_compliant(self._compliant_series.cum_min(reverse=reverse))
+
+ def cum_max(self, *, reverse: bool = False) -> Self:
+ r"""Return the cumulative max of the non-null values in the series.
+
+ Arguments:
+ reverse: reverse the operation
+
+ Returns:
+ A new Series with the cumulative max of non-null values.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[1, 3, None, 2]])
+ >>> nw.from_native(
+ ... s_native, series_only=True
+ ... ).cum_max().to_native() # doctest:+ELLIPSIS
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ 1,
+ 3,
+ null,
+ 3
+ ]
+ ]
+
+ """
+ return self._with_compliant(self._compliant_series.cum_max(reverse=reverse))
+
+ def cum_prod(self, *, reverse: bool = False) -> Self:
+ r"""Return the cumulative product of the non-null values in the series.
+
+ Arguments:
+ reverse: reverse the operation
+
+ Returns:
+ A new Series with the cumulative product of non-null values.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pl.Series([1, 3, None, 2])
+ >>> nw.from_native(
+ ... s_native, series_only=True
+ ... ).cum_prod().to_native() # doctest:+NORMALIZE_WHITESPACE
+ shape: (4,)
+ Series: '' [i64]
+ [
+ 1
+ 3
+ null
+ 6
+ ]
+ """
+ return self._with_compliant(self._compliant_series.cum_prod(reverse=reverse))
+
+ def rolling_sum(
+ self, window_size: int, *, min_samples: int | None = None, center: bool = False
+ ) -> Self:
+ """Apply a rolling sum (moving sum) over the values.
+
+ A window of length `window_size` will traverse the values. The resulting values
+ will be aggregated to their sum.
+
+ The window at a given row will include the row itself and the `window_size - 1`
+ elements before it.
+
+ Arguments:
+ window_size: The length of the window in number of elements. It must be a
+ strictly positive integer.
+ min_samples: The number of values in the window that should be non-null before
+ computing a result. If set to `None` (default), it will be set equal to
+ `window_size`. If provided, it must be a strictly positive integer, and
+ less than or equal to `window_size`
+ center: Set the labels at the center of the window.
+
+ Returns:
+ A new series.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series([1.0, 2.0, 3.0, 4.0])
+ >>> nw.from_native(s_native, series_only=True).rolling_sum(
+ ... window_size=2
+ ... ).to_native()
+ 0 NaN
+ 1 3.0
+ 2 5.0
+ 3 7.0
+ dtype: float64
+ """
+ window_size, min_samples_int = _validate_rolling_arguments(
+ window_size=window_size, min_samples=min_samples
+ )
+
+ if len(self) == 0: # pragma: no cover
+ return self
+
+ return self._with_compliant(
+ self._compliant_series.rolling_sum(
+ window_size=window_size, min_samples=min_samples_int, center=center
+ )
+ )
+
+ def rolling_mean(
+ self, window_size: int, *, min_samples: int | None = None, center: bool = False
+ ) -> Self:
+ """Apply a rolling mean (moving mean) over the values.
+
+ A window of length `window_size` will traverse the values. The resulting values
+ will be aggregated to their mean.
+
+ The window at a given row will include the row itself and the `window_size - 1`
+ elements before it.
+
+ Arguments:
+ window_size: The length of the window in number of elements. It must be a
+ strictly positive integer.
+ min_samples: The number of values in the window that should be non-null before
+ computing a result. If set to `None` (default), it will be set equal to
+ `window_size`. If provided, it must be a strictly positive integer, and
+ less than or equal to `window_size`
+ center: Set the labels at the center of the window.
+
+ Returns:
+ A new series.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[1.0, 2.0, 3.0, 4.0]])
+ >>> nw.from_native(s_native, series_only=True).rolling_mean(
+ ... window_size=2
+ ... ).to_native() # doctest:+ELLIPSIS
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ null,
+ 1.5,
+ 2.5,
+ 3.5
+ ]
+ ]
+ """
+ window_size, min_samples = _validate_rolling_arguments(
+ window_size=window_size, min_samples=min_samples
+ )
+
+ if len(self) == 0: # pragma: no cover
+ return self
+
+ return self._with_compliant(
+ self._compliant_series.rolling_mean(
+ window_size=window_size, min_samples=min_samples, center=center
+ )
+ )
+
+ def rolling_var(
+ self,
+ window_size: int,
+ *,
+ min_samples: int | None = None,
+ center: bool = False,
+ ddof: int = 1,
+ ) -> Self:
+ """Apply a rolling variance (moving variance) over the values.
+
+ A window of length `window_size` will traverse the values. The resulting values
+ will be aggregated to their variance.
+
+ The window at a given row will include the row itself and the `window_size - 1`
+ elements before it.
+
+ Arguments:
+ window_size: The length of the window in number of elements. It must be a
+ strictly positive integer.
+ min_samples: The number of values in the window that should be non-null before
+ computing a result. If set to `None` (default), it will be set equal to
+ `window_size`. If provided, it must be a strictly positive integer, and
+ less than or equal to `window_size`.
+ center: Set the labels at the center of the window.
+ ddof: Delta Degrees of Freedom; the divisor for a length N window is N - ddof.
+
+ Returns:
+ A new series.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pl.Series([1.0, 3.0, 1.0, 4.0])
+ >>> nw.from_native(s_native, series_only=True).rolling_var(
+ ... window_size=2, min_samples=1
+ ... ).to_native() # doctest:+NORMALIZE_WHITESPACE
+ shape: (4,)
+ Series: '' [f64]
+ [
+ null
+ 2.0
+ 2.0
+ 4.5
+ ]
+ """
+ window_size, min_samples = _validate_rolling_arguments(
+ window_size=window_size, min_samples=min_samples
+ )
+
+ if len(self) == 0: # pragma: no cover
+ return self
+
+ return self._with_compliant(
+ self._compliant_series.rolling_var(
+ window_size=window_size, min_samples=min_samples, center=center, ddof=ddof
+ )
+ )
+
+ def rolling_std(
+ self,
+ window_size: int,
+ *,
+ min_samples: int | None = None,
+ center: bool = False,
+ ddof: int = 1,
+ ) -> Self:
+ """Apply a rolling standard deviation (moving standard deviation) over the values.
+
+ A window of length `window_size` will traverse the values. The resulting values
+ will be aggregated to their standard deviation.
+
+ The window at a given row will include the row itself and the `window_size - 1`
+ elements before it.
+
+ Arguments:
+ window_size: The length of the window in number of elements. It must be a
+ strictly positive integer.
+ min_samples: The number of values in the window that should be non-null before
+ computing a result. If set to `None` (default), it will be set equal to
+ `window_size`. If provided, it must be a strictly positive integer, and
+ less than or equal to `window_size`.
+ center: Set the labels at the center of the window.
+ ddof: Delta Degrees of Freedom; the divisor for a length N window is N - ddof.
+
+ Returns:
+ A new series.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pd.Series([1.0, 3.0, 1.0, 4.0])
+ >>> nw.from_native(s_native, series_only=True).rolling_std(
+ ... window_size=2, min_samples=1
+ ... ).to_native()
+ 0 NaN
+ 1 1.414214
+ 2 1.414214
+ 3 2.121320
+ dtype: float64
+ """
+ window_size, min_samples = _validate_rolling_arguments(
+ window_size=window_size, min_samples=min_samples
+ )
+
+ if len(self) == 0: # pragma: no cover
+ return self
+
+ return self._with_compliant(
+ self._compliant_series.rolling_std(
+ window_size=window_size, min_samples=min_samples, center=center, ddof=ddof
+ )
+ )
+
+ def __iter__(self) -> Iterator[Any]:
+ yield from self._compliant_series.__iter__()
+
+ def __contains__(self, other: Any) -> bool:
+ return self._compliant_series.__contains__(other)
+
+ def rank(self, method: RankMethod = "average", *, descending: bool = False) -> Self:
+ """Assign ranks to data, dealing with ties appropriately.
+
+ Notes:
+ The resulting dtype may differ between backends.
+
+ Arguments:
+ method: The method used to assign ranks to tied elements.
+ The following methods are available (default is 'average')
+
+ - *"average"*: The average of the ranks that would have been assigned to
+ all the tied values is assigned to each value.
+ - *"min"*: The minimum of the ranks that would have been assigned to all
+ the tied values is assigned to each value. (This is also referred to
+ as "competition" ranking.)
+ - *"max"*: The maximum of the ranks that would have been assigned to all
+ the tied values is assigned to each value.
+ - *"dense"*: Like "min", but the rank of the next highest element is
+ assigned the rank immediately after those assigned to the tied elements.
+ - *"ordinal"*: All values are given a distinct rank, corresponding to the
+ order that the values occur in the Series.
+
+ descending: Rank in descending order.
+
+ Returns:
+ A new series with rank data as values.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>>
+ >>> s_native = pa.chunked_array([[3, 6, 1, 1, 6]])
+ >>> nw.from_native(s_native, series_only=True).rank(
+ ... method="dense"
+ ... ).to_native() # doctest:+ELLIPSIS
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ 2,
+ 3,
+ 1,
+ 1,
+ 3
+ ]
+ ]
+ """
+ supported_rank_methods = {"average", "min", "max", "dense", "ordinal"}
+ if method not in supported_rank_methods:
+ msg = (
+ "Ranking method must be one of {'average', 'min', 'max', 'dense', 'ordinal'}. "
+ f"Found '{method}'"
+ )
+ raise ValueError(msg)
+
+ return self._with_compliant(
+ self._compliant_series.rank(method=method, descending=descending)
+ )
+
+ def hist(
+ self,
+ bins: list[float | int] | None = None,
+ *,
+ bin_count: int | None = None,
+ include_breakpoint: bool = True,
+ ) -> DataFrame[Any]:
+ """Bin values into buckets and count their occurrences.
+
+ Warning:
+ This functionality is considered **unstable**. It may be changed at any point
+ without it being considered a breaking change.
+
+ Arguments:
+ bins: A monotonically increasing sequence of values.
+ bin_count: If no bins provided, this will be used to determine the distance of the bins.
+ include_breakpoint: Include a column that shows the intervals as categories.
+
+ Returns:
+ A new DataFrame containing the counts of values that occur within each passed bin.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series([1, 3, 8, 8, 2, 1, 3], name="a")
+ >>> nw.from_native(s_native, series_only=True).hist(bin_count=4)
+ ┌────────────────────┐
+ | Narwhals DataFrame |
+ |--------------------|
+ | breakpoint count|
+ |0 2.75 3|
+ |1 4.50 2|
+ |2 6.25 0|
+ |3 8.00 2|
+ └────────────────────┘
+ """
+ if bins is not None and bin_count is not None:
+ msg = "can only provide one of `bin_count` or `bins`"
+ raise ComputeError(msg)
+ if bins is None and bin_count is None:
+ bin_count = 10 # polars (v1.20) sets bin=10 if neither are provided.
+
+ if bins is not None:
+ for i in range(1, len(bins)):
+ if bins[i - 1] >= bins[i]:
+ msg = "bins must increase monotonically"
+ raise ComputeError(msg)
+
+ return self._dataframe(
+ self._compliant_series.hist(
+ bins=bins, bin_count=bin_count, include_breakpoint=include_breakpoint
+ ),
+ level=self._level,
+ )
+
+ def log(self, base: float = math.e) -> Self:
+ r"""Compute the logarithm to a given base.
+
+ Arguments:
+ base: Given base, defaults to `e`
+
+ Returns:
+ A new series.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series([1, 2, 4], name="a")
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.log(base=2)
+ ┌───────────────────────┐
+ | Narwhals Series |
+ |-----------------------|
+ |0 0.0 |
+ |1 1.0 |
+ |2 2.0 |
+ |Name: a, dtype: float64|
+ └───────────────────────┘
+ """
+ return self._with_compliant(self._compliant_series.log(base=base))
+
+ def exp(self) -> Self:
+ r"""Compute the exponent.
+
+ Returns:
+ A new series.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series([-1, 0, 1], name="a")
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.exp()
+ ┌───────────────────────┐
+ | Narwhals Series |
+ |-----------------------|
+ |0 0.367879 |
+ |1 1.000000 |
+ |2 2.718282 |
+ |Name: a, dtype: float64|
+ └───────────────────────┘
+ """
+ return self._with_compliant(self._compliant_series.exp())
+
+ @property
+ def str(self) -> SeriesStringNamespace[Self]:
+ return SeriesStringNamespace(self)
+
+ @property
+ def dt(self) -> SeriesDateTimeNamespace[Self]:
+ return SeriesDateTimeNamespace(self)
+
+ @property
+ def cat(self) -> SeriesCatNamespace[Self]:
+ return SeriesCatNamespace(self)
+
+ @property
+ def list(self) -> SeriesListNamespace[Self]:
+ return SeriesListNamespace(self)
+
+ @property
+ def struct(self) -> SeriesStructNamespace[Self]:
+ return SeriesStructNamespace(self)
diff --git a/venv/lib/python3.8/site-packages/narwhals/series_cat.py b/venv/lib/python3.8/site-packages/narwhals/series_cat.py
new file mode 100644
index 0000000..cb976d4
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/series_cat.py
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+from typing import Generic
+
+from narwhals.typing import SeriesT
+
+
+class SeriesCatNamespace(Generic[SeriesT]):
+ def __init__(self, series: SeriesT) -> None:
+ self._narwhals_series = series
+
+ def get_categories(self) -> SeriesT:
+ """Get unique categories from column.
+
+ Returns:
+ A new Series containing the unique categories.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series(["apple", "mango", "mango"], dtype="category")
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.cat.get_categories().to_native()
+ 0 apple
+ 1 mango
+ dtype: object
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.cat.get_categories()
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/series_dt.py b/venv/lib/python3.8/site-packages/narwhals/series_dt.py
new file mode 100644
index 0000000..23f86f7
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/series_dt.py
@@ -0,0 +1,683 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Generic
+
+from narwhals.typing import SeriesT
+
+if TYPE_CHECKING:
+ from narwhals.typing import TimeUnit
+
+
+class SeriesDateTimeNamespace(Generic[SeriesT]):
+ def __init__(self, series: SeriesT) -> None:
+ self._narwhals_series = series
+
+ def date(self) -> SeriesT:
+ """Get the date in a datetime series.
+
+ Returns:
+ A new Series with the date portion of the datetime values.
+
+ Raises:
+ NotImplementedError: If pandas default backend is being used.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series(
+ ... [datetime(2012, 1, 7, 10, 20), datetime(2023, 3, 10, 11, 32)]
+ ... ).convert_dtypes(dtype_backend="pyarrow")
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.dt.date().to_native()
+ 0 2012-01-07
+ 1 2023-03-10
+ dtype: date32[day][pyarrow]
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.dt.date()
+ )
+
+ def year(self) -> SeriesT:
+ """Get the year in a datetime series.
+
+ Returns:
+ A new Series containing the year component of each datetime value.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s_native = pl.Series([datetime(2012, 1, 7), datetime(2023, 3, 10)])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.dt.year().to_native() # doctest: +NORMALIZE_WHITESPACE
+ shape: (2,)
+ Series: '' [i32]
+ [
+ 2012
+ 2023
+ ]
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.dt.year()
+ )
+
+ def month(self) -> SeriesT:
+ """Gets the month in a datetime series.
+
+ Returns:
+ A new Series containing the month component of each datetime value.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s_native = pl.Series([datetime(2012, 1, 7), datetime(2023, 3, 10)])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.dt.month().to_native() # doctest: +NORMALIZE_WHITESPACE
+ shape: (2,)
+ Series: '' [i8]
+ [
+ 1
+ 3
+ ]
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.dt.month()
+ )
+
+ def day(self) -> SeriesT:
+ """Extracts the day in a datetime series.
+
+ Returns:
+ A new Series containing the day component of each datetime value.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> s_native = pa.chunked_array(
+ ... [[datetime(2022, 1, 1), datetime(2022, 1, 5)]]
+ ... )
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.dt.day().to_native() # doctest: +ELLIPSIS
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ 1,
+ 5
+ ]
+ ]
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.dt.day()
+ )
+
+ def hour(self) -> SeriesT:
+ """Extracts the hour in a datetime series.
+
+ Returns:
+ A new Series containing the hour component of each datetime value.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> s_native = pa.chunked_array(
+ ... [[datetime(2022, 1, 1, 5, 3), datetime(2022, 1, 5, 9, 12)]]
+ ... )
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.dt.hour().to_native() # doctest: +ELLIPSIS
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ 5,
+ 9
+ ]
+ ]
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.dt.hour()
+ )
+
+ def minute(self) -> SeriesT:
+ """Extracts the minute in a datetime series.
+
+ Returns:
+ A new Series containing the minute component of each datetime value.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series(
+ ... [datetime(2022, 1, 1, 5, 3), datetime(2022, 1, 5, 9, 12)]
+ ... )
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.dt.minute().to_native()
+ 0 3
+ 1 12
+ dtype: int32
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.dt.minute()
+ )
+
+ def second(self) -> SeriesT:
+ """Extracts the seconds in a datetime series.
+
+ Returns:
+ A new Series containing the second component of each datetime value.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series(
+ ... [datetime(2022, 1, 1, 5, 3, 10), datetime(2022, 1, 5, 9, 12, 4)]
+ ... )
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.dt.second().to_native()
+ 0 10
+ 1 4
+ dtype: int32
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.dt.second()
+ )
+
+ def millisecond(self) -> SeriesT:
+ """Extracts the milliseconds in a datetime series.
+
+ Returns:
+ A new Series containing the millisecond component of each datetime value.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series(
+ ... [
+ ... datetime(2022, 1, 1, 5, 3, 7, 400000),
+ ... datetime(2022, 1, 1, 5, 3, 7, 0),
+ ... ]
+ ... )
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.dt.millisecond().alias("datetime").to_native()
+ 0 400
+ 1 0
+ Name: datetime, dtype: int32
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.dt.millisecond()
+ )
+
+ def microsecond(self) -> SeriesT:
+ """Extracts the microseconds in a datetime series.
+
+ Returns:
+ A new Series containing the microsecond component of each datetime value.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series(
+ ... [
+ ... datetime(2022, 1, 1, 5, 3, 7, 400000),
+ ... datetime(2022, 1, 1, 5, 3, 7, 0),
+ ... ]
+ ... )
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.dt.microsecond().alias("datetime").to_native()
+ 0 400000
+ 1 0
+ Name: datetime, dtype: int32
+
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.dt.microsecond()
+ )
+
+ def nanosecond(self) -> SeriesT:
+ """Extract the nanoseconds in a date series.
+
+ Returns:
+ A new Series containing the nanosecond component of each datetime value.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series(
+ ... [
+ ... datetime(2022, 1, 1, 5, 3, 7, 400000),
+ ... datetime(2022, 1, 1, 5, 3, 7, 0),
+ ... ]
+ ... )
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.dt.nanosecond().alias("datetime").to_native()
+ 0 400000000
+ 1 0
+ Name: datetime, dtype: int32
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.dt.nanosecond()
+ )
+
+ def ordinal_day(self) -> SeriesT:
+ """Get ordinal day.
+
+ Returns:
+ A new Series containing the ordinal day (day of year) for each datetime value.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> s_native = pa.chunked_array(
+ ... [[datetime(2020, 1, 1), datetime(2020, 8, 3)]]
+ ... )
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.dt.ordinal_day().to_native() # doctest: +ELLIPSIS
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ 1,
+ 216
+ ]
+ ]
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.dt.ordinal_day()
+ )
+
+ def weekday(self) -> SeriesT:
+ """Extract the week day in a datetime series.
+
+ Returns:
+ A new Series containing the week day for each datetime value.
+ Returns the ISO weekday number where monday = 1 and sunday = 7
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> s_native = pa.chunked_array(
+ ... [[datetime(2020, 1, 1), datetime(2020, 8, 3)]]
+ ... )
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.dt.weekday().to_native() # doctest: +ELLIPSIS
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ 3,
+ 1
+ ]
+ ]
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.dt.weekday()
+ )
+
+ def total_minutes(self) -> SeriesT:
+ """Get total minutes.
+
+ Notes:
+ The function outputs the total minutes in the int dtype by default,
+ however, pandas may change the dtype to float when there are missing values,
+ consider using `fill_null()` in this case.
+
+ Returns:
+ A new Series containing the total number of minutes for each timedelta value.
+
+ Examples:
+ >>> from datetime import timedelta
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s_native = pl.Series(
+ ... [timedelta(minutes=10), timedelta(minutes=20, seconds=40)]
+ ... )
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.dt.total_minutes().to_native() # doctest: +NORMALIZE_WHITESPACE
+ shape: (2,)
+ Series: '' [i64]
+ [
+ 10
+ 20
+ ]
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.dt.total_minutes()
+ )
+
+ def total_seconds(self) -> SeriesT:
+ """Get total seconds.
+
+ Notes:
+ The function outputs the total seconds in the int dtype by default,
+ however, pandas may change the dtype to float when there are missing values,
+ consider using `fill_null()` in this case.
+
+ Returns:
+ A new Series containing the total number of seconds for each timedelta value.
+
+ Examples:
+ >>> from datetime import timedelta
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s_native = pl.Series(
+ ... [timedelta(minutes=10), timedelta(minutes=20, seconds=40)]
+ ... )
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.dt.total_seconds().to_native() # doctest: +NORMALIZE_WHITESPACE
+ shape: (2,)
+ Series: '' [i64]
+ [
+ 600
+ 1240
+ ]
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.dt.total_seconds()
+ )
+
+ def total_milliseconds(self) -> SeriesT:
+ """Get total milliseconds.
+
+ Notes:
+ The function outputs the total milliseconds in the int dtype by default,
+ however, pandas may change the dtype to float when there are missing values,
+ consider using `fill_null()` in this case.
+
+ Returns:
+ A new Series containing the total number of milliseconds for each timedelta value.
+
+ Examples:
+ >>> from datetime import timedelta
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s_native = pl.Series(
+ ... [
+ ... timedelta(milliseconds=10),
+ ... timedelta(milliseconds=20, microseconds=40),
+ ... ]
+ ... )
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.dt.total_milliseconds().to_native() # doctest: +NORMALIZE_WHITESPACE
+ shape: (2,)
+ Series: '' [i64]
+ [
+ 10
+ 20
+ ]
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.dt.total_milliseconds()
+ )
+
+ def total_microseconds(self) -> SeriesT:
+ """Get total microseconds.
+
+ Returns:
+ A new Series containing the total number of microseconds for each timedelta value.
+
+ Notes:
+ The function outputs the total microseconds in the int dtype by default,
+ however, pandas may change the dtype to float when there are missing values,
+ consider using `fill_null()` in this case.
+
+ Examples:
+ >>> from datetime import timedelta
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s_native = pl.Series(
+ ... [
+ ... timedelta(microseconds=10),
+ ... timedelta(milliseconds=1, microseconds=200),
+ ... ]
+ ... )
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.dt.total_microseconds().to_native() # doctest: +NORMALIZE_WHITESPACE
+ shape: (2,)
+ Series: '' [i64]
+ [
+ 10
+ 1200
+ ]
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.dt.total_microseconds()
+ )
+
+ def total_nanoseconds(self) -> SeriesT:
+ """Get total nanoseconds.
+
+ Notes:
+ The function outputs the total nanoseconds in the int dtype by default,
+ however, pandas may change the dtype to float when there are missing values,
+ consider using `fill_null()` in this case.
+
+ Returns:
+ A new Series containing the total number of nanoseconds for each timedelta value.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s_native = pl.Series(
+ ... ["2024-01-01 00:00:00.000000001", "2024-01-01 00:00:00.000000002"]
+ ... ).str.to_datetime(time_unit="ns")
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.diff().dt.total_nanoseconds().to_native() # doctest: +NORMALIZE_WHITESPACE
+ shape: (2,)
+ Series: '' [i64]
+ [
+ null
+ 1
+ ]
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.dt.total_nanoseconds()
+ )
+
+ def to_string(self, format: str) -> SeriesT:
+ """Convert a Date/Time/Datetime series into a String series with the given format.
+
+ Arguments:
+ format: Format string for converting the datetime to string.
+
+ Returns:
+ A new Series with the datetime values formatted as strings according to the specified format.
+
+ Notes:
+ Unfortunately, different libraries interpret format directives a bit
+ differently.
+
+ - Chrono, the library used by Polars, uses `"%.f"` for fractional seconds,
+ whereas pandas and Python stdlib use `".%f"`.
+ - PyArrow interprets `"%S"` as "seconds, including fractional seconds"
+ whereas most other tools interpret it as "just seconds, as 2 digits".
+ ---
+ Therefore, we make the following adjustments.
+
+ - for pandas-like libraries, we replace `"%S.%f"` with `"%S%.f"`.
+ - for PyArrow, we replace `"%S.%f"` with `"%S"`.
+ ---
+ Workarounds like these don't make us happy, and we try to avoid them as
+ much as possible, but here we feel like it's the best compromise.
+
+ If you just want to format a date/datetime Series as a local datetime
+ string, and have it work as consistently as possible across libraries,
+ we suggest using:
+
+ - `"%Y-%m-%dT%H:%M:%S%.f"` for datetimes
+ - `"%Y-%m-%d"` for dates
+ ---
+ Though note that, even then, different tools may return a different number
+ of trailing zeros. Nonetheless, this is probably consistent enough for
+ most applications.
+
+ If you have an application where this is not enough, please open an issue
+ and let us know.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> s_native = pa.chunked_array(
+ ... [[datetime(2020, 3, 1), datetime(2020, 4, 1)]]
+ ... )
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.dt.to_string("%Y/%m/%d").to_native() # doctest: +ELLIPSIS
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ "2020/03/01",
+ "2020/04/01"
+ ]
+ ]
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.dt.to_string(format)
+ )
+
+ def replace_time_zone(self, time_zone: str | None) -> SeriesT:
+ """Replace time zone.
+
+ Arguments:
+ time_zone: Target time zone.
+
+ Returns:
+ A new Series with the specified time zone.
+
+ Examples:
+ >>> from datetime import datetime, timezone
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s_native = pl.Series(
+ ... [
+ ... datetime(2024, 1, 1, tzinfo=timezone.utc),
+ ... datetime(2024, 1, 2, tzinfo=timezone.utc),
+ ... ]
+ ... )
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.dt.replace_time_zone(
+ ... "Asia/Kathmandu"
+ ... ).to_native() # doctest: +NORMALIZE_WHITESPACE
+ shape: (2,)
+ Series: '' [datetime[μs, Asia/Kathmandu]]
+ [
+ 2024-01-01 00:00:00 +0545
+ 2024-01-02 00:00:00 +0545
+ ]
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.dt.replace_time_zone(time_zone)
+ )
+
+ def convert_time_zone(self, time_zone: str) -> SeriesT:
+ """Convert time zone.
+
+ If converting from a time-zone-naive column, then conversion happens
+ as if converting from UTC.
+
+ Arguments:
+ time_zone: Target time zone.
+
+ Returns:
+ A new Series with the specified time zone.
+
+ Examples:
+ >>> from datetime import datetime, timezone
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series(
+ ... [
+ ... datetime(2024, 1, 1, tzinfo=timezone.utc),
+ ... datetime(2024, 1, 2, tzinfo=timezone.utc),
+ ... ]
+ ... )
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.dt.convert_time_zone("Asia/Kathmandu").to_native()
+ 0 2024-01-01 05:45:00+05:45
+ 1 2024-01-02 05:45:00+05:45
+ dtype: datetime64[ns, Asia/Kathmandu]
+ """
+ if time_zone is None:
+ msg = "Target `time_zone` cannot be `None` in `convert_time_zone`. Please use `replace_time_zone(None)` if you want to remove the time zone."
+ raise TypeError(msg)
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.dt.convert_time_zone(time_zone)
+ )
+
+ def timestamp(self, time_unit: TimeUnit) -> SeriesT:
+ """Return a timestamp in the given time unit.
+
+ Arguments:
+ time_unit: One of
+ - 'ns': nanosecond.
+ - 'us': microsecond.
+ - 'ms': millisecond.
+
+ Returns:
+ A new Series with timestamps in the specified time unit.
+
+ Examples:
+ >>> from datetime import date
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series(
+ ... [date(2001, 1, 1), None, date(2001, 1, 3)], dtype="datetime64[ns]"
+ ... )
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.dt.timestamp("ms").to_native()
+ 0 9.783072e+11
+ 1 NaN
+ 2 9.784800e+11
+ dtype: float64
+ """
+ if time_unit not in {"ns", "us", "ms"}:
+ msg = (
+ "invalid `time_unit`"
+ f"\n\nExpected one of {{'ns', 'us', 'ms'}}, got {time_unit!r}."
+ )
+ raise ValueError(msg)
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.dt.timestamp(time_unit)
+ )
+
+ def truncate(self, every: str) -> SeriesT:
+ """Divide the date/datetime range into buckets.
+
+ Arguments:
+ every: Length of bucket. Must be of form `<multiple><unit>`,
+ where `multiple` is a positive integer and `unit` is one of
+
+ - 'ns': nanosecond.
+ - 'us': microsecond.
+ - 'ms': millisecond.
+ - 's': second.
+ - 'm': minute.
+ - 'h': hour.
+ - 'd': day.
+ - 'mo': month.
+ - 'q': quarter.
+ - 'y': year.
+
+ Returns:
+ Series of data type `Date` or `Datetime`.
+
+ Examples:
+ >>> from datetime import datetime
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series([datetime(2021, 3, 1, 12, 34)])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.dt.truncate("1h").to_native()
+ 0 2021-03-01 12:00:00
+ dtype: datetime64[ns]
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.dt.truncate(every)
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/series_list.py b/venv/lib/python3.8/site-packages/narwhals/series_list.py
new file mode 100644
index 0000000..258535f
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/series_list.py
@@ -0,0 +1,38 @@
+from __future__ import annotations
+
+from typing import Generic
+
+from narwhals.typing import SeriesT
+
+
+class SeriesListNamespace(Generic[SeriesT]):
+ def __init__(self, series: SeriesT) -> None:
+ self._narwhals_series = series
+
+ def len(self) -> SeriesT:
+ """Return the number of elements in each list.
+
+ Null values count towards the total.
+
+ Returns:
+ A new series.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> s_native = pa.chunked_array([[[1, 2], [3, 4, None], None, []]])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.list.len().to_native() # doctest: +ELLIPSIS
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ 2,
+ 3,
+ null,
+ 0
+ ]
+ ]
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.list.len()
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/series_str.py b/venv/lib/python3.8/site-packages/narwhals/series_str.py
new file mode 100644
index 0000000..82d50f5
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/series_str.py
@@ -0,0 +1,400 @@
+from __future__ import annotations
+
+from typing import Generic
+
+from narwhals.typing import SeriesT
+
+
+class SeriesStringNamespace(Generic[SeriesT]):
+ def __init__(self, series: SeriesT) -> None:
+ self._narwhals_series = series
+
+ def len_chars(self) -> SeriesT:
+ r"""Return the length of each string as the number of characters.
+
+ Returns:
+ A new Series containing the length of each string in characters.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s_native = pl.Series(["foo", "345", None])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.str.len_chars().to_native() # doctest: +NORMALIZE_WHITESPACE
+ shape: (3,)
+ Series: '' [u32]
+ [
+ 3
+ 3
+ null
+ ]
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.str.len_chars()
+ )
+
+ def replace(
+ self, pattern: str, value: str, *, literal: bool = False, n: int = 1
+ ) -> SeriesT:
+ r"""Replace first matching regex/literal substring with a new string value.
+
+ Arguments:
+ pattern: A valid regular expression pattern.
+ value: String that will replace the matched substring.
+ literal: Treat `pattern` as a literal string.
+ n: Number of matches to replace.
+
+ Returns:
+ A new Series with the regex/literal pattern replaced with the specified value.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series(["123abc", "abc abc123"])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.str.replace("abc", "").to_native()
+ 0 123
+ 1 abc123
+ dtype: object
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.str.replace(
+ pattern, value, literal=literal, n=n
+ )
+ )
+
+ def replace_all(self, pattern: str, value: str, *, literal: bool = False) -> SeriesT:
+ r"""Replace all matching regex/literal substring with a new string value.
+
+ Arguments:
+ pattern: A valid regular expression pattern.
+ value: String that will replace the matched substring.
+ literal: Treat `pattern` as a literal string.
+
+ Returns:
+ A new Series with all occurrences of pattern replaced with the specified value.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series(["123abc", "abc abc123"])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.str.replace_all("abc", "").to_native()
+ 0 123
+ 1 123
+ dtype: object
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.str.replace_all(
+ pattern, value, literal=literal
+ )
+ )
+
+ def strip_chars(self, characters: str | None = None) -> SeriesT:
+ r"""Remove leading and trailing characters.
+
+ Arguments:
+ characters: The set of characters to be removed. All combinations of this set of characters will be stripped from the start and end of the string. If set to None (default), all leading and trailing whitespace is removed instead.
+
+ Returns:
+ A new Series with leading and trailing characters removed.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s_native = pl.Series(["apple", "\nmango"])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.str.strip_chars().to_native() # doctest: +NORMALIZE_WHITESPACE
+ shape: (2,)
+ Series: '' [str]
+ [
+ "apple"
+ "mango"
+ ]
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.str.strip_chars(characters)
+ )
+
+ def starts_with(self, prefix: str) -> SeriesT:
+ r"""Check if string values start with a substring.
+
+ Arguments:
+ prefix: prefix substring
+
+ Returns:
+ A new Series with boolean values indicating if each string starts with the prefix.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series(["apple", "mango", None])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.str.starts_with("app").to_native()
+ 0 True
+ 1 False
+ 2 None
+ dtype: object
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.str.starts_with(prefix)
+ )
+
+ def ends_with(self, suffix: str) -> SeriesT:
+ r"""Check if string values end with a substring.
+
+ Arguments:
+ suffix: suffix substring
+
+ Returns:
+ A new Series with boolean values indicating if each string ends with the suffix.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series(["apple", "mango", None])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.str.ends_with("ngo").to_native()
+ 0 False
+ 1 True
+ 2 None
+ dtype: object
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.str.ends_with(suffix)
+ )
+
+ def contains(self, pattern: str, *, literal: bool = False) -> SeriesT:
+ r"""Check if string contains a substring that matches a pattern.
+
+ Arguments:
+ pattern: A Character sequence or valid regular expression pattern.
+ literal: If True, treats the pattern as a literal string.
+ If False, assumes the pattern is a regular expression.
+
+ Returns:
+ A new Series with boolean values indicating if each string contains the pattern.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> s_native = pa.chunked_array([["cat", "dog", "rabbit and parrot"]])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.str.contains("cat|parrot").to_native() # doctest: +ELLIPSIS
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ true,
+ false,
+ true
+ ]
+ ]
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.str.contains(pattern, literal=literal)
+ )
+
+ def slice(self, offset: int, length: int | None = None) -> SeriesT:
+ r"""Create subslices of the string values of a Series.
+
+ Arguments:
+ offset: Start index. Negative indexing is supported.
+ length: Length of the slice. If set to `None` (default), the slice is taken to the
+ end of the string.
+
+ Returns:
+ A new Series containing subslices of each string.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series(["pear", None, "papaya"])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.str.slice(4, 3).to_native() # doctest: +NORMALIZE_WHITESPACE
+ 0
+ 1 None
+ 2 ya
+ dtype: object
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.str.slice(
+ offset=offset, length=length
+ )
+ )
+
+ def split(self, by: str) -> SeriesT:
+ r"""Split the string values of a Series by a substring.
+
+ Arguments:
+ by: Substring to split by.
+
+ Returns:
+ A new Series containing lists of strings.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s_native = pl.Series(["foo bar", "foo_bar"])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.str.split("_").to_native() # doctest: +NORMALIZE_WHITESPACE
+ shape: (2,)
+ Series: '' [list[str]]
+ [
+ ["foo bar"]
+ ["foo", "bar"]
+ ]
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.str.split(by=by)
+ )
+
+ def head(self, n: int = 5) -> SeriesT:
+ r"""Take the first n elements of each string.
+
+ Arguments:
+ n: Number of elements to take. Negative indexing is supported (see note (1.))
+
+ Returns:
+ A new Series containing the first n characters of each string.
+
+ Notes:
+ 1. When the `n` input is negative, `head` returns characters up to the n-th from the end of the string.
+ For example, if `n = -3`, then all characters except the last three are returned.
+ 2. If the length of the string has fewer than `n` characters, the full string is returned.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> s_native = pa.chunked_array([["taata", "taatatata", "zukkyun"]])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.str.head().to_native() # doctest: +ELLIPSIS
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ "taata",
+ "taata",
+ "zukky"
+ ]
+ ]
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.str.slice(offset=0, length=n)
+ )
+
+ def tail(self, n: int = 5) -> SeriesT:
+ r"""Take the last n elements of each string.
+
+ Arguments:
+ n: Number of elements to take. Negative indexing is supported (see note (1.))
+
+ Returns:
+ A new Series containing the last n characters of each string.
+
+ Notes:
+ 1. When the `n` input is negative, `tail` returns characters starting from the n-th from the beginning of
+ the string. For example, if `n = -3`, then all characters except the first three are returned.
+ 2. If the length of the string has fewer than `n` characters, the full string is returned.
+
+ Examples:
+ >>> import pyarrow as pa
+ >>> import narwhals as nw
+ >>> s_native = pa.chunked_array([["taata", "taatatata", "zukkyun"]])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.str.tail().to_native() # doctest: +ELLIPSIS
+ <pyarrow.lib.ChunkedArray object at ...>
+ [
+ [
+ "taata",
+ "atata",
+ "kkyun"
+ ]
+ ]
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.str.slice(offset=-n, length=None)
+ )
+
+ def to_uppercase(self) -> SeriesT:
+ r"""Transform string to uppercase variant.
+
+ Returns:
+ A new Series with values converted to uppercase.
+
+ Notes:
+ The PyArrow backend will convert 'ß' to 'ẞ' instead of 'SS'.
+ For more info see: https://github.com/apache/arrow/issues/34599
+ There may be other unicode-edge-case-related variations across implementations.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series(["apple", None])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.str.to_uppercase().to_native()
+ 0 APPLE
+ 1 None
+ dtype: object
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.str.to_uppercase()
+ )
+
+ def to_lowercase(self) -> SeriesT:
+ r"""Transform string to lowercase variant.
+
+ Returns:
+ A new Series with values converted to lowercase.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> s_native = pd.Series(["APPLE", None])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.str.to_lowercase().to_native()
+ 0 apple
+ 1 None
+ dtype: object
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.str.to_lowercase()
+ )
+
+ def to_datetime(self, format: str | None = None) -> SeriesT:
+ """Parse Series with strings to a Series with Datetime dtype.
+
+ Notes:
+ - pandas defaults to nanosecond time unit, Polars to microsecond.
+ Prior to pandas 2.0, nanoseconds were the only time unit supported
+ in pandas, with no ability to set any other one. The ability to
+ set the time unit in pandas, if the version permits, will arrive.
+ - timezone-aware strings are all converted to and parsed as UTC.
+
+ Warning:
+ As different backends auto-infer format in different ways, if `format=None`
+ there is no guarantee that the result will be equal.
+
+ Arguments:
+ format: Format to use for conversion. If set to None (default), the format is
+ inferred from the data.
+
+ Returns:
+ A new Series with datetime dtype.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s_native = pl.Series(["2020-01-01", "2020-01-02"])
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.str.to_datetime(
+ ... format="%Y-%m-%d"
+ ... ).to_native() # doctest: +NORMALIZE_WHITESPACE
+ shape: (2,)
+ Series: '' [datetime[μs]]
+ [
+ 2020-01-01 00:00:00
+ 2020-01-02 00:00:00
+ ]
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.str.to_datetime(format=format)
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/series_struct.py b/venv/lib/python3.8/site-packages/narwhals/series_struct.py
new file mode 100644
index 0000000..6f92e7b
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/series_struct.py
@@ -0,0 +1,33 @@
+from __future__ import annotations
+
+from typing import Generic
+
+from narwhals.typing import SeriesT
+
+
+class SeriesStructNamespace(Generic[SeriesT]):
+ def __init__(self, series: SeriesT) -> None:
+ self._narwhals_series = series
+
+ def field(self, name: str) -> SeriesT:
+ r"""Retrieve a Struct field as a new expression.
+
+ Arguments:
+ name: Name of the struct field to retrieve.
+
+ Returns:
+ A new Series.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> s_native = pl.Series(
+ ... [{"id": "0", "name": "john"}, {"id": "1", "name": "jane"}]
+ ... )
+ >>> s = nw.from_native(s_native, series_only=True)
+ >>> s.struct.field("name").to_list()
+ ['john', 'jane']
+ """
+ return self._narwhals_series._with_compliant(
+ self._narwhals_series._compliant_series.struct.field(name)
+ )
diff --git a/venv/lib/python3.8/site-packages/narwhals/stable/__init__.py b/venv/lib/python3.8/site-packages/narwhals/stable/__init__.py
new file mode 100644
index 0000000..60bc872
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/stable/__init__.py
@@ -0,0 +1,5 @@
+from __future__ import annotations
+
+from narwhals.stable import v1
+
+__all__ = ["v1"]
diff --git a/venv/lib/python3.8/site-packages/narwhals/stable/v1/__init__.py b/venv/lib/python3.8/site-packages/narwhals/stable/v1/__init__.py
new file mode 100644
index 0000000..3259be1
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/stable/v1/__init__.py
@@ -0,0 +1,1875 @@
+from __future__ import annotations
+
+from functools import wraps
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ Iterable,
+ Literal,
+ Mapping,
+ Sequence,
+ cast,
+ overload,
+)
+from warnings import warn
+
+import narwhals as nw
+from narwhals import dependencies, exceptions, functions as nw_f, selectors
+from narwhals._typing_compat import TypeVar
+from narwhals._utils import (
+ Implementation,
+ Version,
+ deprecate_native_namespace,
+ find_stacklevel,
+ generate_temporary_column_name,
+ inherit_doc,
+ is_ordered_categorical,
+ maybe_align_index,
+ maybe_convert_dtypes,
+ maybe_get_index,
+ maybe_reset_index,
+ maybe_set_index,
+ validate_strict_and_pass_though,
+)
+from narwhals.dataframe import DataFrame as NwDataFrame, LazyFrame as NwLazyFrame
+from narwhals.dependencies import get_polars
+from narwhals.exceptions import InvalidIntoExprError
+from narwhals.expr import Expr as NwExpr
+from narwhals.functions import _new_series_impl, concat, get_level, show_versions
+from narwhals.schema import Schema as NwSchema
+from narwhals.series import Series as NwSeries
+from narwhals.stable.v1 import dtypes
+from narwhals.stable.v1.dtypes import (
+ Array,
+ Binary,
+ Boolean,
+ Categorical,
+ Date,
+ Datetime,
+ Decimal,
+ Duration,
+ Enum,
+ Field,
+ Float32,
+ Float64,
+ Int8,
+ Int16,
+ Int32,
+ Int64,
+ Int128,
+ List,
+ Object,
+ String,
+ Struct,
+ Time,
+ UInt8,
+ UInt16,
+ UInt32,
+ UInt64,
+ UInt128,
+ Unknown,
+)
+from narwhals.translate import _from_native_impl, get_native_namespace, to_py_scalar
+from narwhals.typing import IntoDataFrameT, IntoFrameT
+
+if TYPE_CHECKING:
+ from types import ModuleType
+
+ from typing_extensions import ParamSpec, Self
+
+ from narwhals._translate import IntoArrowTable
+ from narwhals.dataframe import MultiColSelector, MultiIndexSelector
+ from narwhals.dtypes import DType
+ from narwhals.typing import (
+ IntoDType,
+ IntoExpr,
+ IntoFrame,
+ IntoLazyFrameT,
+ IntoSeries,
+ NonNestedLiteral,
+ SingleColSelector,
+ SingleIndexSelector,
+ _1DArray,
+ _2DArray,
+ )
+
+ DataFrameT = TypeVar("DataFrameT", bound="DataFrame[Any]")
+ LazyFrameT = TypeVar("LazyFrameT", bound="LazyFrame[Any]")
+ SeriesT = TypeVar("SeriesT", bound="Series[Any]")
+ T = TypeVar("T", default=Any)
+ P = ParamSpec("P")
+ R = TypeVar("R")
+
+IntoSeriesT = TypeVar("IntoSeriesT", bound="IntoSeries", default=Any)
+
+
+class DataFrame(NwDataFrame[IntoDataFrameT]):
+ @inherit_doc(NwDataFrame)
+ def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> None:
+ assert df._version is Version.V1 # noqa: S101
+ super().__init__(df, level=level)
+
+ # We need to override any method which don't return Self so that type
+ # annotations are correct.
+
+ @property
+ def _series(self) -> type[Series[Any]]:
+ return cast("type[Series[Any]]", Series)
+
+ @property
+ def _lazyframe(self) -> type[LazyFrame[Any]]:
+ return cast("type[LazyFrame[Any]]", LazyFrame)
+
+ @overload
+ def __getitem__(self, item: tuple[SingleIndexSelector, SingleColSelector]) -> Any: ...
+
+ @overload
+ def __getitem__( # type: ignore[overload-overlap]
+ self, item: str | tuple[MultiIndexSelector, SingleColSelector]
+ ) -> Series[Any]: ...
+
+ @overload
+ def __getitem__(
+ self,
+ item: (
+ SingleIndexSelector
+ | MultiIndexSelector
+ | MultiColSelector
+ | tuple[SingleIndexSelector, MultiColSelector]
+ | tuple[MultiIndexSelector, MultiColSelector]
+ ),
+ ) -> Self: ...
+ def __getitem__(
+ self,
+ item: (
+ SingleIndexSelector
+ | SingleColSelector
+ | MultiColSelector
+ | MultiIndexSelector
+ | tuple[SingleIndexSelector, SingleColSelector]
+ | tuple[SingleIndexSelector, MultiColSelector]
+ | tuple[MultiIndexSelector, SingleColSelector]
+ | tuple[MultiIndexSelector, MultiColSelector]
+ ),
+ ) -> Series[Any] | Self | Any:
+ return super().__getitem__(item)
+
+ def lazy(
+ self, backend: ModuleType | Implementation | str | None = None
+ ) -> LazyFrame[Any]:
+ return _stableify(super().lazy(backend=backend))
+
+ @overload # type: ignore[override]
+ def to_dict(self, *, as_series: Literal[True] = ...) -> dict[str, Series[Any]]: ...
+ @overload
+ def to_dict(self, *, as_series: Literal[False]) -> dict[str, list[Any]]: ...
+ @overload
+ def to_dict(
+ self, *, as_series: bool
+ ) -> dict[str, Series[Any]] | dict[str, list[Any]]: ...
+ def to_dict(
+ self, *, as_series: bool = True
+ ) -> dict[str, Series[Any]] | dict[str, list[Any]]:
+ # Type checkers complain that `nw.Series` is not assignable to `nw.v1.stable.Series`.
+ # However the return type actually is `nw.v1.stable.Series`, check `tests/v1_test.py::test_to_dict_as_series`.
+ return super().to_dict(as_series=as_series) # type: ignore[return-value]
+
+ def is_duplicated(self) -> Series[Any]:
+ return _stableify(super().is_duplicated())
+
+ def is_unique(self) -> Series[Any]:
+ return _stableify(super().is_unique())
+
+ def _l1_norm(self) -> Self:
+ """Private, just used to test the stable API.
+
+ Returns:
+ A new DataFrame.
+ """
+ return self.select(all()._l1_norm())
+
+
+class LazyFrame(NwLazyFrame[IntoFrameT]):
+ @inherit_doc(NwLazyFrame)
+ def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> None:
+ assert df._version is Version.V1 # noqa: S101
+ super().__init__(df, level=level)
+
+ @property
+ def _dataframe(self) -> type[DataFrame[Any]]:
+ return DataFrame
+
+ def _extract_compliant(self, arg: Any) -> Any:
+ # After v1, we raise when passing order-dependent or length-changing
+ # expressions to LazyFrame
+ from narwhals.dataframe import BaseFrame
+ from narwhals.expr import Expr
+ from narwhals.series import Series
+
+ if isinstance(arg, BaseFrame):
+ return arg._compliant_frame
+ if isinstance(arg, Series): # pragma: no cover
+ msg = "Mixing Series with LazyFrame is not supported."
+ raise TypeError(msg)
+ if isinstance(arg, Expr):
+ # After stable.v1, we raise for order-dependent exprs or filtrations
+ return arg._to_compliant_expr(self.__narwhals_namespace__())
+ if isinstance(arg, str):
+ plx = self.__narwhals_namespace__()
+ return plx.col(arg)
+ if get_polars() is not None and "polars" in str(type(arg)): # pragma: no cover
+ msg = (
+ f"Expected Narwhals object, got: {type(arg)}.\n\n"
+ "Perhaps you:\n"
+ "- Forgot a `nw.from_native` somewhere?\n"
+ "- Used `pl.col` instead of `nw.col`?"
+ )
+ raise TypeError(msg)
+ raise InvalidIntoExprError.from_invalid_type(type(arg))
+
+ def collect(
+ self, backend: ModuleType | Implementation | str | None = None, **kwargs: Any
+ ) -> DataFrame[Any]:
+ return _stableify(super().collect(backend=backend, **kwargs))
+
+ def _l1_norm(self) -> Self:
+ """Private, just used to test the stable API.
+
+ Returns:
+ A new lazyframe.
+ """
+ return self.select(all()._l1_norm())
+
+ def tail(self, n: int = 5) -> Self:
+ r"""Get the last `n` rows.
+
+ Arguments:
+ n: Number of rows to return.
+
+ Returns:
+ A subset of the LazyFrame of shape (n, n_columns).
+ """
+ return super().tail(n)
+
+ def gather_every(self, n: int, offset: int = 0) -> Self:
+ r"""Take every nth row in the DataFrame and return as a new DataFrame.
+
+ Arguments:
+ n: Gather every *n*-th row.
+ offset: Starting index.
+
+ Returns:
+ The LazyFrame containing only the selected rows.
+ """
+ return self._with_compliant(
+ self._compliant_frame.gather_every(n=n, offset=offset)
+ )
+
+
+class Series(NwSeries[IntoSeriesT]):
+ @inherit_doc(NwSeries)
+ def __init__(
+ self, series: Any, *, level: Literal["full", "lazy", "interchange"]
+ ) -> None:
+ assert series._version is Version.V1 # noqa: S101
+ super().__init__(series, level=level)
+
+ # We need to override any method which don't return Self so that type
+ # annotations are correct.
+
+ @property
+ def _dataframe(self) -> type[DataFrame[Any]]:
+ return DataFrame
+
+ def to_frame(self) -> DataFrame[Any]:
+ return _stableify(super().to_frame())
+
+ def value_counts(
+ self,
+ *,
+ sort: bool = False,
+ parallel: bool = False,
+ name: str | None = None,
+ normalize: bool = False,
+ ) -> DataFrame[Any]:
+ return _stableify(
+ super().value_counts(
+ sort=sort, parallel=parallel, name=name, normalize=normalize
+ )
+ )
+
+ def hist(
+ self,
+ bins: list[float | int] | None = None,
+ *,
+ bin_count: int | None = None,
+ include_breakpoint: bool = True,
+ ) -> DataFrame[Any]:
+ from narwhals._utils import find_stacklevel
+ from narwhals.exceptions import NarwhalsUnstableWarning
+
+ msg = (
+ "`Series.hist` is being called from the stable API although considered "
+ "an unstable feature."
+ )
+ warn(message=msg, category=NarwhalsUnstableWarning, stacklevel=find_stacklevel())
+ return _stableify(
+ super().hist(
+ bins=bins, bin_count=bin_count, include_breakpoint=include_breakpoint
+ )
+ )
+
+
+class Expr(NwExpr):
+ def _l1_norm(self) -> Self:
+ return super()._taxicab_norm()
+
+ def head(self, n: int = 10) -> Self:
+ r"""Get the first `n` rows.
+
+ Arguments:
+ n: Number of rows to return.
+
+ Returns:
+ A new expression.
+ """
+ return self._with_orderable_filtration(
+ lambda plx: self._to_compliant_expr(plx).head(n)
+ )
+
+ def tail(self, n: int = 10) -> Self:
+ r"""Get the last `n` rows.
+
+ Arguments:
+ n: Number of rows to return.
+
+ Returns:
+ A new expression.
+ """
+ return self._with_orderable_filtration(
+ lambda plx: self._to_compliant_expr(plx).tail(n)
+ )
+
+ def gather_every(self, n: int, offset: int = 0) -> Self:
+ r"""Take every nth value in the Series and return as new Series.
+
+ Arguments:
+ n: Gather every *n*-th row.
+ offset: Starting index.
+
+ Returns:
+ A new expression.
+ """
+ return self._with_orderable_filtration(
+ lambda plx: self._to_compliant_expr(plx).gather_every(n=n, offset=offset)
+ )
+
+ def unique(self, *, maintain_order: bool | None = None) -> Self:
+ """Return unique values of this expression.
+
+ Arguments:
+ maintain_order: Keep the same order as the original expression.
+ This is deprecated and will be removed in a future version,
+ but will still be kept around in `narwhals.stable.v1`.
+
+ Returns:
+ A new expression.
+ """
+ if maintain_order is not None:
+ msg = (
+ "`maintain_order` has no effect and is only kept around for backwards-compatibility. "
+ "You can safely remove this argument."
+ )
+ warn(message=msg, category=UserWarning, stacklevel=find_stacklevel())
+ return self._with_filtration(lambda plx: self._to_compliant_expr(plx).unique())
+
+ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self:
+ """Sort this column. Place null values first.
+
+ Arguments:
+ descending: Sort in descending order.
+ nulls_last: Place null values last instead of first.
+
+ Returns:
+ A new expression.
+ """
+ return self._with_unorderable_window(
+ lambda plx: self._to_compliant_expr(plx).sort(
+ descending=descending, nulls_last=nulls_last
+ )
+ )
+
+ def arg_true(self) -> Self:
+ """Find elements where boolean expression is True.
+
+ Returns:
+ A new expression.
+ """
+ return self._with_orderable_filtration(
+ lambda plx: self._to_compliant_expr(plx).arg_true()
+ )
+
+ def sample(
+ self,
+ n: int | None = None,
+ *,
+ fraction: float | None = None,
+ with_replacement: bool = False,
+ seed: int | None = None,
+ ) -> Self:
+ """Sample randomly from this expression.
+
+ Arguments:
+ n: Number of items to return. Cannot be used with fraction.
+ fraction: Fraction of items to return. Cannot be used with n.
+ with_replacement: Allow values to be sampled more than once.
+ seed: Seed for the random number generator. If set to None (default), a random
+ seed is generated for each sample operation.
+
+ Returns:
+ A new expression.
+ """
+ return self._with_filtration(
+ lambda plx: self._to_compliant_expr(plx).sample(
+ n, fraction=fraction, with_replacement=with_replacement, seed=seed
+ )
+ )
+
+
+class Schema(NwSchema):
+ _version = Version.V1
+
+ @inherit_doc(NwSchema)
+ def __init__(
+ self, schema: Mapping[str, DType] | Iterable[tuple[str, DType]] | None = None
+ ) -> None:
+ super().__init__(schema)
+
+
+@overload
+def _stableify(obj: NwDataFrame[IntoFrameT]) -> DataFrame[IntoFrameT]: ...
+@overload
+def _stableify(obj: NwLazyFrame[IntoFrameT]) -> LazyFrame[IntoFrameT]: ...
+@overload
+def _stableify(obj: NwSeries[IntoSeriesT]) -> Series[IntoSeriesT]: ...
+@overload
+def _stableify(obj: NwExpr) -> Expr: ...
+
+
+def _stableify(
+ obj: NwDataFrame[IntoFrameT]
+ | NwLazyFrame[IntoFrameT]
+ | NwSeries[IntoSeriesT]
+ | NwExpr,
+) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series[IntoSeriesT] | Expr:
+ if isinstance(obj, NwDataFrame):
+ return DataFrame(obj._compliant_frame._with_version(Version.V1), level=obj._level)
+ if isinstance(obj, NwLazyFrame):
+ return LazyFrame(obj._compliant_frame._with_version(Version.V1), level=obj._level)
+ if isinstance(obj, NwSeries):
+ return Series(obj._compliant_series._with_version(Version.V1), level=obj._level)
+ if isinstance(obj, NwExpr):
+ return Expr(obj._to_compliant_expr, obj._metadata)
+ msg = f"Expected DataFrame, LazyFrame, Series, or Expr, got: {type(obj)}" # pragma: no cover
+ raise AssertionError(msg)
+
+
+@overload
+def from_native(native_object: SeriesT, **kwds: Any) -> SeriesT: ...
+
+
+@overload
+def from_native(native_object: DataFrameT, **kwds: Any) -> DataFrameT: ...
+
+
+@overload
+def from_native(native_object: LazyFrameT, **kwds: Any) -> LazyFrameT: ...
+
+
+@overload
+def from_native(
+ native_object: DataFrameT | LazyFrameT, **kwds: Any
+) -> DataFrameT | LazyFrameT: ...
+
+
+@overload
+def from_native(
+ native_object: IntoDataFrameT | IntoSeriesT,
+ *,
+ strict: Literal[False],
+ eager_only: Literal[False] = ...,
+ eager_or_interchange_only: Literal[True],
+ series_only: Literal[False] = ...,
+ allow_series: Literal[True],
+) -> DataFrame[IntoDataFrameT] | Series[IntoSeriesT]: ...
+
+
+@overload
+def from_native(
+ native_object: IntoDataFrameT | IntoSeriesT,
+ *,
+ strict: Literal[False],
+ eager_only: Literal[True],
+ eager_or_interchange_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: Literal[True],
+) -> DataFrame[IntoDataFrameT] | Series[IntoSeriesT]: ...
+
+
+@overload
+def from_native(
+ native_object: IntoDataFrameT,
+ *,
+ strict: Literal[False],
+ eager_only: Literal[False] = ...,
+ eager_or_interchange_only: Literal[True],
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> DataFrame[IntoDataFrameT]: ...
+
+
+@overload
+def from_native(
+ native_object: T,
+ *,
+ strict: Literal[False],
+ eager_only: Literal[False] = ...,
+ eager_or_interchange_only: Literal[True],
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> T: ...
+
+
+@overload
+def from_native(
+ native_object: IntoDataFrameT,
+ *,
+ strict: Literal[False],
+ eager_only: Literal[True],
+ eager_or_interchange_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> DataFrame[IntoDataFrameT]: ...
+
+
+@overload
+def from_native(
+ native_object: T,
+ *,
+ strict: Literal[False],
+ eager_only: Literal[True],
+ eager_or_interchange_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> T: ...
+
+
+@overload
+def from_native(
+ native_object: IntoFrameT | IntoSeriesT,
+ *,
+ strict: Literal[False],
+ eager_only: Literal[False] = ...,
+ eager_or_interchange_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: Literal[True],
+) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series[IntoSeriesT]: ...
+
+
+@overload
+def from_native(
+ native_object: IntoSeriesT,
+ *,
+ strict: Literal[False],
+ eager_only: Literal[False] = ...,
+ eager_or_interchange_only: Literal[False] = ...,
+ series_only: Literal[True],
+ allow_series: None = ...,
+) -> Series[IntoSeriesT]: ...
+
+
+@overload
+def from_native(
+ native_object: IntoFrameT,
+ *,
+ strict: Literal[False],
+ eager_only: Literal[False] = ...,
+ eager_or_interchange_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT]: ...
+
+
+@overload
+def from_native(
+ native_object: T,
+ *,
+ strict: Literal[False],
+ eager_only: Literal[False] = ...,
+ eager_or_interchange_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> T: ...
+
+
+@overload
+def from_native(
+ native_object: IntoDataFrameT,
+ *,
+ strict: Literal[True] = ...,
+ eager_only: Literal[False] = ...,
+ eager_or_interchange_only: Literal[True],
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> DataFrame[IntoDataFrameT]: ...
+
+
+@overload
+def from_native(
+ native_object: IntoDataFrameT,
+ *,
+ strict: Literal[True] = ...,
+ eager_only: Literal[True],
+ eager_or_interchange_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> DataFrame[IntoDataFrameT]: ...
+
+
+@overload
+def from_native(
+ native_object: IntoFrame | IntoSeries,
+ *,
+ strict: Literal[True] = ...,
+ eager_only: Literal[False] = ...,
+ eager_or_interchange_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: Literal[True],
+) -> DataFrame[Any] | LazyFrame[Any] | Series[Any]: ...
+
+
+@overload
+def from_native(
+ native_object: IntoSeriesT,
+ *,
+ strict: Literal[True] = ...,
+ eager_only: Literal[False] = ...,
+ eager_or_interchange_only: Literal[False] = ...,
+ series_only: Literal[True],
+ allow_series: None = ...,
+) -> Series[IntoSeriesT]: ...
+
+
+@overload
+def from_native(
+ native_object: IntoLazyFrameT,
+ *,
+ strict: Literal[True] = ...,
+ eager_only: Literal[False] = ...,
+ eager_or_interchange_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> LazyFrame[IntoLazyFrameT]: ...
+
+
+# NOTE: `pl.LazyFrame` originally matched here
+@overload
+def from_native(
+ native_object: IntoFrameT,
+ *,
+ strict: Literal[True] = ...,
+ eager_only: Literal[False] = ...,
+ eager_or_interchange_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT]: ...
+
+
+@overload
+def from_native(
+ native_object: IntoDataFrameT | IntoSeries,
+ *,
+ pass_through: Literal[True],
+ eager_only: Literal[False] = ...,
+ eager_or_interchange_only: Literal[True],
+ series_only: Literal[False] = ...,
+ allow_series: Literal[True],
+) -> DataFrame[IntoDataFrameT]: ...
+
+
+@overload
+def from_native(
+ native_object: IntoDataFrameT | IntoSeriesT,
+ *,
+ pass_through: Literal[True],
+ eager_only: Literal[True],
+ eager_or_interchange_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: Literal[True],
+) -> DataFrame[IntoDataFrameT] | Series[IntoSeriesT]: ...
+
+
+@overload
+def from_native(
+ native_object: IntoDataFrameT,
+ *,
+ pass_through: Literal[True],
+ eager_only: Literal[False] = ...,
+ eager_or_interchange_only: Literal[True],
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> DataFrame[IntoDataFrameT]: ...
+
+
+@overload
+def from_native(
+ native_object: T,
+ *,
+ pass_through: Literal[True],
+ eager_only: Literal[False] = ...,
+ eager_or_interchange_only: Literal[True],
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> T: ...
+
+
+@overload
+def from_native(
+ native_object: IntoDataFrameT,
+ *,
+ pass_through: Literal[True],
+ eager_only: Literal[True],
+ eager_or_interchange_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> DataFrame[IntoDataFrameT]: ...
+
+
+@overload
+def from_native(
+ native_object: T,
+ *,
+ pass_through: Literal[True],
+ eager_only: Literal[True],
+ eager_or_interchange_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> T: ...
+
+
+@overload
+def from_native(
+ native_object: IntoFrameT | IntoSeriesT,
+ *,
+ pass_through: Literal[True],
+ eager_only: Literal[False] = ...,
+ eager_or_interchange_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: Literal[True],
+) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series[IntoSeriesT]: ...
+
+
+@overload
+def from_native(
+ native_object: IntoSeriesT,
+ *,
+ pass_through: Literal[True],
+ eager_only: Literal[False] = ...,
+ eager_or_interchange_only: Literal[False] = ...,
+ series_only: Literal[True],
+ allow_series: None = ...,
+) -> Series[IntoSeriesT]: ...
+
+
+@overload
+def from_native(
+ native_object: IntoFrameT,
+ *,
+ pass_through: Literal[True],
+ eager_only: Literal[False] = ...,
+ eager_or_interchange_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT]: ...
+
+
+@overload
+def from_native(
+ native_object: T,
+ *,
+ pass_through: Literal[True],
+ eager_only: Literal[False] = ...,
+ eager_or_interchange_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> T: ...
+
+
+@overload
+def from_native(
+ native_object: IntoDataFrameT,
+ *,
+ pass_through: Literal[False] = ...,
+ eager_only: Literal[False] = ...,
+ eager_or_interchange_only: Literal[True],
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> DataFrame[IntoDataFrameT]: ...
+
+
+@overload
+def from_native(
+ native_object: IntoDataFrameT,
+ *,
+ pass_through: Literal[False] = ...,
+ eager_only: Literal[True],
+ eager_or_interchange_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> DataFrame[IntoDataFrameT]: ...
+
+
+@overload
+def from_native(
+ native_object: IntoFrame | IntoSeries,
+ *,
+ pass_through: Literal[False] = ...,
+ eager_only: Literal[False] = ...,
+ eager_or_interchange_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: Literal[True],
+) -> DataFrame[Any] | LazyFrame[Any] | Series[Any]: ...
+
+
+@overload
+def from_native(
+ native_object: IntoSeriesT,
+ *,
+ pass_through: Literal[False] = ...,
+ eager_only: Literal[False] = ...,
+ eager_or_interchange_only: Literal[False] = ...,
+ series_only: Literal[True],
+ allow_series: None = ...,
+) -> Series[IntoSeriesT]: ...
+
+
+@overload
+def from_native(
+ native_object: IntoFrameT,
+ *,
+ pass_through: Literal[False] = ...,
+ eager_only: Literal[False] = ...,
+ eager_or_interchange_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT]: ...
+
+
+# All params passed in as variables
+@overload
+def from_native(
+ native_object: Any,
+ *,
+ pass_through: bool,
+ eager_only: bool,
+ eager_or_interchange_only: bool = False,
+ series_only: bool,
+ allow_series: bool | None,
+) -> Any: ...
+
+
+def from_native( # noqa: D417
+ native_object: IntoFrameT | IntoFrame | IntoSeriesT | IntoSeries | T,
+ *,
+ strict: bool | None = None,
+ pass_through: bool | None = None,
+ eager_only: bool = False,
+ eager_or_interchange_only: bool = False,
+ series_only: bool = False,
+ allow_series: bool | None = None,
+ **kwds: Any,
+) -> LazyFrame[IntoFrameT] | DataFrame[IntoFrameT] | Series[IntoSeriesT] | T:
+ """Convert `native_object` to Narwhals Dataframe, Lazyframe, or Series.
+
+ Arguments:
+ native_object: Raw object from user.
+ Depending on the other arguments, input object can be
+
+ - a Dataframe / Lazyframe / Series supported by Narwhals (pandas, Polars, PyArrow, ...)
+ - an object which implements `__narwhals_dataframe__`, `__narwhals_lazyframe__`,
+ or `__narwhals_series__`
+ strict: Determine what happens if the object can't be converted to Narwhals
+
+ - `True` or `None` (default): raise an error
+ - `False`: pass object through as-is
+
+ *Deprecated* (v1.13.0)
+
+ Please use `pass_through` instead. Note that `strict` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+ pass_through: Determine what happens if the object can't be converted to Narwhals
+
+ - `False` or `None` (default): raise an error
+ - `True`: pass object through as-is
+ eager_only: Whether to only allow eager objects
+
+ - `False` (default): don't require `native_object` to be eager
+ - `True`: only convert to Narwhals if `native_object` is eager
+ eager_or_interchange_only: Whether to only allow eager objects or objects which
+ have interchange-level support in Narwhals
+
+ - `False` (default): don't require `native_object` to either be eager or to
+ have interchange-level support in Narwhals
+ - `True`: only convert to Narwhals if `native_object` is eager or has
+ interchange-level support in Narwhals
+
+ See [interchange-only support](../extending.md/#interchange-only-support)
+ for more details.
+ series_only: Whether to only allow Series
+
+ - `False` (default): don't require `native_object` to be a Series
+ - `True`: only convert to Narwhals if `native_object` is a Series
+ allow_series: Whether to allow Series (default is only Dataframe / Lazyframe)
+
+ - `False` or `None` (default): don't convert to Narwhals if `native_object` is a Series
+ - `True`: allow `native_object` to be a Series
+
+ Returns:
+ DataFrame, LazyFrame, Series, or original object, depending
+ on which combination of parameters was passed.
+ """
+ # Early returns
+ if isinstance(native_object, (DataFrame, LazyFrame)) and not series_only:
+ return native_object
+ if isinstance(native_object, Series) and (series_only or allow_series):
+ return native_object
+
+ pass_through = validate_strict_and_pass_though(
+ strict, pass_through, pass_through_default=False, emit_deprecation_warning=False
+ )
+ if kwds:
+ msg = f"from_native() got an unexpected keyword argument {next(iter(kwds))!r}"
+ raise TypeError(msg)
+
+ return _from_native_impl( # type: ignore[no-any-return]
+ native_object,
+ pass_through=pass_through,
+ eager_only=eager_only,
+ eager_or_interchange_only=eager_or_interchange_only,
+ series_only=series_only,
+ allow_series=allow_series,
+ version=Version.V1,
+ )
+
+
+@overload
+def to_native(
+ narwhals_object: DataFrame[IntoDataFrameT], *, strict: Literal[True] = ...
+) -> IntoDataFrameT: ...
+@overload
+def to_native(
+ narwhals_object: LazyFrame[IntoFrameT], *, strict: Literal[True] = ...
+) -> IntoFrameT: ...
+@overload
+def to_native(
+ narwhals_object: Series[IntoSeriesT], *, strict: Literal[True] = ...
+) -> IntoSeriesT: ...
+@overload
+def to_native(narwhals_object: Any, *, strict: bool) -> Any: ...
+@overload
+def to_native(
+ narwhals_object: DataFrame[IntoDataFrameT], *, pass_through: Literal[False] = ...
+) -> IntoDataFrameT: ...
+@overload
+def to_native(
+ narwhals_object: LazyFrame[IntoFrameT], *, pass_through: Literal[False] = ...
+) -> IntoFrameT: ...
+@overload
+def to_native(
+ narwhals_object: Series[IntoSeriesT], *, pass_through: Literal[False] = ...
+) -> IntoSeriesT: ...
+@overload
+def to_native(narwhals_object: Any, *, pass_through: bool) -> Any: ...
+
+
+def to_native(
+ narwhals_object: DataFrame[IntoDataFrameT]
+ | LazyFrame[IntoFrameT]
+ | Series[IntoSeriesT],
+ *,
+ strict: bool | None = None,
+ pass_through: bool | None = None,
+) -> IntoFrameT | IntoSeriesT | Any:
+ """Convert Narwhals object to native one.
+
+ Arguments:
+ narwhals_object: Narwhals object.
+ strict: Determine what happens if `narwhals_object` isn't a Narwhals class
+
+ - `True` (default): raise an error
+ - `False`: pass object through as-is
+
+ *Deprecated* (v1.13.0)
+
+ Please use `pass_through` instead. Note that `strict` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+ pass_through: Determine what happens if `narwhals_object` isn't a Narwhals class
+
+ - `False` (default): raise an error
+ - `True`: pass object through as-is
+
+ Returns:
+ Object of class that user started with.
+ """
+ from narwhals._utils import validate_strict_and_pass_though
+ from narwhals.dataframe import BaseFrame
+ from narwhals.series import Series
+
+ pass_through = validate_strict_and_pass_though(
+ strict, pass_through, pass_through_default=False, emit_deprecation_warning=False
+ )
+
+ if isinstance(narwhals_object, BaseFrame):
+ return narwhals_object._compliant_frame._native_frame
+ if isinstance(narwhals_object, Series):
+ return narwhals_object._compliant_series.native
+
+ if not pass_through:
+ msg = f"Expected Narwhals object, got {type(narwhals_object)}."
+ raise TypeError(msg)
+ return narwhals_object
+
+
+def narwhalify(
+ func: Callable[..., Any] | None = None,
+ *,
+ strict: bool | None = None,
+ pass_through: bool | None = None,
+ eager_only: bool = False,
+ eager_or_interchange_only: bool = False,
+ series_only: bool = False,
+ allow_series: bool | None = True,
+) -> Callable[..., Any]:
+ """Decorate function so it becomes dataframe-agnostic.
+
+ This will try to convert any dataframe/series-like object into the Narwhals
+ respective DataFrame/Series, while leaving the other parameters as they are.
+ Similarly, if the output of the function is a Narwhals DataFrame or Series, it will be
+ converted back to the original dataframe/series type, while if the output is another
+ type it will be left as is.
+ By setting `pass_through=False`, then every input and every output will be required to be a
+ dataframe/series-like object.
+
+ Arguments:
+ func: Function to wrap in a `from_native`-`to_native` block.
+ strict: Determine what happens if the object can't be converted to Narwhals
+
+ *Deprecated* (v1.13.0)
+
+ Please use `pass_through` instead. Note that `strict` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+
+ - `True` or `None` (default): raise an error
+ - `False`: pass object through as-is
+ pass_through: Determine what happens if the object can't be converted to Narwhals
+
+ - `False` or `None` (default): raise an error
+ - `True`: pass object through as-is
+ eager_only: Whether to only allow eager objects
+
+ - `False` (default): don't require `native_object` to be eager
+ - `True`: only convert to Narwhals if `native_object` is eager
+ eager_or_interchange_only: Whether to only allow eager objects or objects which
+ have interchange-level support in Narwhals
+
+ - `False` (default): don't require `native_object` to either be eager or to
+ have interchange-level support in Narwhals
+ - `True`: only convert to Narwhals if `native_object` is eager or has
+ interchange-level support in Narwhals
+
+ See [interchange-only support](../extending.md/#interchange-only-support)
+ for more details.
+ series_only: Whether to only allow Series
+
+ - `False` (default): don't require `native_object` to be a Series
+ - `True`: only convert to Narwhals if `native_object` is a Series
+ allow_series: Whether to allow Series (default is only Dataframe / Lazyframe)
+
+ - `False` or `None`: don't convert to Narwhals if `native_object` is a Series
+ - `True` (default): allow `native_object` to be a Series
+
+ Returns:
+ Decorated function.
+ """
+ pass_through = validate_strict_and_pass_though(
+ strict, pass_through, pass_through_default=True, emit_deprecation_warning=False
+ )
+
+ def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
+ @wraps(func)
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
+ args = [
+ from_native(
+ arg,
+ pass_through=pass_through,
+ eager_only=eager_only,
+ eager_or_interchange_only=eager_or_interchange_only,
+ series_only=series_only,
+ allow_series=allow_series,
+ )
+ for arg in args
+ ] # type: ignore[assignment]
+
+ kwargs = {
+ name: from_native(
+ value,
+ pass_through=pass_through,
+ eager_only=eager_only,
+ eager_or_interchange_only=eager_or_interchange_only,
+ series_only=series_only,
+ allow_series=allow_series,
+ )
+ for name, value in kwargs.items()
+ }
+
+ backends = {
+ b()
+ for v in (*args, *kwargs.values())
+ if (b := getattr(v, "__native_namespace__", None))
+ }
+
+ if backends.__len__() > 1:
+ msg = "Found multiple backends. Make sure that all dataframe/series inputs come from the same backend."
+ raise ValueError(msg)
+
+ result = func(*args, **kwargs)
+
+ return to_native(result, pass_through=pass_through)
+
+ return wrapper
+
+ if func is None:
+ return decorator
+ else:
+ # If func is not None, it means the decorator is used without arguments
+ return decorator(func)
+
+
+def all() -> Expr:
+ """Instantiate an expression representing all columns.
+
+ Returns:
+ A new expression.
+ """
+ return _stableify(nw.all())
+
+
+def col(*names: str | Iterable[str]) -> Expr:
+ """Creates an expression that references one or more columns by their name(s).
+
+ Arguments:
+ names: Name(s) of the columns to use.
+
+ Returns:
+ A new expression.
+ """
+ return _stableify(nw.col(*names))
+
+
+def exclude(*names: str | Iterable[str]) -> Expr:
+ """Creates an expression that excludes columns by their name(s).
+
+ Arguments:
+ names: Name(s) of the columns to exclude.
+
+ Returns:
+ A new expression.
+ """
+ return _stableify(nw.exclude(*names))
+
+
+def nth(*indices: int | Sequence[int]) -> Expr:
+ """Creates an expression that references one or more columns by their index(es).
+
+ Notes:
+ `nth` is not supported for Polars version<1.0.0. Please use
+ [`narwhals.col`][] instead.
+
+ Arguments:
+ indices: One or more indices representing the columns to retrieve.
+
+ Returns:
+ A new expression.
+ """
+ return _stableify(nw.nth(*indices))
+
+
+def len() -> Expr:
+ """Return the number of rows.
+
+ Returns:
+ A new expression.
+ """
+ return _stableify(nw.len())
+
+
+def lit(value: NonNestedLiteral, dtype: IntoDType | None = None) -> Expr:
+ """Return an expression representing a literal value.
+
+ Arguments:
+ value: The value to use as literal.
+ dtype: The data type of the literal value. If not provided, the data type will
+ be inferred by the native library.
+
+ Returns:
+ A new expression.
+ """
+ return _stableify(nw.lit(value, dtype))
+
+
+def min(*columns: str) -> Expr:
+ """Return the minimum value.
+
+ Note:
+ Syntactic sugar for ``nw.col(columns).min()``.
+
+ Arguments:
+ columns: Name(s) of the columns to use in the aggregation function.
+
+ Returns:
+ A new expression.
+ """
+ return _stableify(nw.min(*columns))
+
+
+def max(*columns: str) -> Expr:
+ """Return the maximum value.
+
+ Note:
+ Syntactic sugar for ``nw.col(columns).max()``.
+
+ Arguments:
+ columns: Name(s) of the columns to use in the aggregation function.
+
+ Returns:
+ A new expression.
+ """
+ return _stableify(nw.max(*columns))
+
+
+def mean(*columns: str) -> Expr:
+ """Get the mean value.
+
+ Note:
+ Syntactic sugar for ``nw.col(columns).mean()``
+
+ Arguments:
+ columns: Name(s) of the columns to use in the aggregation function
+
+ Returns:
+ A new expression.
+ """
+ return _stableify(nw.mean(*columns))
+
+
+def median(*columns: str) -> Expr:
+ """Get the median value.
+
+ Notes:
+ - Syntactic sugar for ``nw.col(columns).median()``
+ - Results might slightly differ across backends due to differences in the
+ underlying algorithms used to compute the median.
+
+ Arguments:
+ columns: Name(s) of the columns to use in the aggregation function
+
+ Returns:
+ A new expression.
+ """
+ return _stableify(nw.median(*columns))
+
+
+def sum(*columns: str) -> Expr:
+ """Sum all values.
+
+ Note:
+ Syntactic sugar for ``nw.col(columns).sum()``
+
+ Arguments:
+ columns: Name(s) of the columns to use in the aggregation function
+
+ Returns:
+ A new expression.
+ """
+ return _stableify(nw.sum(*columns))
+
+
+def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+ """Sum all values horizontally across columns.
+
+ Warning:
+ Unlike Polars, we support horizontal sum over numeric columns only.
+
+ Arguments:
+ exprs: Name(s) of the columns to use in the aggregation function. Accepts
+ expression input.
+
+ Returns:
+ A new expression.
+ """
+ return _stableify(nw.sum_horizontal(*exprs))
+
+
+def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+ r"""Compute the bitwise AND horizontally across columns.
+
+ Arguments:
+ exprs: Name(s) of the columns to use in the aggregation function. Accepts
+ expression input.
+
+ Returns:
+ A new expression.
+ """
+ return _stableify(nw.all_horizontal(*exprs))
+
+
+def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+ r"""Compute the bitwise OR horizontally across columns.
+
+ Arguments:
+ exprs: Name(s) of the columns to use in the aggregation function. Accepts
+ expression input.
+
+ Returns:
+ A new expression.
+ """
+ return _stableify(nw.any_horizontal(*exprs))
+
+
+def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+ """Compute the mean of all values horizontally across columns.
+
+ Arguments:
+ exprs: Name(s) of the columns to use in the aggregation function. Accepts
+ expression input.
+
+ Returns:
+ A new expression.
+ """
+ return _stableify(nw.mean_horizontal(*exprs))
+
+
+def min_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+ """Get the minimum value horizontally across columns.
+
+ Notes:
+ We support `min_horizontal` over numeric columns only.
+
+ Arguments:
+ exprs: Name(s) of the columns to use in the aggregation function. Accepts
+ expression input.
+
+ Returns:
+ A new expression.
+ """
+ return _stableify(nw.min_horizontal(*exprs))
+
+
+def max_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
+ """Get the maximum value horizontally across columns.
+
+ Notes:
+ We support `max_horizontal` over numeric columns only.
+
+ Arguments:
+ exprs: Name(s) of the columns to use in the aggregation function. Accepts
+ expression input.
+
+ Returns:
+ A new expression.
+ """
+ return _stableify(nw.max_horizontal(*exprs))
+
+
+def concat_str(
+ exprs: IntoExpr | Iterable[IntoExpr],
+ *more_exprs: IntoExpr,
+ separator: str = "",
+ ignore_nulls: bool = False,
+) -> Expr:
+ r"""Horizontally concatenate columns into a single string column.
+
+ Arguments:
+ exprs: Columns to concatenate into a single string column. Accepts expression
+ input. Strings are parsed as column names, other non-expression inputs are
+ parsed as literals. Non-`String` columns are cast to `String`.
+ *more_exprs: Additional columns to concatenate into a single string column,
+ specified as positional arguments.
+ separator: String that will be used to separate the values of each column.
+ ignore_nulls: Ignore null values (default is `False`).
+ If set to `False`, null values will be propagated and if the row contains any
+ null values, the output is null.
+
+ Returns:
+ A new expression.
+ """
+ return _stableify(
+ nw.concat_str(exprs, *more_exprs, separator=separator, ignore_nulls=ignore_nulls)
+ )
+
+
+class When(nw_f.When):
+ @classmethod
+ def from_when(cls, when: nw_f.When) -> When:
+ return cls(when._predicate)
+
+ def then(self, value: IntoExpr | NonNestedLiteral | _1DArray) -> Then:
+ return Then.from_then(super().then(value))
+
+
+class Then(nw_f.Then, Expr):
+ @classmethod
+ def from_then(cls, then: nw_f.Then) -> Then:
+ return cls(then._to_compliant_expr, then._metadata)
+
+ def otherwise(self, value: IntoExpr | NonNestedLiteral | _1DArray) -> Expr:
+ return _stableify(super().otherwise(value))
+
+
+def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When:
+ """Start a `when-then-otherwise` expression.
+
+ Expression similar to an `if-else` statement in Python. Always initiated by a
+ `pl.when(<condition>).then(<value if condition>)`, and optionally followed by a
+ `.otherwise(<value if condition is false>)` can be appended at the end. If not
+ appended, and the condition is not `True`, `None` will be returned.
+
+ Info:
+ Chaining multiple `.when(<condition>).then(<value>)` statements is currently
+ not supported.
+ See [Narwhals#668](https://github.com/narwhals-dev/narwhals/issues/668).
+
+ Arguments:
+ predicates: Condition(s) that must be met in order to apply the subsequent
+ statement. Accepts one or more boolean expressions, which are implicitly
+ combined with `&`. String input is parsed as a column name.
+
+ Returns:
+ A "when" object, which `.then` can be called on.
+ """
+ return When.from_when(nw_f.when(*predicates))
+
+
+@deprecate_native_namespace(required=True)
+def new_series(
+ name: str,
+ values: Any,
+ dtype: IntoDType | None = None,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None, # noqa: ARG001
+) -> Series[Any]:
+ """Instantiate Narwhals Series from iterable (e.g. list or array).
+
+ Arguments:
+ name: Name of resulting Series.
+ values: Values of make Series from.
+ dtype: (Narwhals) dtype. If not provided, the native library
+ may auto-infer it from `values`.
+ backend: specifies which eager backend instantiate to.
+
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN` or `CUDF`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.31.0)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+
+ Returns:
+ A new Series
+ """
+ backend = cast("ModuleType | Implementation | str", backend)
+ return _stableify(_new_series_impl(name, values, dtype, backend=backend))
+
+
+@deprecate_native_namespace(required=True)
+def from_arrow(
+ native_frame: IntoArrowTable,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None, # noqa: ARG001
+) -> DataFrame[Any]:
+ """Construct a DataFrame from an object which supports the PyCapsule Interface.
+
+ Arguments:
+ native_frame: Object which implements `__arrow_c_stream__`.
+ backend: specifies which eager backend instantiate to.
+
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN` or `CUDF`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.31.0)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+
+ Returns:
+ A new DataFrame.
+ """
+ backend = cast("ModuleType | Implementation | str", backend)
+ return _stableify(nw_f.from_arrow(native_frame, backend=backend))
+
+
+@deprecate_native_namespace()
+def from_dict(
+ data: Mapping[str, Any],
+ schema: Mapping[str, DType] | Schema | None = None,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None, # noqa: ARG001
+) -> DataFrame[Any]:
+ """Instantiate DataFrame from dictionary.
+
+ Indexes (if present, for pandas-like backends) are aligned following
+ the [left-hand-rule](../concepts/pandas_index.md/).
+
+ Notes:
+ For pandas-like dataframes, conversion to schema is applied after dataframe
+ creation.
+
+ Arguments:
+ data: Dictionary to create DataFrame from.
+ schema: The DataFrame schema as Schema or dict of {name: type}. If not
+ specified, the schema will be inferred by the native library.
+ backend: specifies which eager backend instantiate to. Only
+ necessary if inputs are not Narwhals Series.
+
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN` or `CUDF`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.26.0)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+
+ Returns:
+ A new DataFrame.
+ """
+ return _stableify(nw_f.from_dict(data, schema, backend=backend))
+
+
+@deprecate_native_namespace(required=True)
+def from_numpy(
+ data: _2DArray,
+ schema: Mapping[str, DType] | Schema | Sequence[str] | None = None,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None, # noqa: ARG001
+) -> DataFrame[Any]:
+ """Construct a DataFrame from a NumPy ndarray.
+
+ Notes:
+ Only row orientation is currently supported.
+
+ For pandas-like dataframes, conversion to schema is applied after dataframe
+ creation.
+
+ Arguments:
+ data: Two-dimensional data represented as a NumPy ndarray.
+ schema: The DataFrame schema as Schema, dict of {name: type}, or a sequence of str.
+ backend: specifies which eager backend instantiate to.
+
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN` or `CUDF`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.31.0)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+
+ Returns:
+ A new DataFrame.
+ """
+ backend = cast("ModuleType | Implementation | str", backend)
+ return _stableify(nw_f.from_numpy(data, schema, backend=backend))
+
+
+@deprecate_native_namespace(required=True)
+def read_csv(
+ source: str,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None, # noqa: ARG001
+ **kwargs: Any,
+) -> DataFrame[Any]:
+ """Read a CSV file into a DataFrame.
+
+ Arguments:
+ source: Path to a file.
+ backend: The eager backend for DataFrame creation.
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN` or `CUDF`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.27.2)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+ kwargs: Extra keyword arguments which are passed to the native CSV reader.
+ For example, you could use
+ `nw.read_csv('file.csv', backend='pandas', engine='pyarrow')`.
+
+ Returns:
+ DataFrame.
+ """
+ backend = cast("ModuleType | Implementation | str", backend)
+ return _stableify(nw_f.read_csv(source, backend=backend, **kwargs))
+
+
+@deprecate_native_namespace(required=True)
+def scan_csv(
+ source: str,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None, # noqa: ARG001
+ **kwargs: Any,
+) -> LazyFrame[Any]:
+ """Lazily read from a CSV file.
+
+ For the libraries that do not support lazy dataframes, the function reads
+ a csv file eagerly and then converts the resulting dataframe to a lazyframe.
+
+ Arguments:
+ source: Path to a file.
+ backend: The eager backend for DataFrame creation.
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN` or `CUDF`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.31.0)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+ kwargs: Extra keyword arguments which are passed to the native CSV reader.
+ For example, you could use
+ `nw.scan_csv('file.csv', backend=pd, engine='pyarrow')`.
+
+ Returns:
+ LazyFrame.
+ """
+ backend = cast("ModuleType | Implementation | str", backend)
+ return _stableify(nw_f.scan_csv(source, backend=backend, **kwargs))
+
+
+@deprecate_native_namespace(required=True)
+def read_parquet(
+ source: str,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None, # noqa: ARG001
+ **kwargs: Any,
+) -> DataFrame[Any]:
+ """Read into a DataFrame from a parquet file.
+
+ Arguments:
+ source: Path to a file.
+ backend: The eager backend for DataFrame creation.
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN` or `CUDF`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.31.0)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+ kwargs: Extra keyword arguments which are passed to the native parquet reader.
+ For example, you could use
+ `nw.read_parquet('file.parquet', backend=pd, engine='pyarrow')`.
+
+ Returns:
+ DataFrame.
+ """
+ backend = cast("ModuleType | Implementation | str", backend)
+ return _stableify(nw_f.read_parquet(source, backend=backend, **kwargs))
+
+
+@deprecate_native_namespace(required=True)
+def scan_parquet(
+ source: str,
+ *,
+ backend: ModuleType | Implementation | str | None = None,
+ native_namespace: ModuleType | None = None, # noqa: ARG001
+ **kwargs: Any,
+) -> LazyFrame[Any]:
+ """Lazily read from a parquet file.
+
+ For the libraries that do not support lazy dataframes, the function reads
+ a parquet file eagerly and then converts the resulting dataframe to a lazyframe.
+
+ Note:
+ Spark like backends require a session object to be passed in `kwargs`.
+
+ For instance:
+
+ ```py
+ import narwhals as nw
+ from sqlframe.duckdb import DuckDBSession
+
+ nw.scan_parquet(source, backend="sqlframe", session=DuckDBSession())
+ ```
+
+ Arguments:
+ source: Path to a file.
+ backend: The eager backend for DataFrame creation.
+ `backend` can be specified in various ways
+
+ - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+ `POLARS`, `MODIN`, `CUDF`, `PYSPARK` or `SQLFRAME`.
+ - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"`, `"cudf"`,
+ `"pyspark"` or `"sqlframe"`.
+ - Directly as a module `pandas`, `pyarrow`, `polars`, `modin`, `cudf`,
+ `pyspark.sql` or `sqlframe`.
+ native_namespace: The native library to use for DataFrame creation.
+
+ *Deprecated* (v1.31.0)
+
+ Please use `backend` instead. Note that `native_namespace` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+ kwargs: Extra keyword arguments which are passed to the native parquet reader.
+ For example, you could use
+ `nw.scan_parquet('file.parquet', backend=pd, engine='pyarrow')`.
+
+ Returns:
+ LazyFrame.
+ """
+ backend = cast("ModuleType | Implementation | str", backend)
+ return _stableify(nw_f.scan_parquet(source, backend=backend, **kwargs))
+
+
+__all__ = [
+ "Array",
+ "Binary",
+ "Boolean",
+ "Categorical",
+ "DataFrame",
+ "Date",
+ "Datetime",
+ "Decimal",
+ "Duration",
+ "Enum",
+ "Expr",
+ "Field",
+ "Float32",
+ "Float64",
+ "Implementation",
+ "Int8",
+ "Int16",
+ "Int32",
+ "Int64",
+ "Int128",
+ "LazyFrame",
+ "List",
+ "Object",
+ "Schema",
+ "Series",
+ "String",
+ "Struct",
+ "Time",
+ "UInt8",
+ "UInt16",
+ "UInt32",
+ "UInt64",
+ "UInt128",
+ "Unknown",
+ "all",
+ "all_horizontal",
+ "any_horizontal",
+ "col",
+ "concat",
+ "concat_str",
+ "dependencies",
+ "dtypes",
+ "exceptions",
+ "exclude",
+ "from_arrow",
+ "from_dict",
+ "from_native",
+ "from_numpy",
+ "generate_temporary_column_name",
+ "get_level",
+ "get_native_namespace",
+ "is_ordered_categorical",
+ "len",
+ "lit",
+ "max",
+ "max_horizontal",
+ "maybe_align_index",
+ "maybe_convert_dtypes",
+ "maybe_get_index",
+ "maybe_reset_index",
+ "maybe_set_index",
+ "mean",
+ "mean_horizontal",
+ "median",
+ "min",
+ "min_horizontal",
+ "narwhalify",
+ "new_series",
+ "nth",
+ "read_csv",
+ "read_parquet",
+ "scan_csv",
+ "scan_parquet",
+ "selectors",
+ "show_versions",
+ "sum",
+ "sum_horizontal",
+ "to_native",
+ "to_py_scalar",
+ "when",
+]
diff --git a/venv/lib/python3.8/site-packages/narwhals/stable/v1/_dtypes.py b/venv/lib/python3.8/site-packages/narwhals/stable/v1/_dtypes.py
new file mode 100644
index 0000000..060980c
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/stable/v1/_dtypes.py
@@ -0,0 +1,135 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from narwhals._utils import inherit_doc
+from narwhals.dtypes import (
+ Array,
+ Binary,
+ Boolean,
+ Categorical,
+ Date,
+ Datetime as NwDatetime,
+ Decimal,
+ DType,
+ Duration as NwDuration,
+ Enum as NwEnum,
+ Field,
+ Float32,
+ Float64,
+ FloatType,
+ Int8,
+ Int16,
+ Int32,
+ Int64,
+ Int128,
+ IntegerType,
+ List,
+ NestedType,
+ NumericType,
+ Object,
+ SignedIntegerType,
+ String,
+ Struct,
+ Time,
+ UInt8,
+ UInt16,
+ UInt32,
+ UInt64,
+ UInt128,
+ Unknown,
+ UnsignedIntegerType,
+)
+
+if TYPE_CHECKING:
+ from datetime import timezone
+
+ from narwhals.typing import TimeUnit
+
+
+class Datetime(NwDatetime):
+ @inherit_doc(NwDatetime)
+ def __init__(
+ self, time_unit: TimeUnit = "us", time_zone: str | timezone | None = None
+ ) -> None:
+ super().__init__(time_unit, time_zone)
+
+ def __hash__(self) -> int:
+ return hash(self.__class__)
+
+
+class Duration(NwDuration):
+ @inherit_doc(NwDuration)
+ def __init__(self, time_unit: TimeUnit = "us") -> None:
+ super().__init__(time_unit)
+
+ def __hash__(self) -> int:
+ return hash(self.__class__)
+
+
+class Enum(NwEnum):
+ """A fixed categorical encoding of a unique set of strings.
+
+ Polars has an Enum data type, while pandas and PyArrow do not.
+
+ Examples:
+ >>> import polars as pl
+ >>> import narwhals.stable.v1 as nw
+ >>> data = ["beluga", "narwhal", "orca"]
+ >>> s_native = pl.Series(data, dtype=pl.Enum(data))
+ >>> nw.from_native(s_native, series_only=True).dtype
+ Enum
+ """
+
+ def __init__(self) -> None:
+ super(NwEnum, self).__init__()
+
+ def __eq__(self, other: DType | type[DType]) -> bool: # type: ignore[override]
+ if type(other) is type:
+ return other in {type(self), NwEnum}
+ return isinstance(other, type(self))
+
+ def __hash__(self) -> int: # pragma: no cover
+ return super(NwEnum, self).__hash__()
+
+ def __repr__(self) -> str: # pragma: no cover
+ return super(NwEnum, self).__repr__()
+
+
+__all__ = [
+ "Array",
+ "Binary",
+ "Boolean",
+ "Categorical",
+ "DType",
+ "Date",
+ "Datetime",
+ "Decimal",
+ "Duration",
+ "Enum",
+ "Field",
+ "Float32",
+ "Float64",
+ "FloatType",
+ "Int8",
+ "Int16",
+ "Int32",
+ "Int64",
+ "Int128",
+ "IntegerType",
+ "List",
+ "NestedType",
+ "NumericType",
+ "Object",
+ "SignedIntegerType",
+ "String",
+ "Struct",
+ "Time",
+ "UInt8",
+ "UInt16",
+ "UInt32",
+ "UInt64",
+ "UInt128",
+ "Unknown",
+ "UnsignedIntegerType",
+]
diff --git a/venv/lib/python3.8/site-packages/narwhals/stable/v1/_namespace.py b/venv/lib/python3.8/site-packages/narwhals/stable/v1/_namespace.py
new file mode 100644
index 0000000..8f11f01
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/stable/v1/_namespace.py
@@ -0,0 +1,10 @@
+from __future__ import annotations
+
+from narwhals._compliant.typing import CompliantNamespaceT_co
+from narwhals._namespace import Namespace as NwNamespace
+from narwhals._utils import Version
+
+__all__ = ["Namespace"]
+
+
+class Namespace(NwNamespace[CompliantNamespaceT_co], version=Version.V1): ...
diff --git a/venv/lib/python3.8/site-packages/narwhals/stable/v1/dependencies.py b/venv/lib/python3.8/site-packages/narwhals/stable/v1/dependencies.py
new file mode 100644
index 0000000..ad57042
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/stable/v1/dependencies.py
@@ -0,0 +1,65 @@
+from __future__ import annotations
+
+from narwhals.dependencies import (
+ get_cudf,
+ get_ibis,
+ get_modin,
+ get_numpy,
+ get_pandas,
+ get_polars,
+ get_pyarrow,
+ is_cudf_dataframe,
+ is_cudf_series,
+ is_dask_dataframe,
+ is_ibis_table,
+ is_into_dataframe,
+ is_into_series,
+ is_modin_dataframe,
+ is_modin_series,
+ is_narwhals_dataframe,
+ is_narwhals_lazyframe,
+ is_narwhals_series,
+ is_numpy_array,
+ is_pandas_dataframe,
+ is_pandas_index,
+ is_pandas_like_dataframe,
+ is_pandas_like_series,
+ is_pandas_series,
+ is_polars_dataframe,
+ is_polars_lazyframe,
+ is_polars_series,
+ is_pyarrow_chunked_array,
+ is_pyarrow_table,
+)
+
+__all__ = [
+ "get_cudf",
+ "get_ibis",
+ "get_modin",
+ "get_numpy",
+ "get_pandas",
+ "get_polars",
+ "get_pyarrow",
+ "is_cudf_dataframe",
+ "is_cudf_series",
+ "is_dask_dataframe",
+ "is_ibis_table",
+ "is_into_dataframe",
+ "is_into_series",
+ "is_modin_dataframe",
+ "is_modin_series",
+ "is_narwhals_dataframe",
+ "is_narwhals_lazyframe",
+ "is_narwhals_series",
+ "is_numpy_array",
+ "is_pandas_dataframe",
+ "is_pandas_index",
+ "is_pandas_like_dataframe",
+ "is_pandas_like_series",
+ "is_pandas_series",
+ "is_polars_dataframe",
+ "is_polars_lazyframe",
+ "is_polars_series",
+ "is_pyarrow_chunked_array",
+ "is_pyarrow_table",
+]
diff --git a/venv/lib/python3.8/site-packages/narwhals/stable/v1/dtypes.py b/venv/lib/python3.8/site-packages/narwhals/stable/v1/dtypes.py
new file mode 100644
index 0000000..a292be8
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/stable/v1/dtypes.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+
+from narwhals.stable.v1._dtypes import (
+ Array,
+ Binary,
+ Boolean,
+ Categorical,
+ Date,
+ Datetime,
+ Decimal,
+ DType,
+ Duration,
+ Enum,
+ Field,
+ Float32,
+ Float64,
+ FloatType,
+ Int8,
+ Int16,
+ Int32,
+ Int64,
+ Int128,
+ IntegerType,
+ List,
+ NestedType,
+ NumericType,
+ Object,
+ SignedIntegerType,
+ String,
+ Struct,
+ Time,
+ UInt8,
+ UInt16,
+ UInt32,
+ UInt64,
+ UInt128,
+ Unknown,
+ UnsignedIntegerType,
+)
+
+__all__ = [
+ "Array",
+ "Binary",
+ "Boolean",
+ "Categorical",
+ "DType",
+ "Date",
+ "Datetime",
+ "Decimal",
+ "Duration",
+ "Enum",
+ "Field",
+ "Float32",
+ "Float64",
+ "FloatType",
+ "Int8",
+ "Int16",
+ "Int32",
+ "Int64",
+ "Int128",
+ "IntegerType",
+ "List",
+ "NestedType",
+ "NumericType",
+ "Object",
+ "SignedIntegerType",
+ "String",
+ "Struct",
+ "Time",
+ "UInt8",
+ "UInt16",
+ "UInt32",
+ "UInt64",
+ "UInt128",
+ "Unknown",
+ "UnsignedIntegerType",
+]
diff --git a/venv/lib/python3.8/site-packages/narwhals/stable/v1/selectors.py b/venv/lib/python3.8/site-packages/narwhals/stable/v1/selectors.py
new file mode 100644
index 0000000..ede128e
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/stable/v1/selectors.py
@@ -0,0 +1,23 @@
+from __future__ import annotations
+
+from narwhals.selectors import (
+ all,
+ boolean,
+ by_dtype,
+ categorical,
+ datetime,
+ matches,
+ numeric,
+ string,
+)
+
+__all__ = [
+ "all",
+ "boolean",
+ "by_dtype",
+ "categorical",
+ "datetime",
+ "matches",
+ "numeric",
+ "string",
+]
diff --git a/venv/lib/python3.8/site-packages/narwhals/stable/v1/typing.py b/venv/lib/python3.8/site-packages/narwhals/stable/v1/typing.py
new file mode 100644
index 0000000..b55d13e
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/stable/v1/typing.py
@@ -0,0 +1,209 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Protocol, TypeVar, Union
+
+if TYPE_CHECKING:
+ import sys
+
+ from narwhals.stable.v1 import DataFrame, LazyFrame
+
+ if sys.version_info >= (3, 10):
+ from typing import TypeAlias
+ else:
+ from typing_extensions import TypeAlias
+
+ from narwhals.stable.v1 import Expr, Series, dtypes
+
+ # All dataframes supported by Narwhals have a
+ # `columns` property. Their similarities don't extend
+ # _that_ much further unfortunately...
+ class NativeFrame(Protocol):
+ @property
+ def columns(self) -> Any: ...
+
+ def join(self, *args: Any, **kwargs: Any) -> Any: ...
+
+ class NativeSeries(Protocol):
+ def __len__(self) -> int: ...
+
+ class DataFrameLike(Protocol):
+ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ...
+
+
+IntoExpr: TypeAlias = Union["Expr", str, "Series[Any]"]
+"""Anything which can be converted to an expression.
+
+Use this to mean "either a Narwhals expression, or something
+which can be converted into one". For example, `exprs` in `DataFrame.select` is
+typed to accept `IntoExpr`, as it can either accept a `nw.Expr`
+(e.g. `df.select(nw.col('a'))`) or a string which will be interpreted as a
+`nw.Expr`, e.g. `df.select('a')`.
+"""
+
+IntoDataFrame: TypeAlias = Union["NativeFrame", "DataFrame[Any]", "DataFrameLike"]
+"""Anything which can be converted to a Narwhals DataFrame.
+
+Use this if your function accepts a narwhalifiable object but doesn't care about its backend.
+
+Examples:
+ >>> import narwhals as nw
+ >>> from narwhals.typing import IntoDataFrame
+ >>> def agnostic_shape(df_native: IntoDataFrame) -> tuple[int, int]:
+ ... df = nw.from_native(df_native, eager_only=True)
+ ... return df.shape
+"""
+
+IntoFrame: TypeAlias = Union[
+ "NativeFrame", "DataFrame[Any]", "LazyFrame[Any]", "DataFrameLike"
+]
+"""Anything which can be converted to a Narwhals DataFrame or LazyFrame.
+
+Use this if your function can accept an object which can be converted to either
+`nw.DataFrame` or `nw.LazyFrame` and it doesn't care about its backend.
+
+Examples:
+ >>> import narwhals as nw
+ >>> from narwhals.typing import IntoFrame
+ >>> def agnostic_columns(df_native: IntoFrame) -> list[str]:
+ ... df = nw.from_native(df_native)
+ ... return df.collect_schema().names()
+"""
+
+Frame: TypeAlias = Union["DataFrame[Any]", "LazyFrame[Any]"]
+"""Narwhals DataFrame or Narwhals LazyFrame.
+
+Use this if your function can work with either and your function doesn't care
+about its backend.
+
+Examples:
+ >>> import narwhals as nw
+ >>> from narwhals.typing import Frame
+ >>> @nw.narwhalify
+ ... def agnostic_columns(df: Frame) -> list[str]:
+ ... return df.columns
+"""
+
+IntoSeries: TypeAlias = Union["Series[Any]", "NativeSeries"]
+"""Anything which can be converted to a Narwhals Series.
+
+Use this if your function can accept an object which can be converted to `nw.Series`
+and it doesn't care about its backend.
+
+Examples:
+ >>> from typing import Any
+ >>> import narwhals as nw
+ >>> from narwhals.typing import IntoSeries
+ >>> def agnostic_to_list(s_native: IntoSeries) -> list[Any]:
+ ... s = nw.from_native(s_native)
+ ... return s.to_list()
+"""
+
+IntoFrameT = TypeVar("IntoFrameT", bound="IntoFrame")
+"""TypeVar bound to object convertible to Narwhals DataFrame or Narwhals LazyFrame.
+
+Use this if your function accepts an object which is convertible to `nw.DataFrame`
+or `nw.LazyFrame` and returns an object of the same type.
+
+Examples:
+ >>> import narwhals as nw
+ >>> from narwhals.typing import IntoFrameT
+ >>> def agnostic_func(df_native: IntoFrameT) -> IntoFrameT:
+ ... df = nw.from_native(df_native)
+ ... return df.with_columns(c=nw.col("a") + 1).to_native()
+"""
+
+IntoDataFrameT = TypeVar("IntoDataFrameT", bound="IntoDataFrame")
+"""TypeVar bound to object convertible to Narwhals DataFrame.
+
+Use this if your function accepts an object which can be converted to `nw.DataFrame`
+and returns an object of the same class.
+
+Examples:
+ >>> import narwhals as nw
+ >>> from narwhals.typing import IntoDataFrameT
+ >>> def agnostic_func(df_native: IntoDataFrameT) -> IntoDataFrameT:
+ ... df = nw.from_native(df_native, eager_only=True)
+ ... return df.with_columns(c=df["a"] + 1).to_native()
+"""
+
+FrameT = TypeVar("FrameT", "DataFrame[Any]", "LazyFrame[Any]")
+"""TypeVar bound to Narwhals DataFrame or Narwhals LazyFrame.
+
+Use this if your function accepts either `nw.DataFrame` or `nw.LazyFrame` and returns
+an object of the same kind.
+
+Examples:
+ >>> import narwhals as nw
+ >>> from narwhals.typing import FrameT
+ >>> @nw.narwhalify
+ ... def agnostic_func(df: FrameT) -> FrameT:
+ ... return df.with_columns(c=nw.col("a") + 1)
+"""
+
+DataFrameT = TypeVar("DataFrameT", bound="DataFrame[Any]")
+"""TypeVar bound to Narwhals DataFrame.
+
+Use this if your function can accept a Narwhals DataFrame and returns a Narwhals
+DataFrame backed by the same backend.
+
+Examples:
+ >>> import narwhals as nw
+ >>> from narwhals.typing import DataFrameT
+ >>> @nw.narwhalify
+ >>> def func(df: DataFrameT) -> DataFrameT:
+ ... return df.with_columns(c=df["a"] + 1)
+"""
+
+IntoSeriesT = TypeVar("IntoSeriesT", bound="IntoSeries")
+"""TypeVar bound to object convertible to Narwhals Series.
+
+Use this if your function accepts an object which can be converted to `nw.Series`
+and returns an object of the same class.
+
+Examples:
+ >>> import narwhals as nw
+ >>> from narwhals.typing import IntoSeriesT
+ >>> def agnostic_abs(s_native: IntoSeriesT) -> IntoSeriesT:
+ ... s = nw.from_native(s_native, series_only=True)
+ ... return s.abs().to_native()
+"""
+
+
+class DTypes:
+ Int64: type[dtypes.Int64]
+ Int32: type[dtypes.Int32]
+ Int16: type[dtypes.Int16]
+ Int8: type[dtypes.Int8]
+ UInt64: type[dtypes.UInt64]
+ UInt32: type[dtypes.UInt32]
+ UInt16: type[dtypes.UInt16]
+ UInt8: type[dtypes.UInt8]
+ Float64: type[dtypes.Float64]
+ Float32: type[dtypes.Float32]
+ String: type[dtypes.String]
+ Boolean: type[dtypes.Boolean]
+ Object: type[dtypes.Object]
+ Categorical: type[dtypes.Categorical]
+ Enum: type[dtypes.Enum]
+ Datetime: type[dtypes.Datetime]
+ Duration: type[dtypes.Duration]
+ Date: type[dtypes.Date]
+ Field: type[dtypes.Field]
+ Struct: type[dtypes.Struct]
+ List: type[dtypes.List]
+ Array: type[dtypes.Array]
+ Unknown: type[dtypes.Unknown]
+
+
+__all__ = [
+ "DataFrameT",
+ "Frame",
+ "FrameT",
+ "IntoDataFrame",
+ "IntoDataFrameT",
+ "IntoExpr",
+ "IntoFrame",
+ "IntoFrameT",
+ "IntoSeries",
+ "IntoSeriesT",
+]
diff --git a/venv/lib/python3.8/site-packages/narwhals/this.py b/venv/lib/python3.8/site-packages/narwhals/this.py
new file mode 100644
index 0000000..8ba7aa2
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/this.py
@@ -0,0 +1,17 @@
+# ruff: noqa
+ZEN = """\
+⣿⣿⣿⣿⣿⠘⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ THE ZEN OF NARWHALS
+⣿⣿⣿⣿⣿⠠⢹⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ Keep it simple
+⣿⣿⣿⣿⣿⡀⡄⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ Move slowly and keep things working
+⣿⣿⣿⣿⣿⡇⡼⡘⠛⠿⠿⠿⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ A good API is an honest one
+⣿⣿⣿⡿⣫⡄⠾⣣⠹⣿⣿⣿⣶⣮⣙⠻⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ Yes, that needs documenting
+⣿⣿⢋⣴⣿⣷⣬⣭⣾⣿⣿⣿⣿⣿⣿⣿⣦⡙⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ People learn better from examples
+⣿⢃⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⡌⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ than from explanations⠀
+⡏⠀⢰⠄⢻⣿⣿⣿⣿⡿⠋⢉⠻⣿⣿⣿⣿⣿⣿⡜⣿⣿⡿⢁⢻⣿⣿⣿⣿⣿ If in doubt, better to say 'no'
+⡇⣌⣀⣠⣾⣿⣿⣿⣿⣇⠶⠉⢁⣿⣿⣿⣿⣿⣿⣧⡹⣿⡇⣿⣧⠻⠿⠿⠿⠿ than to risk causing a commotion⠀
+⡧⢹⣿⣿⣿⣜⣟⣸⣿⣿⣷⣶⣿⡿⣿⣿⣝⢿⣿⣿⣷⣬⣥⣿⣿⣿⣿⣿⡟⣰ Yes, we need a test for that
+⢡⣆⢻⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣧⡙⣿⣿⡇⣿⣿⣿⣿⠟⣋⣭⣛⠻⣋⣴⣿ If you want users
+⣶⣤⣤⣙⠻⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⣦⣍⣡⣿⡿⢋⣴⣿⣿⣿⣿⣿⣿⣿⣿ you need good docs⠀
+⣿⣿⣿⣿⣿⣶⣬⣙⣛⠻⠿⠿⠿⠿⠿⠟⣛⣩⣥⣶⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ Our code is not irreplaceable"""
+
+print(ZEN)
diff --git a/venv/lib/python3.8/site-packages/narwhals/translate.py b/venv/lib/python3.8/site-packages/narwhals/translate.py
new file mode 100644
index 0000000..5c7db91
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/translate.py
@@ -0,0 +1,809 @@
+from __future__ import annotations
+
+import datetime as dt
+from decimal import Decimal
+from functools import wraps
+from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar, overload
+
+from narwhals._namespace import (
+ is_native_arrow,
+ is_native_pandas_like,
+ is_native_polars,
+ is_native_spark_like,
+)
+from narwhals._utils import Version
+from narwhals.dependencies import (
+ get_dask,
+ get_dask_expr,
+ get_numpy,
+ get_pandas,
+ is_cupy_scalar,
+ is_dask_dataframe,
+ is_duckdb_relation,
+ is_ibis_table,
+ is_numpy_scalar,
+ is_pandas_like_dataframe,
+ is_polars_lazyframe,
+ is_polars_series,
+ is_pyarrow_scalar,
+ is_pyarrow_table,
+)
+
+if TYPE_CHECKING:
+ from narwhals.dataframe import DataFrame, LazyFrame
+ from narwhals.series import Series
+ from narwhals.typing import (
+ DataFrameT,
+ IntoDataFrameT,
+ IntoFrame,
+ IntoFrameT,
+ IntoLazyFrameT,
+ IntoSeries,
+ IntoSeriesT,
+ LazyFrameT,
+ SeriesT,
+ )
+
+T = TypeVar("T")
+
+NON_TEMPORAL_SCALAR_TYPES = (bool, bytes, str, int, float, complex, Decimal)
+TEMPORAL_SCALAR_TYPES = (dt.date, dt.timedelta, dt.time)
+
+
+@overload
+def to_native(
+ narwhals_object: DataFrame[IntoDataFrameT], *, pass_through: Literal[False] = ...
+) -> IntoDataFrameT: ...
+@overload
+def to_native(
+ narwhals_object: LazyFrame[IntoFrameT], *, pass_through: Literal[False] = ...
+) -> IntoFrameT: ...
+@overload
+def to_native(
+ narwhals_object: Series[IntoSeriesT], *, pass_through: Literal[False] = ...
+) -> IntoSeriesT: ...
+@overload
+def to_native(narwhals_object: Any, *, pass_through: bool) -> Any: ...
+
+
+def to_native(
+ narwhals_object: DataFrame[IntoDataFrameT]
+ | LazyFrame[IntoFrameT]
+ | Series[IntoSeriesT],
+ *,
+ strict: bool | None = None,
+ pass_through: bool | None = None,
+) -> IntoDataFrameT | IntoFrameT | IntoSeriesT | Any:
+ """Convert Narwhals object to native one.
+
+ Arguments:
+ narwhals_object: Narwhals object.
+ strict: Determine what happens if `narwhals_object` isn't a Narwhals class
+
+ - `True` (default): raise an error
+ - `False`: pass object through as-is
+
+ *Deprecated* (v1.13.0)
+
+ Please use `pass_through` instead. Note that `strict` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+ pass_through: Determine what happens if `narwhals_object` isn't a Narwhals class
+
+ - `False` (default): raise an error
+ - `True`: pass object through as-is
+
+ Returns:
+ Object of class that user started with.
+ """
+ from narwhals._utils import validate_strict_and_pass_though
+ from narwhals.dataframe import BaseFrame
+ from narwhals.series import Series
+
+ pass_through = validate_strict_and_pass_though(
+ strict, pass_through, pass_through_default=False, emit_deprecation_warning=True
+ )
+
+ if isinstance(narwhals_object, BaseFrame):
+ return narwhals_object._compliant_frame._native_frame
+ if isinstance(narwhals_object, Series):
+ return narwhals_object._compliant_series.native
+
+ if not pass_through:
+ msg = f"Expected Narwhals object, got {type(narwhals_object)}."
+ raise TypeError(msg)
+ return narwhals_object
+
+
+@overload
+def from_native(native_object: SeriesT, **kwds: Any) -> SeriesT: ...
+
+
+@overload
+def from_native(native_object: DataFrameT, **kwds: Any) -> DataFrameT: ...
+
+
+@overload
+def from_native(native_object: LazyFrameT, **kwds: Any) -> LazyFrameT: ...
+
+
+@overload
+def from_native(
+ native_object: IntoDataFrameT | IntoSeriesT,
+ *,
+ pass_through: Literal[True],
+ eager_only: Literal[True],
+ series_only: Literal[False] = ...,
+ allow_series: Literal[True],
+) -> DataFrame[IntoDataFrameT] | Series[IntoSeriesT]: ...
+
+
+@overload
+def from_native(
+ native_object: IntoDataFrameT,
+ *,
+ pass_through: Literal[True],
+ eager_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> DataFrame[IntoDataFrameT]: ...
+
+
+@overload
+def from_native(
+ native_object: T,
+ *,
+ pass_through: Literal[True],
+ eager_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> T: ...
+
+
+@overload
+def from_native(
+ native_object: IntoDataFrameT,
+ *,
+ pass_through: Literal[True],
+ eager_only: Literal[True],
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> DataFrame[IntoDataFrameT]: ...
+
+
+@overload
+def from_native(
+ native_object: T,
+ *,
+ pass_through: Literal[True],
+ eager_only: Literal[True],
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> T: ...
+
+
+@overload
+def from_native(
+ native_object: IntoFrameT | IntoLazyFrameT | IntoSeriesT,
+ *,
+ pass_through: Literal[True],
+ eager_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: Literal[True],
+) -> DataFrame[IntoFrameT] | LazyFrame[IntoLazyFrameT] | Series[IntoSeriesT]: ...
+
+
+@overload
+def from_native(
+ native_object: IntoSeriesT,
+ *,
+ pass_through: Literal[True],
+ eager_only: Literal[False] = ...,
+ series_only: Literal[True],
+ allow_series: None = ...,
+) -> Series[IntoSeriesT]: ...
+
+
+# NOTE: Seems like `mypy` is giving a false positive
+# Following this advice will introduce overlapping overloads?
+# > note: Flipping the order of overloads will fix this error
+@overload
+def from_native( # type: ignore[overload-overlap]
+ native_object: IntoLazyFrameT,
+ *,
+ pass_through: Literal[False] = ...,
+ eager_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> LazyFrame[IntoLazyFrameT]: ...
+
+
+@overload
+def from_native(
+ native_object: IntoDataFrameT,
+ *,
+ pass_through: Literal[False] = ...,
+ eager_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> DataFrame[IntoDataFrameT]: ...
+
+
+@overload
+def from_native(
+ native_object: IntoDataFrameT,
+ *,
+ pass_through: Literal[False] = ...,
+ eager_only: Literal[True],
+ series_only: Literal[False] = ...,
+ allow_series: None = ...,
+) -> DataFrame[IntoDataFrameT]: ...
+
+
+@overload
+def from_native(
+ native_object: IntoFrame | IntoSeries,
+ *,
+ pass_through: Literal[False] = ...,
+ eager_only: Literal[False] = ...,
+ series_only: Literal[False] = ...,
+ allow_series: Literal[True],
+) -> DataFrame[Any] | LazyFrame[Any] | Series[Any]: ...
+
+
+@overload
+def from_native(
+ native_object: IntoSeriesT,
+ *,
+ pass_through: Literal[False] = ...,
+ eager_only: Literal[False] = ...,
+ series_only: Literal[True],
+ allow_series: None = ...,
+) -> Series[IntoSeriesT]: ...
+
+
+# All params passed in as variables
+@overload
+def from_native(
+ native_object: Any,
+ *,
+ pass_through: bool,
+ eager_only: bool,
+ series_only: bool,
+ allow_series: bool | None,
+) -> Any: ...
+
+
+def from_native( # noqa: D417
+ native_object: IntoLazyFrameT | IntoFrameT | IntoSeriesT | IntoFrame | IntoSeries | T,
+ *,
+ strict: bool | None = None,
+ pass_through: bool | None = None,
+ eager_only: bool = False,
+ series_only: bool = False,
+ allow_series: bool | None = None,
+ **kwds: Any,
+) -> LazyFrame[IntoLazyFrameT] | DataFrame[IntoFrameT] | Series[IntoSeriesT] | T:
+ """Convert `native_object` to Narwhals Dataframe, Lazyframe, or Series.
+
+ Arguments:
+ native_object: Raw object from user.
+ Depending on the other arguments, input object can be
+
+ - a Dataframe / Lazyframe / Series supported by Narwhals (pandas, Polars, PyArrow, ...)
+ - an object which implements `__narwhals_dataframe__`, `__narwhals_lazyframe__`,
+ or `__narwhals_series__`
+ strict: Determine what happens if the object can't be converted to Narwhals
+
+ - `True` or `None` (default): raise an error
+ - `False`: pass object through as-is
+
+ *Deprecated* (v1.13.0)
+
+ Please use `pass_through` instead. Note that `strict` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+ pass_through: Determine what happens if the object can't be converted to Narwhals
+
+ - `False` or `None` (default): raise an error
+ - `True`: pass object through as-is
+ eager_only: Whether to only allow eager objects
+
+ - `False` (default): don't require `native_object` to be eager
+ - `True`: only convert to Narwhals if `native_object` is eager
+ series_only: Whether to only allow Series
+
+ - `False` (default): don't require `native_object` to be a Series
+ - `True`: only convert to Narwhals if `native_object` is a Series
+ allow_series: Whether to allow Series (default is only Dataframe / Lazyframe)
+
+ - `False` or `None` (default): don't convert to Narwhals if `native_object` is a Series
+ - `True`: allow `native_object` to be a Series
+
+ Returns:
+ DataFrame, LazyFrame, Series, or original object, depending
+ on which combination of parameters was passed.
+ """
+ from narwhals._utils import validate_strict_and_pass_though
+
+ pass_through = validate_strict_and_pass_though(
+ strict, pass_through, pass_through_default=False, emit_deprecation_warning=True
+ )
+ if kwds:
+ msg = f"from_native() got an unexpected keyword argument {next(iter(kwds))!r}"
+ raise TypeError(msg)
+
+ return _from_native_impl( # type: ignore[no-any-return]
+ native_object,
+ pass_through=pass_through,
+ eager_only=eager_only,
+ eager_or_interchange_only=False,
+ series_only=series_only,
+ allow_series=allow_series,
+ version=Version.MAIN,
+ )
+
+
+def _from_native_impl( # noqa: C901, PLR0911, PLR0912, PLR0915
+ native_object: Any,
+ *,
+ pass_through: bool = False,
+ eager_only: bool = False,
+ # Interchange-level was removed after v1
+ eager_or_interchange_only: bool = False,
+ series_only: bool = False,
+ allow_series: bool | None = None,
+ version: Version,
+) -> Any:
+ from narwhals._utils import (
+ _supports_dataframe_interchange,
+ is_compliant_dataframe,
+ is_compliant_lazyframe,
+ is_compliant_series,
+ parse_version,
+ )
+ from narwhals.dataframe import DataFrame, LazyFrame
+ from narwhals.series import Series
+
+ # Early returns
+ if isinstance(native_object, (DataFrame, LazyFrame)) and not series_only:
+ return native_object
+ if isinstance(native_object, Series) and (series_only or allow_series):
+ return native_object
+
+ if series_only:
+ if allow_series is False:
+ msg = "Invalid parameter combination: `series_only=True` and `allow_series=False`"
+ raise ValueError(msg)
+ allow_series = True
+ if eager_only and eager_or_interchange_only:
+ msg = "Invalid parameter combination: `eager_only=True` and `eager_or_interchange_only=True`"
+ raise ValueError(msg)
+
+ # Extensions
+ if is_compliant_dataframe(native_object):
+ if series_only:
+ if not pass_through:
+ msg = "Cannot only use `series_only` with dataframe"
+ raise TypeError(msg)
+ return native_object
+ return version.dataframe(
+ native_object.__narwhals_dataframe__()._with_version(version), level="full"
+ )
+ elif is_compliant_lazyframe(native_object):
+ if series_only:
+ if not pass_through:
+ msg = "Cannot only use `series_only` with lazyframe"
+ raise TypeError(msg)
+ return native_object
+ if eager_only or eager_or_interchange_only:
+ if not pass_through:
+ msg = "Cannot only use `eager_only` or `eager_or_interchange_only` with lazyframe"
+ raise TypeError(msg)
+ return native_object
+ return version.lazyframe(
+ native_object.__narwhals_lazyframe__()._with_version(version), level="full"
+ )
+ elif is_compliant_series(native_object):
+ if not allow_series:
+ if not pass_through:
+ msg = "Please set `allow_series=True` or `series_only=True`"
+ raise TypeError(msg)
+ return native_object
+ return version.series(
+ native_object.__narwhals_series__()._with_version(version), level="full"
+ )
+
+ # Polars
+ elif is_native_polars(native_object):
+ if series_only and not is_polars_series(native_object):
+ if not pass_through:
+ msg = f"Cannot only use `series_only` with {type(native_object).__qualname__}"
+ raise TypeError(msg)
+ return native_object
+ if (eager_only or eager_or_interchange_only) and is_polars_lazyframe(
+ native_object
+ ):
+ if not pass_through:
+ msg = "Cannot only use `eager_only` or `eager_or_interchange_only` with polars.LazyFrame"
+ raise TypeError(msg)
+ return native_object
+ if (not allow_series) and is_polars_series(native_object):
+ if not pass_through:
+ msg = "Please set `allow_series=True` or `series_only=True`"
+ raise TypeError(msg)
+ return native_object
+ return (
+ version.namespace.from_native_object(native_object)
+ .compliant.from_native(native_object)
+ .to_narwhals()
+ )
+
+ # PandasLike
+ elif is_native_pandas_like(native_object):
+ if is_pandas_like_dataframe(native_object):
+ if series_only:
+ if not pass_through:
+ msg = f"Cannot only use `series_only` with {type(native_object).__qualname__}"
+ raise TypeError(msg)
+ return native_object
+ elif not allow_series:
+ if not pass_through:
+ msg = "Please set `allow_series=True` or `series_only=True`"
+ raise TypeError(msg)
+ return native_object
+ return (
+ version.namespace.from_native_object(native_object)
+ .compliant.from_native(native_object)
+ .to_narwhals()
+ )
+
+ # PyArrow
+ elif is_native_arrow(native_object):
+ if is_pyarrow_table(native_object):
+ if series_only:
+ if not pass_through:
+ msg = f"Cannot only use `series_only` with {type(native_object).__qualname__}"
+ raise TypeError(msg)
+ return native_object
+ elif not allow_series:
+ if not pass_through:
+ msg = "Please set `allow_series=True` or `series_only=True`"
+ raise TypeError(msg)
+ return native_object
+ return (
+ version.namespace.from_native_object(native_object)
+ .compliant.from_native(native_object)
+ .to_narwhals()
+ )
+
+ # Dask
+ elif is_dask_dataframe(native_object):
+ from narwhals._dask.namespace import DaskNamespace
+
+ if series_only:
+ if not pass_through:
+ msg = "Cannot only use `series_only` with dask DataFrame"
+ raise TypeError(msg)
+ return native_object
+ if eager_only or eager_or_interchange_only:
+ if not pass_through:
+ msg = "Cannot only use `eager_only` or `eager_or_interchange_only` with dask DataFrame"
+ raise TypeError(msg)
+ return native_object
+ dask_version = parse_version(get_dask())
+ if dask_version <= (2024, 12, 1) and get_dask_expr() is None: # pragma: no cover
+ msg = "Please install dask-expr"
+ raise ImportError(msg)
+ return (
+ DaskNamespace(backend_version=dask_version, version=version)
+ .from_native(native_object)
+ .to_narwhals()
+ )
+
+ # DuckDB
+ elif is_duckdb_relation(native_object):
+ if eager_only or series_only: # pragma: no cover
+ if not pass_through:
+ msg = "Cannot only use `series_only=True` or `eager_only=False` with DuckDBPyRelation"
+ raise TypeError(msg)
+ return native_object
+ return (
+ version.namespace.from_native_object(native_object)
+ .compliant.from_native(native_object)
+ .to_narwhals()
+ )
+
+ # Ibis
+ elif is_ibis_table(native_object):
+ if eager_only or series_only: # pragma: no cover
+ if not pass_through:
+ msg = "Cannot only use `series_only=True` or `eager_only=False` with ibis.Table"
+ raise TypeError(msg)
+ return native_object
+ return (
+ version.namespace.from_native_object(native_object)
+ .compliant.from_native(native_object)
+ .to_narwhals()
+ )
+
+ # PySpark
+ elif is_native_spark_like(native_object): # pragma: no cover
+ ns_spark = version.namespace.from_native_object(native_object)
+ if series_only or eager_only or eager_or_interchange_only:
+ if not pass_through:
+ msg = (
+ "Cannot only use `series_only`, `eager_only` or `eager_or_interchange_only` "
+ f"with {ns_spark.implementation} DataFrame"
+ )
+ raise TypeError(msg)
+ return native_object
+ return ns_spark.compliant.from_native(native_object).to_narwhals()
+
+ # Interchange protocol
+ elif _supports_dataframe_interchange(native_object):
+ from narwhals._interchange.dataframe import InterchangeFrame
+
+ if eager_only or series_only:
+ if not pass_through:
+ msg = (
+ "Cannot only use `series_only=True` or `eager_only=False` "
+ "with object which only implements __dataframe__"
+ )
+ raise TypeError(msg)
+ return native_object
+ if version is not Version.V1:
+ if pass_through:
+ return native_object
+ msg = (
+ "The Dataframe Interchange Protocol is no longer supported in the main `narwhals` namespace.\n\n"
+ "You may want to:\n"
+ " - Use `narwhals.stable.v1`, where it is still supported.\n"
+ " - See https://narwhals-dev.github.io/narwhals/backcompat\n"
+ " - Use `pass_through=True` to pass the object through without raising."
+ )
+ raise TypeError(msg)
+ return Version.V1.dataframe(InterchangeFrame(native_object), level="interchange")
+
+ elif not pass_through:
+ msg = f"Expected pandas-like dataframe, Polars dataframe, or Polars lazyframe, got: {type(native_object)}"
+ raise TypeError(msg)
+ return native_object
+
+
+def get_native_namespace(
+ *obj: DataFrame[Any] | LazyFrame[Any] | Series[Any] | IntoFrame | IntoSeries,
+) -> Any:
+ """Get native namespace from object.
+
+ Arguments:
+ obj: Dataframe, Lazyframe, or Series. Multiple objects can be
+ passed positionally, in which case they must all have the
+ same native namespace (else an error is raised).
+
+ Returns:
+ Native module.
+
+ Examples:
+ >>> import polars as pl
+ >>> import pandas as pd
+ >>> import narwhals as nw
+ >>> df = nw.from_native(pd.DataFrame({"a": [1, 2, 3]}))
+ >>> nw.get_native_namespace(df)
+ <module 'pandas'...>
+ >>> df = nw.from_native(pl.DataFrame({"a": [1, 2, 3]}))
+ >>> nw.get_native_namespace(df)
+ <module 'polars'...>
+ """
+ if not obj:
+ msg = "At least one object must be passed to `get_native_namespace`."
+ raise ValueError(msg)
+ result = {_get_native_namespace_single_obj(x) for x in obj}
+ if len(result) != 1:
+ msg = f"Found objects with different native namespaces: {result}."
+ raise ValueError(msg)
+ return result.pop()
+
+
+def _get_native_namespace_single_obj(
+ obj: DataFrame[Any] | LazyFrame[Any] | Series[Any] | IntoFrame | IntoSeries,
+) -> Any:
+ from contextlib import suppress
+
+ from narwhals._utils import has_native_namespace
+
+ with suppress(TypeError, AssertionError):
+ return Version.MAIN.namespace.from_native_object(
+ obj
+ ).implementation.to_native_namespace()
+
+ if has_native_namespace(obj):
+ return obj.__native_namespace__()
+ msg = f"Could not get native namespace from object of type: {type(obj)}"
+ raise TypeError(msg)
+
+
+def narwhalify(
+ func: Callable[..., Any] | None = None,
+ *,
+ strict: bool | None = None,
+ pass_through: bool | None = None,
+ eager_only: bool = False,
+ series_only: bool = False,
+ allow_series: bool | None = True,
+) -> Callable[..., Any]:
+ """Decorate function so it becomes dataframe-agnostic.
+
+ This will try to convert any dataframe/series-like object into the Narwhals
+ respective DataFrame/Series, while leaving the other parameters as they are.
+ Similarly, if the output of the function is a Narwhals DataFrame or Series, it will be
+ converted back to the original dataframe/series type, while if the output is another
+ type it will be left as is.
+ By setting `pass_through=False`, then every input and every output will be required to be a
+ dataframe/series-like object.
+
+ Arguments:
+ func: Function to wrap in a `from_native`-`to_native` block.
+ strict: Determine what happens if the object can't be converted to Narwhals
+
+ *Deprecated* (v1.13.0)
+
+ Please use `pass_through` instead. Note that `strict` is still available
+ (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+ see [perfect backwards compatibility policy](../backcompat.md/).
+
+ - `True` or `None` (default): raise an error
+ - `False`: pass object through as-is
+ pass_through: Determine what happens if the object can't be converted to Narwhals
+
+ - `False` or `None` (default): raise an error
+ - `True`: pass object through as-is
+ eager_only: Whether to only allow eager objects
+
+ - `False` (default): don't require `native_object` to be eager
+ - `True`: only convert to Narwhals if `native_object` is eager
+ series_only: Whether to only allow Series
+
+ - `False` (default): don't require `native_object` to be a Series
+ - `True`: only convert to Narwhals if `native_object` is a Series
+ allow_series: Whether to allow Series (default is only Dataframe / Lazyframe)
+
+ - `False` or `None`: don't convert to Narwhals if `native_object` is a Series
+ - `True` (default): allow `native_object` to be a Series
+
+ Returns:
+ Decorated function.
+
+ Examples:
+ Instead of writing
+
+ >>> import narwhals as nw
+ >>> def agnostic_group_by_sum(df):
+ ... df = nw.from_native(df, pass_through=True)
+ ... df = df.group_by("a").agg(nw.col("b").sum())
+ ... return nw.to_native(df)
+
+ you can just write
+
+ >>> @nw.narwhalify
+ ... def agnostic_group_by_sum(df):
+ ... return df.group_by("a").agg(nw.col("b").sum())
+ """
+ from narwhals._utils import validate_strict_and_pass_though
+
+ pass_through = validate_strict_and_pass_though(
+ strict, pass_through, pass_through_default=True, emit_deprecation_warning=True
+ )
+
+ def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
+ @wraps(func)
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
+ args = [
+ from_native(
+ arg,
+ pass_through=pass_through,
+ eager_only=eager_only,
+ series_only=series_only,
+ allow_series=allow_series,
+ )
+ for arg in args
+ ] # type: ignore[assignment]
+
+ kwargs = {
+ name: from_native(
+ value,
+ pass_through=pass_through,
+ eager_only=eager_only,
+ series_only=series_only,
+ allow_series=allow_series,
+ )
+ for name, value in kwargs.items()
+ }
+
+ backends = {
+ b()
+ for v in (*args, *kwargs.values())
+ if (b := getattr(v, "__native_namespace__", None))
+ }
+
+ if len(backends) > 1:
+ msg = "Found multiple backends. Make sure that all dataframe/series inputs come from the same backend."
+ raise ValueError(msg)
+
+ result = func(*args, **kwargs)
+
+ return to_native(result, pass_through=pass_through)
+
+ return wrapper
+
+ if func is None:
+ return decorator
+ else:
+ # If func is not None, it means the decorator is used without arguments
+ return decorator(func)
+
+
+def to_py_scalar(scalar_like: Any) -> Any:
+ """If a scalar is not Python native, converts it to Python native.
+
+ Arguments:
+ scalar_like: Scalar-like value.
+
+ Returns:
+ Python scalar.
+
+ Raises:
+ ValueError: If the object is not convertible to a scalar.
+
+ Examples:
+ >>> import narwhals as nw
+ >>> import pandas as pd
+ >>> df = nw.from_native(pd.DataFrame({"a": [1, 2, 3]}))
+ >>> nw.to_py_scalar(df["a"].item(0))
+ 1
+ >>> import pyarrow as pa
+ >>> df = nw.from_native(pa.table({"a": [1, 2, 3]}))
+ >>> nw.to_py_scalar(df["a"].item(0))
+ 1
+ >>> nw.to_py_scalar(1)
+ 1
+ """
+ scalar: Any
+ pd = get_pandas()
+ if scalar_like is None or isinstance(scalar_like, NON_TEMPORAL_SCALAR_TYPES):
+ scalar = scalar_like
+ elif (
+ (np := get_numpy())
+ and isinstance(scalar_like, np.datetime64)
+ and scalar_like.dtype == "datetime64[ns]"
+ ):
+ ms = scalar_like.item() // 1000
+ scalar = dt.datetime(1970, 1, 1) + dt.timedelta(microseconds=ms)
+ elif is_numpy_scalar(scalar_like) or is_cupy_scalar(scalar_like):
+ scalar = scalar_like.item()
+ elif pd and isinstance(scalar_like, pd.Timestamp):
+ scalar = scalar_like.to_pydatetime()
+ elif pd and isinstance(scalar_like, pd.Timedelta):
+ scalar = scalar_like.to_pytimedelta()
+ # pd.Timestamp and pd.Timedelta subclass datetime and timedelta,
+ # so we need to check this separately
+ elif isinstance(scalar_like, TEMPORAL_SCALAR_TYPES):
+ scalar = scalar_like
+ elif _is_pandas_na(scalar_like):
+ scalar = None
+ elif is_pyarrow_scalar(scalar_like):
+ scalar = scalar_like.as_py()
+ else:
+ msg = (
+ f"Expected object convertible to a scalar, found {type(scalar_like)}.\n"
+ f"{scalar_like!r}"
+ )
+ raise ValueError(msg)
+ return scalar
+
+
+def _is_pandas_na(obj: Any) -> bool:
+ return bool((pd := get_pandas()) and pd.api.types.is_scalar(obj) and pd.isna(obj))
+
+
+__all__ = ["get_native_namespace", "narwhalify", "to_native", "to_py_scalar"]
diff --git a/venv/lib/python3.8/site-packages/narwhals/typing.py b/venv/lib/python3.8/site-packages/narwhals/typing.py
new file mode 100644
index 0000000..fe43757
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/typing.py
@@ -0,0 +1,428 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Literal, Protocol, Sequence, TypeVar, Union
+
+from narwhals._compliant import CompliantDataFrame, CompliantLazyFrame, CompliantSeries
+
+if TYPE_CHECKING:
+ import datetime as dt
+ from decimal import Decimal
+ from types import ModuleType
+ from typing import Iterable, Sized
+
+ import numpy as np
+ from typing_extensions import TypeAlias
+
+ from narwhals import dtypes
+ from narwhals.dataframe import DataFrame, LazyFrame
+ from narwhals.expr import Expr
+ from narwhals.series import Series
+
+ # All dataframes supported by Narwhals have a
+ # `columns` property. Their similarities don't extend
+ # _that_ much further unfortunately...
+ class NativeFrame(Protocol):
+ @property
+ def columns(self) -> Any: ...
+
+ def join(self, *args: Any, **kwargs: Any) -> Any: ...
+
+ class NativeLazyFrame(NativeFrame, Protocol):
+ def explain(self, *args: Any, **kwargs: Any) -> Any: ...
+
+ class NativeSeries(Sized, Iterable[Any], Protocol):
+ def filter(self, *args: Any, **kwargs: Any) -> Any: ...
+
+ class DataFrameLike(Protocol):
+ def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ...
+
+ class SupportsNativeNamespace(Protocol):
+ def __native_namespace__(self) -> ModuleType: ...
+
+ # ruff: noqa: N802
+ class DTypes(Protocol):
+ @property
+ def Decimal(self) -> type[dtypes.Decimal]: ...
+ @property
+ def Int128(self) -> type[dtypes.Int128]: ...
+ @property
+ def Int64(self) -> type[dtypes.Int64]: ...
+ @property
+ def Int32(self) -> type[dtypes.Int32]: ...
+ @property
+ def Int16(self) -> type[dtypes.Int16]: ...
+ @property
+ def Int8(self) -> type[dtypes.Int8]: ...
+ @property
+ def UInt128(self) -> type[dtypes.UInt128]: ...
+ @property
+ def UInt64(self) -> type[dtypes.UInt64]: ...
+ @property
+ def UInt32(self) -> type[dtypes.UInt32]: ...
+ @property
+ def UInt16(self) -> type[dtypes.UInt16]: ...
+ @property
+ def UInt8(self) -> type[dtypes.UInt8]: ...
+ @property
+ def Float64(self) -> type[dtypes.Float64]: ...
+ @property
+ def Float32(self) -> type[dtypes.Float32]: ...
+ @property
+ def String(self) -> type[dtypes.String]: ...
+ @property
+ def Boolean(self) -> type[dtypes.Boolean]: ...
+ @property
+ def Object(self) -> type[dtypes.Object]: ...
+ @property
+ def Categorical(self) -> type[dtypes.Categorical]: ...
+ @property
+ def Enum(self) -> type[dtypes.Enum]: ...
+ @property
+ def Datetime(self) -> type[dtypes.Datetime]: ...
+ @property
+ def Duration(self) -> type[dtypes.Duration]: ...
+ @property
+ def Date(self) -> type[dtypes.Date]: ...
+ @property
+ def Field(self) -> type[dtypes.Field]: ...
+ @property
+ def Struct(self) -> type[dtypes.Struct]: ...
+ @property
+ def List(self) -> type[dtypes.List]: ...
+ @property
+ def Array(self) -> type[dtypes.Array]: ...
+ @property
+ def Unknown(self) -> type[dtypes.Unknown]: ...
+ @property
+ def Time(self) -> type[dtypes.Time]: ...
+ @property
+ def Binary(self) -> type[dtypes.Binary]: ...
+
+
+IntoExpr: TypeAlias = Union["Expr", str, "Series[Any]"]
+"""Anything which can be converted to an expression.
+
+Use this to mean "either a Narwhals expression, or something which can be converted
+into one". For example, `exprs` in `DataFrame.select` is typed to accept `IntoExpr`,
+as it can either accept a `nw.Expr` (e.g. `df.select(nw.col('a'))`) or a string
+which will be interpreted as a `nw.Expr`, e.g. `df.select('a')`.
+"""
+
+IntoDataFrame: TypeAlias = Union["NativeFrame", "DataFrameLike"]
+"""Anything which can be converted to a Narwhals DataFrame.
+
+Use this if your function accepts a narwhalifiable object but doesn't care about its backend.
+
+Examples:
+ >>> import narwhals as nw
+ >>> from narwhals.typing import IntoDataFrame
+ >>> def agnostic_shape(df_native: IntoDataFrame) -> tuple[int, int]:
+ ... df = nw.from_native(df_native, eager_only=True)
+ ... return df.shape
+"""
+
+IntoLazyFrame: TypeAlias = "NativeLazyFrame"
+
+IntoFrame: TypeAlias = Union["IntoDataFrame", "IntoLazyFrame"]
+"""Anything which can be converted to a Narwhals DataFrame or LazyFrame.
+
+Use this if your function can accept an object which can be converted to either
+`nw.DataFrame` or `nw.LazyFrame` and it doesn't care about its backend.
+
+Examples:
+ >>> import narwhals as nw
+ >>> from narwhals.typing import IntoFrame
+ >>> def agnostic_columns(df_native: IntoFrame) -> list[str]:
+ ... df = nw.from_native(df_native)
+ ... return df.collect_schema().names()
+"""
+
+Frame: TypeAlias = Union["DataFrame[Any]", "LazyFrame[Any]"]
+"""Narwhals DataFrame or Narwhals LazyFrame.
+
+Use this if your function can work with either and your function doesn't care
+about its backend.
+
+Examples:
+ >>> import narwhals as nw
+ >>> from narwhals.typing import Frame
+ >>> @nw.narwhalify
+ ... def agnostic_columns(df: Frame) -> list[str]:
+ ... return df.columns
+"""
+
+IntoSeries: TypeAlias = "NativeSeries"
+"""Anything which can be converted to a Narwhals Series.
+
+Use this if your function can accept an object which can be converted to `nw.Series`
+and it doesn't care about its backend.
+
+Examples:
+ >>> from typing import Any
+ >>> import narwhals as nw
+ >>> from narwhals.typing import IntoSeries
+ >>> def agnostic_to_list(s_native: IntoSeries) -> list[Any]:
+ ... s = nw.from_native(s_native)
+ ... return s.to_list()
+"""
+
+IntoFrameT = TypeVar("IntoFrameT", bound="IntoFrame")
+"""TypeVar bound to object convertible to Narwhals DataFrame or Narwhals LazyFrame.
+
+Use this if your function accepts an object which is convertible to `nw.DataFrame`
+or `nw.LazyFrame` and returns an object of the same type.
+
+Examples:
+ >>> import narwhals as nw
+ >>> from narwhals.typing import IntoFrameT
+ >>> def agnostic_func(df_native: IntoFrameT) -> IntoFrameT:
+ ... df = nw.from_native(df_native)
+ ... return df.with_columns(c=nw.col("a") + 1).to_native()
+"""
+
+IntoDataFrameT = TypeVar("IntoDataFrameT", bound="IntoDataFrame")
+"""TypeVar bound to object convertible to Narwhals DataFrame.
+
+Use this if your function accepts an object which can be converted to `nw.DataFrame`
+and returns an object of the same class.
+
+Examples:
+ >>> import narwhals as nw
+ >>> from narwhals.typing import IntoDataFrameT
+ >>> def agnostic_func(df_native: IntoDataFrameT) -> IntoDataFrameT:
+ ... df = nw.from_native(df_native, eager_only=True)
+ ... return df.with_columns(c=df["a"] + 1).to_native()
+"""
+
+IntoLazyFrameT = TypeVar("IntoLazyFrameT", bound="IntoLazyFrame")
+
+FrameT = TypeVar("FrameT", "DataFrame[Any]", "LazyFrame[Any]")
+"""TypeVar bound to Narwhals DataFrame or Narwhals LazyFrame.
+
+Use this if your function accepts either `nw.DataFrame` or `nw.LazyFrame` and returns
+an object of the same kind.
+
+Examples:
+ >>> import narwhals as nw
+ >>> from narwhals.typing import FrameT
+ >>> @nw.narwhalify
+ ... def agnostic_func(df: FrameT) -> FrameT:
+ ... return df.with_columns(c=nw.col("a") + 1)
+"""
+
+DataFrameT = TypeVar("DataFrameT", bound="DataFrame[Any]")
+"""TypeVar bound to Narwhals DataFrame.
+
+Use this if your function can accept a Narwhals DataFrame and returns a Narwhals
+DataFrame backed by the same backend.
+
+Examples:
+ >>> import narwhals as nw
+ >>> from narwhals.typing import DataFrameT
+ >>> @nw.narwhalify
+ >>> def func(df: DataFrameT) -> DataFrameT:
+ ... return df.with_columns(c=df["a"] + 1)
+"""
+
+LazyFrameT = TypeVar("LazyFrameT", bound="LazyFrame[Any]")
+SeriesT = TypeVar("SeriesT", bound="Series[Any]")
+
+IntoSeriesT = TypeVar("IntoSeriesT", bound="IntoSeries")
+"""TypeVar bound to object convertible to Narwhals Series.
+
+Use this if your function accepts an object which can be converted to `nw.Series`
+and returns an object of the same class.
+
+Examples:
+ >>> import narwhals as nw
+ >>> from narwhals.typing import IntoSeriesT
+ >>> def agnostic_abs(s_native: IntoSeriesT) -> IntoSeriesT:
+ ... s = nw.from_native(s_native, series_only=True)
+ ... return s.abs().to_native()
+"""
+
+DTypeBackend: TypeAlias = 'Literal["pyarrow", "numpy_nullable"] | None'
+SizeUnit: TypeAlias = Literal[
+ "b",
+ "kb",
+ "mb",
+ "gb",
+ "tb",
+ "bytes",
+ "kilobytes",
+ "megabytes",
+ "gigabytes",
+ "terabytes",
+]
+
+TimeUnit: TypeAlias = Literal["ns", "us", "ms", "s"]
+
+AsofJoinStrategy: TypeAlias = Literal["backward", "forward", "nearest"]
+"""Join strategy.
+
+- *"backward"*: Selects the last row in the right DataFrame whose `on` key
+ is less than or equal to the left's key.
+- *"forward"*: Selects the first row in the right DataFrame whose `on` key
+ is greater than or equal to the left's key.
+- *"nearest"*: Search selects the last row in the right DataFrame whose value
+ is nearest to the left's key.
+"""
+
+ClosedInterval: TypeAlias = Literal["left", "right", "none", "both"]
+"""Define which sides of the interval are closed (inclusive)."""
+
+ConcatMethod: TypeAlias = Literal["horizontal", "vertical", "diagonal"]
+"""Concatenating strategy.
+
+- *"vertical"*: Concatenate vertically. Column names must match.
+- *"horizontal"*: Concatenate horizontally. If lengths don't match, then
+ missing rows are filled with null values.
+- *"diagonal"*: Finds a union between the column schemas and fills missing
+ column values with null.
+"""
+
+FillNullStrategy: TypeAlias = Literal["forward", "backward"]
+"""Strategy used to fill null values."""
+
+JoinStrategy: TypeAlias = Literal["inner", "left", "full", "cross", "semi", "anti"]
+"""Join strategy.
+
+- *"inner"*: Returns rows that have matching values in both tables.
+- *"left"*: Returns all rows from the left table, and the matched rows from
+ the right table.
+- *"full"*: Returns all rows in both dataframes, with the `suffix` appended to
+ the right join keys.
+- *"cross"*: Returns the Cartesian product of rows from both tables.
+- *"semi"*: Filter rows that have a match in the right table.
+- *"anti"*: Filter rows that do not have a match in the right table.
+"""
+
+PivotAgg: TypeAlias = Literal[
+ "min", "max", "first", "last", "sum", "mean", "median", "len"
+]
+"""A predefined aggregate function string."""
+
+RankMethod: TypeAlias = Literal["average", "min", "max", "dense", "ordinal"]
+"""The method used to assign ranks to tied elements.
+
+- *"average"*: The average of the ranks that would have been assigned to
+ all the tied values is assigned to each value.
+- *"min"*: The minimum of the ranks that would have been assigned to all
+ the tied values is assigned to each value. (This is also referred to
+ as "competition" ranking.)
+- *"max"*: The maximum of the ranks that would have been assigned to all
+ the tied values is assigned to each value.
+- *"dense"*: Like "min", but the rank of the next highest element is
+ assigned the rank immediately after those assigned to the tied elements.
+- *"ordinal"*: All values are given a distinct rank, corresponding to the
+ order that the values occur in the Series.
+"""
+
+RollingInterpolationMethod: TypeAlias = Literal[
+ "nearest", "higher", "lower", "midpoint", "linear"
+]
+"""Interpolation method."""
+
+UniqueKeepStrategy: TypeAlias = Literal["any", "first", "last", "none"]
+"""Which of the duplicate rows to keep.
+
+- *"any"*: Does not give any guarantee of which row is kept.
+ This allows more optimizations.
+- *"none"*: Don't keep duplicate rows.
+- *"first"*: Keep first unique row.
+- *"last"*: Keep last unique row.
+"""
+
+LazyUniqueKeepStrategy: TypeAlias = Literal["any", "none"]
+"""Which of the duplicate rows to keep.
+
+- *"any"*: Does not give any guarantee of which row is kept.
+- *"none"*: Don't keep duplicate rows.
+"""
+
+
+_ShapeT = TypeVar("_ShapeT", bound="tuple[int, ...]")
+_NDArray: TypeAlias = "np.ndarray[_ShapeT, Any]"
+_1DArray: TypeAlias = "_NDArray[tuple[int]]" # noqa: PYI042
+_1DArrayInt: TypeAlias = "np.ndarray[tuple[int], np.dtype[np.integer[Any]]]" # noqa: PYI042
+_2DArray: TypeAlias = "_NDArray[tuple[int, int]]" # noqa: PYI042, PYI047
+_AnyDArray: TypeAlias = "_NDArray[tuple[int, ...]]" # noqa: PYI047
+_NumpyScalar: TypeAlias = "np.generic[Any]"
+Into1DArray: TypeAlias = "_1DArray | _NumpyScalar"
+"""A 1-dimensional `numpy.ndarray` or scalar that can be converted into one."""
+
+
+NumericLiteral: TypeAlias = "int | float | Decimal"
+TemporalLiteral: TypeAlias = "dt.date | dt.datetime | dt.time | dt.timedelta"
+NonNestedLiteral: TypeAlias = (
+ "NumericLiteral | TemporalLiteral | str | bool | bytes | None"
+)
+PythonLiteral: TypeAlias = "NonNestedLiteral | list[Any] | tuple[Any, ...]"
+
+NonNestedDType: TypeAlias = "dtypes.NumericType | dtypes.TemporalType | dtypes.String | dtypes.Boolean | dtypes.Binary | dtypes.Categorical | dtypes.Unknown | dtypes.Object"
+"""Any Narwhals DType that does not have required arguments."""
+
+IntoDType: TypeAlias = "dtypes.DType | type[NonNestedDType]"
+"""Anything that can be converted into a Narwhals DType.
+
+Examples:
+ >>> import polars as pl
+ >>> import narwhals as nw
+ >>> df_native = pl.DataFrame({"a": [1, 2, 3], "b": [4.0, 5.0, 6.0]})
+ >>> df = nw.from_native(df_native)
+ >>> df.select(
+ ... nw.col("a").cast(nw.Int32),
+ ... nw.col("b").cast(nw.String()).str.split(".").cast(nw.List(nw.Int8)),
+ ... )
+ ┌──────────────────┐
+ |Narwhals DataFrame|
+ |------------------|
+ |shape: (3, 2) |
+ |┌─────┬──────────┐|
+ |│ a ┆ b │|
+ |│ --- ┆ --- │|
+ |│ i32 ┆ list[i8] │|
+ |╞═════╪══════════╡|
+ |│ 1 ┆ [4, 0] │|
+ |│ 2 ┆ [5, 0] │|
+ |│ 3 ┆ [6, 0] │|
+ |└─────┴──────────┘|
+ └──────────────────┘
+"""
+
+
+# Annotations for `__getitem__` methods
+_T = TypeVar("_T")
+_Slice: TypeAlias = "slice[_T, Any, Any] | slice[Any, _T, Any] | slice[None, None, _T]"
+_SliceNone: TypeAlias = "slice[None, None, None]"
+# Index/column positions
+SingleIndexSelector: TypeAlias = int
+_SliceIndex: TypeAlias = "_Slice[int] | _SliceNone"
+"""E.g. `[1:]` or `[:3]` or `[::2]`."""
+SizedMultiIndexSelector: TypeAlias = "Sequence[int] | _T | _1DArrayInt"
+MultiIndexSelector: TypeAlias = "_SliceIndex | SizedMultiIndexSelector[_T]"
+# Labels/column names
+SingleNameSelector: TypeAlias = str
+_SliceName: TypeAlias = "_Slice[str] | _SliceNone"
+SizedMultiNameSelector: TypeAlias = "Sequence[str] | _T | _1DArray"
+MultiNameSelector: TypeAlias = "_SliceName | SizedMultiNameSelector[_T]"
+# Mixed selectors
+SingleColSelector: TypeAlias = "SingleIndexSelector | SingleNameSelector"
+MultiColSelector: TypeAlias = "MultiIndexSelector[_T] | MultiNameSelector[_T]"
+
+
+__all__ = [
+ "CompliantDataFrame",
+ "CompliantLazyFrame",
+ "CompliantSeries",
+ "DataFrameT",
+ "Frame",
+ "FrameT",
+ "IntoDataFrame",
+ "IntoDataFrameT",
+ "IntoExpr",
+ "IntoFrame",
+ "IntoFrameT",
+ "IntoSeries",
+ "IntoSeriesT",
+]
diff --git a/venv/lib/python3.8/site-packages/narwhals/utils.py b/venv/lib/python3.8/site-packages/narwhals/utils.py
new file mode 100644
index 0000000..83e2687
--- /dev/null
+++ b/venv/lib/python3.8/site-packages/narwhals/utils.py
@@ -0,0 +1,6 @@
+# Re-export some functions from `_utils` to make them public.
+from __future__ import annotations
+
+from narwhals._utils import Implementation, Version, parse_version
+
+__all__ = ["Implementation", "Version", "parse_version"]