From 5bf22fc7e3c392c8bd44315ca2d06d7dca7d084e Mon Sep 17 00:00:00 2001 From: sotech117 Date: Thu, 31 Jul 2025 17:27:24 -0400 Subject: add code for analysis of data --- .../site-packages/narwhals/stable/__init__.py | 5 + .../site-packages/narwhals/stable/v1/__init__.py | 1875 ++++++++++++++++++++ .../site-packages/narwhals/stable/v1/_dtypes.py | 135 ++ .../site-packages/narwhals/stable/v1/_namespace.py | 10 + .../narwhals/stable/v1/dependencies.py | 65 + .../site-packages/narwhals/stable/v1/dtypes.py | 77 + .../site-packages/narwhals/stable/v1/selectors.py | 23 + .../site-packages/narwhals/stable/v1/typing.py | 209 +++ 8 files changed, 2399 insertions(+) create mode 100644 venv/lib/python3.8/site-packages/narwhals/stable/__init__.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/stable/v1/__init__.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/stable/v1/_dtypes.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/stable/v1/_namespace.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/stable/v1/dependencies.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/stable/v1/dtypes.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/stable/v1/selectors.py create mode 100644 venv/lib/python3.8/site-packages/narwhals/stable/v1/typing.py (limited to 'venv/lib/python3.8/site-packages/narwhals/stable') diff --git a/venv/lib/python3.8/site-packages/narwhals/stable/__init__.py b/venv/lib/python3.8/site-packages/narwhals/stable/__init__.py new file mode 100644 index 0000000..60bc872 --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/stable/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from narwhals.stable import v1 + +__all__ = ["v1"] diff --git a/venv/lib/python3.8/site-packages/narwhals/stable/v1/__init__.py b/venv/lib/python3.8/site-packages/narwhals/stable/v1/__init__.py new file mode 100644 index 0000000..3259be1 --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/stable/v1/__init__.py @@ -0,0 +1,1875 @@ +from __future__ import annotations + +from functools import wraps +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Iterable, + Literal, + Mapping, + Sequence, + cast, + overload, +) +from warnings import warn + +import narwhals as nw +from narwhals import dependencies, exceptions, functions as nw_f, selectors +from narwhals._typing_compat import TypeVar +from narwhals._utils import ( + Implementation, + Version, + deprecate_native_namespace, + find_stacklevel, + generate_temporary_column_name, + inherit_doc, + is_ordered_categorical, + maybe_align_index, + maybe_convert_dtypes, + maybe_get_index, + maybe_reset_index, + maybe_set_index, + validate_strict_and_pass_though, +) +from narwhals.dataframe import DataFrame as NwDataFrame, LazyFrame as NwLazyFrame +from narwhals.dependencies import get_polars +from narwhals.exceptions import InvalidIntoExprError +from narwhals.expr import Expr as NwExpr +from narwhals.functions import _new_series_impl, concat, get_level, show_versions +from narwhals.schema import Schema as NwSchema +from narwhals.series import Series as NwSeries +from narwhals.stable.v1 import dtypes +from narwhals.stable.v1.dtypes import ( + Array, + Binary, + Boolean, + Categorical, + Date, + Datetime, + Decimal, + Duration, + Enum, + Field, + Float32, + Float64, + Int8, + Int16, + Int32, + Int64, + Int128, + List, + Object, + String, + Struct, + Time, + UInt8, + UInt16, + UInt32, + UInt64, + UInt128, + Unknown, +) +from narwhals.translate import _from_native_impl, get_native_namespace, to_py_scalar +from narwhals.typing import IntoDataFrameT, IntoFrameT + +if TYPE_CHECKING: + from types import ModuleType + + from typing_extensions import ParamSpec, Self + + from narwhals._translate import IntoArrowTable + from narwhals.dataframe import MultiColSelector, MultiIndexSelector + from narwhals.dtypes import DType + from narwhals.typing import ( + IntoDType, + IntoExpr, + IntoFrame, + IntoLazyFrameT, + IntoSeries, + NonNestedLiteral, + SingleColSelector, + SingleIndexSelector, + _1DArray, + _2DArray, + ) + + DataFrameT = TypeVar("DataFrameT", bound="DataFrame[Any]") + LazyFrameT = TypeVar("LazyFrameT", bound="LazyFrame[Any]") + SeriesT = TypeVar("SeriesT", bound="Series[Any]") + T = TypeVar("T", default=Any) + P = ParamSpec("P") + R = TypeVar("R") + +IntoSeriesT = TypeVar("IntoSeriesT", bound="IntoSeries", default=Any) + + +class DataFrame(NwDataFrame[IntoDataFrameT]): + @inherit_doc(NwDataFrame) + def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> None: + assert df._version is Version.V1 # noqa: S101 + super().__init__(df, level=level) + + # We need to override any method which don't return Self so that type + # annotations are correct. + + @property + def _series(self) -> type[Series[Any]]: + return cast("type[Series[Any]]", Series) + + @property + def _lazyframe(self) -> type[LazyFrame[Any]]: + return cast("type[LazyFrame[Any]]", LazyFrame) + + @overload + def __getitem__(self, item: tuple[SingleIndexSelector, SingleColSelector]) -> Any: ... + + @overload + def __getitem__( # type: ignore[overload-overlap] + self, item: str | tuple[MultiIndexSelector, SingleColSelector] + ) -> Series[Any]: ... + + @overload + def __getitem__( + self, + item: ( + SingleIndexSelector + | MultiIndexSelector + | MultiColSelector + | tuple[SingleIndexSelector, MultiColSelector] + | tuple[MultiIndexSelector, MultiColSelector] + ), + ) -> Self: ... + def __getitem__( + self, + item: ( + SingleIndexSelector + | SingleColSelector + | MultiColSelector + | MultiIndexSelector + | tuple[SingleIndexSelector, SingleColSelector] + | tuple[SingleIndexSelector, MultiColSelector] + | tuple[MultiIndexSelector, SingleColSelector] + | tuple[MultiIndexSelector, MultiColSelector] + ), + ) -> Series[Any] | Self | Any: + return super().__getitem__(item) + + def lazy( + self, backend: ModuleType | Implementation | str | None = None + ) -> LazyFrame[Any]: + return _stableify(super().lazy(backend=backend)) + + @overload # type: ignore[override] + def to_dict(self, *, as_series: Literal[True] = ...) -> dict[str, Series[Any]]: ... + @overload + def to_dict(self, *, as_series: Literal[False]) -> dict[str, list[Any]]: ... + @overload + def to_dict( + self, *, as_series: bool + ) -> dict[str, Series[Any]] | dict[str, list[Any]]: ... + def to_dict( + self, *, as_series: bool = True + ) -> dict[str, Series[Any]] | dict[str, list[Any]]: + # Type checkers complain that `nw.Series` is not assignable to `nw.v1.stable.Series`. + # However the return type actually is `nw.v1.stable.Series`, check `tests/v1_test.py::test_to_dict_as_series`. + return super().to_dict(as_series=as_series) # type: ignore[return-value] + + def is_duplicated(self) -> Series[Any]: + return _stableify(super().is_duplicated()) + + def is_unique(self) -> Series[Any]: + return _stableify(super().is_unique()) + + def _l1_norm(self) -> Self: + """Private, just used to test the stable API. + + Returns: + A new DataFrame. + """ + return self.select(all()._l1_norm()) + + +class LazyFrame(NwLazyFrame[IntoFrameT]): + @inherit_doc(NwLazyFrame) + def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> None: + assert df._version is Version.V1 # noqa: S101 + super().__init__(df, level=level) + + @property + def _dataframe(self) -> type[DataFrame[Any]]: + return DataFrame + + def _extract_compliant(self, arg: Any) -> Any: + # After v1, we raise when passing order-dependent or length-changing + # expressions to LazyFrame + from narwhals.dataframe import BaseFrame + from narwhals.expr import Expr + from narwhals.series import Series + + if isinstance(arg, BaseFrame): + return arg._compliant_frame + if isinstance(arg, Series): # pragma: no cover + msg = "Mixing Series with LazyFrame is not supported." + raise TypeError(msg) + if isinstance(arg, Expr): + # After stable.v1, we raise for order-dependent exprs or filtrations + return arg._to_compliant_expr(self.__narwhals_namespace__()) + if isinstance(arg, str): + plx = self.__narwhals_namespace__() + return plx.col(arg) + if get_polars() is not None and "polars" in str(type(arg)): # pragma: no cover + msg = ( + f"Expected Narwhals object, got: {type(arg)}.\n\n" + "Perhaps you:\n" + "- Forgot a `nw.from_native` somewhere?\n" + "- Used `pl.col` instead of `nw.col`?" + ) + raise TypeError(msg) + raise InvalidIntoExprError.from_invalid_type(type(arg)) + + def collect( + self, backend: ModuleType | Implementation | str | None = None, **kwargs: Any + ) -> DataFrame[Any]: + return _stableify(super().collect(backend=backend, **kwargs)) + + def _l1_norm(self) -> Self: + """Private, just used to test the stable API. + + Returns: + A new lazyframe. + """ + return self.select(all()._l1_norm()) + + def tail(self, n: int = 5) -> Self: + r"""Get the last `n` rows. + + Arguments: + n: Number of rows to return. + + Returns: + A subset of the LazyFrame of shape (n, n_columns). + """ + return super().tail(n) + + def gather_every(self, n: int, offset: int = 0) -> Self: + r"""Take every nth row in the DataFrame and return as a new DataFrame. + + Arguments: + n: Gather every *n*-th row. + offset: Starting index. + + Returns: + The LazyFrame containing only the selected rows. + """ + return self._with_compliant( + self._compliant_frame.gather_every(n=n, offset=offset) + ) + + +class Series(NwSeries[IntoSeriesT]): + @inherit_doc(NwSeries) + def __init__( + self, series: Any, *, level: Literal["full", "lazy", "interchange"] + ) -> None: + assert series._version is Version.V1 # noqa: S101 + super().__init__(series, level=level) + + # We need to override any method which don't return Self so that type + # annotations are correct. + + @property + def _dataframe(self) -> type[DataFrame[Any]]: + return DataFrame + + def to_frame(self) -> DataFrame[Any]: + return _stableify(super().to_frame()) + + def value_counts( + self, + *, + sort: bool = False, + parallel: bool = False, + name: str | None = None, + normalize: bool = False, + ) -> DataFrame[Any]: + return _stableify( + super().value_counts( + sort=sort, parallel=parallel, name=name, normalize=normalize + ) + ) + + def hist( + self, + bins: list[float | int] | None = None, + *, + bin_count: int | None = None, + include_breakpoint: bool = True, + ) -> DataFrame[Any]: + from narwhals._utils import find_stacklevel + from narwhals.exceptions import NarwhalsUnstableWarning + + msg = ( + "`Series.hist` is being called from the stable API although considered " + "an unstable feature." + ) + warn(message=msg, category=NarwhalsUnstableWarning, stacklevel=find_stacklevel()) + return _stableify( + super().hist( + bins=bins, bin_count=bin_count, include_breakpoint=include_breakpoint + ) + ) + + +class Expr(NwExpr): + def _l1_norm(self) -> Self: + return super()._taxicab_norm() + + def head(self, n: int = 10) -> Self: + r"""Get the first `n` rows. + + Arguments: + n: Number of rows to return. + + Returns: + A new expression. + """ + return self._with_orderable_filtration( + lambda plx: self._to_compliant_expr(plx).head(n) + ) + + def tail(self, n: int = 10) -> Self: + r"""Get the last `n` rows. + + Arguments: + n: Number of rows to return. + + Returns: + A new expression. + """ + return self._with_orderable_filtration( + lambda plx: self._to_compliant_expr(plx).tail(n) + ) + + def gather_every(self, n: int, offset: int = 0) -> Self: + r"""Take every nth value in the Series and return as new Series. + + Arguments: + n: Gather every *n*-th row. + offset: Starting index. + + Returns: + A new expression. + """ + return self._with_orderable_filtration( + lambda plx: self._to_compliant_expr(plx).gather_every(n=n, offset=offset) + ) + + def unique(self, *, maintain_order: bool | None = None) -> Self: + """Return unique values of this expression. + + Arguments: + maintain_order: Keep the same order as the original expression. + This is deprecated and will be removed in a future version, + but will still be kept around in `narwhals.stable.v1`. + + Returns: + A new expression. + """ + if maintain_order is not None: + msg = ( + "`maintain_order` has no effect and is only kept around for backwards-compatibility. " + "You can safely remove this argument." + ) + warn(message=msg, category=UserWarning, stacklevel=find_stacklevel()) + return self._with_filtration(lambda plx: self._to_compliant_expr(plx).unique()) + + def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self: + """Sort this column. Place null values first. + + Arguments: + descending: Sort in descending order. + nulls_last: Place null values last instead of first. + + Returns: + A new expression. + """ + return self._with_unorderable_window( + lambda plx: self._to_compliant_expr(plx).sort( + descending=descending, nulls_last=nulls_last + ) + ) + + def arg_true(self) -> Self: + """Find elements where boolean expression is True. + + Returns: + A new expression. + """ + return self._with_orderable_filtration( + lambda plx: self._to_compliant_expr(plx).arg_true() + ) + + def sample( + self, + n: int | None = None, + *, + fraction: float | None = None, + with_replacement: bool = False, + seed: int | None = None, + ) -> Self: + """Sample randomly from this expression. + + Arguments: + n: Number of items to return. Cannot be used with fraction. + fraction: Fraction of items to return. Cannot be used with n. + with_replacement: Allow values to be sampled more than once. + seed: Seed for the random number generator. If set to None (default), a random + seed is generated for each sample operation. + + Returns: + A new expression. + """ + return self._with_filtration( + lambda plx: self._to_compliant_expr(plx).sample( + n, fraction=fraction, with_replacement=with_replacement, seed=seed + ) + ) + + +class Schema(NwSchema): + _version = Version.V1 + + @inherit_doc(NwSchema) + def __init__( + self, schema: Mapping[str, DType] | Iterable[tuple[str, DType]] | None = None + ) -> None: + super().__init__(schema) + + +@overload +def _stableify(obj: NwDataFrame[IntoFrameT]) -> DataFrame[IntoFrameT]: ... +@overload +def _stableify(obj: NwLazyFrame[IntoFrameT]) -> LazyFrame[IntoFrameT]: ... +@overload +def _stableify(obj: NwSeries[IntoSeriesT]) -> Series[IntoSeriesT]: ... +@overload +def _stableify(obj: NwExpr) -> Expr: ... + + +def _stableify( + obj: NwDataFrame[IntoFrameT] + | NwLazyFrame[IntoFrameT] + | NwSeries[IntoSeriesT] + | NwExpr, +) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series[IntoSeriesT] | Expr: + if isinstance(obj, NwDataFrame): + return DataFrame(obj._compliant_frame._with_version(Version.V1), level=obj._level) + if isinstance(obj, NwLazyFrame): + return LazyFrame(obj._compliant_frame._with_version(Version.V1), level=obj._level) + if isinstance(obj, NwSeries): + return Series(obj._compliant_series._with_version(Version.V1), level=obj._level) + if isinstance(obj, NwExpr): + return Expr(obj._to_compliant_expr, obj._metadata) + msg = f"Expected DataFrame, LazyFrame, Series, or Expr, got: {type(obj)}" # pragma: no cover + raise AssertionError(msg) + + +@overload +def from_native(native_object: SeriesT, **kwds: Any) -> SeriesT: ... + + +@overload +def from_native(native_object: DataFrameT, **kwds: Any) -> DataFrameT: ... + + +@overload +def from_native(native_object: LazyFrameT, **kwds: Any) -> LazyFrameT: ... + + +@overload +def from_native( + native_object: DataFrameT | LazyFrameT, **kwds: Any +) -> DataFrameT | LazyFrameT: ... + + +@overload +def from_native( + native_object: IntoDataFrameT | IntoSeriesT, + *, + strict: Literal[False], + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[True], + series_only: Literal[False] = ..., + allow_series: Literal[True], +) -> DataFrame[IntoDataFrameT] | Series[IntoSeriesT]: ... + + +@overload +def from_native( + native_object: IntoDataFrameT | IntoSeriesT, + *, + strict: Literal[False], + eager_only: Literal[True], + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: Literal[True], +) -> DataFrame[IntoDataFrameT] | Series[IntoSeriesT]: ... + + +@overload +def from_native( + native_object: IntoDataFrameT, + *, + strict: Literal[False], + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[True], + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> DataFrame[IntoDataFrameT]: ... + + +@overload +def from_native( + native_object: T, + *, + strict: Literal[False], + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[True], + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> T: ... + + +@overload +def from_native( + native_object: IntoDataFrameT, + *, + strict: Literal[False], + eager_only: Literal[True], + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> DataFrame[IntoDataFrameT]: ... + + +@overload +def from_native( + native_object: T, + *, + strict: Literal[False], + eager_only: Literal[True], + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> T: ... + + +@overload +def from_native( + native_object: IntoFrameT | IntoSeriesT, + *, + strict: Literal[False], + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: Literal[True], +) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series[IntoSeriesT]: ... + + +@overload +def from_native( + native_object: IntoSeriesT, + *, + strict: Literal[False], + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[True], + allow_series: None = ..., +) -> Series[IntoSeriesT]: ... + + +@overload +def from_native( + native_object: IntoFrameT, + *, + strict: Literal[False], + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT]: ... + + +@overload +def from_native( + native_object: T, + *, + strict: Literal[False], + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> T: ... + + +@overload +def from_native( + native_object: IntoDataFrameT, + *, + strict: Literal[True] = ..., + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[True], + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> DataFrame[IntoDataFrameT]: ... + + +@overload +def from_native( + native_object: IntoDataFrameT, + *, + strict: Literal[True] = ..., + eager_only: Literal[True], + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> DataFrame[IntoDataFrameT]: ... + + +@overload +def from_native( + native_object: IntoFrame | IntoSeries, + *, + strict: Literal[True] = ..., + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: Literal[True], +) -> DataFrame[Any] | LazyFrame[Any] | Series[Any]: ... + + +@overload +def from_native( + native_object: IntoSeriesT, + *, + strict: Literal[True] = ..., + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[True], + allow_series: None = ..., +) -> Series[IntoSeriesT]: ... + + +@overload +def from_native( + native_object: IntoLazyFrameT, + *, + strict: Literal[True] = ..., + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> LazyFrame[IntoLazyFrameT]: ... + + +# NOTE: `pl.LazyFrame` originally matched here +@overload +def from_native( + native_object: IntoFrameT, + *, + strict: Literal[True] = ..., + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT]: ... + + +@overload +def from_native( + native_object: IntoDataFrameT | IntoSeries, + *, + pass_through: Literal[True], + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[True], + series_only: Literal[False] = ..., + allow_series: Literal[True], +) -> DataFrame[IntoDataFrameT]: ... + + +@overload +def from_native( + native_object: IntoDataFrameT | IntoSeriesT, + *, + pass_through: Literal[True], + eager_only: Literal[True], + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: Literal[True], +) -> DataFrame[IntoDataFrameT] | Series[IntoSeriesT]: ... + + +@overload +def from_native( + native_object: IntoDataFrameT, + *, + pass_through: Literal[True], + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[True], + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> DataFrame[IntoDataFrameT]: ... + + +@overload +def from_native( + native_object: T, + *, + pass_through: Literal[True], + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[True], + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> T: ... + + +@overload +def from_native( + native_object: IntoDataFrameT, + *, + pass_through: Literal[True], + eager_only: Literal[True], + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> DataFrame[IntoDataFrameT]: ... + + +@overload +def from_native( + native_object: T, + *, + pass_through: Literal[True], + eager_only: Literal[True], + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> T: ... + + +@overload +def from_native( + native_object: IntoFrameT | IntoSeriesT, + *, + pass_through: Literal[True], + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: Literal[True], +) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series[IntoSeriesT]: ... + + +@overload +def from_native( + native_object: IntoSeriesT, + *, + pass_through: Literal[True], + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[True], + allow_series: None = ..., +) -> Series[IntoSeriesT]: ... + + +@overload +def from_native( + native_object: IntoFrameT, + *, + pass_through: Literal[True], + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT]: ... + + +@overload +def from_native( + native_object: T, + *, + pass_through: Literal[True], + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> T: ... + + +@overload +def from_native( + native_object: IntoDataFrameT, + *, + pass_through: Literal[False] = ..., + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[True], + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> DataFrame[IntoDataFrameT]: ... + + +@overload +def from_native( + native_object: IntoDataFrameT, + *, + pass_through: Literal[False] = ..., + eager_only: Literal[True], + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> DataFrame[IntoDataFrameT]: ... + + +@overload +def from_native( + native_object: IntoFrame | IntoSeries, + *, + pass_through: Literal[False] = ..., + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: Literal[True], +) -> DataFrame[Any] | LazyFrame[Any] | Series[Any]: ... + + +@overload +def from_native( + native_object: IntoSeriesT, + *, + pass_through: Literal[False] = ..., + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[True], + allow_series: None = ..., +) -> Series[IntoSeriesT]: ... + + +@overload +def from_native( + native_object: IntoFrameT, + *, + pass_through: Literal[False] = ..., + eager_only: Literal[False] = ..., + eager_or_interchange_only: Literal[False] = ..., + series_only: Literal[False] = ..., + allow_series: None = ..., +) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT]: ... + + +# All params passed in as variables +@overload +def from_native( + native_object: Any, + *, + pass_through: bool, + eager_only: bool, + eager_or_interchange_only: bool = False, + series_only: bool, + allow_series: bool | None, +) -> Any: ... + + +def from_native( # noqa: D417 + native_object: IntoFrameT | IntoFrame | IntoSeriesT | IntoSeries | T, + *, + strict: bool | None = None, + pass_through: bool | None = None, + eager_only: bool = False, + eager_or_interchange_only: bool = False, + series_only: bool = False, + allow_series: bool | None = None, + **kwds: Any, +) -> LazyFrame[IntoFrameT] | DataFrame[IntoFrameT] | Series[IntoSeriesT] | T: + """Convert `native_object` to Narwhals Dataframe, Lazyframe, or Series. + + Arguments: + native_object: Raw object from user. + Depending on the other arguments, input object can be + + - a Dataframe / Lazyframe / Series supported by Narwhals (pandas, Polars, PyArrow, ...) + - an object which implements `__narwhals_dataframe__`, `__narwhals_lazyframe__`, + or `__narwhals_series__` + strict: Determine what happens if the object can't be converted to Narwhals + + - `True` or `None` (default): raise an error + - `False`: pass object through as-is + + *Deprecated* (v1.13.0) + + Please use `pass_through` instead. Note that `strict` is still available + (and won't emit a deprecation warning) if you use `narwhals.stable.v1`, + see [perfect backwards compatibility policy](../backcompat.md/). + pass_through: Determine what happens if the object can't be converted to Narwhals + + - `False` or `None` (default): raise an error + - `True`: pass object through as-is + eager_only: Whether to only allow eager objects + + - `False` (default): don't require `native_object` to be eager + - `True`: only convert to Narwhals if `native_object` is eager + eager_or_interchange_only: Whether to only allow eager objects or objects which + have interchange-level support in Narwhals + + - `False` (default): don't require `native_object` to either be eager or to + have interchange-level support in Narwhals + - `True`: only convert to Narwhals if `native_object` is eager or has + interchange-level support in Narwhals + + See [interchange-only support](../extending.md/#interchange-only-support) + for more details. + series_only: Whether to only allow Series + + - `False` (default): don't require `native_object` to be a Series + - `True`: only convert to Narwhals if `native_object` is a Series + allow_series: Whether to allow Series (default is only Dataframe / Lazyframe) + + - `False` or `None` (default): don't convert to Narwhals if `native_object` is a Series + - `True`: allow `native_object` to be a Series + + Returns: + DataFrame, LazyFrame, Series, or original object, depending + on which combination of parameters was passed. + """ + # Early returns + if isinstance(native_object, (DataFrame, LazyFrame)) and not series_only: + return native_object + if isinstance(native_object, Series) and (series_only or allow_series): + return native_object + + pass_through = validate_strict_and_pass_though( + strict, pass_through, pass_through_default=False, emit_deprecation_warning=False + ) + if kwds: + msg = f"from_native() got an unexpected keyword argument {next(iter(kwds))!r}" + raise TypeError(msg) + + return _from_native_impl( # type: ignore[no-any-return] + native_object, + pass_through=pass_through, + eager_only=eager_only, + eager_or_interchange_only=eager_or_interchange_only, + series_only=series_only, + allow_series=allow_series, + version=Version.V1, + ) + + +@overload +def to_native( + narwhals_object: DataFrame[IntoDataFrameT], *, strict: Literal[True] = ... +) -> IntoDataFrameT: ... +@overload +def to_native( + narwhals_object: LazyFrame[IntoFrameT], *, strict: Literal[True] = ... +) -> IntoFrameT: ... +@overload +def to_native( + narwhals_object: Series[IntoSeriesT], *, strict: Literal[True] = ... +) -> IntoSeriesT: ... +@overload +def to_native(narwhals_object: Any, *, strict: bool) -> Any: ... +@overload +def to_native( + narwhals_object: DataFrame[IntoDataFrameT], *, pass_through: Literal[False] = ... +) -> IntoDataFrameT: ... +@overload +def to_native( + narwhals_object: LazyFrame[IntoFrameT], *, pass_through: Literal[False] = ... +) -> IntoFrameT: ... +@overload +def to_native( + narwhals_object: Series[IntoSeriesT], *, pass_through: Literal[False] = ... +) -> IntoSeriesT: ... +@overload +def to_native(narwhals_object: Any, *, pass_through: bool) -> Any: ... + + +def to_native( + narwhals_object: DataFrame[IntoDataFrameT] + | LazyFrame[IntoFrameT] + | Series[IntoSeriesT], + *, + strict: bool | None = None, + pass_through: bool | None = None, +) -> IntoFrameT | IntoSeriesT | Any: + """Convert Narwhals object to native one. + + Arguments: + narwhals_object: Narwhals object. + strict: Determine what happens if `narwhals_object` isn't a Narwhals class + + - `True` (default): raise an error + - `False`: pass object through as-is + + *Deprecated* (v1.13.0) + + Please use `pass_through` instead. Note that `strict` is still available + (and won't emit a deprecation warning) if you use `narwhals.stable.v1`, + see [perfect backwards compatibility policy](../backcompat.md/). + pass_through: Determine what happens if `narwhals_object` isn't a Narwhals class + + - `False` (default): raise an error + - `True`: pass object through as-is + + Returns: + Object of class that user started with. + """ + from narwhals._utils import validate_strict_and_pass_though + from narwhals.dataframe import BaseFrame + from narwhals.series import Series + + pass_through = validate_strict_and_pass_though( + strict, pass_through, pass_through_default=False, emit_deprecation_warning=False + ) + + if isinstance(narwhals_object, BaseFrame): + return narwhals_object._compliant_frame._native_frame + if isinstance(narwhals_object, Series): + return narwhals_object._compliant_series.native + + if not pass_through: + msg = f"Expected Narwhals object, got {type(narwhals_object)}." + raise TypeError(msg) + return narwhals_object + + +def narwhalify( + func: Callable[..., Any] | None = None, + *, + strict: bool | None = None, + pass_through: bool | None = None, + eager_only: bool = False, + eager_or_interchange_only: bool = False, + series_only: bool = False, + allow_series: bool | None = True, +) -> Callable[..., Any]: + """Decorate function so it becomes dataframe-agnostic. + + This will try to convert any dataframe/series-like object into the Narwhals + respective DataFrame/Series, while leaving the other parameters as they are. + Similarly, if the output of the function is a Narwhals DataFrame or Series, it will be + converted back to the original dataframe/series type, while if the output is another + type it will be left as is. + By setting `pass_through=False`, then every input and every output will be required to be a + dataframe/series-like object. + + Arguments: + func: Function to wrap in a `from_native`-`to_native` block. + strict: Determine what happens if the object can't be converted to Narwhals + + *Deprecated* (v1.13.0) + + Please use `pass_through` instead. Note that `strict` is still available + (and won't emit a deprecation warning) if you use `narwhals.stable.v1`, + see [perfect backwards compatibility policy](../backcompat.md/). + + - `True` or `None` (default): raise an error + - `False`: pass object through as-is + pass_through: Determine what happens if the object can't be converted to Narwhals + + - `False` or `None` (default): raise an error + - `True`: pass object through as-is + eager_only: Whether to only allow eager objects + + - `False` (default): don't require `native_object` to be eager + - `True`: only convert to Narwhals if `native_object` is eager + eager_or_interchange_only: Whether to only allow eager objects or objects which + have interchange-level support in Narwhals + + - `False` (default): don't require `native_object` to either be eager or to + have interchange-level support in Narwhals + - `True`: only convert to Narwhals if `native_object` is eager or has + interchange-level support in Narwhals + + See [interchange-only support](../extending.md/#interchange-only-support) + for more details. + series_only: Whether to only allow Series + + - `False` (default): don't require `native_object` to be a Series + - `True`: only convert to Narwhals if `native_object` is a Series + allow_series: Whether to allow Series (default is only Dataframe / Lazyframe) + + - `False` or `None`: don't convert to Narwhals if `native_object` is a Series + - `True` (default): allow `native_object` to be a Series + + Returns: + Decorated function. + """ + pass_through = validate_strict_and_pass_though( + strict, pass_through, pass_through_default=True, emit_deprecation_warning=False + ) + + def decorator(func: Callable[..., Any]) -> Callable[..., Any]: + @wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> Any: + args = [ + from_native( + arg, + pass_through=pass_through, + eager_only=eager_only, + eager_or_interchange_only=eager_or_interchange_only, + series_only=series_only, + allow_series=allow_series, + ) + for arg in args + ] # type: ignore[assignment] + + kwargs = { + name: from_native( + value, + pass_through=pass_through, + eager_only=eager_only, + eager_or_interchange_only=eager_or_interchange_only, + series_only=series_only, + allow_series=allow_series, + ) + for name, value in kwargs.items() + } + + backends = { + b() + for v in (*args, *kwargs.values()) + if (b := getattr(v, "__native_namespace__", None)) + } + + if backends.__len__() > 1: + msg = "Found multiple backends. Make sure that all dataframe/series inputs come from the same backend." + raise ValueError(msg) + + result = func(*args, **kwargs) + + return to_native(result, pass_through=pass_through) + + return wrapper + + if func is None: + return decorator + else: + # If func is not None, it means the decorator is used without arguments + return decorator(func) + + +def all() -> Expr: + """Instantiate an expression representing all columns. + + Returns: + A new expression. + """ + return _stableify(nw.all()) + + +def col(*names: str | Iterable[str]) -> Expr: + """Creates an expression that references one or more columns by their name(s). + + Arguments: + names: Name(s) of the columns to use. + + Returns: + A new expression. + """ + return _stableify(nw.col(*names)) + + +def exclude(*names: str | Iterable[str]) -> Expr: + """Creates an expression that excludes columns by their name(s). + + Arguments: + names: Name(s) of the columns to exclude. + + Returns: + A new expression. + """ + return _stableify(nw.exclude(*names)) + + +def nth(*indices: int | Sequence[int]) -> Expr: + """Creates an expression that references one or more columns by their index(es). + + Notes: + `nth` is not supported for Polars version<1.0.0. Please use + [`narwhals.col`][] instead. + + Arguments: + indices: One or more indices representing the columns to retrieve. + + Returns: + A new expression. + """ + return _stableify(nw.nth(*indices)) + + +def len() -> Expr: + """Return the number of rows. + + Returns: + A new expression. + """ + return _stableify(nw.len()) + + +def lit(value: NonNestedLiteral, dtype: IntoDType | None = None) -> Expr: + """Return an expression representing a literal value. + + Arguments: + value: The value to use as literal. + dtype: The data type of the literal value. If not provided, the data type will + be inferred by the native library. + + Returns: + A new expression. + """ + return _stableify(nw.lit(value, dtype)) + + +def min(*columns: str) -> Expr: + """Return the minimum value. + + Note: + Syntactic sugar for ``nw.col(columns).min()``. + + Arguments: + columns: Name(s) of the columns to use in the aggregation function. + + Returns: + A new expression. + """ + return _stableify(nw.min(*columns)) + + +def max(*columns: str) -> Expr: + """Return the maximum value. + + Note: + Syntactic sugar for ``nw.col(columns).max()``. + + Arguments: + columns: Name(s) of the columns to use in the aggregation function. + + Returns: + A new expression. + """ + return _stableify(nw.max(*columns)) + + +def mean(*columns: str) -> Expr: + """Get the mean value. + + Note: + Syntactic sugar for ``nw.col(columns).mean()`` + + Arguments: + columns: Name(s) of the columns to use in the aggregation function + + Returns: + A new expression. + """ + return _stableify(nw.mean(*columns)) + + +def median(*columns: str) -> Expr: + """Get the median value. + + Notes: + - Syntactic sugar for ``nw.col(columns).median()`` + - Results might slightly differ across backends due to differences in the + underlying algorithms used to compute the median. + + Arguments: + columns: Name(s) of the columns to use in the aggregation function + + Returns: + A new expression. + """ + return _stableify(nw.median(*columns)) + + +def sum(*columns: str) -> Expr: + """Sum all values. + + Note: + Syntactic sugar for ``nw.col(columns).sum()`` + + Arguments: + columns: Name(s) of the columns to use in the aggregation function + + Returns: + A new expression. + """ + return _stableify(nw.sum(*columns)) + + +def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: + """Sum all values horizontally across columns. + + Warning: + Unlike Polars, we support horizontal sum over numeric columns only. + + Arguments: + exprs: Name(s) of the columns to use in the aggregation function. Accepts + expression input. + + Returns: + A new expression. + """ + return _stableify(nw.sum_horizontal(*exprs)) + + +def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: + r"""Compute the bitwise AND horizontally across columns. + + Arguments: + exprs: Name(s) of the columns to use in the aggregation function. Accepts + expression input. + + Returns: + A new expression. + """ + return _stableify(nw.all_horizontal(*exprs)) + + +def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: + r"""Compute the bitwise OR horizontally across columns. + + Arguments: + exprs: Name(s) of the columns to use in the aggregation function. Accepts + expression input. + + Returns: + A new expression. + """ + return _stableify(nw.any_horizontal(*exprs)) + + +def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: + """Compute the mean of all values horizontally across columns. + + Arguments: + exprs: Name(s) of the columns to use in the aggregation function. Accepts + expression input. + + Returns: + A new expression. + """ + return _stableify(nw.mean_horizontal(*exprs)) + + +def min_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: + """Get the minimum value horizontally across columns. + + Notes: + We support `min_horizontal` over numeric columns only. + + Arguments: + exprs: Name(s) of the columns to use in the aggregation function. Accepts + expression input. + + Returns: + A new expression. + """ + return _stableify(nw.min_horizontal(*exprs)) + + +def max_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: + """Get the maximum value horizontally across columns. + + Notes: + We support `max_horizontal` over numeric columns only. + + Arguments: + exprs: Name(s) of the columns to use in the aggregation function. Accepts + expression input. + + Returns: + A new expression. + """ + return _stableify(nw.max_horizontal(*exprs)) + + +def concat_str( + exprs: IntoExpr | Iterable[IntoExpr], + *more_exprs: IntoExpr, + separator: str = "", + ignore_nulls: bool = False, +) -> Expr: + r"""Horizontally concatenate columns into a single string column. + + Arguments: + exprs: Columns to concatenate into a single string column. Accepts expression + input. Strings are parsed as column names, other non-expression inputs are + parsed as literals. Non-`String` columns are cast to `String`. + *more_exprs: Additional columns to concatenate into a single string column, + specified as positional arguments. + separator: String that will be used to separate the values of each column. + ignore_nulls: Ignore null values (default is `False`). + If set to `False`, null values will be propagated and if the row contains any + null values, the output is null. + + Returns: + A new expression. + """ + return _stableify( + nw.concat_str(exprs, *more_exprs, separator=separator, ignore_nulls=ignore_nulls) + ) + + +class When(nw_f.When): + @classmethod + def from_when(cls, when: nw_f.When) -> When: + return cls(when._predicate) + + def then(self, value: IntoExpr | NonNestedLiteral | _1DArray) -> Then: + return Then.from_then(super().then(value)) + + +class Then(nw_f.Then, Expr): + @classmethod + def from_then(cls, then: nw_f.Then) -> Then: + return cls(then._to_compliant_expr, then._metadata) + + def otherwise(self, value: IntoExpr | NonNestedLiteral | _1DArray) -> Expr: + return _stableify(super().otherwise(value)) + + +def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When: + """Start a `when-then-otherwise` expression. + + Expression similar to an `if-else` statement in Python. Always initiated by a + `pl.when().then()`, and optionally followed by a + `.otherwise()` can be appended at the end. If not + appended, and the condition is not `True`, `None` will be returned. + + Info: + Chaining multiple `.when().then()` statements is currently + not supported. + See [Narwhals#668](https://github.com/narwhals-dev/narwhals/issues/668). + + Arguments: + predicates: Condition(s) that must be met in order to apply the subsequent + statement. Accepts one or more boolean expressions, which are implicitly + combined with `&`. String input is parsed as a column name. + + Returns: + A "when" object, which `.then` can be called on. + """ + return When.from_when(nw_f.when(*predicates)) + + +@deprecate_native_namespace(required=True) +def new_series( + name: str, + values: Any, + dtype: IntoDType | None = None, + *, + backend: ModuleType | Implementation | str | None = None, + native_namespace: ModuleType | None = None, # noqa: ARG001 +) -> Series[Any]: + """Instantiate Narwhals Series from iterable (e.g. list or array). + + Arguments: + name: Name of resulting Series. + values: Values of make Series from. + dtype: (Narwhals) dtype. If not provided, the native library + may auto-infer it from `values`. + backend: specifies which eager backend instantiate to. + + `backend` can be specified in various ways + + - As `Implementation.` with `BACKEND` being `PANDAS`, `PYARROW`, + `POLARS`, `MODIN` or `CUDF`. + - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`. + - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`. + native_namespace: The native library to use for DataFrame creation. + + *Deprecated* (v1.31.0) + + Please use `backend` instead. Note that `native_namespace` is still available + (and won't emit a deprecation warning) if you use `narwhals.stable.v1`, + see [perfect backwards compatibility policy](../backcompat.md/). + + Returns: + A new Series + """ + backend = cast("ModuleType | Implementation | str", backend) + return _stableify(_new_series_impl(name, values, dtype, backend=backend)) + + +@deprecate_native_namespace(required=True) +def from_arrow( + native_frame: IntoArrowTable, + *, + backend: ModuleType | Implementation | str | None = None, + native_namespace: ModuleType | None = None, # noqa: ARG001 +) -> DataFrame[Any]: + """Construct a DataFrame from an object which supports the PyCapsule Interface. + + Arguments: + native_frame: Object which implements `__arrow_c_stream__`. + backend: specifies which eager backend instantiate to. + + `backend` can be specified in various ways + + - As `Implementation.` with `BACKEND` being `PANDAS`, `PYARROW`, + `POLARS`, `MODIN` or `CUDF`. + - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`. + - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`. + native_namespace: The native library to use for DataFrame creation. + + *Deprecated* (v1.31.0) + + Please use `backend` instead. Note that `native_namespace` is still available + (and won't emit a deprecation warning) if you use `narwhals.stable.v1`, + see [perfect backwards compatibility policy](../backcompat.md/). + + Returns: + A new DataFrame. + """ + backend = cast("ModuleType | Implementation | str", backend) + return _stableify(nw_f.from_arrow(native_frame, backend=backend)) + + +@deprecate_native_namespace() +def from_dict( + data: Mapping[str, Any], + schema: Mapping[str, DType] | Schema | None = None, + *, + backend: ModuleType | Implementation | str | None = None, + native_namespace: ModuleType | None = None, # noqa: ARG001 +) -> DataFrame[Any]: + """Instantiate DataFrame from dictionary. + + Indexes (if present, for pandas-like backends) are aligned following + the [left-hand-rule](../concepts/pandas_index.md/). + + Notes: + For pandas-like dataframes, conversion to schema is applied after dataframe + creation. + + Arguments: + data: Dictionary to create DataFrame from. + schema: The DataFrame schema as Schema or dict of {name: type}. If not + specified, the schema will be inferred by the native library. + backend: specifies which eager backend instantiate to. Only + necessary if inputs are not Narwhals Series. + + `backend` can be specified in various ways + + - As `Implementation.` with `BACKEND` being `PANDAS`, `PYARROW`, + `POLARS`, `MODIN` or `CUDF`. + - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`. + - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`. + native_namespace: The native library to use for DataFrame creation. + + *Deprecated* (v1.26.0) + + Please use `backend` instead. Note that `native_namespace` is still available + (and won't emit a deprecation warning) if you use `narwhals.stable.v1`, + see [perfect backwards compatibility policy](../backcompat.md/). + + Returns: + A new DataFrame. + """ + return _stableify(nw_f.from_dict(data, schema, backend=backend)) + + +@deprecate_native_namespace(required=True) +def from_numpy( + data: _2DArray, + schema: Mapping[str, DType] | Schema | Sequence[str] | None = None, + *, + backend: ModuleType | Implementation | str | None = None, + native_namespace: ModuleType | None = None, # noqa: ARG001 +) -> DataFrame[Any]: + """Construct a DataFrame from a NumPy ndarray. + + Notes: + Only row orientation is currently supported. + + For pandas-like dataframes, conversion to schema is applied after dataframe + creation. + + Arguments: + data: Two-dimensional data represented as a NumPy ndarray. + schema: The DataFrame schema as Schema, dict of {name: type}, or a sequence of str. + backend: specifies which eager backend instantiate to. + + `backend` can be specified in various ways + + - As `Implementation.` with `BACKEND` being `PANDAS`, `PYARROW`, + `POLARS`, `MODIN` or `CUDF`. + - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`. + - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`. + native_namespace: The native library to use for DataFrame creation. + + *Deprecated* (v1.31.0) + + Please use `backend` instead. Note that `native_namespace` is still available + (and won't emit a deprecation warning) if you use `narwhals.stable.v1`, + see [perfect backwards compatibility policy](../backcompat.md/). + + Returns: + A new DataFrame. + """ + backend = cast("ModuleType | Implementation | str", backend) + return _stableify(nw_f.from_numpy(data, schema, backend=backend)) + + +@deprecate_native_namespace(required=True) +def read_csv( + source: str, + *, + backend: ModuleType | Implementation | str | None = None, + native_namespace: ModuleType | None = None, # noqa: ARG001 + **kwargs: Any, +) -> DataFrame[Any]: + """Read a CSV file into a DataFrame. + + Arguments: + source: Path to a file. + backend: The eager backend for DataFrame creation. + `backend` can be specified in various ways + + - As `Implementation.` with `BACKEND` being `PANDAS`, `PYARROW`, + `POLARS`, `MODIN` or `CUDF`. + - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`. + - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`. + native_namespace: The native library to use for DataFrame creation. + + *Deprecated* (v1.27.2) + + Please use `backend` instead. Note that `native_namespace` is still available + (and won't emit a deprecation warning) if you use `narwhals.stable.v1`, + see [perfect backwards compatibility policy](../backcompat.md/). + kwargs: Extra keyword arguments which are passed to the native CSV reader. + For example, you could use + `nw.read_csv('file.csv', backend='pandas', engine='pyarrow')`. + + Returns: + DataFrame. + """ + backend = cast("ModuleType | Implementation | str", backend) + return _stableify(nw_f.read_csv(source, backend=backend, **kwargs)) + + +@deprecate_native_namespace(required=True) +def scan_csv( + source: str, + *, + backend: ModuleType | Implementation | str | None = None, + native_namespace: ModuleType | None = None, # noqa: ARG001 + **kwargs: Any, +) -> LazyFrame[Any]: + """Lazily read from a CSV file. + + For the libraries that do not support lazy dataframes, the function reads + a csv file eagerly and then converts the resulting dataframe to a lazyframe. + + Arguments: + source: Path to a file. + backend: The eager backend for DataFrame creation. + `backend` can be specified in various ways + + - As `Implementation.` with `BACKEND` being `PANDAS`, `PYARROW`, + `POLARS`, `MODIN` or `CUDF`. + - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`. + - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`. + native_namespace: The native library to use for DataFrame creation. + + *Deprecated* (v1.31.0) + + Please use `backend` instead. Note that `native_namespace` is still available + (and won't emit a deprecation warning) if you use `narwhals.stable.v1`, + see [perfect backwards compatibility policy](../backcompat.md/). + kwargs: Extra keyword arguments which are passed to the native CSV reader. + For example, you could use + `nw.scan_csv('file.csv', backend=pd, engine='pyarrow')`. + + Returns: + LazyFrame. + """ + backend = cast("ModuleType | Implementation | str", backend) + return _stableify(nw_f.scan_csv(source, backend=backend, **kwargs)) + + +@deprecate_native_namespace(required=True) +def read_parquet( + source: str, + *, + backend: ModuleType | Implementation | str | None = None, + native_namespace: ModuleType | None = None, # noqa: ARG001 + **kwargs: Any, +) -> DataFrame[Any]: + """Read into a DataFrame from a parquet file. + + Arguments: + source: Path to a file. + backend: The eager backend for DataFrame creation. + `backend` can be specified in various ways + + - As `Implementation.` with `BACKEND` being `PANDAS`, `PYARROW`, + `POLARS`, `MODIN` or `CUDF`. + - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`. + - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`. + native_namespace: The native library to use for DataFrame creation. + + *Deprecated* (v1.31.0) + + Please use `backend` instead. Note that `native_namespace` is still available + (and won't emit a deprecation warning) if you use `narwhals.stable.v1`, + see [perfect backwards compatibility policy](../backcompat.md/). + kwargs: Extra keyword arguments which are passed to the native parquet reader. + For example, you could use + `nw.read_parquet('file.parquet', backend=pd, engine='pyarrow')`. + + Returns: + DataFrame. + """ + backend = cast("ModuleType | Implementation | str", backend) + return _stableify(nw_f.read_parquet(source, backend=backend, **kwargs)) + + +@deprecate_native_namespace(required=True) +def scan_parquet( + source: str, + *, + backend: ModuleType | Implementation | str | None = None, + native_namespace: ModuleType | None = None, # noqa: ARG001 + **kwargs: Any, +) -> LazyFrame[Any]: + """Lazily read from a parquet file. + + For the libraries that do not support lazy dataframes, the function reads + a parquet file eagerly and then converts the resulting dataframe to a lazyframe. + + Note: + Spark like backends require a session object to be passed in `kwargs`. + + For instance: + + ```py + import narwhals as nw + from sqlframe.duckdb import DuckDBSession + + nw.scan_parquet(source, backend="sqlframe", session=DuckDBSession()) + ``` + + Arguments: + source: Path to a file. + backend: The eager backend for DataFrame creation. + `backend` can be specified in various ways + + - As `Implementation.` with `BACKEND` being `PANDAS`, `PYARROW`, + `POLARS`, `MODIN`, `CUDF`, `PYSPARK` or `SQLFRAME`. + - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"`, `"cudf"`, + `"pyspark"` or `"sqlframe"`. + - Directly as a module `pandas`, `pyarrow`, `polars`, `modin`, `cudf`, + `pyspark.sql` or `sqlframe`. + native_namespace: The native library to use for DataFrame creation. + + *Deprecated* (v1.31.0) + + Please use `backend` instead. Note that `native_namespace` is still available + (and won't emit a deprecation warning) if you use `narwhals.stable.v1`, + see [perfect backwards compatibility policy](../backcompat.md/). + kwargs: Extra keyword arguments which are passed to the native parquet reader. + For example, you could use + `nw.scan_parquet('file.parquet', backend=pd, engine='pyarrow')`. + + Returns: + LazyFrame. + """ + backend = cast("ModuleType | Implementation | str", backend) + return _stableify(nw_f.scan_parquet(source, backend=backend, **kwargs)) + + +__all__ = [ + "Array", + "Binary", + "Boolean", + "Categorical", + "DataFrame", + "Date", + "Datetime", + "Decimal", + "Duration", + "Enum", + "Expr", + "Field", + "Float32", + "Float64", + "Implementation", + "Int8", + "Int16", + "Int32", + "Int64", + "Int128", + "LazyFrame", + "List", + "Object", + "Schema", + "Series", + "String", + "Struct", + "Time", + "UInt8", + "UInt16", + "UInt32", + "UInt64", + "UInt128", + "Unknown", + "all", + "all_horizontal", + "any_horizontal", + "col", + "concat", + "concat_str", + "dependencies", + "dtypes", + "exceptions", + "exclude", + "from_arrow", + "from_dict", + "from_native", + "from_numpy", + "generate_temporary_column_name", + "get_level", + "get_native_namespace", + "is_ordered_categorical", + "len", + "lit", + "max", + "max_horizontal", + "maybe_align_index", + "maybe_convert_dtypes", + "maybe_get_index", + "maybe_reset_index", + "maybe_set_index", + "mean", + "mean_horizontal", + "median", + "min", + "min_horizontal", + "narwhalify", + "new_series", + "nth", + "read_csv", + "read_parquet", + "scan_csv", + "scan_parquet", + "selectors", + "show_versions", + "sum", + "sum_horizontal", + "to_native", + "to_py_scalar", + "when", +] diff --git a/venv/lib/python3.8/site-packages/narwhals/stable/v1/_dtypes.py b/venv/lib/python3.8/site-packages/narwhals/stable/v1/_dtypes.py new file mode 100644 index 0000000..060980c --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/stable/v1/_dtypes.py @@ -0,0 +1,135 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from narwhals._utils import inherit_doc +from narwhals.dtypes import ( + Array, + Binary, + Boolean, + Categorical, + Date, + Datetime as NwDatetime, + Decimal, + DType, + Duration as NwDuration, + Enum as NwEnum, + Field, + Float32, + Float64, + FloatType, + Int8, + Int16, + Int32, + Int64, + Int128, + IntegerType, + List, + NestedType, + NumericType, + Object, + SignedIntegerType, + String, + Struct, + Time, + UInt8, + UInt16, + UInt32, + UInt64, + UInt128, + Unknown, + UnsignedIntegerType, +) + +if TYPE_CHECKING: + from datetime import timezone + + from narwhals.typing import TimeUnit + + +class Datetime(NwDatetime): + @inherit_doc(NwDatetime) + def __init__( + self, time_unit: TimeUnit = "us", time_zone: str | timezone | None = None + ) -> None: + super().__init__(time_unit, time_zone) + + def __hash__(self) -> int: + return hash(self.__class__) + + +class Duration(NwDuration): + @inherit_doc(NwDuration) + def __init__(self, time_unit: TimeUnit = "us") -> None: + super().__init__(time_unit) + + def __hash__(self) -> int: + return hash(self.__class__) + + +class Enum(NwEnum): + """A fixed categorical encoding of a unique set of strings. + + Polars has an Enum data type, while pandas and PyArrow do not. + + Examples: + >>> import polars as pl + >>> import narwhals.stable.v1 as nw + >>> data = ["beluga", "narwhal", "orca"] + >>> s_native = pl.Series(data, dtype=pl.Enum(data)) + >>> nw.from_native(s_native, series_only=True).dtype + Enum + """ + + def __init__(self) -> None: + super(NwEnum, self).__init__() + + def __eq__(self, other: DType | type[DType]) -> bool: # type: ignore[override] + if type(other) is type: + return other in {type(self), NwEnum} + return isinstance(other, type(self)) + + def __hash__(self) -> int: # pragma: no cover + return super(NwEnum, self).__hash__() + + def __repr__(self) -> str: # pragma: no cover + return super(NwEnum, self).__repr__() + + +__all__ = [ + "Array", + "Binary", + "Boolean", + "Categorical", + "DType", + "Date", + "Datetime", + "Decimal", + "Duration", + "Enum", + "Field", + "Float32", + "Float64", + "FloatType", + "Int8", + "Int16", + "Int32", + "Int64", + "Int128", + "IntegerType", + "List", + "NestedType", + "NumericType", + "Object", + "SignedIntegerType", + "String", + "Struct", + "Time", + "UInt8", + "UInt16", + "UInt32", + "UInt64", + "UInt128", + "Unknown", + "UnsignedIntegerType", +] diff --git a/venv/lib/python3.8/site-packages/narwhals/stable/v1/_namespace.py b/venv/lib/python3.8/site-packages/narwhals/stable/v1/_namespace.py new file mode 100644 index 0000000..8f11f01 --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/stable/v1/_namespace.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +from narwhals._compliant.typing import CompliantNamespaceT_co +from narwhals._namespace import Namespace as NwNamespace +from narwhals._utils import Version + +__all__ = ["Namespace"] + + +class Namespace(NwNamespace[CompliantNamespaceT_co], version=Version.V1): ... diff --git a/venv/lib/python3.8/site-packages/narwhals/stable/v1/dependencies.py b/venv/lib/python3.8/site-packages/narwhals/stable/v1/dependencies.py new file mode 100644 index 0000000..ad57042 --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/stable/v1/dependencies.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +from narwhals.dependencies import ( + get_cudf, + get_ibis, + get_modin, + get_numpy, + get_pandas, + get_polars, + get_pyarrow, + is_cudf_dataframe, + is_cudf_series, + is_dask_dataframe, + is_ibis_table, + is_into_dataframe, + is_into_series, + is_modin_dataframe, + is_modin_series, + is_narwhals_dataframe, + is_narwhals_lazyframe, + is_narwhals_series, + is_numpy_array, + is_pandas_dataframe, + is_pandas_index, + is_pandas_like_dataframe, + is_pandas_like_series, + is_pandas_series, + is_polars_dataframe, + is_polars_lazyframe, + is_polars_series, + is_pyarrow_chunked_array, + is_pyarrow_table, +) + +__all__ = [ + "get_cudf", + "get_ibis", + "get_modin", + "get_numpy", + "get_pandas", + "get_polars", + "get_pyarrow", + "is_cudf_dataframe", + "is_cudf_series", + "is_dask_dataframe", + "is_ibis_table", + "is_into_dataframe", + "is_into_series", + "is_modin_dataframe", + "is_modin_series", + "is_narwhals_dataframe", + "is_narwhals_lazyframe", + "is_narwhals_series", + "is_numpy_array", + "is_pandas_dataframe", + "is_pandas_index", + "is_pandas_like_dataframe", + "is_pandas_like_series", + "is_pandas_series", + "is_polars_dataframe", + "is_polars_lazyframe", + "is_polars_series", + "is_pyarrow_chunked_array", + "is_pyarrow_table", +] diff --git a/venv/lib/python3.8/site-packages/narwhals/stable/v1/dtypes.py b/venv/lib/python3.8/site-packages/narwhals/stable/v1/dtypes.py new file mode 100644 index 0000000..a292be8 --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/stable/v1/dtypes.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +from narwhals.stable.v1._dtypes import ( + Array, + Binary, + Boolean, + Categorical, + Date, + Datetime, + Decimal, + DType, + Duration, + Enum, + Field, + Float32, + Float64, + FloatType, + Int8, + Int16, + Int32, + Int64, + Int128, + IntegerType, + List, + NestedType, + NumericType, + Object, + SignedIntegerType, + String, + Struct, + Time, + UInt8, + UInt16, + UInt32, + UInt64, + UInt128, + Unknown, + UnsignedIntegerType, +) + +__all__ = [ + "Array", + "Binary", + "Boolean", + "Categorical", + "DType", + "Date", + "Datetime", + "Decimal", + "Duration", + "Enum", + "Field", + "Float32", + "Float64", + "FloatType", + "Int8", + "Int16", + "Int32", + "Int64", + "Int128", + "IntegerType", + "List", + "NestedType", + "NumericType", + "Object", + "SignedIntegerType", + "String", + "Struct", + "Time", + "UInt8", + "UInt16", + "UInt32", + "UInt64", + "UInt128", + "Unknown", + "UnsignedIntegerType", +] diff --git a/venv/lib/python3.8/site-packages/narwhals/stable/v1/selectors.py b/venv/lib/python3.8/site-packages/narwhals/stable/v1/selectors.py new file mode 100644 index 0000000..ede128e --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/stable/v1/selectors.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from narwhals.selectors import ( + all, + boolean, + by_dtype, + categorical, + datetime, + matches, + numeric, + string, +) + +__all__ = [ + "all", + "boolean", + "by_dtype", + "categorical", + "datetime", + "matches", + "numeric", + "string", +] diff --git a/venv/lib/python3.8/site-packages/narwhals/stable/v1/typing.py b/venv/lib/python3.8/site-packages/narwhals/stable/v1/typing.py new file mode 100644 index 0000000..b55d13e --- /dev/null +++ b/venv/lib/python3.8/site-packages/narwhals/stable/v1/typing.py @@ -0,0 +1,209 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Protocol, TypeVar, Union + +if TYPE_CHECKING: + import sys + + from narwhals.stable.v1 import DataFrame, LazyFrame + + if sys.version_info >= (3, 10): + from typing import TypeAlias + else: + from typing_extensions import TypeAlias + + from narwhals.stable.v1 import Expr, Series, dtypes + + # All dataframes supported by Narwhals have a + # `columns` property. Their similarities don't extend + # _that_ much further unfortunately... + class NativeFrame(Protocol): + @property + def columns(self) -> Any: ... + + def join(self, *args: Any, **kwargs: Any) -> Any: ... + + class NativeSeries(Protocol): + def __len__(self) -> int: ... + + class DataFrameLike(Protocol): + def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... + + +IntoExpr: TypeAlias = Union["Expr", str, "Series[Any]"] +"""Anything which can be converted to an expression. + +Use this to mean "either a Narwhals expression, or something +which can be converted into one". For example, `exprs` in `DataFrame.select` is +typed to accept `IntoExpr`, as it can either accept a `nw.Expr` +(e.g. `df.select(nw.col('a'))`) or a string which will be interpreted as a +`nw.Expr`, e.g. `df.select('a')`. +""" + +IntoDataFrame: TypeAlias = Union["NativeFrame", "DataFrame[Any]", "DataFrameLike"] +"""Anything which can be converted to a Narwhals DataFrame. + +Use this if your function accepts a narwhalifiable object but doesn't care about its backend. + +Examples: + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> def agnostic_shape(df_native: IntoDataFrame) -> tuple[int, int]: + ... df = nw.from_native(df_native, eager_only=True) + ... return df.shape +""" + +IntoFrame: TypeAlias = Union[ + "NativeFrame", "DataFrame[Any]", "LazyFrame[Any]", "DataFrameLike" +] +"""Anything which can be converted to a Narwhals DataFrame or LazyFrame. + +Use this if your function can accept an object which can be converted to either +`nw.DataFrame` or `nw.LazyFrame` and it doesn't care about its backend. + +Examples: + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrame + >>> def agnostic_columns(df_native: IntoFrame) -> list[str]: + ... df = nw.from_native(df_native) + ... return df.collect_schema().names() +""" + +Frame: TypeAlias = Union["DataFrame[Any]", "LazyFrame[Any]"] +"""Narwhals DataFrame or Narwhals LazyFrame. + +Use this if your function can work with either and your function doesn't care +about its backend. + +Examples: + >>> import narwhals as nw + >>> from narwhals.typing import Frame + >>> @nw.narwhalify + ... def agnostic_columns(df: Frame) -> list[str]: + ... return df.columns +""" + +IntoSeries: TypeAlias = Union["Series[Any]", "NativeSeries"] +"""Anything which can be converted to a Narwhals Series. + +Use this if your function can accept an object which can be converted to `nw.Series` +and it doesn't care about its backend. + +Examples: + >>> from typing import Any + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + >>> def agnostic_to_list(s_native: IntoSeries) -> list[Any]: + ... s = nw.from_native(s_native) + ... return s.to_list() +""" + +IntoFrameT = TypeVar("IntoFrameT", bound="IntoFrame") +"""TypeVar bound to object convertible to Narwhals DataFrame or Narwhals LazyFrame. + +Use this if your function accepts an object which is convertible to `nw.DataFrame` +or `nw.LazyFrame` and returns an object of the same type. + +Examples: + >>> import narwhals as nw + >>> from narwhals.typing import IntoFrameT + >>> def agnostic_func(df_native: IntoFrameT) -> IntoFrameT: + ... df = nw.from_native(df_native) + ... return df.with_columns(c=nw.col("a") + 1).to_native() +""" + +IntoDataFrameT = TypeVar("IntoDataFrameT", bound="IntoDataFrame") +"""TypeVar bound to object convertible to Narwhals DataFrame. + +Use this if your function accepts an object which can be converted to `nw.DataFrame` +and returns an object of the same class. + +Examples: + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrameT + >>> def agnostic_func(df_native: IntoDataFrameT) -> IntoDataFrameT: + ... df = nw.from_native(df_native, eager_only=True) + ... return df.with_columns(c=df["a"] + 1).to_native() +""" + +FrameT = TypeVar("FrameT", "DataFrame[Any]", "LazyFrame[Any]") +"""TypeVar bound to Narwhals DataFrame or Narwhals LazyFrame. + +Use this if your function accepts either `nw.DataFrame` or `nw.LazyFrame` and returns +an object of the same kind. + +Examples: + >>> import narwhals as nw + >>> from narwhals.typing import FrameT + >>> @nw.narwhalify + ... def agnostic_func(df: FrameT) -> FrameT: + ... return df.with_columns(c=nw.col("a") + 1) +""" + +DataFrameT = TypeVar("DataFrameT", bound="DataFrame[Any]") +"""TypeVar bound to Narwhals DataFrame. + +Use this if your function can accept a Narwhals DataFrame and returns a Narwhals +DataFrame backed by the same backend. + +Examples: + >>> import narwhals as nw + >>> from narwhals.typing import DataFrameT + >>> @nw.narwhalify + >>> def func(df: DataFrameT) -> DataFrameT: + ... return df.with_columns(c=df["a"] + 1) +""" + +IntoSeriesT = TypeVar("IntoSeriesT", bound="IntoSeries") +"""TypeVar bound to object convertible to Narwhals Series. + +Use this if your function accepts an object which can be converted to `nw.Series` +and returns an object of the same class. + +Examples: + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> def agnostic_abs(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.abs().to_native() +""" + + +class DTypes: + Int64: type[dtypes.Int64] + Int32: type[dtypes.Int32] + Int16: type[dtypes.Int16] + Int8: type[dtypes.Int8] + UInt64: type[dtypes.UInt64] + UInt32: type[dtypes.UInt32] + UInt16: type[dtypes.UInt16] + UInt8: type[dtypes.UInt8] + Float64: type[dtypes.Float64] + Float32: type[dtypes.Float32] + String: type[dtypes.String] + Boolean: type[dtypes.Boolean] + Object: type[dtypes.Object] + Categorical: type[dtypes.Categorical] + Enum: type[dtypes.Enum] + Datetime: type[dtypes.Datetime] + Duration: type[dtypes.Duration] + Date: type[dtypes.Date] + Field: type[dtypes.Field] + Struct: type[dtypes.Struct] + List: type[dtypes.List] + Array: type[dtypes.Array] + Unknown: type[dtypes.Unknown] + + +__all__ = [ + "DataFrameT", + "Frame", + "FrameT", + "IntoDataFrame", + "IntoDataFrameT", + "IntoExpr", + "IntoFrame", + "IntoFrameT", + "IntoSeries", + "IntoSeriesT", +] -- cgit v1.2.3-70-g09d2