diff --git a/python/pyarrow-stubs/pyarrow/_acero.pyi b/python/pyarrow-stubs/pyarrow/_acero.pyi new file mode 100644 index 00000000000..85ed9683e7e --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/_acero.pyi @@ -0,0 +1,163 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import sys +from collections.abc import Iterable, Collection, Sequence + +if sys.version_info >= (3, 11): + from typing import Self, LiteralString +else: + from typing_extensions import Self, LiteralString +if sys.version_info >= (3, 10): + from typing import TypeAlias +else: + from typing_extensions import TypeAlias +from typing import Literal + +from . import lib +from .compute import Expression +from .dataset import InMemoryDataset, Dataset +from .table import Aggregation, AggregateOptions + +_StrOrExpr: TypeAlias = str | Expression + +IntoField: TypeAlias = str | int | Expression +Target: TypeAlias = ( + IntoField + | tuple[IntoField, ...] + | list[str] + | list[int] + | list[Expression] + | list[IntoField] +) + +UserDefinedAggregation: TypeAlias = LiteralString +OutputName: TypeAlias = str +AggregationSpec: TypeAlias = tuple[ + Target, Aggregation | UserDefinedAggregation, AggregateOptions | None, OutputName +] + + +class Declaration(lib._Weakrefable): + def __init__( + self, + factory_name: str, + options: ExecNodeOptions, + inputs: list[Declaration] | None = None, + ) -> None: ... + @classmethod + def from_sequence(cls, decls: Iterable[Declaration]) -> Self: ... + def to_reader(self, use_threads: bool = True) -> lib.RecordBatchReader: ... + def to_table(self, use_threads: bool = True) -> lib.Table: ... + + +class ExecNodeOptions(lib._Weakrefable): + ... + + +class TableSourceNodeOptions(ExecNodeOptions): + def __init__(self, table: lib.Table | lib.RecordBatch | None) -> None: ... + + +class FilterNodeOptions(ExecNodeOptions): + def __init__(self, filter_expression: Expression | None) -> None: ... + + +class ProjectNodeOptions(ExecNodeOptions): + def __init__(self, expressions: Collection[Expression], + names: Collection[str] | None = None) -> None: ... + + +class AggregateNodeOptions(ExecNodeOptions): + def __init__( + self, + aggregates: Iterable[ + tuple[ + Target, + Aggregation | UserDefinedAggregation, + AggregateOptions | None, + OutputName, + ] + ], + keys: Iterable[str | Expression] | None = None, + ) -> None: ... + + +class OrderByNodeOptions(ExecNodeOptions): + def __init__( + self, + sort_keys: + Iterable[tuple[str | Expression | int, Literal["ascending", "descending"]]] + = (), + *, + null_placement: Literal["at_start", "at_end"] = "at_end", + ) -> None: ... + + +class HashJoinNodeOptions(ExecNodeOptions): + def __init__( + self, + join_type: Literal[ + "left semi", + "right semi", + "left anti", + "right anti", + "inner", + "left outer", + "right outer", + "full outer", + ], + left_keys: _StrOrExpr | Sequence[_StrOrExpr], + right_keys: _StrOrExpr | Sequence[_StrOrExpr], + left_output: Sequence[_StrOrExpr] | None = None, + right_output: Sequence[_StrOrExpr] | None = None, + output_suffix_for_left: str = "", + output_suffix_for_right: str = "", + filter_expression: + lib.BooleanScalar | lib.BooleanArray | Expression | None = None, + ) -> None: ... + + +class AsofJoinNodeOptions(ExecNodeOptions): + def __init__( + self, + left_on: _StrOrExpr, + left_by: _StrOrExpr | Sequence[_StrOrExpr], + right_on: _StrOrExpr, + right_by: _StrOrExpr | Sequence[_StrOrExpr], + tolerance: int, + ) -> None: ... + + +def _perform_join( + join_type: str, + left_operand: lib.Table | Dataset, + left_keys: str | list[str], + right_operand: lib.Table | Dataset, + right_keys: str | list[str], + left_suffix: str, + right_suffix: str, + use_threads: bool, + coalesce_keys: bool, + output_type: type[lib.Table | InMemoryDataset] = lib.Table, + filter_expression: Expression | None = None, +) -> lib.Table | InMemoryDataset: ... + + +def _filter_table( + table: lib.Table | lib.RecordBatch, filter_expression: Expression, + use_threads: bool = True) -> lib.Table | lib.RecordBatch: ... diff --git a/python/pyarrow-stubs/pyarrow/_compute.pyi b/python/pyarrow-stubs/pyarrow/_compute.pyi new file mode 100644 index 00000000000..dfe46908c08 --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/_compute.pyi @@ -0,0 +1,671 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import types as stdlib_types +from collections.abc import ( + Callable, + Iterable, + Mapping, + Sequence, +) + +from typing import ( + Any, + Literal, + TypeAlias, + TypedDict, +) + +from . import lib + +_Order: TypeAlias = Literal["ascending", "descending"] +_Placement: TypeAlias = Literal["at_start", "at_end"] + + +class Kernel(lib._Weakrefable): + ... + + +class Function(lib._Weakrefable): + @property + def arity(self) -> int | stdlib_types.EllipsisType: ... + + @property + def kind( + self, + ) -> Literal["scalar", "vector", "scalar_aggregate", "hash_aggregate", "meta"]: ... + @property + def name(self) -> str: ... + @property + def num_kernels(self) -> int: ... + + @property + def kernels( + self, + ) -> list[ + ScalarKernel | VectorKernel | ScalarAggregateKernel | HashAggregateKernel + ]: ... + + def call( + self, + args: Iterable, + options: FunctionOptions | None = None, + memory_pool: lib.MemoryPool | None = None, + length: int | None = None, + ) -> Any: ... + + +class FunctionOptions(lib._Weakrefable): + def serialize(self) -> lib.Buffer: ... + @classmethod + def deserialize(cls, buf: lib.Buffer) -> FunctionOptions: ... + + +class FunctionRegistry(lib._Weakrefable): + def get_function(self, name: str) -> Function: ... + def list_functions(self) -> list[str]: ... + + +class HashAggregateFunction(Function): + ... + + +class HashAggregateKernel(Kernel): + ... + + +class ScalarAggregateFunction(Function): + ... + + +class ScalarAggregateKernel(Kernel): + ... + + +class ScalarFunction(Function): + ... + + +class ScalarKernel(Kernel): + ... + + +class VectorFunction(Function): + ... + + +class VectorKernel(Kernel): + ... + +# ==================== _compute.pyx Option classes ==================== + + +class ArraySortOptions(FunctionOptions): + def __init__( + self, + order: _Order = "ascending", + null_placement: _Placement = "at_end", + ) -> None: ... + + +class AssumeTimezoneOptions(FunctionOptions): + def __init__( + self, + timezone: str, + *, + ambiguous: Literal["raise", "earliest", "latest"] = "raise", + nonexistent: Literal["raise", "earliest", "latest"] = "raise", + ) -> None: ... + + +class CastOptions(FunctionOptions): + allow_int_overflow: bool + allow_time_truncate: bool + allow_time_overflow: bool + allow_decimal_truncate: bool + allow_float_truncate: bool + allow_invalid_utf8: bool + + def __init__( + self, + target_type: lib.DataType | None = None, + *, + allow_int_overflow: bool | None = None, + allow_time_truncate: bool | None = None, + allow_time_overflow: bool | None = None, + allow_decimal_truncate: bool | None = None, + allow_float_truncate: bool | None = None, + allow_invalid_utf8: bool | None = None, + ) -> None: ... + @staticmethod + def safe(target_type: lib.DataType | None = None) -> CastOptions: ... + @staticmethod + def unsafe(target_type: lib.DataType | None = None) -> CastOptions: ... + def is_safe(self) -> bool: ... + + +class CountOptions(FunctionOptions): + def __init__(self, mode: Literal["only_valid", + "only_null", "all"] = "only_valid") -> None: ... + + +class CumulativeOptions(FunctionOptions): + def __init__(self, start: lib.Scalar | None = None, + *, skip_nulls: bool = False) -> None: ... + + +class CumulativeSumOptions(FunctionOptions): + def __init__(self, start: lib.Scalar | None = None, + *, skip_nulls: bool = False) -> None: ... + + +class DayOfWeekOptions(FunctionOptions): + def __init__(self, *, count_from_zero: bool = True, + week_start: int = 1) -> None: ... + + +class DictionaryEncodeOptions(FunctionOptions): + def __init__(self, null_encoding: Literal["mask", "encode"] = "mask") -> None: ... + + +class RunEndEncodeOptions(FunctionOptions): + # TODO: default is DataType(int32) + def __init__(self, run_end_type: lib.DataType | str = ...) -> None: ... + + +class ElementWiseAggregateOptions(FunctionOptions): + def __init__(self, *, skip_nulls: bool = True) -> None: ... + + +class ExtractRegexOptions(FunctionOptions): + def __init__(self, pattern: str) -> None: ... + + +class ExtractRegexSpanOptions(FunctionOptions): + def __init__(self, pattern: str) -> None: ... + + +class FilterOptions(FunctionOptions): + def __init__(self, + null_selection_behavior: Literal["drop", + "emit_null"] = "drop") -> None: ... + + +class IndexOptions(FunctionOptions): + def __init__(self, value: lib.Scalar) -> None: ... + + +class JoinOptions(FunctionOptions): + def __init__( + self, + null_handling: + Literal["emit_null", "skip", "replace"] + = "emit_null", *, null_replacement: str = "") -> None: ... + + +class ListSliceOptions(FunctionOptions): + def __init__( + self, + start: int, + stop: int | None = None, + step: int = 1, + return_fixed_size_list: bool | None = None, + ) -> None: ... + + +class ListFlattenOptions(FunctionOptions): + def __init__(self, recursive: bool = False) -> None: ... + + +class MakeStructOptions(FunctionOptions): + def __init__( + self, + field_names: Sequence[str] = (), + *, + field_nullability: Sequence[bool] | None = None, + field_metadata: Sequence[lib.KeyValueMetadata] | None = None, + ) -> None: ... + + +class MapLookupOptions(FunctionOptions): + # TODO: query_key: Scalar or Object can be converted to Scalar + def __init__( + self, query_key: lib.Scalar, occurrence: Literal["first", "last", "all"] + ) -> None: ... + + +class MatchSubstringOptions(FunctionOptions): + def __init__(self, pattern: str, *, ignore_case: bool = False) -> None: ... + + +class ModeOptions(FunctionOptions): + def __init__(self, n: int = 1, *, skip_nulls: bool = True, + min_count: int = 0) -> None: ... + + +class NullOptions(FunctionOptions): + def __init__(self, *, nan_is_null: bool = False) -> None: ... + + +class PadOptions(FunctionOptions): + def __init__( + self, width: int, padding: str = " ", lean_left_on_odd_padding: bool = True + ) -> None: ... + + +class PairwiseOptions(FunctionOptions): + def __init__(self, period: int = 1) -> None: ... + + +class PartitionNthOptions(FunctionOptions): + def __init__(self, pivot: int, *, + null_placement: _Placement = "at_end") -> None: ... + + +class WinsorizeOptions(FunctionOptions): + def __init__(self, lower_limit: float, upper_limit: float) -> None: ... + + +class QuantileOptions(FunctionOptions): + def __init__( + self, + q: float | Sequence[float] = 0.5, + *, + interpolation: Literal["linear", "lower", + "higher", "nearest", "midpoint"] = "linear", + skip_nulls: bool = True, + min_count: int = 0, + ) -> None: ... + + +class RandomOptions(FunctionOptions): + def __init__(self, *, initializer: int | Literal["system"] = "system") -> None: ... + + +class RankOptions(FunctionOptions): + def __init__( + self, + sort_keys: _Order | Sequence[tuple[str, _Order]] = "ascending", + *, + null_placement: _Placement = "at_end", + tiebreaker: Literal["min", "max", "first", "dense"] = "first", + ) -> None: ... + + +class RankQuantileOptions(FunctionOptions): + def __init__( + self, + sort_keys: _Order | Sequence[tuple[str, _Order]] = "ascending", + *, + null_placement: _Placement = "at_end", + ) -> None: ... + + +class PivotWiderOptions(FunctionOptions): + def __init__( + self, + key_names: Sequence[str], + *, + unexpected_key_behavior: Literal["ignore", "raise"] = "ignore", + ) -> None: ... + + +class ReplaceSliceOptions(FunctionOptions): + def __init__(self, start: int, stop: int, replacement: str) -> None: ... + + +class ReplaceSubstringOptions(FunctionOptions): + def __init__( + self, pattern: str, replacement: str, *, max_replacements: int | None = None + ) -> None: ... + + +_RoundMode: TypeAlias = Literal[ + "down", + "up", + "towards_zero", + "towards_infinity", + "half_down", + "half_up", + "half_towards_zero", + "half_towards_infinity", + "half_to_even", + "half_to_odd", +] + + +class RoundBinaryOptions(FunctionOptions): + def __init__( + self, + round_mode: _RoundMode = "half_to_even", + ) -> None: ... + + +class RoundOptions(FunctionOptions): + def __init__( + self, + ndigits: int = 0, + round_mode: _RoundMode = "half_to_even", + ) -> None: ... + + +_DateTimeUint: TypeAlias = Literal[ + "year", + "quarter", + "month", + "week", + "day", + "hour", + "minute", + "second", + "millisecond", + "microsecond", + "nanosecond", +] + + +class RoundTemporalOptions(FunctionOptions): + def __init__( + self, + multiple: int = 1, + unit: _DateTimeUint = "day", + *, + week_starts_monday: bool = True, + ceil_is_strictly_greater: bool = False, + calendar_based_origin: bool = False, + ) -> None: ... + + +class RoundToMultipleOptions(FunctionOptions): + def __init__(self, multiple: int | float | lib.Scalar = 1.0, + round_mode: _RoundMode = "half_to_even") -> None: ... + + +class ScalarAggregateOptions(FunctionOptions): + def __init__(self, *, skip_nulls: bool = True, min_count: int = 1) -> None: ... + + +class SelectKOptions(FunctionOptions): + def __init__(self, k: int, sort_keys: Sequence[tuple[str, _Order]]) -> None: ... + + +class SetLookupOptions(FunctionOptions): + def __init__(self, value_set: lib.Array, *, skip_nulls: bool = True) -> None: ... + + +class SliceOptions(FunctionOptions): + def __init__( + self, start: int, stop: int | None = None, step: int = 1) -> None: ... + + +class SortOptions(FunctionOptions): + def __init__( + self, + sort_keys: Sequence[tuple[str, _Order]], + *, + null_placement: _Placement = "at_end" + ) -> None: ... + + +class SplitOptions(FunctionOptions): + def __init__(self, *, max_splits: int | None = None, + reverse: bool = False) -> None: ... + + +class SplitPatternOptions(FunctionOptions): + def __init__( + self, pattern: str, *, max_splits: int | None = None, reverse: bool = False + ) -> None: ... + + +class StrftimeOptions(FunctionOptions): + def __init__(self, format: str = "%Y-%m-%dT%H:%M:%S", + locale: str = "C") -> None: ... + + +class StrptimeOptions(FunctionOptions): + def __init__(self, + format: str, + unit: Literal["s", + "ms", + "us", + "ns"], + error_is_null: bool = False) -> None: ... + + +class StructFieldOptions(FunctionOptions): + def __init__(self, indices: list[str] | list[bytes] | + list[int] | Expression | bytes | str | int) -> None: ... + + +class TakeOptions(FunctionOptions): + def __init__(self, boundscheck: bool = True) -> None: ... + + +class TDigestOptions(FunctionOptions): + def __init__( + self, + q: float | Sequence[float] = 0.5, + *, + delta: int = 100, + buffer_size: int = 500, + skip_nulls: bool = True, + min_count: int = 0, + ) -> None: ... + + +class TrimOptions(FunctionOptions): + def __init__(self, characters: str) -> None: ... + + +class Utf8NormalizeOptions(FunctionOptions): + def __init__(self, form: Literal["NFC", "NFKC", "NFD", "NFKD"]) -> None: ... + + +class VarianceOptions(FunctionOptions): + def __init__(self, *, ddof: int = 0, skip_nulls: bool = True, + min_count: int = 0) -> None: ... + + +class SkewOptions(FunctionOptions): + def __init__( + self, *, skip_nulls: bool = True, biased: bool = True, min_count: int = 0 + ) -> None: ... + + +class WeekOptions(FunctionOptions): + def __init__( + self, + *, + week_starts_monday: bool = True, + count_from_zero: bool = False, + first_week_is_fully_in_year: bool = False, + ) -> None: ... + + +class ZeroFillOptions(FunctionOptions): + def __init__(self, width: int, padding: str = "0") -> None: ... + +# ==================== _compute.pyx Functions ==================== + + +def call_function( + name: str, + args: list, + options: FunctionOptions | None = None, + memory_pool: lib.MemoryPool | None = None, + length: int | None = None, +) -> Any: ... +def function_registry() -> FunctionRegistry: ... +def get_function(name: str) -> Function: ... +def list_functions() -> list[str]: ... + +# ==================== _compute.pyx Udf ==================== + + +def call_tabular_function( + function_name: str, + args: Iterable | None = None, + func_registry: FunctionRegistry | None = None) -> lib.RecordBatchReader: ... + + +class _FunctionDoc(TypedDict): + summary: str + description: str + + +def register_scalar_function( + func: Callable | None, + function_name: str | None, + function_doc: _FunctionDoc | dict[str, str], + in_types: Mapping[str, lib.DataType] | None, + out_type: lib.DataType | None, + func_registry: FunctionRegistry | None = None, +) -> None: ... + + +def register_tabular_function( + func: Callable, + function_name: str, + function_doc: _FunctionDoc | dict[str, str], + in_types: Mapping[str, lib.DataType], + out_type: lib.DataType, + func_registry: FunctionRegistry | None = None, +) -> None: ... + + +def register_aggregate_function( + func: Callable, + function_name: str, + function_doc: _FunctionDoc | dict[str, str], + in_types: Mapping[str, lib.DataType], + out_type: lib.DataType, + func_registry: FunctionRegistry | None = None, +) -> None: ... + + +def register_vector_function( + func: Callable, + function_name: str, + function_doc: _FunctionDoc | dict[str, str], + in_types: Mapping[str, lib.DataType], + out_type: lib.DataType, + func_registry: FunctionRegistry | None = None, +) -> None: ... + + +class UdfContext: + @property + def batch_length(self) -> int: ... + @property + def memory_pool(self) -> lib.MemoryPool: ... + + +def _get_udf_context(memory_pool: lib.MemoryPool, batch_length: int) -> UdfContext: ... + +# ==================== _compute.pyx Expression ==================== + + +class Expression(lib._Weakrefable): + @staticmethod + def from_substrait(buffer: bytes | lib.Buffer) -> Expression: ... + + def to_substrait(self, schema: lib.Schema, + allow_arrow_extensions: bool = False) -> lib.Buffer: ... + + @staticmethod + def _call( + func_name: str, args: list, options: FunctionOptions | None = None + ) -> Expression: ... + + @staticmethod + def _field(name_or_index: str | int) -> Expression: ... + + @staticmethod + def _nested_field(name: str) -> Expression: ... + + @staticmethod + def _scalar(value: Any) -> Expression: ... + + def __invert__(self) -> Expression: ... + + def __and__( + self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ... + + def __rand__( + self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ... + + def __or__( + self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ... + + def __ror__( + self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ... + + def __add__( + self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ... + + def __radd__( + self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ... + + def __mul__( + self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ... + + def __rmul__( + self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ... + + def __sub__( + self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ... + + def __rsub__( + self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ... + + def __eq__(self, value: object) -> Expression: ... # type: ignore[override] + def __ne__(self, value: object) -> Expression: ... # type: ignore[override] + def __gt__(self, value: object) -> Expression: ... + def __lt__(self, value: object) -> Expression: ... + def __ge__(self, value: object) -> Expression: ... + def __le__(self, value: object) -> Expression: ... + + def __truediv__( + self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ... + + def __rtruediv__( + self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ... + + def is_valid(self) -> Expression: ... + def is_null(self, nan_is_null: bool = False) -> Expression: ... + def is_nan(self) -> Expression: ... + + def cast( + self, + type: lib.DataType | str, safe: bool = True, options: CastOptions | None = None + ) -> Expression: ... + + def isin(self, values: lib.Array | Iterable | Any) -> Expression: ... + def equals(self, other: object) -> bool: ... + + # Attributes and methods for materialized expressions (used in tests) + @property + def type(self) -> lib.DataType: ... + def to_pylist(self) -> list: ... + def to_numpy(self, zero_copy_only: bool = True, writable: bool = False) -> Any: ... + def to_pandas(self, **kwargs) -> Any: ... + def as_py(self) -> Any: ... + def tolist(self) -> list: ... + def slice(self, offset: int = 0, length: int | None = None) -> Expression: ... + +# ==================== _compute.py ==================== diff --git a/python/pyarrow-stubs/pyarrow/_compute_docstring.pyi b/python/pyarrow-stubs/pyarrow/_compute_docstring.pyi new file mode 100644 index 00000000000..514a4e4269c --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/_compute_docstring.pyi @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +function_doc_additions: dict[str, str] diff --git a/python/pyarrow-stubs/pyarrow/compute.pyi b/python/pyarrow-stubs/pyarrow/compute.pyi new file mode 100644 index 00000000000..809bccd1b92 --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/compute.pyi @@ -0,0 +1,1834 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from collections.abc import Callable, Hashable, Iterable, Sequence, Mapping +from typing import Literal, TypeAlias, TypeVar, Any, ParamSpec + +import numpy as np + +# Option classes +from pyarrow._compute import ArraySortOptions as ArraySortOptions +from pyarrow._compute import AssumeTimezoneOptions as AssumeTimezoneOptions +from pyarrow._compute import CastOptions as CastOptions +from pyarrow._compute import CountOptions as CountOptions +from pyarrow._compute import CumulativeOptions as CumulativeOptions # noqa: F401 +from pyarrow._compute import CumulativeSumOptions as CumulativeSumOptions +from pyarrow._compute import DayOfWeekOptions as DayOfWeekOptions +from pyarrow._compute import ( # noqa: F401 + DictionaryEncodeOptions as DictionaryEncodeOptions) +from pyarrow._compute import ElementWiseAggregateOptions as ElementWiseAggregateOptions + +# Expressions +from pyarrow._compute import Expression as Expression +from pyarrow._compute import ExtractRegexOptions as ExtractRegexOptions +from pyarrow._compute import ( # noqa: F401 + ExtractRegexSpanOptions as ExtractRegexSpanOptions) +from pyarrow._compute import FilterOptions as FilterOptions +from pyarrow._compute import FunctionOptions as FunctionOptions # noqa: F401 +from pyarrow._compute import IndexOptions as IndexOptions # noqa: F401 +from pyarrow._compute import JoinOptions as JoinOptions # noqa: F401 +from pyarrow._compute import ListFlattenOptions as ListFlattenOptions +from pyarrow._compute import ListSliceOptions as ListSliceOptions +from pyarrow._compute import MakeStructOptions as MakeStructOptions +from pyarrow._compute import MapLookupOptions as MapLookupOptions +from pyarrow._compute import MatchSubstringOptions as MatchSubstringOptions +from pyarrow._compute import ModeOptions as ModeOptions +from pyarrow._compute import NullOptions as NullOptions +from pyarrow._compute import PadOptions as PadOptions +from pyarrow._compute import PairwiseOptions as PairwiseOptions +from pyarrow._compute import PartitionNthOptions as PartitionNthOptions +from pyarrow._compute import PivotWiderOptions as PivotWiderOptions +from pyarrow._compute import QuantileOptions as QuantileOptions +from pyarrow._compute import RandomOptions as RandomOptions +from pyarrow._compute import RankOptions as RankOptions +from pyarrow._compute import RankQuantileOptions as RankQuantileOptions +from pyarrow._compute import ReplaceSliceOptions as ReplaceSliceOptions +from pyarrow._compute import ReplaceSubstringOptions as ReplaceSubstringOptions +from pyarrow._compute import RoundBinaryOptions as RoundBinaryOptions +from pyarrow._compute import RoundOptions as RoundOptions +from pyarrow._compute import RoundTemporalOptions as RoundTemporalOptions +from pyarrow._compute import RoundToMultipleOptions as RoundToMultipleOptions +from pyarrow._compute import RunEndEncodeOptions as RunEndEncodeOptions +from pyarrow._compute import ScalarAggregateOptions as ScalarAggregateOptions +from pyarrow._compute import SelectKOptions as SelectKOptions +from pyarrow._compute import SetLookupOptions as SetLookupOptions +from pyarrow._compute import SkewOptions as SkewOptions +from pyarrow._compute import SliceOptions as SliceOptions +from pyarrow._compute import SortOptions as SortOptions +from pyarrow._compute import SplitOptions as SplitOptions +from pyarrow._compute import SplitPatternOptions as SplitPatternOptions # noqa: F401 +from pyarrow._compute import StrftimeOptions as StrftimeOptions +from pyarrow._compute import StrptimeOptions as StrptimeOptions +from pyarrow._compute import StructFieldOptions as StructFieldOptions +from pyarrow._compute import TakeOptions as TakeOptions +from pyarrow._compute import TDigestOptions as TDigestOptions +from pyarrow._compute import TrimOptions as TrimOptions +from pyarrow._compute import Utf8NormalizeOptions as Utf8NormalizeOptions +from pyarrow._compute import VarianceOptions as VarianceOptions +from pyarrow._compute import WeekOptions as WeekOptions +from pyarrow._compute import WinsorizeOptions as WinsorizeOptions +from pyarrow._compute import ZeroFillOptions as ZeroFillOptions + +# Functions +from pyarrow._compute import call_function as call_function # noqa: F401 +from pyarrow._compute import ( # noqa: F401 + call_tabular_function as call_tabular_function) +from pyarrow._compute import get_function as get_function # noqa: F401 +from pyarrow._compute import list_functions as list_functions # noqa: F401 +from pyarrow._compute import ( # noqa: F401 + register_scalar_function as register_scalar_function) +from pyarrow._compute import ( # noqa: F401 + register_aggregate_function as register_aggregate_function) +from pyarrow._compute import ( # noqa: F401 + register_vector_function as register_vector_function) +from pyarrow._compute import ( # noqa: F401 + register_tabular_function as register_tabular_function) + +# Function and Kernel classes +from pyarrow._compute import Function as Function # noqa: F401 +from pyarrow._compute import Kernel as Kernel # noqa: F401 +from pyarrow._compute import ScalarFunction as ScalarFunction # noqa: F401 +from pyarrow._compute import ScalarKernel as ScalarKernel # noqa: F401 +from pyarrow._compute import VectorFunction as VectorFunction # noqa: F401 +from pyarrow._compute import VectorKernel as VectorKernel # noqa: F401 +from pyarrow._compute import ( # noqa: F401 + ScalarAggregateFunction as ScalarAggregateFunction) +from pyarrow._compute import ( # noqa: F401 + ScalarAggregateKernel as ScalarAggregateKernel) +from pyarrow._compute import ( # noqa: F401 + HashAggregateFunction as HashAggregateFunction) +from pyarrow._compute import HashAggregateKernel as HashAggregateKernel # noqa: F401 + +# Udf + +from pyarrow._compute import _Order, _Placement +from pyarrow._stubs_typing import ArrayLike, ScalarLike, PyScalar, TimeUnit +from pyarrow._types import _RunEndType +from . import lib + +_P = ParamSpec("_P") +_R = TypeVar("_R") + + +class _ExprComparable(Expression): + def __ge__(self, other: Any) -> Expression: ... + def __le__(self, other: Any) -> Expression: ... + def __gt__(self, other: Any) -> Expression: ... + def __lt__(self, other: Any) -> Expression: ... + + +def field(*name_or_index: str | bytes | tuple[str | int, ...] | int) -> Expression: ... +def __ge__(self, other: Any) -> Expression: ... + + +def scalar(value: PyScalar | lib.Scalar[Any] | Mapping | lib.int64()) -> Expression: ... + + +def _clone_signature(f: Callable[_P, _R]) -> Callable[_P, _R]: ... + + +# ============= compute functions ============= +_DataTypeT = TypeVar("_DataTypeT", bound=lib.DataType) +_Scalar_CoT = TypeVar("_Scalar_CoT", bound=lib.Scalar, covariant=True) +_ScalarT = TypeVar("_ScalarT", bound=lib.Scalar) +_ArrayT = TypeVar("_ArrayT", bound=lib.Array | lib.ChunkedArray) +_ScalarOrArrayT = TypeVar("_ScalarOrArrayT", bound=lib.Array | + lib.Scalar | lib.ChunkedArray) +ArrayOrChunkedArray: TypeAlias = lib.Array[_Scalar_CoT] | lib.ChunkedArray[_Scalar_CoT] +ScalarOrArray: TypeAlias = ArrayOrChunkedArray[_Scalar_CoT] | _Scalar_CoT + +SignedIntegerScalar: TypeAlias = ( + lib.Scalar[lib.Int8Type] + | lib.Scalar[lib.Int16Type] + | lib.Scalar[lib.Int32Type] + | lib.Scalar[lib.Int64Type] +) +UnsignedIntegerScalar: TypeAlias = ( + lib.Scalar[lib.UInt8Type] + | lib.Scalar[lib.UInt16Type] + | lib.Scalar[lib.UInt32Type] + | lib.Scalar[lib.UInt64Type] +) +IntegerScalar: TypeAlias = SignedIntegerScalar | UnsignedIntegerScalar +FloatScalar: TypeAlias = (lib.Scalar[lib.Float16Type] | lib.Scalar[lib.Float32Type] + | lib.Scalar[lib.Float64Type]) +DecimalScalar: TypeAlias = ( + lib.Scalar[lib.Decimal32Type] + | lib.Scalar[lib.Decimal64Type] + | lib.Scalar[lib.Decimal128Type] + | lib.Scalar[lib.Decimal256Type] +) +NonFloatNumericScalar: TypeAlias = IntegerScalar | DecimalScalar +NumericScalar: TypeAlias = IntegerScalar | FloatScalar | DecimalScalar +BinaryScalar: TypeAlias = ( + lib.Scalar[lib.BinaryType] + | lib.Scalar[lib.LargeBinaryType] + | lib.Scalar[lib.FixedSizeBinaryType] +) +StringScalar: TypeAlias = lib.Scalar[lib.StringType] | lib.Scalar[lib.LargeStringType] +StringOrBinaryScalar: TypeAlias = StringScalar | BinaryScalar +_ListScalar: TypeAlias = ( + lib.ListViewScalar[_DataTypeT] | lib.FixedSizeListScalar[_DataTypeT, Any] +) +_LargeListScalar: TypeAlias = ( + lib.LargeListScalar[_DataTypeT] | lib.LargeListViewScalar[_DataTypeT] +) +ListScalar: TypeAlias = ( + lib.ListScalar[_DataTypeT] | _ListScalar[_DataTypeT] | _LargeListScalar[_DataTypeT] +) +TemporalScalar: TypeAlias = ( + lib.Date32Scalar + | lib.Date64Scalar + | lib.Time32Scalar[Any] + | lib.Time64Scalar[Any] + | lib.TimestampScalar[Any] + | lib.DurationScalar[Any] + | lib.MonthDayNanoIntervalScalar +) +NumericOrDurationScalar: TypeAlias = NumericScalar | lib.DurationScalar +NumericOrTemporalScalar: TypeAlias = NumericScalar | TemporalScalar + +_NumericOrTemporalScalarT = TypeVar( + "_NumericOrTemporalScalarT", bound=NumericOrTemporalScalar) +_NumericScalarT = TypeVar("_NumericScalarT", bound=NumericScalar) +NumericArray: TypeAlias = ArrayOrChunkedArray[_NumericScalarT] +_NumericArrayT = TypeVar("_NumericArrayT", bound=NumericArray) +_NumericOrDurationT = TypeVar("_NumericOrDurationT", bound=NumericOrDurationScalar) +NumericOrDurationArray: TypeAlias = ArrayOrChunkedArray[NumericOrDurationScalar] +_NumericOrDurationArrayT = TypeVar( + "_NumericOrDurationArrayT", bound=NumericOrDurationArray) +NumericOrTemporalArray: TypeAlias = ArrayOrChunkedArray[_NumericOrTemporalScalarT] +_NumericOrTemporalArrayT = TypeVar( + "_NumericOrTemporalArrayT", bound=NumericOrTemporalArray) +BooleanArray: TypeAlias = ArrayOrChunkedArray[lib.BooleanScalar] +_BooleanArrayT = TypeVar("_BooleanArrayT", bound=BooleanArray) +IntegerArray: TypeAlias = ArrayOrChunkedArray[IntegerScalar] +_FloatScalarT = TypeVar("_FloatScalarT", bound=FloatScalar) +FloatArray: TypeAlias = ArrayOrChunkedArray[FloatScalar] +_FloatArrayT = TypeVar("_FloatArrayT", bound=FloatArray) +_StringScalarT = TypeVar("_StringScalarT", bound=StringScalar) +StringArray: TypeAlias = ArrayOrChunkedArray[StringScalar] +_StringArrayT = TypeVar("_StringArrayT", bound=StringArray) +_BinaryScalarT = TypeVar("_BinaryScalarT", bound=BinaryScalar) +BinaryArray: TypeAlias = ArrayOrChunkedArray[BinaryScalar] +_BinaryArrayT = TypeVar("_BinaryArrayT", bound=BinaryArray) +_StringOrBinaryScalarT = TypeVar("_StringOrBinaryScalarT", bound=StringOrBinaryScalar) +StringOrBinaryArray: TypeAlias = StringArray | BinaryArray +_StringOrBinaryArrayT = TypeVar("_StringOrBinaryArrayT", bound=StringOrBinaryArray) +_TemporalScalarT = TypeVar("_TemporalScalarT", bound=TemporalScalar) +TemporalArray: TypeAlias = ArrayOrChunkedArray[TemporalScalar] +_TemporalArrayT = TypeVar("_TemporalArrayT", bound=TemporalArray) +_ListArray: TypeAlias = ArrayOrChunkedArray[_ListScalar[_DataTypeT]] +_LargeListArray: TypeAlias = ArrayOrChunkedArray[_LargeListScalar[_DataTypeT]] +ListArray: TypeAlias = ArrayOrChunkedArray[ListScalar[_DataTypeT]] + +# =============================== 1. Aggregation =============================== + + +def array_take( + array: _ArrayT | lib.Scalar | lib.Table | Expression, + indices: list[int] + | list[int | None] + | lib.Int16Array + | lib.Int32Array + | lib.Int64Array + | lib.UInt64Array + | lib.ChunkedArray[lib.Int16Scalar] + | lib.ChunkedArray[lib.Int32Scalar] + | lib.ChunkedArray[lib.Int64Scalar] + | np.ndarray + | Expression, + /, + *, + boundscheck: bool | None = None, + options: TakeOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> _ArrayT | Expression: ... + + +# ========================= 1.1 functions ========================= + + +def all( + array: lib.BooleanScalar | BooleanArray, + /, + *, + skip_nulls: bool = True, + min_count: int = 1, + options: ScalarAggregateOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.BooleanScalar: ... + + +any = _clone_signature(all) + + +def approximate_median( + array: NumericScalar | NumericArray, + /, + *, + skip_nulls: bool = True, + min_count: int = 1, + options: ScalarAggregateOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.DoubleScalar: ... + + +def count( + array: lib.Array | lib.ChunkedArray, + /, + mode: Literal["only_valid", "only_null", "all"] = "only_valid", + *, + options: CountOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.Int64Scalar: ... + + +def count_distinct( + array: lib.Array | lib.ChunkedArray, + /, + mode: Literal["only_valid", "only_null", "all"] = "only_valid", + *, + options: CountOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.Int64Scalar: ... + + +def first( + array: lib.Array[_ScalarT] | lib.ChunkedArray[_ScalarT], + /, + *, + skip_nulls: bool = True, + min_count: int = 1, + options: ScalarAggregateOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> _ScalarT: ... + +last = _clone_signature(first) + +def first_last( + array: lib.Array[Any] | lib.ChunkedArray[Any] | list[Any], + /, + *, + skip_nulls: bool = True, + min_count: int = 1, + options: ScalarAggregateOptions | Mapping[Any, Any] | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.StructScalar: ... + + +def index( + data: lib.Array[Any] | lib.ChunkedArray[Any], + value: ScalarLike, + start: int | None = None, + end: int | None = None, + *, + memory_pool: lib.MemoryPool | None = None, +) -> lib.Int64Scalar: ... + + +last = _clone_signature(first) +max = _clone_signature(first) +min = _clone_signature(first) +min_max = _clone_signature(first_last) + + +def mean( + array: FloatScalar | FloatArray + | lib.NumericArray[lib.Scalar[Any]] + | lib.ChunkedArray[lib.Scalar[Any]] + | lib.Scalar[Any], + /, + *, + skip_nulls: bool = True, + min_count: int = 1, + options: ScalarAggregateOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.Scalar[Any]: ... + + +def mode( + array: NumericScalar | NumericArray, + /, + n: int = 1, + *, + skip_nulls: bool = True, + min_count: int = 0, + options: ModeOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.StructArray: ... + + +def product( + array: _ScalarT | lib.NumericArray[_ScalarT], + /, + *, + skip_nulls: bool = True, + min_count: int = 1, + options: ScalarAggregateOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> _ScalarT: ... + + +def quantile( + array: NumericScalar | NumericArray, + /, + q: float | Sequence[float] = 0.5, + *, + interpolation: Literal["linear", "lower", + "higher", "nearest", "midpoint"] = "linear", + skip_nulls: bool = True, + min_count: int = 0, + options: QuantileOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.DoubleArray: ... + + +def stddev( + array: NumericScalar | NumericArray, + /, + *, + ddof: float = 0, + skip_nulls: bool = True, + min_count: int = 0, + options: VarianceOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.DoubleScalar: ... + + +def sum( + array: _NumericScalarT | NumericArray[_NumericScalarT] | lib.Expression, + /, + *, + skip_nulls: bool = True, + min_count: int = 1, + options: ScalarAggregateOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> _NumericScalarT | lib.Expression: ... + + +def tdigest( + array: NumericScalar | NumericArray, + /, + q: float | Sequence[float] = 0.5, + *, + delta: int = 100, + buffer_size: int = 500, + skip_nulls: bool = True, + min_count: int = 0, + options: TDigestOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.DoubleArray: ... + + +def variance( + array: NumericScalar | NumericArray | ArrayLike, + /, + *, + ddof: int = 0, + skip_nulls: bool = True, + min_count: int = 0, + options: VarianceOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.DoubleScalar: ... + + +def winsorize( + array: _NumericArrayT, + /, + lower_limit: float = 0.0, + upper_limit: float = 1.0, + *, + options: WinsorizeOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> _NumericArrayT: ... + + +def skew( + array: NumericScalar | NumericArray | ArrayLike, + /, + *, + skip_nulls: bool = True, + biased: bool = True, + min_count: int = 0, + options: SkewOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.DoubleScalar: ... + + +def kurtosis( + array: NumericScalar | NumericArray | ArrayLike, + /, + *, + skip_nulls: bool = True, + biased: bool = True, + min_count: int = 0, + options: SkewOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.DoubleScalar: ... + + +def top_k_unstable( + values: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table, + k: int, + sort_keys: list | None = None, + *, + memory_pool: lib.MemoryPool | None = None, +) -> lib.Array: ... + + +def bottom_k_unstable( + values: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table, + k: int, + sort_keys: list | None = None, + *, + memory_pool: lib.MemoryPool | None = None, +) -> lib.Array: ... + + +# ========================= 2. Element-wise (“scalar”) functions ========= + +# ========================= 2.1 Arithmetic ========================= +def abs(x: _NumericOrDurationT | _NumericOrDurationArrayT | Expression, /, *, + memory_pool: lib.MemoryPool | None = None) -> ( + _NumericOrDurationT | _NumericOrDurationArrayT | Expression): ... + + +abs_checked = _clone_signature(abs) + + +def add( + x: (_NumericOrTemporalScalarT | NumericOrTemporalScalar | _NumericOrTemporalArrayT + | ArrayLike | int | Expression), + y: (_NumericOrTemporalScalarT | NumericOrTemporalScalar | _NumericOrTemporalArrayT + | ArrayLike | int | Expression), + /, + *, + memory_pool: lib.MemoryPool | None = None, +) -> _NumericOrTemporalScalarT | _NumericOrTemporalArrayT | Expression: ... + + +add_checked = _clone_signature(add) + + +def divide( + x: (_NumericOrTemporalScalarT | NumericOrTemporalScalar | _NumericOrTemporalArrayT + | Expression), + y: (_NumericOrTemporalScalarT | NumericOrTemporalScalar | _NumericOrTemporalArrayT + | Expression), + /, + *, + memory_pool: lib.MemoryPool | None = None, +) -> _NumericOrTemporalScalarT | _NumericOrTemporalArrayT | Expression: ... + + +divide_checked = _clone_signature(divide) + + +def exp( + exponent: _FloatArrayT | ArrayOrChunkedArray[NonFloatNumericScalar] | _FloatScalarT + | NonFloatNumericScalar | lib.DoubleScalar | Expression, + /, *, memory_pool: lib.MemoryPool | None = None +) -> ( + _FloatArrayT | lib.DoubleArray | _FloatScalarT | lib.DoubleScalar | Expression): ... + + +expm1 = _clone_signature(exp) +multiply = _clone_signature(add) +multiply_checked = _clone_signature(add) + + +def negate( + x: _NumericOrDurationT | _NumericOrDurationArrayT | Expression, /, *, + memory_pool: lib.MemoryPool | None = None) -> ( + _NumericOrDurationT | _NumericOrDurationArrayT | Expression): ... + + +negate_checked = _clone_signature(negate) + + +def power( + base: _NumericScalarT | Expression | _NumericArrayT | NumericScalar, + exponent: _NumericScalarT | Expression | _NumericArrayT | NumericScalar, + /, + *, + memory_pool: lib.MemoryPool | None = None, +) -> _NumericScalarT | _NumericArrayT | Expression: ... + + +power_checked = _clone_signature(power) + + +def sign( + x: NumericOrDurationArray | NumericOrDurationScalar | Expression, /, *, + memory_pool: lib.MemoryPool | None = None +) -> ( + lib.NumericArray[lib.Int8Scalar] + | lib.NumericArray[lib.FloatScalar] + | lib.NumericArray[lib.DoubleScalar] + | lib.Int8Scalar | lib.FloatScalar | lib.DoubleScalar | Expression +): ... + + +def sqrt( + x: NumericArray | NumericScalar | Expression, /, *, + memory_pool: lib.MemoryPool | None = None) -> ( + FloatArray | FloatScalar | Expression): ... + + +sqrt_checked = _clone_signature(sqrt) + +subtract = _clone_signature(add) +subtract_checked = _clone_signature(add) + +# ========================= 2.1 Bit-wise functions ========================= + + +def bit_wise_and( + x: _NumericScalarT | _NumericArrayT | NumericScalar | Expression + | ArrayOrChunkedArray[NumericScalar], + y: _NumericScalarT | _NumericArrayT | NumericScalar | Expression + | ArrayOrChunkedArray[NumericScalar], + /, *, memory_pool: lib.MemoryPool | None = None +) -> _NumericScalarT | _NumericArrayT | Expression: ... + + +def bit_wise_not( + x: _NumericScalarT | _NumericArrayT | Expression, /, *, + memory_pool: lib.MemoryPool | None = None +) -> _NumericScalarT | _NumericArrayT | Expression: ... + + +bit_wise_or = _clone_signature(bit_wise_and) +bit_wise_xor = _clone_signature(bit_wise_and) +shift_left = _clone_signature(bit_wise_and) +shift_left_checked = _clone_signature(bit_wise_and) +shift_right = _clone_signature(bit_wise_and) +shift_right_checked = _clone_signature(bit_wise_and) + +# ========================= 2.2 Rounding functions ========================= + + +def ceil( + x: _FloatScalarT | _FloatArrayT | Expression, /, *, memory_pool: lib.MemoryPool | + None = None) -> _FloatScalarT | _FloatArrayT | Expression: ... + + +floor = _clone_signature(ceil) + + +def round( + x: _NumericScalarT | _NumericArrayT | Expression | list, + /, + ndigits: int = 0, + round_mode: Literal[ + "down", + "up", + "towards_zero", + "towards_infinity", + "half_down", + "half_up", + "half_towards_zero", + "half_towards_infinity", + "half_to_even", + "half_to_odd", + ] = "half_to_even", + *, + options: RoundOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> _NumericScalarT | _NumericArrayT | Expression: ... + + +def round_to_multiple( + x: _NumericScalarT | _NumericArrayT | list | Expression, + /, + multiple: int | float | NumericScalar = 1.0, + round_mode: Literal[ + "down", + "up", + "towards_zero", + "towards_infinity", + "half_down", + "half_up", + "half_towards_zero", + "half_towards_infinity", + "half_to_even", + "half_to_odd", + ] = "half_to_even", + *, + options: RoundToMultipleOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> _NumericScalarT | _NumericArrayT | Expression: ... + + +def round_binary( + x: _NumericScalarT | _NumericArrayT | float | list | Expression, + s: lib.Int8Scalar + | lib.Int16Scalar + | lib.Int32Scalar + | lib.Int64Scalar + | lib.Scalar + | Iterable + | float + | Expression, + /, + round_mode: Literal[ + "down", + "up", + "towards_zero", + "towards_infinity", + "half_down", + "half_up", + "half_towards_zero", + "half_towards_infinity", + "half_to_even", + "half_to_odd", + ] = "half_to_even", + *, + options: RoundBinaryOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> ( + _NumericScalarT | lib.NumericArray[_NumericScalarT] | _NumericArrayT + | Expression): ... + + +trunc = _clone_signature(ceil) + +# ========================= 2.3 Logarithmic functions ========================= + + +def ln( + x: FloatScalar | FloatArray | Expression, /, *, + memory_pool: lib.MemoryPool | None = None +) -> ( + lib.FloatScalar | lib.DoubleScalar | lib.NumericArray[lib.FloatScalar] + | lib.NumericArray[lib.DoubleScalar] | Expression): ... + + +ln_checked = _clone_signature(ln) +log10 = _clone_signature(ln) +log10_checked = _clone_signature(ln) +log1p = _clone_signature(ln) +log1p_checked = _clone_signature(ln) +log2 = _clone_signature(ln) +log2_checked = _clone_signature(ln) + + +def logb( + x: FloatScalar | FloatArray | Expression | Any, + b: FloatScalar | FloatArray | Expression | Any, + /, *, memory_pool: lib.MemoryPool | None = None +) -> ( + lib.FloatScalar | lib.DoubleScalar | lib.NumericArray[lib.FloatScalar] + | lib.NumericArray[lib.DoubleScalar] | Expression | Any): ... + + +logb_checked = _clone_signature(logb) + +# ========================= 2.4 Trigonometric functions ========================= +acos = _clone_signature(ln) +acos_checked = _clone_signature(ln) +acosh = _clone_signature(ln) +acosh_checked = _clone_signature(ln) +asin = _clone_signature(ln) +asin_checked = _clone_signature(ln) +asinh = _clone_signature(ln) +atan = _clone_signature(ln) +atanh_checked = _clone_signature(ln) +atanh = _clone_signature(ln) +cos = _clone_signature(ln) +cos_checked = _clone_signature(ln) +cosh = _clone_signature(ln) +sin = _clone_signature(ln) +sin_checked = _clone_signature(ln) +sinh = _clone_signature(ln) +tan = _clone_signature(ln) +tan_checked = _clone_signature(ln) +tanh = _clone_signature(ln) + + +def atan2( + y: FloatScalar | FloatArray | Expression | Any, + x: FloatScalar | FloatArray | Expression | Any, + /, *, memory_pool: lib.MemoryPool | None = None +) -> ( + lib.FloatScalar | lib.DoubleScalar | lib.NumericArray[lib.FloatScalar] + | lib.NumericArray[lib.DoubleScalar] | Expression): ... + + +# ========================= 2.5 Comparisons functions ========================= +def equal( + x: lib.Scalar | lib.Array | lib.ChunkedArray | list | Expression | Any, + y: lib.Scalar | lib.Array | lib.ChunkedArray | list | Expression | Any, + /, *, memory_pool: lib.MemoryPool | None = None +) -> lib.BooleanScalar | lib.BooleanArray | Expression: ... + + +greater = _clone_signature(equal) +greater_equal = _clone_signature(equal) +less = _clone_signature(equal) +less_equal = _clone_signature(equal) +not_equal = _clone_signature(equal) + + +def max_element_wise( + *args: ScalarOrArray[_Scalar_CoT] | Expression | ScalarLike | ArrayLike, + skip_nulls: bool = True, + options: ElementWiseAggregateOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> _Scalar_CoT | Expression | lib.Scalar | lib.Array: ... + + +min_element_wise = _clone_signature(max_element_wise) + +# ========================= 2.6 Logical functions ========================= + + +def and_( + x: lib.BooleanScalar | BooleanArray | Expression | ScalarOrArray[lib.BooleanScalar], + y: lib.BooleanScalar | BooleanArray | Expression | ScalarOrArray[lib.BooleanScalar], + /, *, memory_pool: lib.MemoryPool | None = None +) -> ( + lib.BooleanScalar | lib.BooleanArray | Expression + | ScalarOrArray[lib.BooleanScalar]): ... + + +and_kleene = _clone_signature(and_) +and_not = _clone_signature(and_) +and_not_kleene = _clone_signature(and_) +or_ = _clone_signature(and_) +or_kleene = _clone_signature(and_) +xor = _clone_signature(and_) + + +def invert( + x: lib.BooleanScalar | _BooleanArrayT | Expression, /, *, + memory_pool: lib.MemoryPool | None = None +) -> lib.BooleanScalar | _BooleanArrayT | Expression: ... + + +# ========================= 2.10 String predicates ========================= +def ascii_is_alnum( + strings: StringScalar | StringArray | Expression, /, *, + memory_pool: lib.MemoryPool | None = None +) -> lib.BooleanScalar | lib.BooleanArray | Expression: ... + + +ascii_is_alpha = _clone_signature(ascii_is_alnum) +ascii_is_decimal = _clone_signature(ascii_is_alnum) +ascii_is_lower = _clone_signature(ascii_is_alnum) +ascii_is_printable = _clone_signature(ascii_is_alnum) +ascii_is_space = _clone_signature(ascii_is_alnum) +ascii_is_upper = _clone_signature(ascii_is_alnum) +utf8_is_alnum = _clone_signature(ascii_is_alnum) +utf8_is_alpha = _clone_signature(ascii_is_alnum) +utf8_is_decimal = _clone_signature(ascii_is_alnum) +utf8_is_digit = _clone_signature(ascii_is_alnum) +utf8_is_lower = _clone_signature(ascii_is_alnum) +utf8_is_numeric = _clone_signature(ascii_is_alnum) +utf8_is_printable = _clone_signature(ascii_is_alnum) +utf8_is_space = _clone_signature(ascii_is_alnum) +utf8_is_upper = _clone_signature(ascii_is_alnum) +ascii_is_title = _clone_signature(ascii_is_alnum) +utf8_is_title = _clone_signature(ascii_is_alnum) +string_is_ascii = _clone_signature(ascii_is_alnum) + +# ========================= 2.11 String transforms ========================= + + +def ascii_capitalize( + strings: _StringScalarT | _StringArrayT | Expression, /, *, + memory_pool: lib.MemoryPool | None = None +) -> _StringScalarT | _StringArrayT | Expression: ... + + +ascii_lower = _clone_signature(ascii_capitalize) +ascii_reverse = _clone_signature(ascii_capitalize) +ascii_swapcase = _clone_signature(ascii_capitalize) +ascii_title = _clone_signature(ascii_capitalize) +ascii_upper = _clone_signature(ascii_capitalize) + + +def binary_length( + strings: ScalarOrArray[StringOrBinaryScalar] | Expression, + /, + *, + memory_pool: lib.MemoryPool | None = None, +) -> ( + lib.Int32Scalar | lib.Int64Scalar | lib.Int32Array | lib.Int64Array + | Expression +): ... + + +def binary_repeat( + strings: _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression, + num_repeats: int | list[int] | list[int | None], + /, + *, + memory_pool: lib.MemoryPool | None = None, +) -> ( + _StringOrBinaryScalarT | lib.Array[_StringOrBinaryScalarT] | _StringOrBinaryArrayT + | Expression): ... + + +def binary_replace_slice( + strings: _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression, + /, + start: int, + stop: int, + replacement: str | bytes, + *, + options: ReplaceSliceOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression: ... + + +def binary_reverse( + strings: _BinaryScalarT | _BinaryArrayT | Expression, /, *, + memory_pool: lib.MemoryPool | None = None +) -> _BinaryScalarT | _BinaryArrayT | Expression: ... + + +def replace_substring( + strings: _StringScalarT | _StringArrayT | Expression, + /, + pattern: str | bytes, + replacement: str | bytes, + *, + max_replacements: int | None = None, + options: ReplaceSubstringOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> _StringScalarT | _StringArrayT | Expression: ... + + +replace_substring_regex = _clone_signature(replace_substring) + + +def utf8_capitalize( + strings: _StringScalarT | _StringArrayT | Expression, /, *, + memory_pool: lib.MemoryPool | None = None +) -> _StringScalarT | _StringArrayT | Expression: ... + + +def utf8_length( + strings: lib.StringScalar | lib.LargeStringScalar | lib.StringArray + | lib.ChunkedArray[lib.StringScalar] | lib.LargeStringArray + | lib.ChunkedArray[lib.LargeStringScalar] | Expression, + /, *, memory_pool: lib.MemoryPool | None = None +) -> ( + lib.Int32Scalar | lib.Int64Scalar | lib.Int32Array | lib.Int64Array + | Expression): ... + + +utf8_lower = _clone_signature(utf8_capitalize) + + +def utf8_replace_slice( + strings: _StringScalarT | _StringArrayT | Expression, + /, + start: int, + stop: int, + replacement: str | bytes, + *, + options: ReplaceSliceOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> _StringScalarT | _StringArrayT | Expression: ... + + +utf8_reverse = _clone_signature(utf8_capitalize) +utf8_swapcase = _clone_signature(utf8_capitalize) +utf8_title = _clone_signature(utf8_capitalize) +utf8_upper = _clone_signature(utf8_capitalize) + +# ========================= 2.12 String padding ========================= + + +def ascii_center( + strings: _StringScalarT | _StringArrayT | Expression, + /, + width: int | None = None, + padding: str = " ", + lean_left_on_odd_padding: bool = True, + *, + options: PadOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> _StringScalarT | _StringArrayT | Expression: ... + + +ascii_lpad = _clone_signature(ascii_center) +ascii_rpad = _clone_signature(ascii_center) +utf8_center = _clone_signature(ascii_center) +utf8_lpad = _clone_signature(ascii_center) +utf8_rpad = _clone_signature(ascii_center) + + +def utf8_zero_fill( + strings: _StringScalarT | _StringArrayT | Expression, + /, + width: int | None = None, + padding: str = "0", + *, + options: ZeroFillOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> _StringScalarT | _StringArrayT | Expression: ... + + +utf8_zfill = utf8_zero_fill + +# ========================= 2.13 String trimming ========================= + + +def ascii_ltrim( + strings: _StringScalarT | _StringArrayT | Expression, + /, + characters: str, + *, + options: TrimOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> _StringScalarT | _StringArrayT | Expression: ... + + +ascii_rtrim = _clone_signature(ascii_ltrim) +ascii_trim = _clone_signature(ascii_ltrim) +utf8_ltrim = _clone_signature(ascii_ltrim) +utf8_rtrim = _clone_signature(ascii_ltrim) +utf8_trim = _clone_signature(ascii_ltrim) + + +def ascii_ltrim_whitespace( + strings: _StringScalarT | _StringArrayT | Expression, + /, + *, + options: TrimOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> _StringScalarT | _StringArrayT | Expression: ... + + +ascii_rtrim_whitespace = _clone_signature(ascii_ltrim_whitespace) +ascii_trim_whitespace = _clone_signature(ascii_ltrim_whitespace) +utf8_ltrim_whitespace = _clone_signature(ascii_ltrim_whitespace) +utf8_rtrim_whitespace = _clone_signature(ascii_ltrim_whitespace) +utf8_trim_whitespace = _clone_signature(ascii_ltrim_whitespace) + +# ========================= 2.14 String splitting ========================= + + +def ascii_split_whitespace( + strings: _StringScalarT | lib.Array[lib.Scalar[_DataTypeT]] | Expression, + /, + *, + max_splits: int | None = None, + reverse: bool = False, + options: SplitOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> ( + lib.ListArray[_StringScalarT] | lib.ListArray[lib.ListScalar[_DataTypeT]] + | Expression): ... + + +def split_pattern( + strings: _StringOrBinaryScalarT | lib.Array[lib.Scalar[_DataTypeT]] | Expression, + /, + pattern: str, + *, + max_splits: int | None = None, + reverse: bool = False, + options: SplitOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> ( + lib.ListArray[_StringOrBinaryScalarT] | lib.ListArray[lib.ListScalar[_DataTypeT]] + | Expression): ... + + +split_pattern_regex = _clone_signature(split_pattern) +utf8_split_whitespace = _clone_signature(ascii_split_whitespace) + +# ========================= 2.15 String component extraction ========================= + + +def extract_regex( + strings: StringOrBinaryScalar | StringOrBinaryArray | Expression, + /, + pattern: str, + *, + options: ExtractRegexOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.StructScalar | lib.StructArray | Expression: ... + + +extract_regex_span = _clone_signature(extract_regex) + + +# ========================= 2.16 String join ========================= +def binary_join( + strings, separator, /, *, memory_pool: lib.MemoryPool | None = None +) -> StringScalar | StringArray: ... + + +def binary_join_element_wise( + *strings: str + | bytes + | _StringOrBinaryScalarT + | _StringOrBinaryArrayT + | Expression + | list, + null_handling: Literal["emit_null", "skip", "replace"] = "emit_null", + null_replacement: str = "", + options: JoinOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression: ... + + +# ========================= 2.17 String Slicing ========================= +def binary_slice( + strings: _BinaryScalarT | _BinaryArrayT | Expression | lib.Scalar, + /, + start: int, + stop: int | None = None, + step: int = 1, + *, + options: SliceOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> _BinaryScalarT | _BinaryArrayT | Expression: ... + + +def utf8_slice_codeunits( + strings: _StringScalarT | _StringArrayT | Expression, + /, + start: int, + stop: int | None = None, + step: int = 1, + *, + options: SliceOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> _StringScalarT | _StringArrayT | Expression: ... + + +def utf8_normalize( + strings: _StringScalarT | _StringArrayT | Expression, + /, + form: Literal["NFC", "NFKC", "NFD", "NFKD"] = "NFC", + *, + options: Utf8NormalizeOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> _StringScalarT | _StringArrayT | Expression: ... + + +# ========================= 2.18 Containment tests ========================= +def count_substring( + strings: lib.StringScalar | lib.BinaryScalar | lib.LargeStringScalar + | lib.LargeBinaryScalar | lib.StringArray | lib.BinaryArray + | lib.ChunkedArray[lib.StringScalar] | lib.ChunkedArray[lib.BinaryScalar] + | lib.LargeStringArray | lib.LargeBinaryArray + | lib.ChunkedArray[lib.LargeStringScalar] | lib.ChunkedArray[lib.LargeBinaryScalar] + | Expression, + /, + pattern: str, + *, + ignore_case: bool = False, + options: MatchSubstringOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> ( + lib.Int32Scalar | lib.Int64Scalar | lib.Int32Array | lib.Int64Array + | Expression): ... + + +count_substring_regex = _clone_signature(count_substring) + + +def ends_with( + strings: StringScalar | BinaryScalar | StringArray | BinaryArray | Expression, + /, + pattern: str, + *, + ignore_case: bool = False, + options: MatchSubstringOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.BooleanScalar | lib.BooleanArray | Expression: ... + + +find_substring = _clone_signature(count_substring) +find_substring_regex = _clone_signature(count_substring) + + +def index_in( + values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression, + /, + value_set: lib.Array | lib.ChunkedArray | Expression, + *, + skip_nulls: bool = False, + options: SetLookupOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.Int32Scalar | lib.Int32Array | Expression: ... + +def index_in_meta_binary( + values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression, + value_set: lib.Array | lib.ChunkedArray | Expression, + /, + *, + memory_pool: lib.MemoryPool | None = None, +) -> lib.Int32Scalar | lib.Int32Array | Expression: ... + +def is_in( + values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression, + /, + value_set: lib.Array | lib.ChunkedArray | Expression, + *, + skip_nulls: bool = False, + options: SetLookupOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.BooleanScalar | lib.BooleanArray | Expression: ... + + +is_in_meta_binary = _clone_signature(index_in_meta_binary) +match_like = _clone_signature(ends_with) +match_substring = _clone_signature(ends_with) +match_substring_regex = _clone_signature(ends_with) +starts_with = _clone_signature(ends_with) + +# ========================= 2.19 Categorizations ========================= + + +def is_finite( + values: NumericScalar | lib.NullScalar | NumericArray | lib.NullArray | Expression, + /, *, memory_pool: lib.MemoryPool | None = None +) -> lib.BooleanScalar | lib.BooleanArray | Expression: ... + + +is_inf = _clone_signature(is_finite) +is_nan = _clone_signature(is_finite) + + +def is_null( + values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression, + /, + *, + nan_is_null: bool = False, + options: NullOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.BooleanScalar | lib.BooleanArray | Expression: ... + + +def is_valid( + values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression | ArrayLike, + /, *, memory_pool: lib.MemoryPool | None = None +) -> lib.BooleanScalar | lib.BooleanArray | Expression: ... + + +true_unless_null = _clone_signature(is_valid) + +# ========================= 2.20 Selecting / multiplexing ========================= + + +def case_when( + cond: lib.StructScalar + | lib.StructArray + | lib.ChunkedArray[lib.StructScalar] + | Expression, + /, + *cases: _ScalarOrArrayT | ArrayLike, memory_pool: lib.MemoryPool | None = None +) -> _ScalarOrArrayT | lib.Array | Expression: ... + + +def choose( + indices: ArrayLike | ScalarLike, + /, + *values: ArrayLike | ScalarLike, + memory_pool: lib.MemoryPool | None = None, +) -> ArrayLike | ScalarLike: ... + + +def coalesce( + *values: _ScalarOrArrayT | Expression, memory_pool: lib.MemoryPool | None = None +) -> _ScalarOrArrayT | Expression: ... + + +def fill_null( + values: _ScalarOrArrayT | ScalarLike, fill_value: ArrayLike | ScalarLike +) -> _ScalarOrArrayT | ScalarLike: ... + + +def if_else( + cond: ArrayLike | ScalarLike, + left: ArrayLike | ScalarLike, + right: ArrayLike | ScalarLike, + /, + *, + memory_pool: lib.MemoryPool | None = None, +) -> ArrayLike | ScalarLike: ... + + +# ========================= 2.21 Structural transforms ========================= + +def list_value_length( + lists: _ListArray[Any] | _LargeListArray[Any] | ListArray[Any] | Expression, + /, + *, + memory_pool: lib.MemoryPool | None = None, +) -> lib.Int32Array | lib.Int64Array | Expression: ... + + +def make_struct( + *args: lib.Scalar | lib.Array | lib.ChunkedArray | Expression | ArrayLike, + field_names: list[str] | tuple[str, ...] = (), + field_nullability: bool | None = None, + field_metadata: list[lib.KeyValueMetadata] | None = None, + options: MakeStructOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.StructScalar | lib.StructArray | Expression: ... + + +# ========================= 2.22 Conversions ========================= +def ceil_temporal( + timestamps: _TemporalScalarT | _TemporalArrayT | Expression, + /, + multiple: int = 1, + unit: Literal[ + "year", + "quarter", + "month", + "week", + "day", + "hour", + "minute", + "second", + "millisecond", + "microsecond", + "nanosecond", + ] = "day", + *, + week_starts_monday: bool = True, + ceil_is_strictly_greater: bool = False, + calendar_based_origin: bool = False, + options: RoundTemporalOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> _TemporalScalarT | _TemporalArrayT | Expression: ... + + +floor_temporal = _clone_signature(ceil_temporal) +round_temporal = _clone_signature(ceil_temporal) + + +def cast( + arr: lib.Scalar | lib.Array | lib.ChunkedArray | lib.Table, + target_type: _DataTypeT | str | None = None, + safe: bool | None = None, + options: CastOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> ( + lib.Scalar[_DataTypeT] | lib.Scalar[Any] | lib.Array[lib.Scalar[_DataTypeT]] + | lib.Array[lib.Scalar[Any]] | lib.ChunkedArray[lib.Scalar[_DataTypeT]] + | lib.ChunkedArray[lib.Scalar[Any]] | lib.Table +): ... + + +def strftime( + timestamps: TemporalScalar | TemporalArray | Expression, + /, + format: str = "%Y-%m-%dT%H:%M:%S", + locale: str = "C", + *, + options: StrftimeOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.StringScalar | lib.StringArray | Expression: ... + + +def strptime( + strings: StringScalar | StringArray | Expression, + /, + format: str, + unit: TimeUnit, + error_is_null: bool = False, + *, + options: StrptimeOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.TimestampScalar | lib.TimestampArray | Expression: ... + + +# ========================= 2.23 Temporal component extraction ========================= +def day( + values: TemporalScalar | TemporalArray | Expression, /, *, + memory_pool: lib.MemoryPool | None = None) -> ( + lib.Int64Scalar | lib.Int64Array | Expression +): ... + + +def day_of_week( + values: TemporalScalar | TemporalArray | Expression, + /, + *, + count_from_zero: bool = True, + week_start: int = 1, + options: DayOfWeekOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.Int64Scalar | lib.Int64Array | Expression: ... + + +day_of_year = _clone_signature(day) + + +def hour( + values: lib.TimestampScalar[Any] | lib.Time32Scalar[Any] | lib.Time64Scalar[Any] + | lib.TimestampArray[Any] | lib.Time32Array[Any] | lib.Time64Array[Any] + | lib.ChunkedArray[lib.TimestampScalar[Any]] + | lib.ChunkedArray[lib.Time32Scalar[Any]] + | lib.ChunkedArray[lib.Time64Scalar[Any]] | Expression, + /, + *, + memory_pool: lib.MemoryPool | None = None, +) -> lib.Int64Scalar | lib.Int64Array | Expression: ... + + +def is_dst( + values: lib.TimestampScalar | lib.TimestampArray[Any] + | lib.ChunkedArray[lib.TimestampScalar] | Expression, + /, *, memory_pool: lib.MemoryPool | None = None +) -> lib.BooleanScalar | lib.BooleanArray | Expression: ... + + +def iso_week( + values: lib.TimestampScalar | lib.TimestampArray[Any] + | lib.ChunkedArray[lib.TimestampScalar[Any]] | Expression, + /, *, memory_pool: lib.MemoryPool | None = None +) -> lib.Int64Scalar | lib.Int64Array | Expression: ... + + +iso_year = _clone_signature(iso_week) + + +def is_leap_year( + values: lib.TimestampScalar[Any] | lib.Date32Scalar | lib.Date64Scalar + | lib.TimestampArray + | lib.Date32Array + | lib.Date64Array + | lib.ChunkedArray[lib.TimestampScalar] + | lib.ChunkedArray[lib.Date32Scalar] + | lib.ChunkedArray[lib.Date64Scalar] | Expression, + /, + *, + memory_pool: lib.MemoryPool | None = None, +) -> lib.BooleanScalar | lib.BooleanArray | Expression: ... + + +microsecond = _clone_signature(iso_week) +millisecond = _clone_signature(iso_week) +minute = _clone_signature(iso_week) +month = _clone_signature(day_of_week) +nanosecond = _clone_signature(hour) +quarter = _clone_signature(day_of_week) +second = _clone_signature(hour) +subsecond = _clone_signature(hour) +us_week = _clone_signature(iso_week) +us_year = _clone_signature(iso_week) +year = _clone_signature(iso_week) + + +def week( + values: lib.TimestampScalar | lib.TimestampArray + | lib.ChunkedArray[lib.TimestampScalar] | Expression, + /, + *, + week_starts_monday: bool = True, + count_from_zero: bool = False, + first_week_is_fully_in_year: bool = False, + options: WeekOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.Int64Scalar | lib.Int64Array | Expression: ... + + +def year_month_day( + values: TemporalScalar | TemporalArray | Expression, /, *, + memory_pool: lib.MemoryPool | None = None +) -> lib.StructScalar | lib.StructArray | Expression: ... + + +iso_calendar = _clone_signature(year_month_day) + + +# ========================= 2.24 Temporal difference ========================= +def day_time_interval_between(start, end, /, *, + memory_pool: lib.MemoryPool | None = None): ... + + +def days_between( + start, end, /, *, memory_pool: lib.MemoryPool | None = None +) -> lib.Int64Scalar | lib.Int64Array: ... + + +hours_between = _clone_signature(days_between) +microseconds_between = _clone_signature(days_between) +milliseconds_between = _clone_signature(days_between) +minutes_between = _clone_signature(days_between) + + +def month_day_nano_interval_between( + start, end, /, *, memory_pool: lib.MemoryPool | None = None +) -> lib.MonthDayNanoIntervalScalar | lib.MonthDayNanoIntervalArray: ... + + +def month_interval_between(start, end, /, *, + memory_pool: lib.MemoryPool | None = None): ... + + +nanoseconds_between = _clone_signature(days_between) +quarters_between = _clone_signature(days_between) +seconds_between = _clone_signature(days_between) + + +def weeks_between( + start, + end, + /, + *, + count_from_zero: bool = True, + week_start: int = 1, + options: DayOfWeekOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.Int64Scalar | lib.Int64Array: ... + + +years_between = _clone_signature(days_between) + +# ========================= 2.25 Timezone handling ========================= + + +def assume_timezone( + timestamps: lib.TimestampScalar | lib.Scalar[lib.TimestampType] | lib.TimestampArray + | lib.ChunkedArray[lib.TimestampScalar] | Expression, + /, + timezone: str | None = None, + *, + ambiguous: Literal["raise", "earliest", "latest"] = "raise", + nonexistent: Literal["raise", "earliest", "latest"] = "raise", + options: AssumeTimezoneOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> ( + lib.TimestampScalar | lib.TimestampArray | lib.ChunkedArray[lib.TimestampScalar] + | Expression +): ... + + +def local_timestamp( + timestamps: lib.TimestampScalar | lib.TimestampArray + | lib.ChunkedArray[lib.TimestampScalar] | Expression, + /, *, memory_pool: lib.MemoryPool | None = None +) -> lib.TimestampScalar | lib.TimestampArray | Expression: ... + + +# ========================= 2.26 Random number generation ========================= +def random( + n: int, + *, + initializer: Hashable = "system", + options: RandomOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.DoubleArray: ... + + +# ========================= 3. Array-wise (“vector”) functions ========================= + +# ========================= 3.1 Cumulative Functions ========================= +def cumulative_sum( + values: _NumericArrayT | ArrayLike | Expression, + /, + start: int | float | lib.Scalar | None = None, + *, + skip_nulls: bool = False, + options: CumulativeSumOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> _NumericArrayT | Expression | lib.Array: ... + + +cumulative_sum_checked = _clone_signature(cumulative_sum) +cumulative_prod = _clone_signature(cumulative_sum) +cumulative_prod_checked = _clone_signature(cumulative_sum) +cumulative_max = _clone_signature(cumulative_sum) +cumulative_min = _clone_signature(cumulative_sum) +cumulative_mean = _clone_signature(cumulative_sum) +# ========================= 3.2 Associative transforms ========================= + + +def dictionary_encode( + array: _ScalarOrArrayT | Expression, + /, + null_encoding: Literal["mask", "encode"] = "mask", + *, + options=None, + memory_pool: lib.MemoryPool | None = None, +) -> _ScalarOrArrayT | Expression: ... + + +def dictionary_decode( + array: _ScalarOrArrayT | Expression, + /, + *, + options=None, + memory_pool: lib.MemoryPool | None = None, +) -> _ScalarOrArrayT | Expression: ... + + +def unique(array: _ArrayT | Expression, /, *, memory_pool: lib.MemoryPool | + None = None) -> _ArrayT | Expression: ... + + +def value_counts( + array: lib.Array | lib.ChunkedArray | Expression, /, *, + memory_pool: lib.MemoryPool | None = None +) -> lib.StructArray | Expression: ... + +# ========================= 3.3 Selections ========================= + + +def array_filter( + array: _ArrayT | Expression, + selection_filter: list[bool] | list[bool | None] | BooleanArray, + /, + null_selection_behavior: Literal["drop", "emit_null"] = "drop", + *, + options: FilterOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> _ArrayT | Expression: ... + + +def drop_null(input: _ArrayT | Expression, /, *, memory_pool: lib.MemoryPool | + None = None) -> _ArrayT | Expression: ... + + +filter = array_filter +take = array_take + +# ========================= 3.4 Containment tests ========================= + + +def indices_nonzero( + values: lib.BooleanArray + | lib.NullArray + | NumericArray + | lib.Decimal128Array + | lib.Decimal256Array | Expression, + /, + *, + memory_pool: lib.MemoryPool | None = None, +) -> lib.UInt64Array | Expression: ... + + +# ========================= 3.5 Sorts and partitions ========================= +def array_sort_indices( + array: lib.Array | lib.ChunkedArray | Expression, + /, + order: _Order = "ascending", + *, + null_placement: _Placement = "at_end", + options: ArraySortOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.UInt64Array | Expression: ... + + +def partition_nth_indices( + array: lib.Array | lib.ChunkedArray | Expression | Iterable, + /, + pivot: int, + *, + null_placement: _Placement = "at_end", + options: PartitionNthOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.UInt64Array | Expression: ... + + +def pivot_wider( + keys: lib.Array | lib.ChunkedArray | Sequence[str], + values: lib.Array | lib.ChunkedArray | Sequence[Any], + /, + key_names: Sequence[str] | None = None, + *, + unexpected_key_behavior: Literal["ignore", "raise"] = "ignore", + options: PivotWiderOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.StructScalar: ... + + +def rank( + input: lib.Array | lib.ChunkedArray, + /, + sort_keys: _Order = "ascending", + *, + null_placement: _Placement = "at_end", + tiebreaker: Literal["min", "max", "first", "dense"] = "first", + options: RankOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.UInt64Array: ... + + +def rank_quantile( + input: lib.Array | lib.ChunkedArray, + /, + sort_keys: _Order = "ascending", + *, + null_placement: _Placement = "at_end", + options: RankQuantileOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.DoubleArray: ... + + +def rank_normal( + input: lib.Array | lib.ChunkedArray, + /, + sort_keys: _Order = "ascending", + *, + null_placement: _Placement = "at_end", + options: RankQuantileOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.DoubleArray: ... + + +def select_k_unstable( + input: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table | Expression, + /, + k: int | None = None, + sort_keys: Sequence[tuple[str | Expression, str]] | None = None, + *, + options: SelectKOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.UInt64Array | Expression: ... + + +def sort_indices( + input: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table | Expression, + /, + sort_keys: Sequence[tuple[str | Expression, _Order]] | None = None, + *, + null_placement: _Placement = "at_end", + options: SortOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.UInt64Array | Expression: ... + + +# ========================= 3.6 Structural transforms ========================= +def list_element( + lists: lib.Array[ListScalar[_DataTypeT]] | lib.ChunkedArray[ListScalar[_DataTypeT]] + | ListScalar[_DataTypeT] | Expression, + index: ScalarLike, /, *, memory_pool: lib.MemoryPool | None = None +) -> (lib.Array[lib.Scalar[_DataTypeT]] | lib.ChunkedArray[lib.Scalar[_DataTypeT]] + | _DataTypeT | Expression): ... + + +def list_flatten( + lists: ArrayOrChunkedArray[ListScalar[Any]] | Expression, + /, + recursive: bool = False, + *, + options: ListFlattenOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.ListArray[Any] | Expression: ... + + +def list_parent_indices( + lists: ArrayOrChunkedArray[Any] | Expression, /, *, + memory_pool: lib.MemoryPool | None = None +) -> lib.Int64Array | Expression: ... + + +def list_slice( + lists: ArrayOrChunkedArray[Any] | Expression, + /, + start: int, + stop: int | None = None, + step: int = 1, + return_fixed_size_list: bool | None = None, + *, + options: ListSliceOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> lib.ListArray[Any] | Expression: ... + + +def map_lookup( + container, + /, + query_key, + occurrence: str, + *, + options: MapLookupOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +): ... + + +def struct_field( + values, + /, + indices, + *, + options: StructFieldOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +): ... + + +def fill_null_backward( + values: _ScalarOrArrayT | ScalarLike | Expression, + /, + *, + memory_pool: lib.MemoryPool | None = None, +) -> _ScalarOrArrayT | ScalarLike | Expression: ... + + +def fill_null_forward( + values: _ScalarOrArrayT | ScalarLike | Expression, + /, + *, + memory_pool: lib.MemoryPool | None = None, +) -> _ScalarOrArrayT | ScalarLike | Expression: ... + + +def replace_with_mask( + values: _ScalarOrArrayT | Expression, + mask: list[bool] | list[bool | None] | BooleanArray, + replacements, + /, + *, + memory_pool: lib.MemoryPool | None = None, +) -> _ScalarOrArrayT | Expression: ... + + +# ========================= 3.7 Pairwise functions ========================= +def pairwise_diff( + input: _NumericOrTemporalArrayT | Expression, + /, + period: int = 1, + *, + options: PairwiseOptions | None = None, + memory_pool: lib.MemoryPool | None = None, +) -> _NumericOrTemporalArrayT | Expression: ... + + +def run_end_encode( + input: _NumericOrTemporalArrayT | Expression, + /, + *, + run_end_type: _RunEndType | None = None, + options: RunEndEncodeOptions | None = None, + memory_pool: lib.MemoryPool | None = None +) -> _NumericOrTemporalArrayT | Expression: ... + + +def run_end_decode( + input: _NumericOrTemporalArrayT | Expression, + /, + *, + memory_pool: lib.MemoryPool | None = None +) -> _NumericOrTemporalArrayT | Expression: ... + + +pairwise_diff_checked = _clone_signature(pairwise_diff) diff --git a/python/pyarrow/acero.py b/python/pyarrow/acero.py index e475e8db5c2..cd99a1bbc53 100644 --- a/python/pyarrow/acero.py +++ b/python/pyarrow/acero.py @@ -22,7 +22,7 @@ # distutils: language = c++ # cython: language_level = 3 -from pyarrow.lib import Table, RecordBatch, array +from pyarrow.lib import Table, RecordBatch, array, Schema from pyarrow.compute import Expression, field try: @@ -49,11 +49,14 @@ except ImportError: class DatasetModuleStub: class Dataset: - pass + @property + def schema(self): + return Schema() class InMemoryDataset: - pass - ds = DatasetModuleStub + def __init__(self, source): + pass + ds = DatasetModuleStub # type: ignore[assignment] def _dataset_to_decl(dataset, use_threads=True, implicit_ordering=False): @@ -306,7 +309,7 @@ def _perform_join_asof(left_operand, left_on, left_by, # AsofJoin does not return on or by columns for right_operand. right_columns = [ col for col in right_operand.schema.names - if col not in [right_on] + right_by + if col not in [right_on] + right_by # type: ignore[reportOperatorIssue] ] columns_collisions = set(left_operand.schema.names) & set(right_columns) if columns_collisions: diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py index 8177948aaeb..b8206a54fdd 100644 --- a/python/pyarrow/compute.py +++ b/python/pyarrow/compute.py @@ -108,7 +108,7 @@ import warnings import pyarrow as pa -from pyarrow import _compute_docstrings +from pyarrow import _compute_docstrings # type: ignore[reportAttributeAccessIssue] from pyarrow.vendored import docscrape @@ -243,7 +243,7 @@ def _handle_options(name, options_class, options, args, kwargs): def _make_generic_wrapper(func_name, func, options_class, arity): if options_class is None: - def wrapper(*args, memory_pool=None): + def wrapper(*args, memory_pool=None): # type: ignore[misc] if arity is not Ellipsis and len(args) != arity: raise TypeError( f"{func_name} takes {arity} positional argument(s), " @@ -253,7 +253,8 @@ def wrapper(*args, memory_pool=None): return Expression._call(func_name, list(args)) return func.call(args, None, memory_pool) else: - def wrapper(*args, memory_pool=None, options=None, **kwargs): + def wrapper( # type: ignore[misc] + *args, memory_pool=None, options=None, **kwargs): if arity is not Ellipsis: if len(args) < arity: raise TypeError( @@ -610,7 +611,7 @@ def top_k_unstable(values, k, sort_keys=None, *, memory_pool=None): sort_keys.append(("dummy", "descending")) else: sort_keys = map(lambda key_name: (key_name, "descending"), sort_keys) - options = SelectKOptions(k, sort_keys) + options = SelectKOptions(k, sort_keys) # type: ignore[reportArgumentType] return call_function("select_k_unstable", [values], options, memory_pool) @@ -657,7 +658,7 @@ def bottom_k_unstable(values, k, sort_keys=None, *, memory_pool=None): sort_keys.append(("dummy", "ascending")) else: sort_keys = map(lambda key_name: (key_name, "ascending"), sort_keys) - options = SelectKOptions(k, sort_keys) + options = SelectKOptions(k, sort_keys) # type: ignore[reportArgumentType] return call_function("select_k_unstable", [values], options, memory_pool) @@ -683,7 +684,8 @@ def random(n, *, initializer='system', options=None, memory_pool=None): memory_pool : pyarrow.MemoryPool, optional If not passed, will allocate memory from the default memory pool. """ - options = RandomOptions(initializer=initializer) + options = RandomOptions( + initializer=initializer) # type: ignore[reportArgumentType] return call_function("random", [], options, memory_pool, length=n) @@ -725,7 +727,7 @@ def field(*name_or_index): if isinstance(name_or_index[0], (str, int)): return Expression._field(name_or_index[0]) elif isinstance(name_or_index[0], tuple): - return Expression._nested_field(name_or_index[0]) + return Expression._nested_field(name_or_index[0]) # type: ignore else: raise TypeError( "field reference should be str, multiple str, tuple or " @@ -733,7 +735,7 @@ def field(*name_or_index): ) # In case of multiple strings not supplied in a tuple else: - return Expression._nested_field(name_or_index) + return Expression._nested_field(name_or_index) # type: ignore def scalar(value): diff --git a/python/pyarrow/tests/test_acero.py b/python/pyarrow/tests/test_acero.py index cb97e3849fd..1285534d08a 100644 --- a/python/pyarrow/tests/test_acero.py +++ b/python/pyarrow/tests/test_acero.py @@ -16,6 +16,7 @@ # under the License. import pytest +from typing import Literal, cast import pyarrow as pa import pyarrow.compute as pc @@ -37,9 +38,10 @@ try: import pyarrow.dataset as ds - from pyarrow.acero import ScanNodeOptions + from pyarrow._dataset import ScanNodeOptions except ImportError: - ds = None + ds = None # type: ignore[assignment] + ScanNodeOptions = None # type: ignore[assignment, misc] pytestmark = pytest.mark.acero @@ -53,7 +55,6 @@ def table_source(): def test_declaration(): - table = pa.table({'a': [1, 2, 3], 'b': [4, 5, 6]}) table_opts = TableSourceNodeOptions(table) filter_opts = FilterNodeOptions(field('a') > 1) @@ -89,7 +90,8 @@ def test_declaration_to_reader(table_source): def test_table_source(): with pytest.raises(TypeError): - TableSourceNodeOptions(pa.record_batch([pa.array([1, 2, 3])], ["a"])) + TableSourceNodeOptions(pa.record_batch( + [pa.array([1, 2, 3])], ["a"])) table_source = TableSourceNodeOptions(None) decl = Declaration("table_source", table_source) @@ -110,9 +112,9 @@ def test_filter(table_source): # requires a pyarrow Expression with pytest.raises(TypeError): - FilterNodeOptions(pa.array([True, False, True])) + FilterNodeOptions(pa.array([True, False, True])) # type: ignore[arg-type] with pytest.raises(TypeError): - FilterNodeOptions(None) + FilterNodeOptions(None) # type: ignore[arg-type] @pytest.mark.parametrize('source', [ @@ -267,19 +269,23 @@ def test_order_by(): table = pa.table({'a': [1, 2, 3, 4], 'b': [1, 3, None, 2]}) table_source = Declaration("table_source", TableSourceNodeOptions(table)) - ord_opts = OrderByNodeOptions([("b", "ascending")]) + sort_keys = [("b", "ascending")] + sort_keys = cast(list[tuple[str, Literal["ascending", "descending"]]], sort_keys) + ord_opts = OrderByNodeOptions(sort_keys) decl = Declaration.from_sequence([table_source, Declaration("order_by", ord_opts)]) result = decl.to_table() expected = pa.table({"a": [1, 4, 2, 3], "b": [1, 2, 3, None]}) assert result.equals(expected) - ord_opts = OrderByNodeOptions([(field("b"), "descending")]) + ord_opts = OrderByNodeOptions( + [(field("b"), "descending")]) # type: ignore[arg-type] decl = Declaration.from_sequence([table_source, Declaration("order_by", ord_opts)]) result = decl.to_table() expected = pa.table({"a": [2, 4, 1, 3], "b": [3, 2, 1, None]}) assert result.equals(expected) - ord_opts = OrderByNodeOptions([(1, "descending")], null_placement="at_start") + ord_opts = OrderByNodeOptions( + [(1, "descending")], null_placement="at_start") # type: ignore[arg-type] decl = Declaration.from_sequence([table_source, Declaration("order_by", ord_opts)]) result = decl.to_table() expected = pa.table({"a": [3, 2, 4, 1], "b": [None, 3, 2, 1]}) @@ -294,10 +300,12 @@ def test_order_by(): _ = decl.to_table() with pytest.raises(ValueError, match="\"decreasing\" is not a valid sort order"): - _ = OrderByNodeOptions([("b", "decreasing")]) + _ = OrderByNodeOptions([("b", "decreasing")]) # type: ignore[arg-type] with pytest.raises(ValueError, match="\"start\" is not a valid null placement"): - _ = OrderByNodeOptions([("b", "ascending")], null_placement="start") + _ = OrderByNodeOptions( + [("b", "ascending")], null_placement="start" # type: ignore[arg-type] + ) def test_hash_join(): @@ -382,7 +390,9 @@ def test_hash_join_with_residual_filter(): # test filter expression referencing columns from both side join_opts = HashJoinNodeOptions( "left outer", left_keys="key", right_keys="key", - filter_expression=pc.equal(pc.field("a"), 5) | pc.equal(pc.field("b"), 10) + filter_expression=( + pc.equal(pc.field("a"), 5) + | pc.equal(pc.field("b"), 10)) # type: ignore[reportOperatorIssue] ) joined = Declaration( "hashjoin", options=join_opts, inputs=[left_source, right_source]) @@ -462,6 +472,8 @@ def test_asof_join(): @pytest.mark.dataset def test_scan(tempdir): + assert ds is not None + assert ScanNodeOptions is not None table = pa.table({'a': [1, 2, 3], 'b': [4, 5, 6]}) ds.write_dataset(table, tempdir / "dataset", format="parquet") dataset = ds.dataset(tempdir / "dataset", format="parquet") @@ -486,11 +498,10 @@ def test_scan(tempdir): assert decl.to_table().num_rows == 0 # projection scan option - scan_opts = ScanNodeOptions(dataset, columns={"a2": pc.multiply(field("a"), 2)}) decl = Declaration("scan", scan_opts) result = decl.to_table() # "a" is included in the result (needed later on for the actual projection) assert result["a"].to_pylist() == [1, 2, 3] # "b" is still included, but without data as it will be removed by the projection - assert pc.all(result["b"].is_null()).as_py() + assert pc.all(result.column("b").is_null()).as_py() diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 2ef14ff39be..1682409193b 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -31,12 +31,12 @@ try: import numpy as np except ImportError: - np = None + pass try: import pandas as pd except ImportError: - pd = None + pass import pyarrow as pa import pyarrow.compute as pc @@ -45,7 +45,7 @@ try: import pyarrow.substrait as pas except ImportError: - pas = None + pas = None # type: ignore[assignment] exported_functions = [ func for (name, func) in sorted(pc.__dict__.items()) @@ -329,9 +329,11 @@ def test_function_attributes(): def test_input_type_conversion(): # Automatic array conversion from Python arr = pc.add([1, 2], [4, None]) + assert isinstance(arr, pa.Array) assert arr.to_pylist() == [5, None] # Automatic scalar conversion from Python arr = pc.add([1, 2], 4) + assert isinstance(arr, pa.Array) assert arr.to_pylist() == [5, 6] # Other scalar type assert pc.equal(["foo", "bar", None], @@ -779,9 +781,11 @@ def test_min_max(): assert s.as_py() == {'min': 1, 'max': 6} s = pc.min_max(data, options=pc.ScalarAggregateOptions()) assert s.as_py() == {'min': 1, 'max': 6} - s = pc.min_max(data, options=pc.ScalarAggregateOptions(skip_nulls=True)) + s = pc.min_max(data, options=pc.ScalarAggregateOptions( + skip_nulls=True)) assert s.as_py() == {'min': 1, 'max': 6} - s = pc.min_max(data, options=pc.ScalarAggregateOptions(skip_nulls=False)) + s = pc.min_max(data, options=pc.ScalarAggregateOptions( + skip_nulls=False)) assert s.as_py() == {'min': None, 'max': None} # Options as dict of kwargs @@ -799,11 +803,11 @@ def test_min_max(): # Wrong options type options = pc.TakeOptions() with pytest.raises(TypeError): - s = pc.min_max(data, options=options) + s = pc.min_max(data, options=options) # type: ignore[arg-type] # Missing argument with pytest.raises(TypeError, match="min_max takes 1 positional"): - s = pc.min_max() + s = pc.min_max() # type: ignore[call-arg] def test_any(): @@ -844,12 +848,12 @@ def test_all(): assert pc.all(a, options=options).as_py() is None a = pa.chunked_array([[True], [True, None]]) - assert pc.all(a).as_py() is True - assert pc.all(a, options=options).as_py() is None + assert pc.all(a).as_py() is True # type: ignore[arg-type] + assert pc.all(a, options=options).as_py() is None # type: ignore[arg-type] a = pa.chunked_array([[True], [False]]) - assert pc.all(a).as_py() is False - assert pc.all(a, options=options).as_py() is False + assert pc.all(a).as_py() is False # type: ignore[arg-type] + assert pc.all(a, options=options).as_py() is False # type: ignore[arg-type] def test_is_valid(): @@ -858,7 +862,7 @@ def test_is_valid(): assert pc.is_valid(data).to_pylist() == [True, True, False] with pytest.raises(TypeError): - pc.is_valid(data, options=None) + pc.is_valid(data, options=None) # type: ignore[call-arg] def test_generated_docstrings(): @@ -1069,21 +1073,6 @@ def find_new_unicode_codepoints(): 0x2097, 0x2098, 0x2099, 0x209a, 0x209b, 0x209c, 0x2c7c, 0x2c7d, 0xa69c, 0xa69d, 0xa770, 0xa7f8, 0xa7f9, 0xab5c, 0xab5d, 0xab5e, 0xab5f, } -# utf8proc does not store if a codepoint is numeric -numeric_info_missing = { - 0x3405, 0x3483, 0x382a, 0x3b4d, 0x4e00, 0x4e03, - 0x4e07, 0x4e09, 0x4e5d, 0x4e8c, 0x4e94, 0x4e96, - 0x4ebf, 0x4ec0, 0x4edf, 0x4ee8, 0x4f0d, 0x4f70, - 0x5104, 0x5146, 0x5169, 0x516b, 0x516d, 0x5341, - 0x5343, 0x5344, 0x5345, 0x534c, 0x53c1, 0x53c2, - 0x53c3, 0x53c4, 0x56db, 0x58f1, 0x58f9, 0x5e7a, - 0x5efe, 0x5eff, 0x5f0c, 0x5f0d, 0x5f0e, 0x5f10, - 0x62fe, 0x634c, 0x67d2, 0x6f06, 0x7396, 0x767e, - 0x8086, 0x842c, 0x8cae, 0x8cb3, 0x8d30, 0x9621, - 0x9646, 0x964c, 0x9678, 0x96f6, 0xf96b, 0xf973, - 0xf978, 0xf9b2, 0xf9d1, 0xf9d3, 0xf9fd, 0x10fc5, - 0x10fc6, 0x10fc7, 0x10fc8, 0x10fc9, 0x10fca, - 0x10fcb, } # utf8proc has no no digit/numeric information digit_info_missing = { 0xb2, 0xb3, 0xb9, 0x1369, 0x136a, 0x136b, 0x136c, @@ -1102,6 +1091,7 @@ def find_new_unicode_codepoints(): 0x278f, 0x2790, 0x2791, 0x2792, 0x10a40, 0x10a41, 0x10a42, 0x10a43, 0x10e60, 0x10e61, 0x10e62, 0x10e63, 0x10e64, 0x10e65, 0x10e66, 0x10e67, 0x10e68, } +# utf8proc does not store if a codepoint is numeric numeric_info_missing = { 0x3405, 0x3483, 0x382a, 0x3b4d, 0x4e00, 0x4e03, 0x4e07, 0x4e09, 0x4e5d, 0x4e8c, 0x4e94, 0x4e96, @@ -1136,7 +1126,7 @@ def test_string_py_compat_boolean(function_name, variant): py_name = function_name.replace('_', '') ignore = codepoints_ignore.get(function_name, set()) | \ find_new_unicode_codepoints() - for i in range(128 if ascii else 0x11000): + for i in range(128 if ascii else 0x11000): # type: ignore[truthy-function] if i in range(0xD800, 0xE000): continue # bug? pyarrow doesn't allow utf16 surrogates # the issues we know of, we skip @@ -1657,10 +1647,10 @@ def test_scatter(): @pytest.mark.parametrize("typ", ["array", "chunked_array"]) def test_compare_array(typ): if typ == "array": - def con(values): + def con(values): # type: ignore[no-redef] return pa.array(values) else: - def con(values): + def con(values): # type: ignore[no-redef] return pa.chunked_array([values]) arr1 = con([1, 2, 3, 4, None]) @@ -1688,10 +1678,10 @@ def con(values): @pytest.mark.parametrize("typ", ["array", "chunked_array"]) def test_compare_string_scalar(typ): if typ == "array": - def con(values): + def con(values): # type: ignore[no-redef] return pa.array(values) else: - def con(values): + def con(values): # type: ignore[no-redef] return pa.chunked_array([values]) arr = con(['a', 'b', 'c', None]) @@ -1725,10 +1715,10 @@ def con(values): @pytest.mark.parametrize("typ", ["array", "chunked_array"]) def test_compare_scalar(typ): if typ == "array": - def con(values): + def con(values): # type: ignore[no-redef] return pa.array(values) else: - def con(values): + def con(values): # type: ignore[no-redef] return pa.chunked_array([values]) arr = con([1, 2, 3, None]) @@ -1821,8 +1811,9 @@ def test_round_to_integer(ty): "half_to_odd": [3, 3, 4, 5, -3, -3, -4, None], } for round_mode, expected in rmode_and_expected.items(): - options = RoundOptions(round_mode=round_mode) - result = round(values, options=options) + options = RoundOptions( + round_mode=round_mode) # type: ignore[arg-type] + result = round(values, options=options) # type: ignore[arg-type] expected_array = pa.array(expected, type=pa.float64()) assert expected_array.equals(result) @@ -1840,7 +1831,9 @@ def test_round(): for ndigits, expected in ndigits_and_expected.items(): options = pc.RoundOptions(ndigits, "half_towards_infinity") result = pc.round(values, options=options) - np.testing.assert_allclose(result, pa.array(expected), equal_nan=True) + assert isinstance(result, pa.Array) + np.testing.assert_allclose( + result, pa.array(expected), equal_nan=True) assert pc.round(values, ndigits, round_mode="half_towards_infinity") == result assert pc.round(values, ndigits, "half_towards_infinity") == result @@ -1860,6 +1853,7 @@ def test_round_to_multiple(): for multiple, expected in multiple_and_expected.items(): options = pc.RoundToMultipleOptions(multiple, "half_towards_infinity") result = pc.round_to_multiple(values, options=options) + assert isinstance(result, pa.Array) np.testing.assert_allclose(result, pa.array(expected), equal_nan=True) assert pc.round_to_multiple(values, multiple, "half_towards_infinity") == result @@ -1867,11 +1861,11 @@ def test_round_to_multiple(): for multiple in [0, -2, pa.scalar(-10.4)]: with pytest.raises(pa.ArrowInvalid, match="Rounding multiple must be positive"): - pc.round_to_multiple(values, multiple=multiple) + pc.round_to_multiple(values, multiple=multiple) # type: ignore[arg-type] for multiple in [object, 99999999999999999999999]: with pytest.raises(TypeError, match="is not a valid multiple type"): - pc.round_to_multiple(values, multiple=multiple) + pc.round_to_multiple(values, multiple=multiple) # type: ignore[arg-type] def test_round_binary(): @@ -2056,7 +2050,8 @@ def test_logical(): def test_dictionary_decode(): array = pa.array(["a", "a", "b", "c", "b"]) dictionary_array = array.dictionary_encode() - dictionary_array_decode = pc.dictionary_decode(dictionary_array) + dictionary_array_decode = pc.dictionary_decode( + dictionary_array) assert array != dictionary_array @@ -2236,7 +2231,7 @@ def check_cast_float_to_decimal(float_ty, float_val, decimal_ty, decimal_ctx, # Round `expected` to `scale` digits after the decimal point expected = expected.quantize(decimal.Decimal(1).scaleb(-decimal_ty.scale)) s = pa.scalar(float_val, type=float_ty) - actual = pc.cast(s, decimal_ty).as_py() + actual = pc.cast(s, decimal_ty).as_py() # type: ignore[union-attr] if actual != expected: # Allow the last digit to vary. The tolerance is higher for # very high precisions as rounding errors can accumulate in @@ -2328,8 +2323,9 @@ def test_cast_float_to_decimal_random(float_ty, decimal_traits): expected = decimal.Decimal(mantissa) / 2**-float_exp expected_as_int = round(expected.scaleb(scale)) actual = pc.cast( - pa.scalar(float_val, type=float_ty), decimal_ty).as_py() - actual_as_int = round(actual.scaleb(scale)) + pa.scalar(float_val, type=float_ty), decimal_ty + ).as_py() # type: ignore[union-attr] + actual_as_int = round(actual.scaleb(scale)) # type: ignore[union-attr] # We allow for a minor rounding error between expected and actual assert abs(actual_as_int - expected_as_int) <= 1 @@ -2365,7 +2361,7 @@ def test_strptime(): @pytest.mark.pandas @pytest.mark.timezone_data def test_strftime(): - times = ["2018-03-10 09:00", "2038-01-31 12:23", None] + times: list[str | None] = ["2018-03-10 09:00", "2038-01-31 12:23", None] timezones = ["CET", "UTC", "Europe/Ljubljana"] formats = ["%a", "%A", "%w", "%d", "%b", "%B", "%m", "%y", "%Y", "%H", "%I", @@ -2375,14 +2371,15 @@ def test_strftime(): formats.extend(["%c", "%x", "%X"]) for timezone in timezones: - ts = pd.to_datetime(times).tz_localize(timezone) + ts = pd.to_datetime(times).tz_localize(timezone) # type: ignore[no-matching-overload] for unit in ["s", "ms", "us", "ns"]: tsa = pa.array(ts, type=pa.timestamp(unit, timezone)) for fmt in formats: options = pc.StrftimeOptions(fmt) result = pc.strftime(tsa, options=options) + st = ts.strftime(fmt) # type: ignore[call-non-callable] # cast to the same type as result to ignore string vs large_string - expected = pa.array(ts.strftime(fmt)).cast(result.type) + expected = pa.array(st).cast(result.type) assert result.equals(expected) fmt = "%Y-%m-%dT%H:%M:%S" @@ -2390,42 +2387,48 @@ def test_strftime(): # Default format tsa = pa.array(ts, type=pa.timestamp("s", timezone)) result = pc.strftime(tsa, options=pc.StrftimeOptions()) - expected = pa.array(ts.strftime(fmt)).cast(result.type) + st = ts.strftime(fmt) # type: ignore[call-non-callable] + expected = pa.array(st).cast(result.type) assert result.equals(expected) # Default format plus timezone tsa = pa.array(ts, type=pa.timestamp("s", timezone)) result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%Z")) - expected = pa.array(ts.strftime(fmt + "%Z")).cast(result.type) + st = ts.strftime(fmt + "%Z") # type: ignore[call-non-callable] + expected = pa.array(st).cast(result.type) assert result.equals(expected) # Pandas %S is equivalent to %S in arrow for unit="s" tsa = pa.array(ts, type=pa.timestamp("s", timezone)) options = pc.StrftimeOptions("%S") result = pc.strftime(tsa, options=options) - expected = pa.array(ts.strftime("%S")).cast(result.type) + st = ts.strftime("%S") # type: ignore[call-non-callable] + expected = pa.array(st).cast(result.type) assert result.equals(expected) # Pandas %S.%f is equivalent to %S in arrow for unit="us" tsa = pa.array(ts, type=pa.timestamp("us", timezone)) options = pc.StrftimeOptions("%S") result = pc.strftime(tsa, options=options) - expected = pa.array(ts.strftime("%S.%f")).cast(result.type) + st = ts.strftime("%S.%f") # type: ignore[call-non-callable] + expected = pa.array(st).cast(result.type) assert result.equals(expected) # Test setting locale tsa = pa.array(ts, type=pa.timestamp("s", timezone)) options = pc.StrftimeOptions(fmt, locale="C") result = pc.strftime(tsa, options=options) - expected = pa.array(ts.strftime(fmt)).cast(result.type) + st = ts.strftime(fmt) # type: ignore[call-non-callable] + expected = pa.array(st).cast(result.type) assert result.equals(expected) # Test timestamps without timezone fmt = "%Y-%m-%dT%H:%M:%S" - ts = pd.to_datetime(times) + ts = pd.to_datetime(times) # type: ignore[no-matching-overload] tsa = pa.array(ts, type=pa.timestamp("s")) result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt)) - expected = pa.array(ts.strftime(fmt)).cast(result.type) + st = ts.strftime(fmt) # type: ignore[call-non-callable] + expected = pa.array(st).cast(result.type) # Positional format assert pc.strftime(tsa, fmt) == result @@ -2554,10 +2557,11 @@ def test_extract_datetime_components(request): def test_offset_timezone(): - arr = pc.strptime(["2012-12-12T12:12:12"], format="%Y-%m-%dT%H:%M:%S", unit="s") + arr = pc.strptime(pa.array(["2012-12-12T12:12:12"]), + format="%Y-%m-%dT%H:%M:%S", unit="s") zoned_arr = arr.cast(pa.timestamp("s", tz="+05:30")) - assert pc.hour(zoned_arr)[0].as_py() == 17 - assert pc.minute(zoned_arr)[0].as_py() == 42 + assert pc.hour(zoned_arr)[0].as_py() == 17 # type: ignore[index,arg-type] + assert pc.minute(zoned_arr)[0].as_py() == 42 # type: ignore[index,arg-type] @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"]) @@ -2654,12 +2658,14 @@ def test_assume_timezone(): f"timezone '{timezone}'"): pc.assume_timezone(ambiguous_array, options=options_ambiguous_raise) - expected = ambiguous.tz_localize(timezone, ambiguous=[True, True, True]) + expected = ambiguous.tz_localize( + timezone, ambiguous=np.array([True, True, True])) result = pc.assume_timezone( ambiguous_array, options=options_ambiguous_earliest) result.equals(pa.array(expected)) - expected = ambiguous.tz_localize(timezone, ambiguous=[False, False, False]) + expected = ambiguous.tz_localize( + timezone, ambiguous=np.array([False, False, False])) result = pc.assume_timezone( ambiguous_array, options=options_ambiguous_latest) result.equals(pa.array(expected)) @@ -2748,7 +2754,9 @@ def _check_temporal_rounding(ts, values, unit): expected = np.where( expected == ts, - expected + pd.Timedelta(value, unit_shorthand[unit]), + expected + pd.Timedelta( + value, unit_shorthand[unit] # type: ignore[arg-type] + ), expected) np.testing.assert_array_equal(result, expected) @@ -2810,7 +2818,7 @@ def test_count(): with pytest.raises(ValueError, match='"something else" is not a valid count mode'): - pc.count(arr, 'something else') + pc.count(arr, 'something else') # type: ignore[arg-type] def test_index(): @@ -2860,7 +2868,7 @@ def test_partition_nth(): with pytest.raises( ValueError, match="'partition_nth_indices' cannot be called without options"): - pc.partition_nth_indices(data) + pc.partition_nth_indices(data) # type: ignore[call-arg] def test_partition_nth_null_placement(): @@ -2982,7 +2990,7 @@ def test_array_sort_indices(): assert result.to_pylist() == [2, 1, 0, 3] with pytest.raises(ValueError, match="not a valid sort order"): - pc.array_sort_indices(arr, order="nonscending") + pc.array_sort_indices(arr, order="nonscending") # type: ignore[arg-type] def test_sort_indices_array(): @@ -3045,23 +3053,29 @@ def test_sort_indices_table(): pc.sort_indices(table, sort_keys=[("unknown", "ascending")]) with pytest.raises(ValueError, match="not a valid sort order"): - pc.sort_indices(table, sort_keys=[("a", "nonscending")]) + pc.sort_indices( + table, sort_keys=[("a", "nonscending")] # type: ignore[list-item] + ) def test_is_in(): arr = pa.array([1, 2, None, 1, 2, 3]) result = pc.is_in(arr, value_set=pa.array([1, 3, None])) - assert result.to_pylist() == [True, False, True, True, False, True] + assert result.to_pylist() == [True, False, True, True, + False, True] result = pc.is_in(arr, value_set=pa.array([1, 3, None]), skip_nulls=True) - assert result.to_pylist() == [True, False, False, True, False, True] + assert result.to_pylist() == [True, False, False, True, + False, True] result = pc.is_in(arr, value_set=pa.array([1, 3])) - assert result.to_pylist() == [True, False, False, True, False, True] + assert result.to_pylist() == [True, False, False, True, + False, True] result = pc.is_in(arr, value_set=pa.array([1, 3]), skip_nulls=True) - assert result.to_pylist() == [True, False, False, True, False, True] + assert result.to_pylist() == [True, False, False, True, + False, True] def test_index_in(): @@ -3125,7 +3139,7 @@ def test_quantile(): with pytest.raises(ValueError, match="Quantile must be between 0 and 1"): pc.quantile(arr, q=1.1) with pytest.raises(ValueError, match="not a valid quantile interpolation"): - pc.quantile(arr, interpolation='zzz') + pc.quantile(arr, interpolation='zzz') # type: ignore[arg-type] def test_tdigest(): @@ -3234,12 +3248,13 @@ def test_cumulative_sum(start, skip_nulls): # Add `start` offset to expected array before comparing expected = pc.add(expected_arrays[i], strt if strt is not None else 0) + assert isinstance(expected, pa.Array) np.testing.assert_array_almost_equal(result.to_numpy( zero_copy_only=False), expected.to_numpy(zero_copy_only=False)) for strt in ['a', pa.scalar('arrow'), 1.1]: with pytest.raises(pa.ArrowInvalid): - pc.cumulative_sum([1, 2, 3], start=strt) + pc.cumulative_sum([1, 2, 3], start=strt) # type: ignore[arg-type] @pytest.mark.numpy @@ -3289,6 +3304,7 @@ def test_cumulative_prod(start, skip_nulls): # Multiply `start` offset to expected array before comparing expected = pc.multiply(expected_arrays[i], strt if strt is not None else 1) + assert isinstance(expected, pa.Array) np.testing.assert_array_almost_equal(result.to_numpy( zero_copy_only=False), expected.to_numpy(zero_copy_only=False)) @@ -3347,8 +3363,10 @@ def test_cumulative_max(start, skip_nulls): expected = pc.max_element_wise( expected_arrays[i], strt if strt is not None else -1e9, skip_nulls=False) - np.testing.assert_array_almost_equal(result.to_numpy( - zero_copy_only=False), expected.to_numpy(zero_copy_only=False)) + np.testing.assert_array_almost_equal( + result.to_numpy(zero_copy_only=False), + expected.to_numpy(zero_copy_only=False) + ) for strt in ['a', pa.scalar('arrow'), 1.1]: with pytest.raises(pa.ArrowInvalid): @@ -3405,8 +3423,10 @@ def test_cumulative_min(start, skip_nulls): expected = pc.min_element_wise( expected_arrays[i], strt if strt is not None else 1e9, skip_nulls=False) - np.testing.assert_array_almost_equal(result.to_numpy( - zero_copy_only=False), expected.to_numpy(zero_copy_only=False)) + np.testing.assert_array_almost_equal( + result.to_numpy(zero_copy_only=False), + expected.to_numpy(zero_copy_only=False) + ) for strt in ['a', pa.scalar('arrow'), 1.1]: with pytest.raises(pa.ArrowInvalid): @@ -3484,7 +3504,7 @@ def test_struct_fields_options(): pc.struct_field(arr, '.a.foo') with pytest.raises(pa.ArrowInvalid, match="cannot be called without options"): - pc.struct_field(arr) + pc.struct_field(arr) # type: ignore[call-arg] def test_case_when(): @@ -3536,7 +3556,7 @@ def test_utf8_normalize(): with pytest.raises( ValueError, match='"NFZ" is not a valid Unicode normalization form'): - pc.utf8_normalize(arr, form="NFZ") + pc.utf8_normalize(arr, form="NFZ") # type: ignore[arg-type] def test_random(): @@ -3563,7 +3583,7 @@ def test_random(): with pytest.raises(TypeError, match=r"initializer should be 'system', an integer, " r"or a hashable object; got \[\]"): - pc.random(100, initializer=[]) + pc.random(100, initializer=[]) # type: ignore[arg-type] @pytest.mark.parametrize( @@ -3613,7 +3633,7 @@ def test_rank_options(): match=r'"NonExisting" is not a valid tiebreaker'): pc.RankOptions(sort_keys="descending", null_placement="at_end", - tiebreaker="NonExisting") + tiebreaker="NonExisting") # type: ignore[arg-type] def test_rank_quantile_options(): @@ -3643,7 +3663,7 @@ def test_rank_quantile_options(): assert result.equals(expected_descending) with pytest.raises(ValueError, match="not a valid sort order"): - pc.rank_quantile(arr, sort_keys="XXX") + pc.rank_quantile(arr, sort_keys="XXX") # type: ignore[arg-type] def test_rank_normal_options(): @@ -3829,21 +3849,21 @@ def test_expression_construction(): nested_field = pc.field(("nested", "field")) nested_field2 = pc.field("nested", "field") - zero | one == string - ~true == false + _ = zero | one == string + _ = ~true == false for typ in ("bool", pa.bool_()): - field.cast(typ) == true + _ = field.cast(typ) == true - field.isin([1, 2]) - nested_mixed_types.isin(["foo", "bar"]) + _ = field.isin([1, 2]) + _ = nested_mixed_types.isin(["foo", "bar"]) nested_field.isin(["foo", "bar"]) nested_field2.isin(["foo", "bar"]) with pytest.raises(TypeError): - field.isin(1) + field.isin(1) # type: ignore[arg-type] with pytest.raises(pa.ArrowInvalid): - field != object() + _ = field != object() def test_expression_boolean_operators(): @@ -3852,16 +3872,16 @@ def test_expression_boolean_operators(): false = pc.scalar(False) with pytest.raises(ValueError, match="cannot be evaluated to python True"): - true and false + _ = true and false with pytest.raises(ValueError, match="cannot be evaluated to python True"): - true or false + _ = true or false with pytest.raises(ValueError, match="cannot be evaluated to python True"): bool(true) with pytest.raises(ValueError, match="cannot be evaluated to python True"): - not true + _ = not true def test_expression_call_function(): @@ -3890,7 +3910,7 @@ def test_cast_table_raises(): table = pa.table({'a': [1, 2]}) with pytest.raises(pa.lib.ArrowTypeError): - pc.cast(table, pa.int64()) + pc.cast(table, pa.int64()) # type: ignore[arg-type] @pytest.mark.parametrize("start,stop,expected", ( diff --git a/python/pyarrow/tests/test_udf.py b/python/pyarrow/tests/test_udf.py index 93004a30618..e028f1c0484 100644 --- a/python/pyarrow/tests/test_udf.py +++ b/python/pyarrow/tests/test_udf.py @@ -21,7 +21,7 @@ try: import numpy as np except ImportError: - np = None + pass import pyarrow as pa from pyarrow import compute as pc @@ -35,7 +35,7 @@ try: import pyarrow.dataset as ds except ImportError: - ds = None + pass def mock_udf_context(batch_length=10): @@ -381,6 +381,7 @@ def check_scalar_function(func_fixture, func = pc.get_function(name) assert func.name == name + assert batch_length is not None result = pc.call_function(name, inputs, length=batch_length) expected_output = function(mock_udf_context(batch_length), *inputs) @@ -580,8 +581,8 @@ def identity(ctx, val): } with pytest.raises(TypeError, match="DataType expected, got "): - pc.register_scalar_function(identity, func_name, - doc, in_types, out_type) + pc.register_scalar_function( + identity, func_name, doc, in_types, out_type) # type: ignore[arg-type] def test_wrong_input_type_declaration(): @@ -597,8 +598,9 @@ def identity(ctx, val): } with pytest.raises(TypeError, match="DataType expected, got "): - pc.register_scalar_function(identity, func_name, doc, - in_types, out_type) + pc.register_scalar_function( + identity, func_name, doc, in_types, # type: ignore[arg-type] + out_type) def test_scalar_udf_context(unary_func_fixture):