Skip to content

Commit d32274d

Browse files
committed
fix: make column.unnest().topk() work
Also simplifies the implementation of Column.topk() and makes it re-use the implementation of Table.topk() This does NOT fix `column.unnest().topk(by=<anything besides None>)`. This is a rare case so I decided to just punt on implementing it
1 parent 431488a commit d32274d

File tree

7 files changed

+126
-64
lines changed

7 files changed

+126
-64
lines changed

ibis/backends/tests/conftest.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
from __future__ import annotations
2+
3+
import pytest
4+
5+
import ibis.common.exceptions as com
6+
from ibis.backends.tests.errors import MySQLOperationalError
7+
8+
9+
def combine_marks(marks: list) -> callable:
10+
def decorator(func):
11+
for mark in reversed(marks):
12+
func = mark(func)
13+
return func
14+
15+
return decorator
16+
17+
18+
NO_ARRAY_SUPPORT_MARKS = [
19+
pytest.mark.never(
20+
["sqlite", "mysql", "exasol"], reason="No array support", raises=Exception
21+
),
22+
pytest.mark.never(
23+
["mssql"],
24+
reason="No array support",
25+
raises=(
26+
com.UnsupportedBackendType,
27+
com.OperationNotDefinedError,
28+
AssertionError,
29+
),
30+
),
31+
pytest.mark.never(
32+
["mysql"],
33+
reason="No array support",
34+
raises=(
35+
com.UnsupportedBackendType,
36+
com.OperationNotDefinedError,
37+
MySQLOperationalError,
38+
),
39+
),
40+
pytest.mark.notyet(
41+
["impala"],
42+
reason="No array support",
43+
raises=(
44+
com.UnsupportedBackendType,
45+
com.OperationNotDefinedError,
46+
com.TableNotFound,
47+
),
48+
),
49+
pytest.mark.notimpl(["druid", "oracle"], raises=Exception),
50+
]
51+
NO_ARRAY_SUPPORT = combine_marks(NO_ARRAY_SUPPORT_MARKS)
52+
53+
54+
NO_STRUCT_SUPPORT_MARKS = [
55+
pytest.mark.never(["mysql", "sqlite", "mssql"], reason="No struct support"),
56+
pytest.mark.notyet(["impala"]),
57+
pytest.mark.notimpl(["druid", "oracle", "exasol"]),
58+
]
59+
NO_STRUCT_SUPPORT = combine_marks(NO_STRUCT_SUPPORT_MARKS)
60+
61+
NO_MAP_SUPPORT_MARKS = [
62+
pytest.mark.never(
63+
["sqlite", "mysql", "mssql"], reason="Unlikely to ever add map support"
64+
),
65+
pytest.mark.notyet(
66+
["bigquery", "impala"], reason="Backend doesn't yet implement map types"
67+
),
68+
pytest.mark.notimpl(
69+
["exasol", "polars", "druid", "oracle"],
70+
reason="Not yet implemented in ibis",
71+
),
72+
]
73+
NO_MAP_SUPPORT = combine_marks(NO_MAP_SUPPORT_MARKS)
74+
75+
NO_JSON_SUPPORT_MARKS = [
76+
pytest.mark.never(["impala"], reason="doesn't support JSON and never will"),
77+
pytest.mark.notyet(["clickhouse"], reason="upstream is broken"),
78+
pytest.mark.notimpl(["datafusion", "exasol", "mssql", "druid", "oracle"]),
79+
]
80+
NO_JSON_SUPPORT = combine_marks(NO_JSON_SUPPORT_MARKS)

ibis/backends/tests/test_array.py

Lines changed: 2 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import ibis.expr.datashape as ds
1616
import ibis.expr.datatypes as dt
1717
import ibis.expr.types as ir
18+
from ibis.backends.tests.conftest import NO_ARRAY_SUPPORT_MARKS
1819
from ibis.backends.tests.errors import (
1920
ClickHouseDatabaseError,
2021
DatabricksServerOperationError,
@@ -39,39 +40,7 @@
3940
pd = pytest.importorskip("pandas")
4041
tm = pytest.importorskip("pandas.testing")
4142

42-
pytestmark = [
43-
pytest.mark.never(
44-
["sqlite", "mysql", "exasol"], reason="No array support", raises=Exception
45-
),
46-
pytest.mark.never(
47-
["mssql"],
48-
reason="No array support",
49-
raises=(
50-
com.UnsupportedBackendType,
51-
com.OperationNotDefinedError,
52-
AssertionError,
53-
),
54-
),
55-
pytest.mark.never(
56-
["mysql"],
57-
reason="No array support",
58-
raises=(
59-
com.UnsupportedBackendType,
60-
com.OperationNotDefinedError,
61-
MySQLOperationalError,
62-
),
63-
),
64-
pytest.mark.notyet(
65-
["impala"],
66-
reason="No array support",
67-
raises=(
68-
com.UnsupportedBackendType,
69-
com.OperationNotDefinedError,
70-
com.TableNotFound,
71-
),
72-
),
73-
pytest.mark.notimpl(["druid", "oracle"], raises=Exception),
74-
]
43+
pytestmark = NO_ARRAY_SUPPORT_MARKS
7544

7645
# NB: We don't check whether results are numpy arrays or lists because this
7746
# varies across backends. At some point we should unify the result type to be

ibis/backends/tests/test_generic.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import ibis.expr.datatypes as dt
1717
import ibis.selectors as s
1818
from ibis import _
19+
from ibis.backends.tests.conftest import NO_ARRAY_SUPPORT
1920
from ibis.backends.tests.errors import (
2021
ClickHouseDatabaseError,
2122
ExaQueryError,
@@ -2390,6 +2391,38 @@ def test_topk_counts_null(con):
23902391
assert result[0].as_py() == 1
23912392

23922393

2394+
@NO_ARRAY_SUPPORT
2395+
def test_topk_unnest_count(con: ibis.BaseBackend):
2396+
t = con.create_table(
2397+
ibis.util.gen_name("topk_unnest_count"),
2398+
{"x": [[1, 2, 3], [1, 2, None], []]},
2399+
temp=True,
2400+
)
2401+
tk = t.x.unnest().topk(name="n")
2402+
n_1s = tk.filter(_.x == 1)["n"].as_scalar()
2403+
result = con.to_pyarrow(n_1s).as_py()
2404+
assert result == 2
2405+
2406+
tk = t.x.unnest().topk()
2407+
n_1s = tk.filter(_.x == 1)["x_count"].as_scalar()
2408+
result = con.to_pyarrow(n_1s).as_py()
2409+
assert result == 2
2410+
2411+
2412+
@pytest.mark.xfail(reason="The unnest is not placed in the right place in the query")
2413+
def test_topk_unnest_max(con: ibis.BaseBackend):
2414+
t = con.create_table(
2415+
ibis.util.gen_name("topk_counts_unnest"),
2416+
{"x": [[1, 2, 3], [1, 2, None], []]},
2417+
temp=True,
2418+
)
2419+
v = t.x.unnest()
2420+
tk = v.topk(by=v.max(), name="n")
2421+
n_1s = tk.filter(_.x == 1)["n"].as_scalar()
2422+
result = con.to_pyarrow(n_1s).as_py()
2423+
assert result == 1
2424+
2425+
23932426
@pytest.mark.notyet(
23942427
"clickhouse",
23952428
raises=AssertionError,

ibis/backends/tests/test_json.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,14 @@
88
from packaging.version import parse as vparse
99

1010
import ibis.expr.types as ir
11+
from ibis.backends.tests.conftest import NO_JSON_SUPPORT_MARKS
1112
from ibis.backends.tests.errors import PySparkPythonException
1213
from ibis.conftest import IS_SPARK_REMOTE
1314

1415
np = pytest.importorskip("numpy")
1516
pd = pytest.importorskip("pandas")
1617

17-
pytestmark = [
18-
pytest.mark.never(["impala"], reason="doesn't support JSON and never will"),
19-
pytest.mark.notyet(["clickhouse"], reason="upstream is broken"),
20-
pytest.mark.notimpl(["datafusion", "exasol", "mssql", "druid", "oracle"]),
21-
]
18+
pytestmark = NO_JSON_SUPPORT_MARKS
2219

2320

2421
@pytest.mark.notyet(

ibis/backends/tests/test_map.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import ibis
77
import ibis.common.exceptions as exc
88
import ibis.expr.datatypes as dt
9+
from ibis.backends.tests.conftest import NO_MAP_SUPPORT_MARKS
910
from ibis.backends.tests.errors import (
1011
PsycoPg2InternalError,
1112
Py4JJavaError,
@@ -17,18 +18,7 @@
1718
tm = pytest.importorskip("pandas.testing")
1819
pa = pytest.importorskip("pyarrow")
1920

20-
pytestmark = [
21-
pytest.mark.never(
22-
["sqlite", "mysql", "mssql"], reason="Unlikely to ever add map support"
23-
),
24-
pytest.mark.notyet(
25-
["bigquery", "impala"], reason="Backend doesn't yet implement map types"
26-
),
27-
pytest.mark.notimpl(
28-
["exasol", "polars", "druid", "oracle"],
29-
reason="Not yet implemented in ibis",
30-
),
31-
]
21+
pytestmark = NO_MAP_SUPPORT_MARKS
3222

3323
mark_notyet_postgres = pytest.mark.notyet(
3424
"postgres", reason="only supports string -> string"

ibis/backends/tests/test_struct.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import ibis
1010
import ibis.expr.datatypes as dt
1111
from ibis import util
12+
from ibis.backends.tests.conftest import NO_STRUCT_SUPPORT_MARKS
1213
from ibis.backends.tests.errors import (
1314
DatabricksServerOperationError,
1415
PolarsColumnNotFoundError,
@@ -25,11 +26,7 @@
2526
pd = pytest.importorskip("pandas")
2627
tm = pytest.importorskip("pandas.testing")
2728

28-
pytestmark = [
29-
pytest.mark.never(["mysql", "sqlite", "mssql"], reason="No struct support"),
30-
pytest.mark.notyet(["impala"]),
31-
pytest.mark.notimpl(["druid", "oracle", "exasol"]),
32-
]
29+
pytestmark = NO_STRUCT_SUPPORT_MARKS
3330

3431

3532
@pytest.mark.parametrize(

ibis/expr/types/generic.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2215,18 +2215,14 @@ def topk(
22152215
"""
22162216
from ibis.expr.types.relations import bind
22172217

2218+
if by is None:
2219+
return self.as_table().topk(k=k, name=name)
2220+
22182221
try:
2219-
(table,) = self.op().relations
2222+
(table_op,) = self.op().relations
22202223
except ValueError:
22212224
raise com.IbisTypeError("TopK must depend on exactly one table.")
2222-
2223-
table = table.to_expr()
2224-
2225-
if by is None and name is None:
2226-
# if `by` is something more complex, the _count doesn't make sense.
2227-
name = f"{self.get_name()}_count"
2228-
if by is None:
2229-
by = lambda t: t.count()
2225+
table: ibis.Table = table_op.to_expr()
22302226

22312227
(metric,) = bind(table, by)
22322228
if name is not None:

0 commit comments

Comments
 (0)