Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ux): include basename of path in generated table names in read_*() #10522

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
chore: update register tests to not rely on generated table names
This is an unstable API and shouldn't be relied on.
NickCrews committed Dec 5, 2024
commit 99e6fa56d6bb058d28d92ed0cfb51e254e6c4a19
53 changes: 24 additions & 29 deletions ibis/backends/tests/test_register.py
Original file line number Diff line number Diff line change
@@ -50,13 +50,12 @@ def gzip_csv(data_dir, tmp_path):

# TODO: rewrite or delete test when register api is removed
@pytest.mark.parametrize(
("fname", "in_table_name", "out_table_name"),
("fname", "table_name"),
[
param("diamonds.csv", None, "ibis_read_csv_", id="default"),
param("diamonds.csv", None, id="default"),
param(
"csv://diamonds.csv",
"Diamonds2",
"Diamonds2",
id="csv_name",
marks=pytest.mark.notyet(
["pyspark"], reason="pyspark lowercases view names"
@@ -65,13 +64,11 @@ def gzip_csv(data_dir, tmp_path):
param(
"file://diamonds.csv",
"fancy_stones",
"fancy_stones",
id="file_name",
),
param(
"file://diamonds.csv",
"fancy stones",
"fancy stones",
id="file_atypical_name",
marks=pytest.mark.notyet(
["pyspark"], reason="no spaces allowed in view names"
@@ -80,7 +77,6 @@ def gzip_csv(data_dir, tmp_path):
param(
["file://diamonds.csv", "diamonds.csv"],
"fancy_stones2",
"fancy_stones2",
id="multi_csv",
marks=pytest.mark.notyet(
["datafusion"],
@@ -105,12 +101,16 @@ def gzip_csv(data_dir, tmp_path):
"databricks",
]
)
def test_register_csv(con, data_dir, fname, in_table_name, out_table_name):
def test_register_csv(con, data_dir, fname, table_name):
tables_before = set(con.list_tables())
with pushd(data_dir / "csv"):
with pytest.warns(FutureWarning, match="v9.1"):
table = con.register(fname, table_name=in_table_name)
table = con.register(fname, table_name=table_name)
new_tables = set(con.list_tables()) - tables_before
assert len(new_tables) == 1
if table_name is not None:
assert new_tables.pop() == table_name

assert any(out_table_name in t for t in con.list_tables())
if con.name != "datafusion":
table.count().execute()

@@ -185,18 +185,12 @@ def read_table(path: Path) -> Iterator[tuple[str, pa.Table]]:

# TODO: rewrite or delete test when register api is removed
@pytest.mark.parametrize(
("fname", "in_table_name", "out_table_name"),
("fname", "table_name"),
[
param(
"parquet://functional_alltypes.parquet", None, "ibis_read_parquet", id="url"
),
param("functional_alltypes.parquet", "funk_all", "funk_all", id="basename"),
param(
"parquet://functional_alltypes.parq", "funk_all", "funk_all", id="url_parq"
),
param(
"parquet://functional_alltypes", None, "ibis_read_parquet", id="url_no_ext"
),
param("parquet://functional_alltypes.parquet", None, id="url"),
param("functional_alltypes.parquet", "my_table1", id="basename"),
param("parquet://functional_alltypes.parq", "my_table2", id="url_parq"),
param("parquet://functional_alltypes", None, id="url_no_ext"),
],
)
@pytest.mark.notyet(
@@ -214,22 +208,22 @@ def read_table(path: Path) -> Iterator[tuple[str, pa.Table]]:
"trino",
]
)
def test_register_parquet(
con, tmp_path, data_dir, fname, in_table_name, out_table_name
):
def test_register_parquet(con, tmp_path, data_dir, fname, table_name):
pq = pytest.importorskip("pyarrow.parquet")

fname = Path(fname)
table = read_table(data_dir / "csv" / fname.name)

pq.write_table(table, tmp_path / fname.name)

tables_before = set(con.list_tables())
with pushd(tmp_path):
with pytest.warns(FutureWarning, match="v9.1"):
table = con.register(f"parquet://{fname.name}", table_name=in_table_name)

assert any(out_table_name in t for t in con.list_tables())

table = con.register(f"parquet://{fname.name}", table_name=table_name)
new_tables = set(con.list_tables()) - tables_before
assert len(new_tables) == 1
if table_name is not None:
assert new_tables.pop() == table_name
if con.name != "datafusion":
table.count().execute()

@@ -263,6 +257,7 @@ def test_register_iterator_parquet(

pq.write_table(table, tmp_path / "functional_alltypes.parquet")

tables_before = set(con.list_tables())
with pushd(tmp_path):
with pytest.warns(FutureWarning, match="v9.1"):
table = con.register(
@@ -272,8 +267,8 @@ def test_register_iterator_parquet(
],
table_name=None,
)

assert any("ibis_read_parquet" in t for t in con.list_tables())
new_tables = set(con.list_tables()) - tables_before
assert len(new_tables) == 1
assert table.count().execute()


Loading