Skip to content

Commit e9a77e0

Browse files
cj-zhukovSergey Zhukovalamb
authored
Add a hint about expected extension in error message in register_csv,… (apache#14168)
* Add a hint about expected extension in error message in register_csv, register_parquet, register_json, register_avro (apache#14144) * Add tests for error * fix test * fmt * Fix issues causing GitHub checks to fail * revert datafusion-testing change --------- Co-authored-by: Sergey Zhukov <[email protected]> Co-authored-by: Andrew Lamb <[email protected]>
1 parent 4f58700 commit e9a77e0

File tree

6 files changed

+94
-2
lines changed

6 files changed

+94
-2
lines changed

datafusion/core/src/execution/context/avro.rs

+2
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ impl SessionContext {
4646
let listing_options = options
4747
.to_listing_options(&self.copied_config(), self.copied_table_options());
4848

49+
self.register_type_check(table_path.as_ref(), &listing_options.file_extension)?;
50+
4951
self.register_listing_table(
5052
table_ref,
5153
table_path,

datafusion/core/src/execution/context/csv.rs

+2
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ impl SessionContext {
6262
let listing_options = options
6363
.to_listing_options(&self.copied_config(), self.copied_table_options());
6464

65+
self.register_type_check(table_path.as_ref(), &listing_options.file_extension)?;
66+
6567
self.register_listing_table(
6668
table_ref,
6769
table_path,

datafusion/core/src/execution/context/json.rs

+2
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ impl SessionContext {
4848
let listing_options = options
4949
.to_listing_options(&self.copied_config(), self.copied_table_options());
5050

51+
self.register_type_check(table_path.as_ref(), &listing_options.file_extension)?;
52+
5153
self.register_listing_table(
5254
table_ref,
5355
table_path,

datafusion/core/src/execution/context/mod.rs

+23
Original file line numberDiff line numberDiff line change
@@ -1379,6 +1379,29 @@ impl SessionContext {
13791379
Ok(())
13801380
}
13811381

1382+
fn register_type_check<P: DataFilePaths>(
1383+
&self,
1384+
table_paths: P,
1385+
extension: impl AsRef<str>,
1386+
) -> Result<()> {
1387+
let table_paths = table_paths.to_urls()?;
1388+
if table_paths.is_empty() {
1389+
return exec_err!("No table paths were provided");
1390+
}
1391+
1392+
// check if the file extension matches the expected extension
1393+
let extension = extension.as_ref();
1394+
for path in &table_paths {
1395+
let file_path = path.as_str();
1396+
if !file_path.ends_with(extension) && !path.is_collection() {
1397+
return exec_err!(
1398+
"File path '{file_path}' does not match the expected extension '{extension}'"
1399+
);
1400+
}
1401+
}
1402+
Ok(())
1403+
}
1404+
13821405
/// Registers an Arrow file as a table that can be referenced from
13831406
/// SQL statements executed against this context.
13841407
pub async fn register_arrow(

datafusion/core/src/execution/context/parquet.rs

+2
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ impl SessionContext {
5050
let listing_options = options
5151
.to_listing_options(&self.copied_config(), self.copied_table_options());
5252

53+
self.register_type_check(table_path.as_ref(), &listing_options.file_extension)?;
54+
5355
self.register_listing_table(
5456
table_ref,
5557
table_path,

datafusion/core/tests/dataframe/mod.rs

+63-2
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,9 @@ use datafusion::error::Result;
5454
use datafusion::execution::context::SessionContext;
5555
use datafusion::execution::session_state::SessionStateBuilder;
5656
use datafusion::logical_expr::{ColumnarValue, Volatility};
57-
use datafusion::prelude::{CsvReadOptions, ParquetReadOptions};
58-
use datafusion::prelude::{JoinType, NdJsonReadOptions};
57+
use datafusion::prelude::{
58+
AvroReadOptions, CsvReadOptions, JoinType, NdJsonReadOptions, ParquetReadOptions,
59+
};
5960
use datafusion::test_util::{
6061
parquet_test_data, populate_csv_partitions, register_aggregate_csv, test_table,
6162
test_table_with_name,
@@ -5121,3 +5122,63 @@ async fn test_alias_nested() -> Result<()> {
51215122
);
51225123
Ok(())
51235124
}
5125+
5126+
#[tokio::test]
5127+
async fn register_non_json_file() {
5128+
let ctx = SessionContext::new();
5129+
let err = ctx
5130+
.register_json(
5131+
"data",
5132+
"tests/data/test_binary.parquet",
5133+
NdJsonReadOptions::default(),
5134+
)
5135+
.await;
5136+
assert_contains!(
5137+
err.unwrap_err().to_string(),
5138+
"test_binary.parquet' does not match the expected extension '.json'"
5139+
);
5140+
}
5141+
5142+
#[tokio::test]
5143+
async fn register_non_csv_file() {
5144+
let ctx = SessionContext::new();
5145+
let err = ctx
5146+
.register_csv(
5147+
"data",
5148+
"tests/data/test_binary.parquet",
5149+
CsvReadOptions::default(),
5150+
)
5151+
.await;
5152+
assert_contains!(
5153+
err.unwrap_err().to_string(),
5154+
"test_binary.parquet' does not match the expected extension '.csv'"
5155+
);
5156+
}
5157+
5158+
#[tokio::test]
5159+
async fn register_non_avro_file() {
5160+
let ctx = SessionContext::new();
5161+
let err = ctx
5162+
.register_avro(
5163+
"data",
5164+
"tests/data/test_binary.parquet",
5165+
AvroReadOptions::default(),
5166+
)
5167+
.await;
5168+
assert_contains!(
5169+
err.unwrap_err().to_string(),
5170+
"test_binary.parquet' does not match the expected extension '.avro'"
5171+
);
5172+
}
5173+
5174+
#[tokio::test]
5175+
async fn register_non_parquet_file() {
5176+
let ctx = SessionContext::new();
5177+
let err = ctx
5178+
.register_parquet("data", "tests/data/1.json", ParquetReadOptions::default())
5179+
.await;
5180+
assert_contains!(
5181+
err.unwrap_err().to_string(),
5182+
"1.json' does not match the expected extension '.parquet'"
5183+
);
5184+
}

0 commit comments

Comments
 (0)