Skip to content

Commit

Permalink
add: automatically detect filetype without rely on ending
Browse files Browse the repository at this point in the history
  • Loading branch information
matthias-Q committed Mar 21, 2024
1 parent 5c6730a commit 711952d
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 17 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,5 @@ comfy-table = "6.1.2"
datafusion = { version = "35.0", features = ["avro"] }
structopt = "0.3"
tokio = { version = "1.36", features = ["rt-multi-thread"] }
thiserror = "1"
thiserror = "1"
file-format = { version = "0.24.0", features = ["reader-txt"] }
32 changes: 16 additions & 16 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,29 @@ use datafusion::prelude::{
AvroReadOptions, CsvReadOptions, DataFrame, NdJsonReadOptions, ParquetReadOptions,
SessionContext,
};
use file_format::FileFormat as DetectFileFormat;
use std::path::Path;

pub fn file_format(filename: &str) -> Result<FileFormat, Error> {
match file_ending(filename)?.as_str() {
"avro" => Ok(FileFormat::Avro),
"csv" => Ok(FileFormat::Csv),
"json" => Ok(FileFormat::Json),
"parquet" | "parq" => Ok(FileFormat::Parquet),
other => Err(Error::General(format!(
"unsupported file extension '{}'",
other
))),
match DetectFileFormat::from_file(filename)? {
DetectFileFormat::ApacheAvro => Ok(FileFormat::Avro),
DetectFileFormat::ApacheParquet => Ok(FileFormat::Parquet),
DetectFileFormat::PlainText => match file_ending(filename)?.as_str() {
"json" => Ok(FileFormat::Json),
"csv" => Ok(FileFormat::Csv),
other => Err(Error::General(format!(
"unsupported file extension '{}'",
other
))),
},
other => Err(Error::General(format!("unsupported file type '{}'", other))),
}
}

pub fn file_ending(filename: &str) -> Result<String, Error> {
if let Some(ending) = std::path::Path::new(filename).extension() {
Ok(ending.to_string_lossy().to_string())
} else {
Err(Error::General(
"Could not determine file extension".to_string(),
))
}
Ok(std::path::Path::new(filename)
.extension()
.map_or_else(|| "".to_owned(), |e| e.to_string_lossy().to_string()))
}

pub fn parse_filename(filename: &Path) -> Result<&str, Error> {
Expand Down

0 comments on commit 711952d

Please sign in to comment.