Skip to content

Commit

Permalink
Add -z/--zstd to enable Zstd compression instead of the default.
Browse files Browse the repository at this point in the history
  • Loading branch information
keltia committed Feb 19, 2024
1 parent 2ddae69 commit 10e9ea3
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 4 deletions.
5 changes: 4 additions & 1 deletion src/bin/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ enum Command {
Convert {
#[structopt(short, long)]
single_file: bool,
#[structopt(short, long)]
zstd: bool,
#[structopt(parse(from_os_str))]
input: PathBuf,
#[structopt(parse(from_os_str))]
Expand Down Expand Up @@ -133,10 +135,11 @@ async fn execute_command(cmd: Command) -> Result<(), Error> {
single_file,
input,
output,
zstd,
} => {
let input_filename = parse_filename(&input)?;
let output_filename = parse_filename(&output)?;
convert_files(&ctx, input_filename, output_filename, single_file).await?;
convert_files(&ctx, input_filename, output_filename, single_file, zstd).await?;
}
Command::Query {
table,
Expand Down
20 changes: 17 additions & 3 deletions src/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,24 @@ pub async fn convert_files(
ctx: &SessionContext,
input_filename: &str,
output_filename: &str,
one_file: bool
single_file: bool,
zstd: bool,
) -> Result<Vec<RecordBatch>, Error> {
let df = register_table(ctx, "t", input_filename).await?;
let write_options = DataFrameWriteOptions::default().with_single_file_output(one_file);
let write_options = DataFrameWriteOptions::default().with_single_file_output(single_file);
let props = if zstd {
WriterProperties::builder()
.set_created_by("bdt".to_string())
.set_encoding(Encoding::PLAIN)
.set_compression(Compression::ZSTD(ZstdLevel::try_new(8)?))
.build()
} else {
WriterProperties::builder()
.set_created_by("bdt".to_string())
.set_encoding(Encoding::PLAIN)
.build()
};

match file_format(output_filename)? {
FileFormat::Avro => Err(Error::General(
"Conversion to Avro is not supported".to_string(),
Expand All @@ -32,7 +46,7 @@ pub async fn convert_files(
.await
.map_err(|e| e.into()),
FileFormat::Parquet => df
.write_parquet(output_filename, write_options, None)
.write_parquet(output_filename, write_options, Some(props))
.await
.map_err(|e| e.into()),
FileFormat::Arrow => unimplemented!(),
Expand Down

0 comments on commit 10e9ea3

Please sign in to comment.