Skip to content

Commit 0761dc6

Browse files
authored
check offset overflow (#71)
1 parent fdee23b commit 0761dc6

File tree

2 files changed

+25
-2
lines changed

2 files changed

+25
-2
lines changed

src/array_decoder/string.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,14 @@ use arrow::array::{ArrayRef, DictionaryArray, GenericByteArray, StringArray};
2323
use arrow::buffer::{Buffer, NullBuffer, OffsetBuffer};
2424
use arrow::compute::kernels::cast;
2525
use arrow::datatypes::{ByteArrayType, DataType, GenericBinaryType, GenericStringType};
26-
use snafu::ResultExt;
26+
use snafu::{ensure, ResultExt};
2727

2828
use crate::array_decoder::derive_present_vec;
2929
use crate::column::Column;
3030
use crate::compression::Decompressor;
3131
use crate::encoding::integer::get_unsigned_int_decoder;
3232
use crate::encoding::PrimitiveValueDecoder;
33-
use crate::error::{ArrowSnafu, IoSnafu, Result};
33+
use crate::error::{ArrowSnafu, IoSnafu, OffsetOverflowSnafu, Result};
3434
use crate::proto::column_encoding::Kind as ColumnEncodingKind;
3535
use crate::proto::stream::Kind;
3636
use crate::stripe::Stripe;
@@ -123,6 +123,14 @@ impl<T: ByteArrayType> GenericByteArrayDecoder<T> {
123123
self.lengths.decode(&mut lengths)?;
124124
}
125125
let total_length: i64 = lengths.iter().sum();
126+
ensure!(
127+
total_length <= i32::MAX as i64,
128+
OffsetOverflowSnafu {
129+
total_length,
130+
max_size: i32::MAX,
131+
batch_size,
132+
}
133+
);
126134
// Fetch all data bytes at once
127135
let mut bytes = Vec::with_capacity(total_length as usize);
128136
self.bytes

src/error.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,21 @@ pub enum OrcError {
7070
to_time_unit: TimeUnit,
7171
},
7272

73+
#[snafu(display(
74+
"String/Binary data size ({} bytes) exceeds maximum offset size ({}) \
75+
with current batch size {}. Please reduce the batch size to avoid offset overflow.",
76+
total_length,
77+
max_size,
78+
batch_size,
79+
))]
80+
OffsetOverflow {
81+
#[snafu(implicit)]
82+
location: Location,
83+
total_length: i64,
84+
max_size: i32,
85+
batch_size: usize,
86+
},
87+
7388
#[snafu(display("Failed to decode proto, source: {}", source))]
7489
DecodeProto {
7590
#[snafu(implicit)]

0 commit comments

Comments
 (0)