Skip to content

Commit bab6e6b

Browse files
committed
Report inputSizeInBytes in HiveDataSink
1 parent 97aa713 commit bab6e6b

File tree

3 files changed

+4
-3
lines changed

3 files changed

+4
-3
lines changed

velox/connectors/hive/HiveDataSink.cpp

+2-3
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,7 @@ void HiveDataSink::write(size_t index, RowVectorPtr input) {
488488
auto dataInput = makeDataInput(dataChannels_, input);
489489

490490
writers_[index]->write(dataInput);
491+
writerInfo_[index]->inputSizeInBytes += dataInput->estimateFlatSize();
491492
writerInfo_[index]->numWrittenRows += dataInput->size();
492493
}
493494

@@ -661,9 +662,7 @@ std::vector<std::string> HiveDataSink::close() {
661662
("targetFileName", info->writerParameters.targetFileName())
662663
("fileSize", ioStats_.at(i)->rawBytesWritten())))
663664
("rowCount", info->numWrittenRows)
664-
// TODO(gaoge): track and send the fields when inMemoryDataSizeInBytes
665-
// and containsNumberedFileNames are needed at coordinator when file_renaming_enabled are turned on.
666-
("inMemoryDataSizeInBytes", 0)
665+
("inMemoryDataSizeInBytes", info->inputSizeInBytes)
667666
("onDiskDataSizeInBytes", ioStats_.at(i)->rawBytesWritten())
668667
("containsNumberedFileNames", true));
669668
// clang-format on

velox/connectors/hive/HiveDataSink.h

+1
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,7 @@ struct HiveWriterInfo {
384384
const std::shared_ptr<memory::MemoryPool> sinkPool;
385385
const std::shared_ptr<memory::MemoryPool> sortPool;
386386
int64_t numWrittenRows = 0;
387+
int64_t inputSizeInBytes = 0;
387388
};
388389

389390
/// Identifies a hive writer.

velox/exec/tests/TableWriteTest.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -2631,6 +2631,7 @@ TEST_P(AllTableWriterTest, tableWriteOutputCheck) {
26312631
std::filesystem::path path{writeFileFullPath};
26322632
const auto actualFileSize = fs::file_size(path);
26332633
ASSERT_EQ(obj["onDiskDataSizeInBytes"].asInt(), actualFileSize);
2634+
ASSERT_GT(obj["inMemoryDataSizeInBytes"].asInt(), 0);
26342635
ASSERT_EQ(writerInfoObj["fileSize"], actualFileSize);
26352636
if (commitStrategy_ == CommitStrategy::kNoCommit) {
26362637
ASSERT_EQ(writeFileName, targetFileName);

0 commit comments

Comments
 (0)