Skip to content

Commit c9f3d89

Browse files
weiting-chenkecookierzhztheplayerFelixYBW
authored
[VL] Branch-1.2: port PR #6503 PR #6515 PR #6554 (#6595)
* [GLUTEN-6501][VL] Fix the missing fileReadProperties when constructing a LocalFilesNode (#6503) * [GLUTEN-6477][VL] Fix occasional dead lock during spilling (#6515) * [VL] Add thread_safe to several VeloxRuntime classes (#6526) VeloxRuntime is shared by many threads, like task threads or parquet writter threads. We must make sure the objects hold by VeloxRuntime are thread safe. * [VL] Following #6526, minor fixes and improvements (#6554) --------- Co-authored-by: zhaokuo <[email protected]> Co-authored-by: Hongze Zhang <[email protected]> Co-authored-by: BInwei Yang <[email protected]>
1 parent 25f40d4 commit c9f3d89

File tree

13 files changed

+107
-69
lines changed

13 files changed

+107
-69
lines changed

backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHIteratorApi.scala

+4-3
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,8 @@ class CHIteratorApi extends IteratorApi with Logging with LogLevelUtil {
122122
partition: InputPartition,
123123
partitionSchema: StructType,
124124
fileFormat: ReadFileFormat,
125-
metadataColumnNames: Seq[String]): SplitInfo = {
125+
metadataColumnNames: Seq[String],
126+
properties: Map[String, String]): SplitInfo = {
126127
partition match {
127128
case p: GlutenMergeTreePartition =>
128129
val partLists = new JArrayList[String]()
@@ -183,7 +184,8 @@ class CHIteratorApi extends IteratorApi with Logging with LogLevelUtil {
183184
partitionColumns,
184185
new JArrayList[JMap[String, String]](),
185186
fileFormat,
186-
preferredLocations.toList.asJava
187+
preferredLocations.toList.asJava,
188+
mapAsJavaMap(properties)
187189
)
188190
case _ =>
189191
throw new UnsupportedOperationException(s"Unsupported input partition: $partition.")
@@ -209,7 +211,6 @@ class CHIteratorApi extends IteratorApi with Logging with LogLevelUtil {
209211
split match {
210212
case filesNode: LocalFilesNode =>
211213
setFileSchemaForLocalFiles(filesNode, scans(i))
212-
filesNode.setFileReadProperties(mapAsJavaMap(scans(i).getProperties))
213214
filesNode.getPaths.forEach(f => files += f)
214215
filesNode.toProtobuf.toByteArray
215216
case extensionTableNode: ExtensionTableNode =>

backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala

+5-2
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ class VeloxIteratorApi extends IteratorApi with Logging {
5454
partition: InputPartition,
5555
partitionSchema: StructType,
5656
fileFormat: ReadFileFormat,
57-
metadataColumnNames: Seq[String]): SplitInfo = {
57+
metadataColumnNames: Seq[String],
58+
properties: Map[String, String]): SplitInfo = {
5859
partition match {
5960
case f: FilePartition =>
6061
val (
@@ -78,7 +79,9 @@ class VeloxIteratorApi extends IteratorApi with Logging {
7879
partitionColumns,
7980
metadataColumns,
8081
fileFormat,
81-
preferredLocations.toList.asJava)
82+
preferredLocations.toList.asJava,
83+
mapAsJavaMap(properties)
84+
)
8285
case _ =>
8386
throw new UnsupportedOperationException(s"Unsupported input partition.")
8487
}

cpp/core/jni/JniCommon.h

+21-8
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,10 @@ static inline gluten::CompressionMode getCompressionMode(JNIEnv* env, jstring co
362362
}
363363
}
364364

365+
/*
366+
NOTE: the class must be thread safe
367+
*/
368+
365369
class SparkAllocationListener final : public gluten::AllocationListener {
366370
public:
367371
SparkAllocationListener(JavaVM* vm, jobject jListenerLocalRef, jmethodID jReserveMethod, jmethodID jUnreserveMethod)
@@ -399,25 +403,34 @@ class SparkAllocationListener final : public gluten::AllocationListener {
399403
env->CallLongMethod(jListenerGlobalRef_, jReserveMethod_, size);
400404
checkException(env);
401405
}
402-
bytesReserved_ += size;
403-
maxBytesReserved_ = std::max(bytesReserved_, maxBytesReserved_);
406+
usedBytes_ += size;
407+
while (true) {
408+
int64_t savedPeakBytes = peakBytes_;
409+
if (usedBytes_ <= savedPeakBytes) {
410+
break;
411+
}
412+
// usedBytes_ > savedPeakBytes, update peak
413+
if (peakBytes_.compare_exchange_weak(savedPeakBytes, usedBytes_)) {
414+
break;
415+
}
416+
}
404417
}
405418

406419
int64_t currentBytes() override {
407-
return bytesReserved_;
420+
return usedBytes_;
408421
}
409422

410423
int64_t peakBytes() override {
411-
return maxBytesReserved_;
424+
return peakBytes_;
412425
}
413426

414427
private:
415428
JavaVM* vm_;
416429
jobject jListenerGlobalRef_;
417-
jmethodID jReserveMethod_;
418-
jmethodID jUnreserveMethod_;
419-
int64_t bytesReserved_ = 0L;
420-
int64_t maxBytesReserved_ = 0L;
430+
const jmethodID jReserveMethod_;
431+
const jmethodID jUnreserveMethod_;
432+
std::atomic_int64_t usedBytes_{0L};
433+
std::atomic_int64_t peakBytes_{0L};
421434
};
422435

423436
class BacktraceAllocationListener final : public gluten::AllocationListener {

cpp/core/memory/AllocationListener.h

+31-19
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include <algorithm>
2121
#include <memory>
22+
#include <mutex>
2223

2324
namespace gluten {
2425

@@ -46,29 +47,21 @@ class AllocationListener {
4647
};
4748

4849
/// Memory changes will be round to specified block size which aim to decrease delegated listener calls.
50+
// The class must be thread safe
4951
class BlockAllocationListener final : public AllocationListener {
5052
public:
51-
BlockAllocationListener(AllocationListener* delegated, uint64_t blockSize)
53+
BlockAllocationListener(AllocationListener* delegated, int64_t blockSize)
5254
: delegated_(delegated), blockSize_(blockSize) {}
5355

5456
void allocationChanged(int64_t diff) override {
5557
if (diff == 0) {
5658
return;
5759
}
58-
if (diff > 0) {
59-
if (reservationBytes_ - usedBytes_ < diff) {
60-
auto roundSize = (diff + (blockSize_ - 1)) / blockSize_ * blockSize_;
61-
delegated_->allocationChanged(roundSize);
62-
reservationBytes_ += roundSize;
63-
peakBytes_ = std::max(peakBytes_, reservationBytes_);
64-
}
65-
usedBytes_ += diff;
66-
} else {
67-
usedBytes_ += diff;
68-
auto unreservedSize = (reservationBytes_ - usedBytes_) / blockSize_ * blockSize_;
69-
delegated_->allocationChanged(-unreservedSize);
70-
reservationBytes_ -= unreservedSize;
60+
int64_t granted = reserve(diff);
61+
if (granted == 0) {
62+
return;
7163
}
64+
delegated_->allocationChanged(granted);
7265
}
7366

7467
int64_t currentBytes() override {
@@ -80,11 +73,30 @@ class BlockAllocationListener final : public AllocationListener {
8073
}
8174

8275
private:
83-
AllocationListener* delegated_;
84-
uint64_t blockSize_{0L};
85-
uint64_t usedBytes_{0L};
86-
uint64_t peakBytes_{0L};
87-
uint64_t reservationBytes_{0L};
76+
inline int64_t reserve(int64_t diff) {
77+
std::lock_guard<std::mutex> lock(mutex_);
78+
usedBytes_ += diff;
79+
int64_t newBlockCount;
80+
if (usedBytes_ == 0) {
81+
newBlockCount = 0;
82+
} else {
83+
// ceil to get the required block number
84+
newBlockCount = (usedBytes_ - 1) / blockSize_ + 1;
85+
}
86+
int64_t bytesGranted = (newBlockCount - blocksReserved_) * blockSize_;
87+
blocksReserved_ = newBlockCount;
88+
peakBytes_ = std::max(peakBytes_, usedBytes_);
89+
return bytesGranted;
90+
}
91+
92+
AllocationListener* const delegated_;
93+
const uint64_t blockSize_;
94+
int64_t blocksReserved_{0L};
95+
int64_t usedBytes_{0L};
96+
int64_t peakBytes_{0L};
97+
int64_t reservationBytes_{0L};
98+
99+
mutable std::mutex mutex_;
88100
};
89101

90102
} // namespace gluten

cpp/core/memory/MemoryAllocator.cc

+10-1
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,16 @@ int64_t ListenableMemoryAllocator::peakBytes() const {
9393
void ListenableMemoryAllocator::updateUsage(int64_t size) {
9494
listener_->allocationChanged(size);
9595
usedBytes_ += size;
96-
peakBytes_ = std::max(peakBytes_, usedBytes_);
96+
while (true) {
97+
int64_t savedPeakBytes = peakBytes_;
98+
if (usedBytes_ <= savedPeakBytes) {
99+
break;
100+
}
101+
// usedBytes_ > savedPeakBytes, update peak
102+
if (peakBytes_.compare_exchange_weak(savedPeakBytes, usedBytes_)) {
103+
break;
104+
}
105+
}
97106
}
98107

99108
bool StdMemoryAllocator::allocate(int64_t size, void** out) {

cpp/core/memory/MemoryAllocator.h

+5-4
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ class MemoryAllocator {
4545
virtual int64_t peakBytes() const = 0;
4646
};
4747

48+
// The class must be thread safe
4849
class ListenableMemoryAllocator final : public MemoryAllocator {
4950
public:
5051
explicit ListenableMemoryAllocator(MemoryAllocator* delegated, AllocationListener* listener)
@@ -69,10 +70,10 @@ class ListenableMemoryAllocator final : public MemoryAllocator {
6970

7071
private:
7172
void updateUsage(int64_t size);
72-
MemoryAllocator* delegated_;
73-
AllocationListener* listener_;
74-
uint64_t usedBytes_{0L};
75-
uint64_t peakBytes_{0L};
73+
MemoryAllocator* const delegated_;
74+
AllocationListener* const listener_;
75+
std::atomic_int64_t usedBytes_{0L};
76+
std::atomic_int64_t peakBytes_{0L};
7677
};
7778

7879
class StdMemoryAllocator final : public MemoryAllocator {

cpp/velox/compute/WholeStageResultIterator.cc

+12-22
Original file line numberDiff line numberDiff line change
@@ -210,28 +210,25 @@ std::shared_ptr<ColumnarBatch> WholeStageResultIterator::next() {
210210
}
211211

212212
namespace {
213-
class ConditionalSuspendedSection {
213+
class SuspendedSection {
214214
public:
215-
ConditionalSuspendedSection(velox::exec::Driver* driver, bool condition) {
216-
if (condition) {
217-
section_ = new velox::exec::SuspendedSection(driver);
218-
}
215+
SuspendedSection() {
216+
reclaimer_->enterArbitration();
219217
}
220218

221-
virtual ~ConditionalSuspendedSection() {
222-
if (section_) {
223-
delete section_;
224-
}
219+
virtual ~SuspendedSection() {
220+
reclaimer_->leaveArbitration();
225221
}
226222

227223
// singleton
228-
ConditionalSuspendedSection(const ConditionalSuspendedSection&) = delete;
229-
ConditionalSuspendedSection(ConditionalSuspendedSection&&) = delete;
230-
ConditionalSuspendedSection& operator=(const ConditionalSuspendedSection&) = delete;
231-
ConditionalSuspendedSection& operator=(ConditionalSuspendedSection&&) = delete;
224+
SuspendedSection(const SuspendedSection&) = delete;
225+
SuspendedSection(SuspendedSection&&) = delete;
226+
SuspendedSection& operator=(const SuspendedSection&) = delete;
227+
SuspendedSection& operator=(SuspendedSection&&) = delete;
232228

233229
private:
234-
velox::exec::SuspendedSection* section_ = nullptr;
230+
// We only use suspension APIs in exec::MemoryReclaimer.
231+
std::unique_ptr<velox::memory::MemoryReclaimer> reclaimer_{velox::exec::MemoryReclaimer::create()};
235232
};
236233
} // namespace
237234

@@ -244,15 +241,8 @@ int64_t WholeStageResultIterator::spillFixedSize(int64_t size) {
244241
if (spillStrategy_ == "auto") {
245242
int64_t remaining = size - shrunken;
246243
LOG(INFO) << logPrefix << "Trying to request spilling for " << remaining << " bytes...";
247-
// if we are on one of the driver of the spilled task, suspend it
248-
velox::exec::Driver* thisDriver = nullptr;
249-
task_->testingVisitDrivers([&](velox::exec::Driver* driver) {
250-
if (driver->isOnThread()) {
251-
thisDriver = driver;
252-
}
253-
});
254244
// suspend the driver when we are on it
255-
ConditionalSuspendedSection noCancel(thisDriver, thisDriver != nullptr);
245+
SuspendedSection suspender;
256246
velox::exec::MemoryReclaimer::Stats status;
257247
auto* mm = memoryManager_->getMemoryManager();
258248
uint64_t spilledOut = mm->arbitrator()->shrinkCapacity({pool}, remaining); // this conducts spilling

cpp/velox/memory/VeloxMemoryManager.h

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
namespace gluten {
2727

28+
// Make sure the class is thread safe
2829
class VeloxMemoryManager final : public MemoryManager {
2930
public:
3031
VeloxMemoryManager(std::unique_ptr<AllocationListener> listener);

gluten-core/src/main/java/org/apache/gluten/substrait/rel/LocalFilesBuilder.java

+4-2
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ public static LocalFilesNode makeLocalFiles(
3232
List<Map<String, String>> partitionColumns,
3333
List<Map<String, String>> metadataColumns,
3434
LocalFilesNode.ReadFileFormat fileFormat,
35-
List<String> preferredLocations) {
35+
List<String> preferredLocations,
36+
Map<String, String> properties) {
3637
return new LocalFilesNode(
3738
index,
3839
paths,
@@ -43,7 +44,8 @@ public static LocalFilesNode makeLocalFiles(
4344
partitionColumns,
4445
metadataColumns,
4546
fileFormat,
46-
preferredLocations);
47+
preferredLocations,
48+
properties);
4749
}
4850

4951
public static LocalFilesNode makeLocalFiles(String iterPath) {

gluten-core/src/main/java/org/apache/gluten/substrait/rel/LocalFilesNode.java

+3-5
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ public enum ReadFileFormat {
6767
List<Map<String, String>> partitionColumns,
6868
List<Map<String, String>> metadataColumns,
6969
ReadFileFormat fileFormat,
70-
List<String> preferredLocations) {
70+
List<String> preferredLocations,
71+
Map<String, String> properties) {
7172
this.index = index;
7273
this.paths.addAll(paths);
7374
this.starts.addAll(starts);
@@ -78,6 +79,7 @@ public enum ReadFileFormat {
7879
this.partitionColumns.addAll(partitionColumns);
7980
this.metadataColumns.addAll(metadataColumns);
8081
this.preferredLocations.addAll(preferredLocations);
82+
this.fileReadProperties = properties;
8183
}
8284

8385
LocalFilesNode(String iterPath) {
@@ -109,10 +111,6 @@ private NamedStruct buildNamedStruct() {
109111
return namedStructBuilder.build();
110112
}
111113

112-
public void setFileReadProperties(Map<String, String> fileReadProperties) {
113-
this.fileReadProperties = fileReadProperties;
114-
}
115-
116114
@Override
117115
public List<String> preferredLocations() {
118116
return this.preferredLocations;

gluten-core/src/main/scala/org/apache/gluten/backendsapi/IteratorApi.scala

+2-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ trait IteratorApi {
3636
partition: InputPartition,
3737
partitionSchema: StructType,
3838
fileFormat: ReadFileFormat,
39-
metadataColumnNames: Seq[String]): SplitInfo
39+
metadataColumnNames: Seq[String],
40+
properties: Map[String, String]): SplitInfo
4041

4142
/** Generate native row partition. */
4243
def genPartitions(

gluten-core/src/main/scala/org/apache/gluten/execution/BasicScanExecTransformer.scala

+6-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,12 @@ trait BasicScanExecTransformer extends LeafTransformSupport with BaseDataSource
7070
def getSplitInfosFromPartitions(partitions: Seq[InputPartition]): Seq[SplitInfo] = {
7171
partitions.map(
7272
BackendsApiManager.getIteratorApiInstance
73-
.genSplitInfo(_, getPartitionSchema, fileFormat, getMetadataColumns.map(_.name)))
73+
.genSplitInfo(
74+
_,
75+
getPartitionSchema,
76+
fileFormat,
77+
getMetadataColumns.map(_.name),
78+
getProperties))
7479
}
7580

7681
override protected def doValidateInternal(): ValidationResult = {

gluten-iceberg/src/main/java/org/apache/gluten/substrait/rel/IcebergLocalFilesNode.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.iceberg.DeleteFile;
2323

2424
import java.util.ArrayList;
25+
import java.util.HashMap;
2526
import java.util.List;
2627
import java.util.Map;
2728

@@ -47,7 +48,8 @@ public class IcebergLocalFilesNode extends LocalFilesNode {
4748
partitionColumns,
4849
new ArrayList<>(),
4950
fileFormat,
50-
preferredLocations);
51+
preferredLocations,
52+
new HashMap<>());
5153
this.deleteFilesList = deleteFilesList;
5254
}
5355

0 commit comments

Comments
 (0)