Skip to content

Commit b66ec85

Browse files
author
xiaying
committed
OpenCL:Bugfix: Fix bug for recordable bug for llm
1 parent 90671dc commit b66ec85

File tree

4 files changed

+38
-16
lines changed

4 files changed

+38
-16
lines changed

source/backend/opencl/core/OpenCLBackend.cpp

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,7 @@ Backend::MemObj* OpenCLBackend::onAcquire(const Tensor* nativeTensor, StorageTyp
466466
}
467467

468468
bool OpenCLBackend::onSelectDynamicAllocator(int index, int maxIndex) {
469-
if (mUseRecordQueue && false == mDevideOpRecord){
469+
if (mUseRecordQueue && false == mDeviceOpRecord){
470470
return false;
471471
}
472472
if (maxIndex > 2) {
@@ -525,7 +525,7 @@ Execution* OpenCLBackend::onCreate(const std::vector<Tensor*>& inputs, const std
525525
return NULL;
526526
}
527527
if (iter == creators->end()) {
528-
mDevideOpRecord = true;
528+
mDeviceOpRecord = true;
529529
#ifdef OPENCL_FALLBACK_LOG
530530
if (nullptr != op->name()) {
531531
MNN_PRINT("Don't support type %s memObject:%d, %s\n", EnumNameOpType(op->type()), mMemType, op->name()->c_str());
@@ -565,7 +565,7 @@ Execution* OpenCLBackend::onCreate(const std::vector<Tensor*>& inputs, const std
565565
}
566566

567567
if (!valid) {
568-
mDevideOpRecord = true;
568+
mDeviceOpRecord = true;
569569
#ifdef OPENCL_FALLBACK_LOG
570570
for (auto t : inputs) {
571571
auto tensorShape = OpenCL::tensorShapeFormat(t);
@@ -589,7 +589,7 @@ Execution* OpenCLBackend::onCreate(const std::vector<Tensor*>& inputs, const std
589589

590590
auto exe = iter->second->onCreate(inputs, outputs, op, this);
591591
if (NULL == exe) {
592-
mDevideOpRecord = true;
592+
mDeviceOpRecord = true;
593593
#ifdef OPENCL_FALLBACK_LOG
594594
if (nullptr != op->name()) {
595595
MNN_PRINT("The Creator Don't support type %s, memObject:%d, %s\n", MNN::EnumNameOpType(op->type()), mMemType, op->name()->c_str());
@@ -1232,7 +1232,7 @@ int OpenCLBackend::fpBytes() {
12321232

12331233
void OpenCLBackend::clearRecord() const{
12341234
#if !defined(ENABLE_OPENCL_TIME_PROFILER) && defined(MNN_USE_LIB_WRAPPER)
1235-
if(mUseRecordQueue && mDevideOpRecord){
1235+
if(mUseRecordQueue && mDeviceOpRecord){
12361236
for(int i = 0; i < mRecordings.size(); ++i){
12371237
std::vector<cl_array_arg_qcom> update_kernel_args;
12381238
std::vector<cl_workgroup_qcom> update_global_size;
@@ -1263,7 +1263,7 @@ void OpenCLBackend::clearRecord() const{
12631263

12641264
void OpenCLBackend::enqeueRecord() const{
12651265
#if !defined(ENABLE_OPENCL_TIME_PROFILER) && defined(MNN_USE_LIB_WRAPPER)
1266-
if(mUseRecordQueue && !mDevideOpRecord){
1266+
if(mUseRecordQueue && !mDeviceOpRecord){
12671267
for(int i = 0; i < mRecordings.size(); ++i){
12681268
std::vector<cl_array_arg_qcom> update_kernel_args;
12691269
std::vector<cl_workgroup_qcom> update_global_size;
@@ -1290,7 +1290,7 @@ void OpenCLBackend::enqeueRecord() const{
12901290

12911291
void OpenCLBackend::releaseRecord(){
12921292
#if !defined(ENABLE_OPENCL_TIME_PROFILER) && defined(MNN_USE_LIB_WRAPPER)
1293-
if(mUseRecordQueue && !mDevideOpRecord){
1293+
if(mUseRecordQueue && !mDeviceOpRecord){
12941294
for(int i = 0; i < mRecordings.size(); ++i){
12951295
cl_int res = clReleaseRecordingQCOM(mRecordings[i].record);
12961296
MNN_CHECK_CL_SUCCESS(res, "clReleaseRecordingQCOM");
@@ -1309,7 +1309,7 @@ void OpenCLBackend::startRecord(cl_recording_qcom &recording){
13091309
MNN_PRINT("start startRecord !\n");
13101310
#endif
13111311
cl_int res = CL_SUCCESS;
1312-
if(mDevideOpRecord){
1312+
if(mDeviceOpRecord){
13131313
if(recording != NULL){
13141314
clReleaseRecordingQCOM(recording);
13151315
}
@@ -1330,7 +1330,7 @@ void OpenCLBackend::endRecord(cl_recording_qcom &recording, bool flag){
13301330
#ifdef LOG_VERBOSE
13311331
MNN_PRINT("start endRecord !\n");
13321332
#endif
1333-
if(mDevideOpRecord){
1333+
if(mDeviceOpRecord){
13341334
cl_int res = CL_SUCCESS;
13351335
res = clEndRecordingQCOM(recording);
13361336
MNN_CHECK_CL_SUCCESS(res, "clEndRecordingQCOM");
@@ -1349,7 +1349,7 @@ void OpenCLBackend::endRecord(cl_recording_qcom &recording, bool flag){
13491349
}
13501350

13511351
void OpenCLBackend::addRecord(cl_recording_qcom &record, std::vector<RecordUpdateInfo *>updateInfo){
1352-
if(mDevideOpRecord){
1352+
if(mDeviceOpRecord){
13531353
RecordInfo info;
13541354
info.record = record;
13551355
for(int i = 0; i < updateInfo.size(); ++i) {
@@ -1369,7 +1369,7 @@ void OpenCLBackend::recordKernel2d(const std::shared_ptr<KernelWrap> &kernelW, c
13691369
MNN_PRINT("start record2dKernel !\n");
13701370
#endif
13711371
cl_int res = CL_SUCCESS;
1372-
if(!mDevideOpRecord){
1372+
if(!mDeviceOpRecord){
13731373
RecordInfo info;
13741374
int recordNum = mRecordNums == mUseRecordableQueueSize ? 0 : mRecordNums;
13751375
if(updateInfo != nullptr){
@@ -1439,7 +1439,7 @@ void OpenCLBackend::recordKernel3d(const std::shared_ptr<KernelWrap> &kernelW, c
14391439
for (size_t i = 0; i < 3; ++i) {
14401440
internalGlobalWS[i] = ROUND_UP(gws[i], std::max((uint32_t)1, lws[i]));
14411441
}
1442-
if(!mDevideOpRecord){
1442+
if(!mDeviceOpRecord){
14431443
RecordInfo info;
14441444
int recordNum = mRecordNums == mUseRecordableQueueSize ? 0 : mRecordNums;
14451445
if(updateInfo != nullptr){
@@ -1547,12 +1547,12 @@ void OpenCLBackend::setGpuMode(const int cl_mode_num) {
15471547
mUseRecordQueue = ((cl_mode_num & MNN_GPU_RECORD_OP) || (cl_mode_num & MNN_GPU_RECORD_BATCH)) && mOpenCLRuntime->isSupportRecordQueue() && (mUseRecordableQueueSize > 0);
15481548
isSet = (cl_mode_num & MNN_GPU_RECORD_OP);
15491549
if(isSet) {
1550-
mDevideOpRecord = true;
1550+
mDeviceOpRecord = true;
15511551
totalSet++;
15521552
}
15531553
isSet = (cl_mode_num & MNN_GPU_RECORD_BATCH);
15541554
if(isSet) {
1555-
mDevideOpRecord = false;
1555+
mDeviceOpRecord = false;
15561556
totalSet++;
15571557
}
15581558
if(totalSet > 1) {

source/backend/opencl/core/OpenCLBackend.hpp

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ class OpenCLBackend : public Backend {
137137
return mUseRecordQueue;
138138
}
139139
bool isDevideOpRecord(){
140-
return mDevideOpRecord;
140+
return mDeviceOpRecord;
141141
}
142142
CLTuneLevel getCLTuneLevel() {
143143
return mTuneLevel;
@@ -185,7 +185,8 @@ class OpenCLBackend : public Backend {
185185
bool mIsCreateError{false};
186186
mutable std::vector<RecordInfo> mRecordings;
187187
bool mUseRecordQueue = false;
188-
bool mDevideOpRecord = false;
188+
bool mDeviceOpRecord = false;
189+
friend class setRecordClose;
189190
uint32_t mRecordNums = 0;
190191
uint32_t mUseRecordableQueueSize;
191192
private:
@@ -200,6 +201,25 @@ class OpenCLBackend : public Backend {
200201

201202
};
202203

204+
class setRecordClose{
205+
public:
206+
setRecordClose(OpenCLBackend *bn){
207+
backend = bn;
208+
if(backend->mUseRecordQueue){
209+
backend->mUseRecordQueue = false;
210+
needRecover = true;
211+
}
212+
}
213+
~setRecordClose(){
214+
if(needRecover){
215+
backend->mUseRecordQueue = true;
216+
}
217+
}
218+
private:
219+
bool needRecover = false;
220+
OpenCLBackend* backend;
221+
};
222+
203223
template <class T>
204224
class OpenCLCreatorRegister {
205225
public:

source/backend/opencl/execution/buffer/AttentionBufExecution.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1575,6 +1575,7 @@ ErrorCode AttentionBufExecution::onResize(const std::vector<Tensor *> &inputs, c
15751575
mLongPrefill = tuneInfo.first[0];
15761576
} else{
15771577
if (mOpenCLBackend->getCLTuneLevel() == Heavy || mOpenCLBackend->getCLTuneLevel() == Wide){
1578+
setRecordClose closeRecord(mOpenCLBackend);
15781579
// tunning choose use witch preill
15791580
prefillResize(inputs, outputs);
15801581
auto shortPrefillTime = getExecuteTime();

source/backend/opencl/execution/buffer/ConvBufLowMemoryExecution.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1008,6 +1008,7 @@ ErrorCode ConvBufLowMemoryExecution::onResize(const std::vector<Tensor *> &input
10081008
mUseFPWeight = tuneInfo.first[0];
10091009
} else{
10101010
if((mOpenCLBackend->getCLTuneLevel() == Heavy || mOpenCLBackend->getCLTuneLevel() == Wide)){
1011+
setRecordClose closeRecord(mOpenCLBackend);
10111012
tuneGemmLowMemory(input, output);
10121013
auto shortBatchTime = getExecuteTime();
10131014
mUseFPWeight = true;

0 commit comments

Comments
 (0)