Skip to content

Commit f87dc12

Browse files
cotacopybara-github
authored andcommitted
[xla:runtime] Do not duplicate single-use argument loads out of the entry block
This is effectively deoptimizing hoisted loads out of loops. Removing it allows us to achieve with XLA Runtime similar performance as XLA:CPU Classic in math microbenchmarks. This was originally introduced to reduce the size of the entry block, which was causing overly long compilation times. It is likely that this problem has now been fixed elsewhere, making the deoptimization unnecessary. PiperOrigin-RevId: 508392023
1 parent 8b1bfed commit f87dc12

File tree

1 file changed

+3
-32
lines changed

1 file changed

+3
-32
lines changed

xla/runtime/execution_engine.cc

Lines changed: 3 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -130,14 +130,7 @@ static absl::Status SetUpExportedFunction(llvm::Module &module,
130130
bb->insertInto(callee);
131131
builder.SetInsertPoint(bb);
132132

133-
// We collect all load instructions that load arguments from a single pointer,
134-
// and duplicate them into the basic blocks where the value is used. We do it
135-
// to avoid creating massive entry block with potentially tens of thousands of
136-
// loads, which puts a lot of pressure on instruction scheduling.
137-
//
138-
// TODO(ezhulenev): Currently we do it only for loads with a single use, we
139-
// should consider doing it for all loads with small number of uses.
140-
llvm::SmallVector<std::pair<llvm::LoadInst *, llvm::LoadInst *>> args;
133+
llvm::SmallVector<llvm::Value *> args;
141134
args.reserve(llvm::size(func->args()));
142135

143136
for (auto &indexed_arg : llvm::enumerate(func->args())) {
@@ -149,13 +142,11 @@ static absl::Status SetUpExportedFunction(llvm::Module &module,
149142
builder.CreateLoad(builder.getPtrTy(), arg_ptr_gep);
150143
llvm::LoadInst *arg_load = builder.CreateLoad(art_ty, arg_ptr_load);
151144

152-
args.emplace_back(arg_ptr_load, arg_load);
145+
args.emplace_back(arg_load);
153146
}
154147

155148
// Call the implementation function with the extracted arguments.
156-
llvm::SmallVector<llvm::Value *> args_values;
157-
for (auto &[_, arg] : args) args_values.push_back(arg);
158-
auto *call = builder.CreateCall(func, args_values);
149+
auto *call = builder.CreateCall(func, args);
159150
builder.CreateRetVoid();
160151

161152
// Make sure that we do not keep exported function in the binary if we do not
@@ -174,26 +165,6 @@ static absl::Status SetUpExportedFunction(llvm::Module &module,
174165
if (is_coro) callee->setPresplitCoroutine();
175166
}
176167

177-
// Clean up loads from the packed argument pointer.
178-
for (auto &[ptr_load, arg_load] : args) {
179-
// Dead argument elimination after inlining.
180-
if (arg_load->use_empty()) {
181-
arg_load->eraseFromParent();
182-
ptr_load->eraseFromParent();
183-
continue;
184-
}
185-
186-
// Move loads used only once into the entry block where they are used.
187-
if (!arg_load->hasOneUser()) continue;
188-
189-
for (llvm::User *user : arg_load->users()) {
190-
auto *inst = cast<llvm::Instruction>(user);
191-
if (llvm::isa<llvm::PHINode>(inst)) continue;
192-
arg_load->moveBefore(inst);
193-
ptr_load->moveBefore(arg_load);
194-
}
195-
}
196-
197168
// Always keep the frame pointer inside jit-compiled modules, so that we can
198169
// correctly walk the stack when collecting profiles at run time.
199170
for (llvm::Function &fn : module.functions()) {

0 commit comments

Comments
 (0)