diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index a22a5d8a6a2647..2de54559033756 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -1410,10 +1410,12 @@ namespace { struct JITPointersT { - JITPointersT(orc::ExecutionSession &ES) JL_NOTSAFEPOINT : ES(ES) {} + JITPointersT(SharedBytesT &SharedBytes, std::mutex &Lock) JL_NOTSAFEPOINT + : SharedBytes(SharedBytes), Lock(Lock) {} Expected operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT { + std::lock_guard locked(Lock); for (auto &GV : make_early_inc_range(M.globals())) { if (auto *Shared = getSharedBytes(GV)) { ++InternedGlobals; @@ -1429,10 +1431,11 @@ namespace { return std::move(TSM); } + private: // optimize memory by turning long strings into memoized copies, instead of // making a copy per object file of output. - // we memoize them using the ExecutionSession's string pool; - // this makes it unsafe to call clearDeadEntries() on the pool. + // we memoize them using a StringSet with a custom-alignment allocator + // to ensure they are properly aligned Constant *getSharedBytes(GlobalVariable &GV) JL_NOTSAFEPOINT { // We could probably technically get away with // interning even external linkage globals, @@ -1458,11 +1461,17 @@ namespace { // Cutoff, since we don't want to intern small strings return nullptr; } - auto Interned = *ES.intern(Data); + Align Required = GV.getAlign().valueOrOne(); + Align Preferred = MaxAlignedAlloc::alignment(Data.size()); + if (Required > Preferred) + return nullptr; + StringRef Interned = SharedBytes.insert(Data).first->getKey(); + assert(llvm::isAddrAligned(Preferred, Interned.data())); return literal_static_pointer_val(Interned.data(), GV.getType()); } - orc::ExecutionSession &ES; + SharedBytesT &SharedBytes; + std::mutex &Lock; }; } @@ -1696,7 +1705,7 @@ JuliaOJIT::JuliaOJIT() #endif LockLayer(ObjectLayer), CompileLayer(ES, LockLayer, std::make_unique>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM)), - JITPointersLayer(ES, CompileLayer, orc::IRTransformLayer::TransformFunction(JITPointersT(ES))), + JITPointersLayer(ES, CompileLayer, orc::IRTransformLayer::TransformFunction(JITPointersT(SharedBytes, RLST_mutex))), OptimizeLayer(ES, JITPointersLayer, orc::IRTransformLayer::TransformFunction(OptimizerT(*TM, PrintLLVMTimers))), OptSelLayer(ES, OptimizeLayer, orc::IRTransformLayer::TransformFunction(selectOptLevel)), DepsVerifyLayer(ES, OptSelLayer, orc::IRTransformLayer::TransformFunction(validateExternRelocations)), diff --git a/src/jitlayers.h b/src/jitlayers.h index 016f97b92140b6..f11fe9fc9a06fb 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -1,6 +1,8 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license #include +#include +#include #include #include @@ -292,6 +294,44 @@ static const inline char *name_from_method_instance(jl_method_instance_t *li) JL return jl_is_method(li->def.method) ? jl_symbol_name(li->def.method->name) : "top-level scope"; } +template +class MaxAlignedAllocImpl + : public AllocatorBase> { + // Define the maximum alignment we expect to require, from offset bytes off + // the returned pointer, this is >= alignof(std::max_align_t), which is too + // small often to actually use. + static const size_t MaxAlignment = JL_CACHE_BYTE_ALIGNMENT; + +public: + MaxAlignedAllocImpl() JL_NOTSAFEPOINT = default; + + static Align alignment(size_t Size) JL_NOTSAFEPOINT { + return Align(std::min((size_t)llvm::PowerOf2Ceil(Size), MaxAlignment)); + } + + LLVM_ATTRIBUTE_RETURNS_NONNULL void *Allocate(size_t Size, Align Alignment) { + Align MaxAlign = alignment(Size); + assert(Alignment < MaxAlign); (void)Alignment; + return jl_gc_perm_alloc(Size, 0, MaxAlign.value(), offset); + } + + inline LLVM_ATTRIBUTE_RETURNS_NONNULL + void * Allocate(size_t Size, size_t Alignment) { + return Allocate(Size, Align(Alignment)); + } + + // Pull in base class overloads. + using AllocatorBase::Allocate; + + void Deallocate(const void *Ptr, size_t Size, size_t /*Alignment*/) { abort(); } + + // Pull in base class overloads. + using AllocatorBase::Deallocate; + +private: +}; +using MaxAlignedAlloc = MaxAlignedAllocImpl<>; + typedef JITSymbol JL_JITSymbol; // The type that is similar to SymbolInfo on LLVM 4.0 is actually // `JITEvaluatedSymbol`. However, we only use this type when a JITSymbol @@ -300,6 +340,7 @@ typedef JITSymbol JL_SymbolInfo; using CompilerResultT = Expected>; using OptimizerResultT = Expected; +using SharedBytesT = StringSet::MapEntryTy)>>; class JuliaOJIT { public: @@ -516,6 +557,7 @@ class JuliaOJIT { // Note that this is a safepoint due to jl_get_library_ and jl_dlsym calls void optimizeDLSyms(Module &M); + private: const std::unique_ptr TM; @@ -529,6 +571,7 @@ class JuliaOJIT { std::mutex RLST_mutex{}; int RLST_inc = 0; DenseMap ReverseLocalSymbolTable; + SharedBytesT SharedBytes; std::unique_ptr DLSymOpt; diff --git a/src/julia_internal.h b/src/julia_internal.h index ad3e46cd6307e8..c09b73f8a1052b 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -349,7 +349,7 @@ JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize) JL_NOTSAFEPOINT; extern uv_mutex_t gc_perm_lock; void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) JL_NOTSAFEPOINT; -void *jl_gc_perm_alloc(size_t sz, int zero, +JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset) JL_NOTSAFEPOINT; void gc_sweep_sysimg(void);