diff --git a/README.md b/README.md index 39f01ab..ad1c523 100755 --- a/README.md +++ b/README.md @@ -5,18 +5,22 @@ trained to any project. The full_workflow.bash will autodetect, if your machine supports LBR or not and choose the correct script which suits to your hardware. -## How to build +## LLVM - git clone https://github.com/ptr1337/llvm-bolt-scripts.git - cd llvm-bolt-scripts - ./full_workflow.bash +### How to build + +Be sure to have jemalloc installed, it is used to improve llvm-bolt's memory handling. + +git clone https://github.com/ptr1337/llvm-bolt-scripts.git +cd llvm-bolt-scripts +./full_workflow.bash This sequence will give you (hopefully) a faster LLVM toolchain. Technologies used: -- LLVM Link Time Optimization (LTO) -- Binary Instrumentation and Profile-Guided-Optimization (PGO) -- perf-measurement and branch-sampling/profiling and final binary reordering (BOLT) +- LLVM Link Time Optimization (LTO) +- Binary Instrumentation and Profile-Guided-Optimization (PGO) +- perf-measurement and branch-sampling/profiling and final binary reordering (BOLT) The goal of the techniques is to utilize the CPU black magic better and layout the code in a way, that allows faster execution. @@ -25,4 +29,43 @@ Measure performance gains and evaluate if its worth the hazzle :) You can experiment with technologies, maybe `ThinLTO` is better then `FullLTO`, For the last bit of performance, you can run several different workloads and then merge the resulted profiles with 'merge-fdata \*.fdata > combined.fdata' and then optimize the libary with llvm-bolt again. - and nothing else! The same goes for `BOLT`. +and nothing else! The same goes for `BOLT`. + +## GCC + +If you want to bolt gcc, you need to disable when building gcc the language `lto`, you can still use the gcc lto function but gcc itself wont build with lto. Enabling lto will crash llvm-bolt. + +Also you need to add following to your compileflags: + +``` +CXXFLAGS+="-fno-reorder-blocks-and-partition" +LDFLAGS+="--emit-relocs" +``` + +The compileflas should be used for any binary you compile and want to optimize the binary with bolt. + +### Bolting other binarys/\*.so files + +Ive included a script which makes it possible to bolt any binary or .so file which got compiled with --emit-relocs. +ust change the binary name and the path to your suits and the STAGE number. + +After you did run stage 1 you need to run a workload with the instrumented binary/.so file. + +When the workload is running, you will see that in the FDATA path many profiles are created. These will be in the STAGE2 process merged and then on your binary/.so file used and bolt will optimize it. + +#### Example: + +We will now take for example llvm: + + - Compile it with relocations (LDFLAGS+="--emit-relocs") enabled + - Install your package + - Change the to BINARY=libLLVM.so and the BINARYPATH=/usr/lib to your suits to the target you want to optimize and set STAGE=1 + - Run the script + - After you did run it, it will backup your file and will move the instrumented target to the original path + - Run a workload with the target, so compile something with clang + - You will get several files into the FDATA path, when you run the workload !!! ATTENTION !!! the size of the data can get quite big, so take a watch at the folder + - After youre done with the workload change at the script to STAGE=1 + - Run the script again and the created data from the instrumentiation will be merged and then used for llvm-bolt to optimize the target + - After that it will automatically move it tor your systembinary/libary, a backup and the bolted binary can be found at the binarypath. + - Thats it, now repeat the worklow for other targets you want top optimize. + - Tip: if you for example instrumented libLLVM the profile is also useable for other llvm based files which where active in the recording process diff --git a/bolt-anything.bash b/bolt-anything.bash new file mode 100755 index 0000000..6f7456e --- /dev/null +++ b/bolt-anything.bash @@ -0,0 +1,149 @@ +#!/bin/bash + +## STAGE 1 = build llvm-bolt +## STAGE 2 = Instrument binary to run a workload with it to gather profiles for optimizing +## STAGE 3 = Merging the created data to one file and remove the not needed data## Actually bug in llvm-bolt https://github.com/llvm/llvm-project/issues/56209 +## STAGE 4 = Optimize the binary with the created profile +STAGE= + +## File or binary you want to instrument and then bolt +: ${BINARY:=libLLVM-14.so} + +## PATH to the target +BINARYPATH=/usr/lib + +## PATH where llvm-bolt is +BOLTPATH=~/toolchain/llvm/llvm-bolt/bin + +## BASEDIR for data +TOPLEV=~/toolchain/bolt + +## Here can be the optimized binarys, merged fdata and your original binary/file as backup +BOLTBIN=${TOPLEV}/bin + +## PATH FOR INTRUMENTED DATA +## Use a own PATH for it since it creates alot of files +FDATA=${TOPLEV}/fdata + + +################################################################ +################################################################ +################################################################ +################################################################ + + +create_path() { + ## Create PATH's + mkdir -p ${FDATA} + mkdir -p ${BOLTBIN} +} + +instrument() { + + echo "Instrument binary with llvm-bolt" + LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt \ + --instrument \ + --instrumentation-file-append-pid \ + --instrumentation-file=${FDATA}/${BINARY}.fdata \ + ${BINARYPATH}/${BINARY} \ + -o ${BOLTBIN}/${BINARY} || (echo "Could not create instrumented binary"; exit 1) + ## Backup original file + sudo cp ${BINARYPATH}/${BINARY} ${BOLTBIN}/${BINARY}.org + sudo cp ${BINARYPATH}/${BINARY} ${BINARYPATH}/${BINARY}.org + ## Move instrumented and replace the original one with it for gathering easier a profile + sudo cp ${BOLTBIN}/${BINARY} ${BINARYPATH}/${BINARY} +} + +merge_fdata() { + + echo "Merging generated profiles" + LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/merge-fdata ${FDATA}/${BINARY}*.fdata > ${BOLTBIN}/${BINARY}-combined.fdata || (echo "Could not merge fdate"; exit 1) + ## Removing not needed bloated fdata + rm -rf ${FDATA}/${BINARY}*.fdata +} + +optimize() { + + echo "Optimizing binary with generated profile" + LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt ${BOLTBIN}/${BINARY}.org \ + --data ${BOLTBIN}/${BINARY}-combined.fdata \ + -o ${BOLTBIN}/${BINARY}.bolt \ + -split-functions \ + -split-all-cold \ + -split-eh \ + -dyno-stats \ + -reorder-functions=hfsort+ \ + -icp-eliminate-loads \ + -reorder-blocks=ext-tsp \ + -icf || (echo "Could not optimize the binary"; exit 1) +} + +move_binary() { + + echo "You can find now your optimzed binary at ${BOLTBIN}" + sudo rm -rf ${FDATA}/${BINARY}.fdata* + sudo cp ${BOLTBIN}/${BINARY}.bolt ${BINARYPATH}/${BINARY} +} + +build_llvm_bolt () { + + TOPLEV=~/toolchain/llvm + mkdir -p ${TOPLEV} + cd ${TOPLEV} || (echo "Could not enter ${TOPLEV} directory"; exit 1) + git clone --depth=1 -b release/15.x https://github.com/llvm/llvm-project.git + + mkdir -p stage1 || (echo "Could not create stage1 directory"; exit 1) + cd stage1 || (echo "Could not enter stage 1 directory"; exit 1) + + echo "== Configure Build" + echo "== Build with system clang" + + cmake -G Ninja ${TOPLEV}/llvm-project/llvm \ + -DLLVM_BINUTILS_INCDIR=/usr/include \ + -DCMAKE_BUILD_TYPE=Release \ + -DCLANG_ENABLE_ARCMT=OFF \ + -DCLANG_ENABLE_STATIC_ANALYZER=OFF \ + -DCLANG_PLUGIN_SUPPORT=OFF \ + -DLLVM_ENABLE_BINDINGS=OFF \ + -DLLVM_ENABLE_OCAMLDOC=OFF \ + -DLLVM_INCLUDE_DOCS=OFF \ + -DLLVM_INCLUDE_EXAMPLES=OFF \ + -DCMAKE_C_COMPILER=clang \ + -DCMAKE_CXX_COMPILER=clang++ \ + -DLLVM_USE_LINKER=lld \ + -DLLVM_ENABLE_PROJECTS="clang;lld;bolt;compiler-rt" \ + -DLLVM_TARGETS_TO_BUILD="X86" \ + -DCMAKE_EXE_LINKER_FLAGS="-Wl,--push-state -Wl,-whole-archive -ljemalloc_pic -Wl,--pop-state -lpthread -lstdc++ -lm -ldl" \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLVM_BUILD_UTILS=OFF \ + -DLLVM_ENABLE_BACKTRACES=OFF \ + -DLLVM_ENABLE_WARNINGS=OFF \ + -DLLVM_INCLUDE_TESTS=OFF \ + -DLLVM_ENABLE_TERMINFO=OFF \ + -DCMAKE_INSTALL_PREFIX=${TOPLEV}/llvm-bolt || (echo "Could not configure project!"; exit 1) + + echo "== Start Build" + ninja install || (echo "Could not build project!"; exit 1) + +} + +## Stage 1 +if [ "${STAGE}" = 1 ]; then + build_llvm_bolt +fi + +## Stage 2 +if [ "${STAGE}" = 2 ]; then + create_path + instrument +fi + +## Stage 3 +if [ "${STAGE}" = 3 ]; then + merge_fdata +fi +## Stage 4 +if [ "${STAGE}" = 4 ]; then + optimize + move_binary +fi diff --git a/bolt-gcc.bash b/bolt-gcc.bash new file mode 100755 index 0000000..c399c0b --- /dev/null +++ b/bolt-gcc.bash @@ -0,0 +1,153 @@ +#!/bin/bash + +## Change here to your gcc version, you can find it with gcc -v "/usr/lib/gcc/x86_64-pc-linux-gnu/12" +GCCVER=12 +## Base +TOPLEV=~/toolchain/gcc +## PATH for instrument data, when bolting without perf +DATA=${TOPLEV}/instrument +## GCC binary path to bolt +GCCPATH=/usr/lib/gcc/x86_64-pc-linux-gnu/${GCCVER} +## PATH where bolt is +BOLTPATH=~/toolchain/llvm/stage1/install/bin +## Change here the path to your perf.data if you have a cpu which supports LBR +## You need before running the script the perf.data with that command example: +## perf record -o perf.data -e cycles:u -j any,u -- 'command to run for example: make' +PERFDATA=/home/foo/perf.data +## Set here the stage you want to run +## STAGE 1 creates a instrumented binary, with that you need to run a workload to get profile data +## Stage 2 there we use llvm-bolt top optimize the binary +STAGE= + + +mkdir -p ${DATA}/cc1 +mkdir -p ${DATA}/cc1plus + + + +if [ ${STAGE} = 1 ]; then + echo "Instrument clang with llvm-bolt" + + LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt \ + --instrument \ + --instrumentation-file-append-pid \ + --instrumentation-file=${DATA}/cc1/cc1.fdata \ + ${GCCPATH}/cc1 \ + -o ${DATA}/cc1/cc1 + + LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt \ + --instrument \ + --instrumentation-file-append-pid \ + --instrumentation-file=${DATA}/cc1plus/cc1plus.fdata \ + ${GCCPATH}/cc1plus \ + -o ${DATA}/cc1plus/cc1plus + #echo "mooving instrumented binary" + sudo mv ${GCCPATH}/cc1 ${GCCPATH}/cc1.org + sudo mv ${DATA}/cc1/cc1 ${GCCPATH}/cc1 + #echo "mooving instrumented binary" + sudo mv ${GCCPATH}/cc1plus ${GCCPATH}/cc1plus.org + sudo mv ${DATA}/cc1plus/cc1plus ${GCCPATH}/cc1plus + + echo "Now move the binarys to the gcc path" + echo "now do some instrument compiles for example compiling a kernel or GCC" +fi + +if [ ${STAGE} = 2 ]; then + echo "Instrument clang with llvm-bolt" + + ## Check if perf is available + perf record -e cycles:u -j any,u -- sleep 1 &>/dev/null; + + if [[ $? == "0" ]]; then + echo "BOLTING with Profile!" + + LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/perf2bolt ${GCCPATH}/cc1.org \ + -p ${PERFDATA} \ + -o ${DATA}/cc1.fdata || (echo "Could not convert perf-data to bolt for clang-15"; exit 1) + + LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/perf2bolt ${GCCPATH}/cc1.org \ + -p ${PERFDATA} \ + -o ${DATA}/cc1plus.fdata || (echo "Could not convert perf-data to bolt for clang-15"; exit 1) + + echo "Optimizing cc1 with the generated profile" + cd ${TOPLEV} + LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt ${GCCPATH}/cc1.org \ + --data ${DATA}/cc1.fdata \ + -o ${TOPLEV}/cc1 \ + -split-functions \ + -split-all-cold \ + -icf=1 \ + -lite=1 \ + -split-eh \ + -use-gnu-stack \ + -jump-tables=move \ + -dyno-stats \ + -reorder-functions=hfsort+ \ + -reorder-blocks=ext-tsp \ + -tail-duplication=cache || (echo "Could not optimize binary for cc1"; exit 1) + + cd ${TOPLEV} + LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt ${GCCPATH}/cc1plus.org \ + --data ${DATA}/cc1plus.fdata \ + -o ${TOPLEV}/cc1plus \ + -split-functions \ + -split-all-cold \ + -icf=1 \ + -lite=1 \ + -split-eh \ + -use-gnu-stack \ + -jump-tables=move \ + -dyno-stats \ + -reorder-functions=hfsort+ \ + -reorder-blocks=ext-tsp \ + -tail-duplication=cache || (echo "Could not optimize binary for cc1plus"; exit 1) + else + echo "Merging generated profiles" + cd ${DATA}/cc1 + ${BOLTPATH}/merge-fdata *.fdata > cc1-combined.fdata + cd ${DATA}/cc1plus + ${BOLTPATH}/merge-fdata *.fdata > cc1plus-combined.fdata + + echo "Optimizing cc1 with the generated profile" + cd ${TOPLEV} + LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt ${GCCPATH}/cc1.org \ + --data ${DATA}/cc1/cc1-combined.fdata \ + -o ${TOPLEV}/cc1 \ + -relocs \ + -split-functions \ + -split-all-cold \ + -icf=1 \ + -lite=1 \ + -split-eh \ + -use-gnu-stack \ + -jump-tables=move \ + -dyno-stats \ + -reorder-functions=hfsort+ \ + -reorder-blocks=ext-tsp \ + -tail-duplication=cache || (echo "Could not optimize binary for cc1"; exit 1) + + cd ${TOPLEV} + LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt ${GCCPATH}/cc1plus.org \ + --data ${DATA}/cc1plus/cc1plus-combined.fdata \ + -o ${TOPLEV}/cc1plus \ + -relocs \ + -split-functions \ + -split-all-cold \ + -icf=1 \ + -lite=1 \ + -split-eh \ + -use-gnu-stack \ + -jump-tables=move \ + -dyno-stats \ + -reorder-functions=hfsort+ \ + -reorder-blocks=ext-tsp \ + -tail-duplication=cache || (echo "Could not optimize binary for cc1plus"; exit 1) + + + echo "mooving bolted binary" + sudo mv ${TOPLEV}/cc1plus ${GCCPATH}/cc1plus + sudo mv ${TOPLEV}/cc1 ${GCCPATH}/cc1 + echo "Now you can move the bolted binarys to your ${GCCPATH}" + fi + +fi diff --git a/build_stage1.bash b/build_stage1.bash index 884c8b8..8038f8a 100755 --- a/build_stage1.bash +++ b/build_stage1.bash @@ -1,7 +1,9 @@ #!/bin/bash -export TOPLEV=~/toolchain/llvm +TOPLEV=~/toolchain/llvm cd ${TOPLEV} || (echo "Could not enter ${TOPLEV} directory"; exit 1) +mkdir -p ${TOPLEV} +git clone --depth=1 -b release/15.x https://github.com/llvm/llvm-project.git mkdir -p stage1 || (echo "Could not create stage1 directory"; exit 1) cd stage1 || (echo "Could not enter stage 1 directory"; exit 1) @@ -24,16 +26,14 @@ cmake -G Ninja ${TOPLEV}/llvm-project/llvm \ -DLLVM_USE_LINKER=lld \ -DLLVM_ENABLE_PROJECTS="clang;lld;bolt;compiler-rt" \ -DLLVM_TARGETS_TO_BUILD="X86" \ + -DCMAKE_EXE_LINKER_FLAGS="-Wl,--push-state -Wl,-whole-archive -ljemalloc_pic -Wl,--pop-state -lpthread -lstdc++ -lm -ldl" \ -DCMAKE_BUILD_TYPE=Release \ -DLLVM_BUILD_UTILS=OFF \ -DLLVM_ENABLE_BACKTRACES=OFF \ -DLLVM_ENABLE_WARNINGS=OFF \ -DLLVM_INCLUDE_TESTS=OFF \ -DLLVM_ENABLE_TERMINFO=OFF \ - -DCMAKE_INSTALL_PREFIX=${TOPLEV}/stage1/install || (echo "Could not configure project!"; exit 1) + -DCMAKE_INSTALL_PREFIX=${TOPLEV}/llvm-bolt || (echo "Could not configure project!"; exit 1) echo "== Start Build" -ninja || (echo "Could not build project!"; exit 1) - -echo "== Install to $(pwd)/install" -ninja install || (echo "Could not install project!"; exit 1) +ninja install || (echo "Could not build project!"; exit 1) diff --git a/build_stage2-prof-generate.bash b/build_stage2-prof-generate.bash index 45e4d15..12c6ba5 100755 --- a/build_stage2-prof-generate.bash +++ b/build_stage2-prof-generate.bash @@ -1,11 +1,11 @@ #!/bin/bash -export TOPLEV=~/toolchain/llvm +TOPLEV=~/toolchain/llvm cd ${TOPLEV} mkdir ${TOPLEV}/stage2-prof-gen || (echo "Could not create stage2-prof-generate directory"; exit 1) cd ${TOPLEV}/stage2-prof-gen -CPATH=${TOPLEV}/stage1/bin +CPATH=${TOPLEV}/llvm-bolt/bin echo "== Configure Build" echo "== Build with stage1-tools -- $CPATH" @@ -19,12 +19,9 @@ cmake -G Ninja ${TOPLEV}/llvm-project/llvm \ -DLLVM_ENABLE_OCAMLDOC=OFF \ -DLLVM_INCLUDE_DOCS=OFF \ -DLLVM_INCLUDE_EXAMPLES=OFF \ - -DCMAKE_AR=${CPATH}/llvm-ar \ -DCMAKE_C_COMPILER=${CPATH}/clang \ - -DCLANG_TABLEGEN=${CPATH}/clang-tblgen \ -DCMAKE_CXX_COMPILER=${CPATH}/clang++ \ -DLLVM_USE_LINKER=${CPATH}/ld.lld \ - -DCMAKE_RANLIB=${CPATH}/llvm-ranlib \ -DLLVM_ENABLE_PROJECTS="clang;lld" \ -DLLVM_TARGETS_TO_BUILD="X86" \ -DCMAKE_BUILD_TYPE=Release \ diff --git a/build_stage2-prof-use-lto.bash b/build_stage2-prof-use-lto.bash index 3071098..1496a79 100755 --- a/build_stage2-prof-use-lto.bash +++ b/build_stage2-prof-use-lto.bash @@ -1,12 +1,12 @@ #!/bin/bash -export TOPLEV=~/toolchain/llvm +TOPLEV=~/toolchain/llvm cd ${TOPLEV} echo "Building Clang with PGO and LTO" mkdir ${TOPLEV}/stage2-prof-use-lto cd ${TOPLEV}/stage2-prof-use-lto -CPATH=${TOPLEV}/stage1/bin +CPATH=${TOPLEV}/llvm-bolt/bin echo "== Configure Build" echo "== Build with stage1-tools -- $CPATH" @@ -20,11 +20,9 @@ cmake -G Ninja ${TOPLEV}/llvm-project/llvm \ -DLLVM_ENABLE_OCAMLDOC=OFF \ -DLLVM_INCLUDE_DOCS=OFF \ -DLLVM_INCLUDE_EXAMPLES=OFF \ - -DCMAKE_AR=${CPATH}/llvm-ar \ -DCMAKE_C_COMPILER=${CPATH}/clang \ -DCMAKE_CXX_COMPILER=${CPATH}/clang++ \ -DLLVM_USE_LINKER=${CPATH}/ld.lld \ - -DCMAKE_RANLIB=${CPATH}/llvm-ranlib \ -DLLVM_ENABLE_PROJECTS="clang;lld;compiler-rt;polly" \ -DLLVM_TARGETS_TO_BUILD="X86" \ -DCMAKE_BUILD_TYPE=Release \ diff --git a/build_stage3-bolt-without-sampling.bash b/build_stage3-bolt-without-sampling.bash index fb4d8bf..462919b 100755 --- a/build_stage3-bolt-without-sampling.bash +++ b/build_stage3-bolt-without-sampling.bash @@ -1,12 +1,12 @@ #!/bin/bash -export TOPLEV=~/toolchain/llvm +TOPLEV=~/toolchain/llvm cd ${TOPLEV} mkdir -p ${TOPLEV}/stage3-without-sampling/intrumentdata || (echo "Could not create stage3-bolt directory"; exit 1) cd ${TOPLEV}/stage3-without-sampling CPATH=${TOPLEV}/stage2-prof-use-lto/install/bin -BOLTPATH=${TOPLEV}/stage1/bin +BOLTPATH=${TOPLEV}/llvm-bolt/bin echo "Instrument clang with llvm-bolt" @@ -41,18 +41,23 @@ ninja & read -t 240 || kill $! echo "Merging generated profiles" cd ${TOPLEV}/stage3-without-sampling/intrumentdata -${BOLTPATH}/merge-fdata *.fdata > combined.fdata +LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/merge-fdata *.fdata > combined.fdata echo "Optimizing Clang with the generated profile" -${BOLTPATH}/llvm-bolt ${CPATH}/clang-15.org \ +LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt ${CPATH}/clang-15.org \ --data combined.fdata \ -o ${CPATH}/clang-15 \ - -reorder-blocks=cache+ \ - -reorder-functions=hfsort+ \ - -split-functions=3 \ + -relocs \ + -split-functions \ -split-all-cold \ - -dyno-stats \ -icf=1 \ - -use-gnu-stack || (echo "Could not optimize binary for clang-15"; exit 1) + -lite=1 \ + -split-eh \ + -use-gnu-stack \ + -jump-tables=move \ + -dyno-stats \ + -reorder-functions=hfsort \ + -reorder-blocks=ext-tsp \ + -tail-duplication=cache || (echo "Could not optimize binary for clang"; exit 1) echo "You can now use the compiler with export PATH=${CPATH}:${PATH}" diff --git a/build_stage3-bolt.bash b/build_stage3-bolt.bash index f475e95..3ad1085 100755 --- a/build_stage3-bolt.bash +++ b/build_stage3-bolt.bash @@ -6,7 +6,7 @@ cd ${TOPLEV} mkdir ${TOPLEV}/stage3-bolt || (echo "Could not create stage3-bolt directory"; exit 1) cd ${TOPLEV}/stage3-bolt CPATH=${TOPLEV}/stage2-prof-use-lto/install/bin -BOLTPATH=${TOPLEV}/stage1/bin +BOLTPATH=${TOPLEV}/llvm-bolt/bin @@ -17,13 +17,9 @@ cmake -G Ninja \ -DLLVM_BINUTILS_INCDIR=/usr/include \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX="$(pwd)/install" \ - -DCMAKE_AR=${CPATH}/llvm-ar \ -DCMAKE_C_COMPILER=${CPATH}/clang \ - -DCLANG_TABLEGEN=${CPATH}/clang-tblgen \ -DCMAKE_CXX_COMPILER=${CPATH}/clang++ \ -DLLVM_USE_LINKER=${CPATH}/ld.lld \ - -DLLVM_TABLEGEN=${CPATH}/llvm-tblgen \ - -DCMAKE_RANLIB=${CPATH}/llvm-ranlib \ -DLLVM_TARGETS_TO_BUILD="X86" \ -DLLVM_ENABLE_PROJECTS="clang" \ ../llvm-project/llvm || (echo "Could not configure project!"; exit 1) @@ -35,22 +31,27 @@ cd ${TOPLEV} echo "Converting profile to a more aggreated form suitable to be consumed by BOLT" -${BOLTPATH}/perf2bolt ${CPATH}/clang-15 \ - -p ${TOPLEV}perf.data \ +LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/perf2bolt ${CPATH}/clang-15 \ + -p ${TOPLEV}/perf.data \ -o ${TOPLEV}/clang-15.fdata || (echo "Could not convert perf-data to bolt for clang-15"; exit 1) echo "Optimizing Clang with the generated profile" -${BOLTPATH}/llvm-bolt ${CPATH}/clang-15 \ +LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt ${CPATH}/clang-15 \ -o ${CPATH}/clang-15.bolt \ --data ${TOPLEV}/clang-15.fdata \ - -reorder-blocks=cache+ \ - -reorder-functions=hfsort+ \ - -split-functions=3 \ + -relocs \ + -split-functions \ -split-all-cold \ - -dyno-stats \ -icf=1 \ - -use-gnu-stack || (echo "Could not optimize binary for clang-15"; exit 1) + -lite=1 \ + -split-eh \ + -use-gnu-stack \ + -jump-tables=move \ + -dyno-stats \ + -reorder-functions=hfsort \ + -reorder-blocks=ext-tsp \ + -tail-duplication=cache || (echo "Could not optimize binary for clang"; exit 1) echo "move bolted binary to clang-15" mv ${CPATH}/clang-15 ${CPATH}/clang-15.org diff --git a/build_stage3-train.bash b/build_stage3-train.bash index d41c134..2f53275 100755 --- a/build_stage3-train.bash +++ b/build_stage3-train.bash @@ -17,11 +17,9 @@ cmake -G Ninja ${TOPLEV}/llvm-project/llvm \ -DLLVM_ENABLE_OCAMLDOC=OFF \ -DLLVM_INCLUDE_DOCS=OFF \ -DLLVM_INCLUDE_EXAMPLES=OFF \ - -DCMAKE_AR=${CPATH}/llvm-ar \ -DCMAKE_C_COMPILER=${CPATH}/clang \ -DCMAKE_CXX_COMPILER=${CPATH}/clang++ \ -DLLVM_USE_LINKER=${CPATH}/ld.lld \ - -DCMAKE_RANLIB=${CPATH}/llvm-ranlib \ -DLLVM_ENABLE_PROJECTS="clang;lld;compiler-rt;polly" \ -DLLVM_TARGETS_TO_BUILD="X86" \ -DCMAKE_BUILD_TYPE=Release \ @@ -35,4 +33,4 @@ ninja || (echo "Could not build project!"; exit 1) echo "Merging PGO-Profiles" cd ${TOPLEV}/stage2-prof-gen/profiles -${TOPLEV}/stage1/install/bin/llvm-profdata merge -output=clang.profdata * +${TOPLEV}/llvm-bolt/bin/llvm-profdata merge -output=clang.profdata * diff --git a/measure_build.bash b/measure_build.bash index 87311db..53443d4 100755 --- a/measure_build.bash +++ b/measure_build.bash @@ -1,6 +1,6 @@ #!/bin/bash -export TOPLEV=~/toolchain/llvm +TOPLEV=~/toolchain/llvm # Change your compiler PATH here to compare them diff --git a/setup_llvm_repo.bash b/setup_llvm_repo.bash index bcdc80b..c9bab0c 100755 --- a/setup_llvm_repo.bash +++ b/setup_llvm_repo.bash @@ -1,4 +1,5 @@ #!/bin/bash export TOPLEV=~/toolchain/llvm +mkdir -p ${TOPLEV} cd ${TOPLEV} -git clone --depth=1 https://github.com/llvm/llvm-project.git +git clone --depth=1 -b release/15.x https://github.com/llvm/llvm-project.git