-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add script for bolting gcc, change llvm-bolt command and add Cache-Aw…
…are Tail Duplication (#5) * Update build_stage1.bash * set all to excutable * Update build_stage3-bolt.bash * Update build_stage2-prof-use-lto.bash * Update build_stage2-prof-use-lto-reloc.bash * added polly, bolt into stage1 * added function to bolt the binary without a profile * removed polly, improved script, autodetection for bolting without/with profile * add alternative workflow * increase link jobs * Update full_workflow-2.bash * fix perf record and llvm-bolt stage * Update full_workflow-2.bash * branch sampling frequency fix * small fixes and binutils for gold * add bolt for gcc * update deprecated cache+ * add Cache-Aware Tail Duplication for llvm-bolt * add script for bolting everything * add more to the readme for better understanding * update to latest changes from bolt, add more stages to bolt-anything.sh * add compiler-rt to stage1, fix readme.md * Update build_stage3-train.bash * use release/15.x branch Patch by @ptr1337 , thank you very much!
- Loading branch information
Showing
11 changed files
with
395 additions
and
50 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
#!/bin/bash | ||
|
||
## STAGE 1 = build llvm-bolt | ||
## STAGE 2 = Instrument binary to run a workload with it to gather profiles for optimizing | ||
## STAGE 3 = Merging the created data to one file and remove the not needed data## Actually bug in llvm-bolt https://github.com/llvm/llvm-project/issues/56209 | ||
## STAGE 4 = Optimize the binary with the created profile | ||
STAGE= | ||
|
||
## File or binary you want to instrument and then bolt | ||
: ${BINARY:=libLLVM-14.so} | ||
|
||
## PATH to the target | ||
BINARYPATH=/usr/lib | ||
|
||
## PATH where llvm-bolt is | ||
BOLTPATH=~/toolchain/llvm/llvm-bolt/bin | ||
|
||
## BASEDIR for data | ||
TOPLEV=~/toolchain/bolt | ||
|
||
## Here can be the optimized binarys, merged fdata and your original binary/file as backup | ||
BOLTBIN=${TOPLEV}/bin | ||
|
||
## PATH FOR INTRUMENTED DATA | ||
## Use a own PATH for it since it creates alot of files | ||
FDATA=${TOPLEV}/fdata | ||
|
||
|
||
################################################################ | ||
################################################################ | ||
################################################################ | ||
################################################################ | ||
|
||
|
||
create_path() { | ||
## Create PATH's | ||
mkdir -p ${FDATA} | ||
mkdir -p ${BOLTBIN} | ||
} | ||
|
||
instrument() { | ||
|
||
echo "Instrument binary with llvm-bolt" | ||
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt \ | ||
--instrument \ | ||
--instrumentation-file-append-pid \ | ||
--instrumentation-file=${FDATA}/${BINARY}.fdata \ | ||
${BINARYPATH}/${BINARY} \ | ||
-o ${BOLTBIN}/${BINARY} || (echo "Could not create instrumented binary"; exit 1) | ||
## Backup original file | ||
sudo cp ${BINARYPATH}/${BINARY} ${BOLTBIN}/${BINARY}.org | ||
sudo cp ${BINARYPATH}/${BINARY} ${BINARYPATH}/${BINARY}.org | ||
## Move instrumented and replace the original one with it for gathering easier a profile | ||
sudo cp ${BOLTBIN}/${BINARY} ${BINARYPATH}/${BINARY} | ||
} | ||
|
||
merge_fdata() { | ||
|
||
echo "Merging generated profiles" | ||
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/merge-fdata ${FDATA}/${BINARY}*.fdata > ${BOLTBIN}/${BINARY}-combined.fdata || (echo "Could not merge fdate"; exit 1) | ||
## Removing not needed bloated fdata | ||
rm -rf ${FDATA}/${BINARY}*.fdata | ||
} | ||
|
||
optimize() { | ||
|
||
echo "Optimizing binary with generated profile" | ||
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt ${BOLTBIN}/${BINARY}.org \ | ||
--data ${BOLTBIN}/${BINARY}-combined.fdata \ | ||
-o ${BOLTBIN}/${BINARY}.bolt \ | ||
-split-functions \ | ||
-split-all-cold \ | ||
-split-eh \ | ||
-dyno-stats \ | ||
-reorder-functions=hfsort+ \ | ||
-icp-eliminate-loads \ | ||
-reorder-blocks=ext-tsp \ | ||
-icf || (echo "Could not optimize the binary"; exit 1) | ||
} | ||
|
||
move_binary() { | ||
|
||
echo "You can find now your optimzed binary at ${BOLTBIN}" | ||
sudo rm -rf ${FDATA}/${BINARY}.fdata* | ||
sudo cp ${BOLTBIN}/${BINARY}.bolt ${BINARYPATH}/${BINARY} | ||
} | ||
|
||
build_llvm_bolt () { | ||
|
||
TOPLEV=~/toolchain/llvm | ||
mkdir -p ${TOPLEV} | ||
cd ${TOPLEV} || (echo "Could not enter ${TOPLEV} directory"; exit 1) | ||
git clone --depth=1 -b release/15.x https://github.com/llvm/llvm-project.git | ||
|
||
mkdir -p stage1 || (echo "Could not create stage1 directory"; exit 1) | ||
cd stage1 || (echo "Could not enter stage 1 directory"; exit 1) | ||
|
||
echo "== Configure Build" | ||
echo "== Build with system clang" | ||
|
||
cmake -G Ninja ${TOPLEV}/llvm-project/llvm \ | ||
-DLLVM_BINUTILS_INCDIR=/usr/include \ | ||
-DCMAKE_BUILD_TYPE=Release \ | ||
-DCLANG_ENABLE_ARCMT=OFF \ | ||
-DCLANG_ENABLE_STATIC_ANALYZER=OFF \ | ||
-DCLANG_PLUGIN_SUPPORT=OFF \ | ||
-DLLVM_ENABLE_BINDINGS=OFF \ | ||
-DLLVM_ENABLE_OCAMLDOC=OFF \ | ||
-DLLVM_INCLUDE_DOCS=OFF \ | ||
-DLLVM_INCLUDE_EXAMPLES=OFF \ | ||
-DCMAKE_C_COMPILER=clang \ | ||
-DCMAKE_CXX_COMPILER=clang++ \ | ||
-DLLVM_USE_LINKER=lld \ | ||
-DLLVM_ENABLE_PROJECTS="clang;lld;bolt;compiler-rt" \ | ||
-DLLVM_TARGETS_TO_BUILD="X86" \ | ||
-DCMAKE_EXE_LINKER_FLAGS="-Wl,--push-state -Wl,-whole-archive -ljemalloc_pic -Wl,--pop-state -lpthread -lstdc++ -lm -ldl" \ | ||
-DCMAKE_BUILD_TYPE=Release \ | ||
-DLLVM_BUILD_UTILS=OFF \ | ||
-DLLVM_ENABLE_BACKTRACES=OFF \ | ||
-DLLVM_ENABLE_WARNINGS=OFF \ | ||
-DLLVM_INCLUDE_TESTS=OFF \ | ||
-DLLVM_ENABLE_TERMINFO=OFF \ | ||
-DCMAKE_INSTALL_PREFIX=${TOPLEV}/llvm-bolt || (echo "Could not configure project!"; exit 1) | ||
|
||
echo "== Start Build" | ||
ninja install || (echo "Could not build project!"; exit 1) | ||
|
||
} | ||
|
||
## Stage 1 | ||
if [ "${STAGE}" = 1 ]; then | ||
build_llvm_bolt | ||
fi | ||
|
||
## Stage 2 | ||
if [ "${STAGE}" = 2 ]; then | ||
create_path | ||
instrument | ||
fi | ||
|
||
## Stage 3 | ||
if [ "${STAGE}" = 3 ]; then | ||
merge_fdata | ||
fi | ||
## Stage 4 | ||
if [ "${STAGE}" = 4 ]; then | ||
optimize | ||
move_binary | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
#!/bin/bash | ||
|
||
## Change here to your gcc version, you can find it with gcc -v "/usr/lib/gcc/x86_64-pc-linux-gnu/12" | ||
GCCVER=12 | ||
## Base | ||
TOPLEV=~/toolchain/gcc | ||
## PATH for instrument data, when bolting without perf | ||
DATA=${TOPLEV}/instrument | ||
## GCC binary path to bolt | ||
GCCPATH=/usr/lib/gcc/x86_64-pc-linux-gnu/${GCCVER} | ||
## PATH where bolt is | ||
BOLTPATH=~/toolchain/llvm/stage1/install/bin | ||
## Change here the path to your perf.data if you have a cpu which supports LBR | ||
## You need before running the script the perf.data with that command example: | ||
## perf record -o perf.data -e cycles:u -j any,u -- 'command to run for example: make' | ||
PERFDATA=/home/foo/perf.data | ||
## Set here the stage you want to run | ||
## STAGE 1 creates a instrumented binary, with that you need to run a workload to get profile data | ||
## Stage 2 there we use llvm-bolt top optimize the binary | ||
STAGE= | ||
|
||
|
||
mkdir -p ${DATA}/cc1 | ||
mkdir -p ${DATA}/cc1plus | ||
|
||
|
||
|
||
if [ ${STAGE} = 1 ]; then | ||
echo "Instrument clang with llvm-bolt" | ||
|
||
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt \ | ||
--instrument \ | ||
--instrumentation-file-append-pid \ | ||
--instrumentation-file=${DATA}/cc1/cc1.fdata \ | ||
${GCCPATH}/cc1 \ | ||
-o ${DATA}/cc1/cc1 | ||
|
||
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt \ | ||
--instrument \ | ||
--instrumentation-file-append-pid \ | ||
--instrumentation-file=${DATA}/cc1plus/cc1plus.fdata \ | ||
${GCCPATH}/cc1plus \ | ||
-o ${DATA}/cc1plus/cc1plus | ||
#echo "mooving instrumented binary" | ||
sudo mv ${GCCPATH}/cc1 ${GCCPATH}/cc1.org | ||
sudo mv ${DATA}/cc1/cc1 ${GCCPATH}/cc1 | ||
#echo "mooving instrumented binary" | ||
sudo mv ${GCCPATH}/cc1plus ${GCCPATH}/cc1plus.org | ||
sudo mv ${DATA}/cc1plus/cc1plus ${GCCPATH}/cc1plus | ||
|
||
echo "Now move the binarys to the gcc path" | ||
echo "now do some instrument compiles for example compiling a kernel or GCC" | ||
fi | ||
|
||
if [ ${STAGE} = 2 ]; then | ||
echo "Instrument clang with llvm-bolt" | ||
|
||
## Check if perf is available | ||
perf record -e cycles:u -j any,u -- sleep 1 &>/dev/null; | ||
|
||
if [[ $? == "0" ]]; then | ||
echo "BOLTING with Profile!" | ||
|
||
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/perf2bolt ${GCCPATH}/cc1.org \ | ||
-p ${PERFDATA} \ | ||
-o ${DATA}/cc1.fdata || (echo "Could not convert perf-data to bolt for clang-15"; exit 1) | ||
|
||
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/perf2bolt ${GCCPATH}/cc1.org \ | ||
-p ${PERFDATA} \ | ||
-o ${DATA}/cc1plus.fdata || (echo "Could not convert perf-data to bolt for clang-15"; exit 1) | ||
|
||
echo "Optimizing cc1 with the generated profile" | ||
cd ${TOPLEV} | ||
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt ${GCCPATH}/cc1.org \ | ||
--data ${DATA}/cc1.fdata \ | ||
-o ${TOPLEV}/cc1 \ | ||
-split-functions \ | ||
-split-all-cold \ | ||
-icf=1 \ | ||
-lite=1 \ | ||
-split-eh \ | ||
-use-gnu-stack \ | ||
-jump-tables=move \ | ||
-dyno-stats \ | ||
-reorder-functions=hfsort+ \ | ||
-reorder-blocks=ext-tsp \ | ||
-tail-duplication=cache || (echo "Could not optimize binary for cc1"; exit 1) | ||
|
||
cd ${TOPLEV} | ||
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt ${GCCPATH}/cc1plus.org \ | ||
--data ${DATA}/cc1plus.fdata \ | ||
-o ${TOPLEV}/cc1plus \ | ||
-split-functions \ | ||
-split-all-cold \ | ||
-icf=1 \ | ||
-lite=1 \ | ||
-split-eh \ | ||
-use-gnu-stack \ | ||
-jump-tables=move \ | ||
-dyno-stats \ | ||
-reorder-functions=hfsort+ \ | ||
-reorder-blocks=ext-tsp \ | ||
-tail-duplication=cache || (echo "Could not optimize binary for cc1plus"; exit 1) | ||
else | ||
echo "Merging generated profiles" | ||
cd ${DATA}/cc1 | ||
${BOLTPATH}/merge-fdata *.fdata > cc1-combined.fdata | ||
cd ${DATA}/cc1plus | ||
${BOLTPATH}/merge-fdata *.fdata > cc1plus-combined.fdata | ||
|
||
echo "Optimizing cc1 with the generated profile" | ||
cd ${TOPLEV} | ||
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt ${GCCPATH}/cc1.org \ | ||
--data ${DATA}/cc1/cc1-combined.fdata \ | ||
-o ${TOPLEV}/cc1 \ | ||
-relocs \ | ||
-split-functions \ | ||
-split-all-cold \ | ||
-icf=1 \ | ||
-lite=1 \ | ||
-split-eh \ | ||
-use-gnu-stack \ | ||
-jump-tables=move \ | ||
-dyno-stats \ | ||
-reorder-functions=hfsort+ \ | ||
-reorder-blocks=ext-tsp \ | ||
-tail-duplication=cache || (echo "Could not optimize binary for cc1"; exit 1) | ||
|
||
cd ${TOPLEV} | ||
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt ${GCCPATH}/cc1plus.org \ | ||
--data ${DATA}/cc1plus/cc1plus-combined.fdata \ | ||
-o ${TOPLEV}/cc1plus \ | ||
-relocs \ | ||
-split-functions \ | ||
-split-all-cold \ | ||
-icf=1 \ | ||
-lite=1 \ | ||
-split-eh \ | ||
-use-gnu-stack \ | ||
-jump-tables=move \ | ||
-dyno-stats \ | ||
-reorder-functions=hfsort+ \ | ||
-reorder-blocks=ext-tsp \ | ||
-tail-duplication=cache || (echo "Could not optimize binary for cc1plus"; exit 1) | ||
|
||
|
||
echo "mooving bolted binary" | ||
sudo mv ${TOPLEV}/cc1plus ${GCCPATH}/cc1plus | ||
sudo mv ${TOPLEV}/cc1 ${GCCPATH}/cc1 | ||
echo "Now you can move the bolted binarys to your ${GCCPATH}" | ||
fi | ||
|
||
fi |
Oops, something went wrong.