Skip to content

Commit

Permalink
Merge pull request #118 from hartwork/fix-for-amd-k8
Browse files Browse the repository at this point in the history
Fix for AMD K8
  • Loading branch information
hartwork authored Dec 7, 2023
2 parents 3b57873 + f0b584d commit 737159a
Show file tree
Hide file tree
Showing 7 changed files with 583 additions and 5 deletions.
4 changes: 3 additions & 1 deletion resolve_march_native/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def _process_flags_explicit_has_more(self, target_set,
march_explicit_flag_set):
PREFIX_NO = '-mno-'
PREFIX_YES = '-m'
PREFIX_MTUNE = '-mtune='

explicit_more_flag_set = march_explicit_flag_set - march_native_flag_set
for flag in explicit_more_flag_set:
Expand All @@ -97,7 +98,8 @@ def _process_flags_explicit_has_more(self, target_set,
flag, file=sys.stderr)
continue

if not flag.startswith(PREFIX_NO) and flag.startswith(PREFIX_YES):
if not flag.startswith(PREFIX_NO) and flag.startswith(PREFIX_YES) \
and not flag.startswith(PREFIX_MTUNE):
# march=<explicit> enabled something (too much) that march=native disabled
opposite_flag = PREFIX_NO + flag[len(PREFIX_YES):]
target_set.add(opposite_flag)
Expand Down
8 changes: 8 additions & 0 deletions resolve_march_native/test/data/amd-k8--assembly--explicit.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.file "tmp.5WRm9r3nVq.c"
# GNU C17 (Gentoo 13.2.1_p20230826 p7) version 13.2.1 20230826 (x86_64-pc-linux-gnu)
# compiled by GNU C version 13.2.1 20230826, GMP version 6.3.0, MPFR version 4.2.1, MPC version 1.3.1, isl version none
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed: -march=k8-sse3
.text
.ident "GCC: (Gentoo 13.2.1_p20230826 p7) 13.2.1 20230826"
.section .note.GNU-stack,"",@progbits
8 changes: 8 additions & 0 deletions resolve_march_native/test/data/amd-k8--assembly--native.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.file "tmp.Rh4WnkncPp.c"
# GNU C17 (Gentoo 13.2.1_p20230826 p7) version 13.2.1 20230826 (x86_64-pc-linux-gnu)
# compiled by GNU C version 13.2.1 20230826, GMP version 6.3.0, MPFR version 4.2.1, MPC version 1.3.1, isl version none
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed: -march=k8-sse3 -mmmx -mno-popcnt -msse -msse2 -msse3 -mno-ssse3 -mno-sse4.1 -mno-sse4.2 -mno-avx -mno-avx2 -mno-sse4a -mno-fma4 -mno-xop -mno-fma -mno-avx512f -mno-bmi -mno-bmi2 -mno-aes -mno-pclmul -mno-avx512vl -mno-avx512bw -mno-avx512dq -mno-avx512cd -mno-avx512er -mno-avx512pf -mno-avx512vbmi -mno-avx512ifma -mno-avx5124vnniw -mno-avx5124fmaps -mno-avx512vpopcntdq -mno-avx512vbmi2 -mno-gfni -mno-vpclmulqdq -mno-avx512vnni -mno-avx512bitalg -mno-avx512bf16 -mno-avx512vp2intersect -m3dnow -mno-adx -mno-abm -mno-cldemote -mno-clflushopt -mno-clwb -mno-clzero -mcx16 -mno-enqcmd -mno-f16c -mno-fsgsbase -mfxsr -mno-hle -msahf -mno-lwp -mno-lzcnt -mno-movbe -mno-movdir64b -mno-movdiri -mno-mwaitx -mno-pconfig -mno-pku -mno-prefetchwt1 -mprfchw -mno-ptwrite -mno-rdpid -mno-rdrnd -mno-rdseed -mno-rtm -mno-serialize -mno-sgx -mno-sha -mno-shstk -mno-tbm -mno-tsxldtrk -mno-vaes -mno-waitpkg -mno-wbnoinvd -mno-xsave -mno-xsavec -mno-xsaveopt -mno-xsaves -mno-amx-tile -mno-amx-int8 -mno-amx-bf16 -mno-uintr -mno-hreset -mno-kl -mno-widekl -mno-avxvnni -mno-avx512fp16 -mno-avxifma -mno-avxvnniint8 -mno-avxneconvert -mno-cmpccxadd -mno-amx-fp16 -mno-prefetchi -mno-raoint -mno-amx-complex --param=l1-cache-size=64 --param=l1-cache-line-size=64 --param=l2-cache-size=512 -mtune=k8
.text
.ident "GCC: (Gentoo 13.2.1_p20230826 p7) 13.2.1 20230826"
.section .note.GNU-stack,"",@progbits
270 changes: 270 additions & 0 deletions resolve_march_native/test/data/amd-k8--target-help--explicit.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
The following options are target specific:
-m128bit-long-double [enabled]
-m16 [disabled]
-m32 [disabled]
-m3dnow [enabled]
-m3dnowa [enabled]
-m64 [enabled]
-m80387 [enabled]
-m8bit-idiv [disabled]
-m96bit-long-double [disabled]
-mabi= sysv
-mabm [disabled]
-maccumulate-outgoing-args [enabled]
-maddress-mode= long
-madx [disabled]
-maes [disabled]
-malign-data= compat
-malign-double [disabled]
-malign-functions= 0
-malign-jumps= 0
-malign-loops= 0
-malign-stringops [enabled]
-mamx-bf16 [disabled]
-mamx-complex [disabled]
-mamx-fp16 [disabled]
-mamx-int8 [disabled]
-mamx-tile [disabled]
-mandroid [disabled]
-march= k8-sse3
-masm= att
-mavx [disabled]
-mavx2 [disabled]
-mavx256-split-unaligned-load [disabled]
-mavx256-split-unaligned-store [disabled]
-mavx5124fmaps [disabled]
-mavx5124vnniw [disabled]
-mavx512bf16 [disabled]
-mavx512bitalg [disabled]
-mavx512bw [disabled]
-mavx512cd [disabled]
-mavx512dq [disabled]
-mavx512er [disabled]
-mavx512f [disabled]
-mavx512fp16 [disabled]
-mavx512ifma [disabled]
-mavx512pf [disabled]
-mavx512vbmi [disabled]
-mavx512vbmi2 [disabled]
-mavx512vl [disabled]
-mavx512vnni [disabled]
-mavx512vp2intersect [disabled]
-mavx512vpopcntdq [disabled]
-mavxifma [disabled]
-mavxneconvert [disabled]
-mavxvnni [disabled]
-mavxvnniint8 [disabled]
-mbionic [disabled]
-mbmi [disabled]
-mbmi2 [disabled]
-mbranch-cost=<0,5> 3
-mcall-ms2sysv-xlogues [disabled]
-mcet-switch [disabled]
-mcld [disabled]
-mcldemote [disabled]
-mclflushopt [disabled]
-mclwb [disabled]
-mclzero [disabled]
-mcmodel= [default]
-mcmpccxadd [disabled]
-mcpu=
-mcrc32 [disabled]
-mcx16 [disabled]
-mdaz-ftz [disabled]
-mdirect-extern-access [enabled]
-mdispatch-scheduler [disabled]
-mdump-tune-features [disabled]
-menqcmd [disabled]
-mf16c [disabled]
-mfancy-math-387 [enabled]
-mfentry [disabled]
-mfentry-name=
-mfentry-section=
-mfma [disabled]
-mfma4 [disabled]
-mforce-drap [disabled]
-mforce-indirect-call [disabled]
-mfp-ret-in-387 [enabled]
-mfpmath= sse
-mfsgsbase [disabled]
-mfunction-return= keep
-mfused-madd -ffp-contract=fast
-mfxsr [enabled]
-mgather -mtune-ctrl=use_gather
-mgeneral-regs-only [disabled]
-mgfni [disabled]
-mglibc [enabled]
-mhard-float [enabled]
-mharden-sls= none
-mhle [disabled]
-mhreset [disabled]
-miamcu [disabled]
-mieee-fp [enabled]
-mincoming-stack-boundary= 0
-mindirect-branch-cs-prefix [disabled]
-mindirect-branch-register [disabled]
-mindirect-branch= keep
-minline-all-stringops [disabled]
-minline-stringops-dynamically [disabled]
-minstrument-return= none
-mintel-syntax -masm=intel
-mkl [disabled]
-mlam= none
-mlarge-data-threshold=<number> 65536
-mlong-double-128 [disabled]
-mlong-double-64 [disabled]
-mlong-double-80 [enabled]
-mlwp [disabled]
-mlzcnt [disabled]
-mmanual-endbr [disabled]
-mmemcpy-strategy=
-mmemset-strategy=
-mmitigate-rop [disabled]
-mmmx [enabled]
-mmovbe [disabled]
-mmovdir64b [disabled]
-mmovdiri [disabled]
-mmove-max= 128
-mmpx [disabled]
-mms-bitfields [disabled]
-mmusl [disabled]
-mmwait [enabled]
-mmwaitx [disabled]
-mneeded [disabled]
-mno-align-stringops [disabled]
-mno-default [disabled]
-mno-fancy-math-387 [disabled]
-mno-push-args [disabled]
-mno-red-zone [disabled]
-mno-sse4 [enabled]
-mnop-mcount [disabled]
-momit-leaf-frame-pointer [disabled]
-mpc32 [disabled]
-mpc64 [disabled]
-mpc80 [disabled]
-mpclmul [disabled]
-mpcommit [disabled]
-mpconfig [disabled]
-mpku [disabled]
-mpopcnt [disabled]
-mprefer-avx128 -mprefer-vector-width=128
-mprefer-vector-width= none
-mpreferred-stack-boundary= 0
-mprefetchi [disabled]
-mprefetchwt1 [disabled]
-mprfchw [disabled]
-mptwrite [disabled]
-mpush-args [enabled]
-mraoint [disabled]
-mrdpid [disabled]
-mrdrnd [disabled]
-mrdseed [disabled]
-mrecip [disabled]
-mrecip=
-mrecord-mcount [disabled]
-mrecord-return [disabled]
-mred-zone [enabled]
-mregparm= 6
-mrelax-cmpxchg-loop [disabled]
-mrtd [disabled]
-mrtm [disabled]
-msahf [disabled]
-mscatter -mtune-ctrl=use_scatter
-mserialize [disabled]
-msgx [disabled]
-msha [disabled]
-mshstk [disabled]
-mskip-rax-setup [disabled]
-msoft-float [disabled]
-msse [enabled]
-msse2 [enabled]
-msse2avx [disabled]
-msse3 [enabled]
-msse4 [disabled]
-msse4.1 [disabled]
-msse4.2 [disabled]
-msse4a [disabled]
-msse5 -mavx
-msseregparm [disabled]
-mssse3 [disabled]
-mstack-arg-probe [disabled]
-mstack-protector-guard-offset=
-mstack-protector-guard-reg=
-mstack-protector-guard-symbol=
-mstack-protector-guard= tls
-mstackrealign [disabled]
-mstore-max= 128
-mstringop-strategy= [default]
-mstv [enabled]
-mtbm [disabled]
-mtls-dialect= gnu
-mtls-direct-seg-refs [enabled]
-mtsxldtrk [disabled]
-mtune-ctrl=
-mtune= k8-sse3
-muclibc [disabled]
-muintr [disabled]
-munroll-only-small-loops [disabled]
-mvaes [disabled]
-mveclibabi= [default]
-mvect8-ret-in-mem [disabled]
-mvpclmulqdq [disabled]
-mvzeroupper [disabled]
-mwaitpkg [disabled]
-mwbnoinvd [disabled]
-mwidekl [disabled]
-mx32 [disabled]
-mxop [disabled]
-mxsave [disabled]
-mxsavec [disabled]
-mxsaveopt [disabled]
-mxsaves [disabled]

Known assembler dialects (for use with the -masm= option):
att intel

Known ABIs (for use with the -mabi= option):
ms sysv

Known code models (for use with the -mcmodel= option):
32 kernel large medium small

Valid arguments to -mfpmath=:
387 387+sse 387,sse both sse sse+387 sse,387

Known choices for mitigation against straight line speculation with -mharden-sls=:
all indirect-jmp none return

Known indirect branch choices (for use with the -mindirect-branch=/-mfunction-return= options):
keep thunk thunk-extern thunk-inline

Known choices for return instrumentation with -minstrument-return=:
call none nop5

Known data alignment choices (for use with the -malign-data= option):
abi cacheline compat

Known vectorization library ABIs (for use with the -mveclibabi= option):
acml svml

Known address mode (for use with the -maddress-mode= option):
long short

Known preferred register vector length (to use with the -mprefer-vector-width= option):
128 256 512 none

Known stack protector guard (for use with the -mstack-protector-guard= option):
global tls

Valid arguments to -mstringop-strategy=:
byte_loop libcall loop rep_4byte rep_8byte rep_byte unrolled_loop vector_loop

Known TLS dialects (for use with the -mtls-dialect= option):
gnu gnu2

Known valid arguments for -march= option:
i386 i486 i586 pentium lakemont pentium-mmx winchip-c6 winchip2 c3 samuel-2 c3-2 nehemiah c7 esther i686 pentiumpro pentium2 pentium3 pentium3m pentium-m pentium4 pentium4m prescott nocona core2 nehalem corei7 westmere sandybridge corei7-avx ivybridge core-avx-i haswell core-avx2 broadwell skylake skylake-avx512 cannonlake icelake-client rocketlake icelake-server cascadelake tigerlake cooperlake sapphirerapids emeraldrapids alderlake raptorlake meteorlake graniterapids graniterapids-d bonnell atom silvermont slm goldmont goldmont-plus tremont gracemont sierraforest grandridge knl knm intel geode k6 k6-2 k6-3 athlon athlon-tbird athlon-4 athlon-xp athlon-mp x86-64 x86-64-v2 x86-64-v3 x86-64-v4 eden-x2 nano nano-1000 nano-2000 nano-3000 nano-x2 eden-x4 nano-x4 lujiazui k8 k8-sse3 opteron opteron-sse3 athlon64 athlon64-sse3 athlon-fx amdfam10 barcelona bdver1 bdver2 bdver3 bdver4 znver1 znver2 znver3 znver4 btver1 btver2 generic native

Known valid arguments for -mtune= option:
generic i386 i486 pentium lakemont pentiumpro pentium4 nocona core2 nehalem sandybridge haswell bonnell silvermont goldmont goldmont-plus tremont sierraforest grandridge knl knm skylake skylake-avx512 cannonlake icelake-client icelake-server cascadelake tigerlake cooperlake sapphirerapids alderlake rocketlake graniterapids graniterapids-d intel lujiazui geode k6 athlon k8 amdfam10 bdver1 bdver2 bdver3 bdver4 btver1 btver2 znver1 znver2 znver3 znver4

Loading

0 comments on commit 737159a

Please sign in to comment.