Skip to content

Commit b02bdb3

Browse files
committed
cpucache for Win
1 parent 3ae3da6 commit b02bdb3

File tree

2 files changed

+179
-0
lines changed

2 files changed

+179
-0
lines changed

sample/cpucache.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#include <xbyak/xbyak_cpucache.h>
2+
3+
int main()
4+
{
5+
Xbyak::util::intel::CpuCache cpuCache;
6+
if (!cpuCache.init()) {
7+
puts("cpuCache.init err");
8+
}
9+
cpuCache.put();
10+
}

xbyak/xbyak_cpucache.h

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
#pragma once
2+
#include <stdint.h>
3+
#include <stdio.h>
4+
5+
namespace Xbyak { namespace util { namespace intel {
6+
7+
enum CoreType {
8+
Ecore,
9+
Pcore,
10+
MAX_CORE_TYPE = 2
11+
};
12+
13+
enum CacheType {
14+
L1i = 0,
15+
L1d = 1,
16+
L2 = 2,
17+
L3 = 3,
18+
MAX_CACHE_TYPE = 4
19+
};
20+
21+
struct CacheSize {
22+
uint32_t byteSize;
23+
uint32_t sharedNum;
24+
CacheSize() : byteSize(0), sharedNum(0) {}
25+
};
26+
27+
struct Cache {
28+
CacheSize v_[MAX_CACHE_TYPE]; // L1i, L1d, L2, L3
29+
};
30+
31+
struct CpuCache {
32+
int PcoreIdx_ = -1;
33+
int EcoreIdx_ = -1;
34+
int PcoreNum_ = 0;
35+
int EcoreNum_ = 0;
36+
int physicalCoreNum_ = 0;
37+
int logicalCoreNum_ = 0;
38+
Cache cpuType_[MAX_CORE_TYPE]; // Ecore, Pcore
39+
void put() const {
40+
printf("Physical cores: %d, Logical cores: %d\n", physicalCoreNum_, logicalCoreNum_);
41+
printf("P-cores: %d, E-cores: %d\n", PcoreNum_, EcoreNum_);
42+
const char* coreNameTbl[] = { "E-core", "P-core" };
43+
const char* cacheNameTbl[] = { "L1i", "L1d", "L2", "L3" };
44+
for (int cti = 0; cti < MAX_CORE_TYPE; cti++) {
45+
printf("%s:\n", coreNameTbl[cti]);
46+
for (int cci = 0; cci < MAX_CACHE_TYPE; cci++) {
47+
const CacheSize& cs = cpuType_[cti].v_[cci];
48+
printf(" %s: %u bytes, shared=%u\n",
49+
cacheNameTbl[cci],
50+
cs.byteSize,
51+
cs.sharedNum);
52+
}
53+
}
54+
}
55+
bool init();
56+
};
57+
58+
}}} // namespace Xbyak::util::intel
59+
60+
#ifdef _WIN32
61+
#define WIN32_LEAN_AND_MEAN
62+
#include <windows.h>
63+
#include <stdio.h>
64+
#include <stdint.h>
65+
#include <memory>
66+
67+
namespace Xbyak { namespace util { namespace intel { namespace impl {
68+
69+
static inline int getIdx(uint64_t mask) {
70+
for (int i = 0; i < 64; i++) {
71+
if (mask & (uint64_t(1) << i)) {
72+
return i;
73+
}
74+
}
75+
return -1;
76+
}
77+
static inline bool maskHasIdx(uint64_t mask, int idx) {
78+
return idx >= 0 && ((mask >> idx) & 1) != 0;
79+
}
80+
81+
// return true if success
82+
// PcoreIdx, EcoreIdx(or old CPU): -1 if not found
83+
bool getCoreIdx(int *PcoreIdx, int *EcoreIdx, int *physicalCoreNum, int *logicalCoreNum, int *PcoreNum, int *EcoreNum) {
84+
typedef SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Info;
85+
DWORD len = 0;
86+
GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &len);
87+
std::unique_ptr<Info, decltype(&free)> buf(static_cast<Info*>(malloc(len)), &free);
88+
if (!buf) return false;
89+
if (!GetLogicalProcessorInformationEx(RelationProcessorCore, buf.get(), &len)) return false;
90+
// get core indices
91+
*PcoreIdx = -1;
92+
*EcoreIdx = -1;
93+
char *ptr = reinterpret_cast<char*>(buf.get());
94+
const char *end = ptr + len;
95+
*physicalCoreNum = 0;
96+
*logicalCoreNum = 0;
97+
*PcoreNum = 0;
98+
*EcoreNum = 0;
99+
while (ptr < end) {
100+
const auto& entry = *reinterpret_cast<Info*>(ptr);
101+
const PROCESSOR_RELATIONSHIP& core = entry.Processor;
102+
uint64_t mask = core.GroupMask[0].Mask;
103+
if (core.EfficiencyClass > 0) {
104+
if (*PcoreIdx == -1) *PcoreIdx = getIdx(mask);
105+
(*EcoreNum)++;
106+
} else {
107+
if (*EcoreIdx == -1) *EcoreIdx = getIdx(mask);
108+
(*PcoreNum)++;
109+
}
110+
ptr += entry.Size;
111+
(*physicalCoreNum)++;
112+
(*logicalCoreNum) += int(__popcnt64(mask));
113+
}
114+
return *PcoreIdx >= 0 || *EcoreIdx >= 0;
115+
}
116+
117+
} // Xbyak::util::intel::impl
118+
119+
// return true if success
120+
bool CpuCache::init() {
121+
typedef SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Info;
122+
using namespace impl;
123+
if (!getCoreIdx(&PcoreIdx_, &EcoreIdx_, &physicalCoreNum_, &logicalCoreNum_, &PcoreNum_, &EcoreNum_)) return false;
124+
DWORD len = 0;
125+
GetLogicalProcessorInformationEx(RelationCache, nullptr, &len);
126+
std::unique_ptr<Info, decltype(&free)> buf(static_cast<Info*>(malloc(len)), &free);
127+
if (!buf) return false;
128+
if (!GetLogicalProcessorInformationEx(RelationCache, buf.get(), &len)) return false;
129+
char *ptr = reinterpret_cast<char*>(buf.get());
130+
const char *end = ptr + len;
131+
while (ptr < end) {
132+
const auto& entry = *reinterpret_cast<Info*>(ptr);
133+
if (entry.Relationship == RelationCache) {
134+
const CACHE_RELATIONSHIP& cache = entry.Cache;
135+
uint64_t mask = cache.GroupMask.Mask;
136+
if (maskHasIdx(mask, PcoreIdx_) || maskHasIdx(mask, EcoreIdx_)) {
137+
int cacheType = -1;
138+
if (cache.Level == 1) {
139+
if (cache.Type == CacheInstruction) {
140+
cacheType = L1i;
141+
} else if (cache.Type == CacheData) {
142+
cacheType = L1d;
143+
}
144+
} else if (cache.Level == 2) {
145+
cacheType = L2;
146+
} else if (cache.Level == 3) {
147+
cacheType = L3;
148+
}
149+
if (cacheType >= 0) {
150+
CacheSize cs;
151+
cs.byteSize = cache.CacheSize;
152+
cs.sharedNum = uint32_t(__popcnt64(mask));
153+
if (maskHasIdx(mask, PcoreIdx_)) {
154+
cpuType_[Pcore].v_[cacheType] = cs;
155+
}
156+
if (maskHasIdx(mask, EcoreIdx_)) {
157+
cpuType_[Ecore].v_[cacheType] = cs;
158+
}
159+
}
160+
}
161+
}
162+
ptr += entry.Size;
163+
}
164+
return true;
165+
}
166+
167+
}}} // namespace Xbyak::util::intel
168+
169+
#endif

0 commit comments

Comments
 (0)