Skip to content

Commit 3571d7e

Browse files
authored
Support better API to detect big little core in windows after win7 (Tencent#5927)
1 parent c9e0c87 commit 3571d7e

File tree

2 files changed

+157
-28
lines changed

2 files changed

+157
-28
lines changed

src/cpu.cpp

Lines changed: 129 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1557,45 +1557,146 @@ static void initialize_cpu_thread_affinity_mask(ncnn::CpuSet& mask_all, ncnn::Cp
15571557
}
15581558

15591559
#if defined _WIN32
1560-
// get max freq mhz for all cores
1561-
int max_freq_mhz_min = INT_MAX;
1562-
int max_freq_mhz_max = 0;
1563-
std::vector<int> cpu_max_freq_mhz = get_max_freq_mhz();
1564-
for (int i = 0; i < g_cpucount; i++)
1565-
{
1566-
int max_freq_mhz = cpu_max_freq_mhz[i];
1567-
1568-
// NCNN_LOGE("%d max freq = %d khz", i, max_freq_mhz);
1569-
1570-
if (max_freq_mhz > max_freq_mhz_max)
1571-
max_freq_mhz_max = max_freq_mhz;
1572-
if (max_freq_mhz < max_freq_mhz_min)
1573-
max_freq_mhz_min = max_freq_mhz;
1574-
}
1560+
// Check SDK >= Win7
1561+
#if _WIN32_WINNT >= _WIN32_WINNT_WIN7 // win7
15751562

1576-
int max_freq_mhz_medium = (max_freq_mhz_min + max_freq_mhz_max) / 2;
1577-
if (max_freq_mhz_medium == max_freq_mhz_max)
1563+
// Load GetLogicalProcessorInformationEx
1564+
HMODULE kernel32 = LoadLibrary(TEXT("kernel32.dll"));
1565+
if (!kernel32)
15781566
{
1579-
mask_little.disable_all();
1580-
mask_big = mask_all;
1567+
NCNN_LOGE("LoadLibrary kernel32.dll failed");
15811568
return;
15821569
}
15831570

1584-
ncnn::CpuSet smt_cpu_mask = get_smt_cpu_mask();
1571+
typedef BOOL(WINAPI * LPFN_GLPIE)(LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD);
1572+
LPFN_GLPIE glpie = (LPFN_GLPIE)GetProcAddress(kernel32, "GetLogicalProcessorInformationEx");
15851573

1586-
for (int i = 0; i < g_cpucount; i++)
1574+
if (glpie != NULL)
15871575
{
1588-
if (smt_cpu_mask.is_enabled(i))
1576+
DWORD bufferSize = 0;
1577+
glpie(RelationProcessorCore, nullptr, &bufferSize);
1578+
std::vector<BYTE> buffer(bufferSize);
1579+
if (!GetLogicalProcessorInformationEx(RelationProcessorCore,
1580+
(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(buffer.data()), &bufferSize))
15891581
{
1590-
// always treat smt core as big core
1591-
mask_big.enable(i);
1592-
continue;
1582+
NCNN_LOGE("GetLogicalProcessorInformationEx failed");
1583+
return;
15931584
}
15941585

1595-
if (cpu_max_freq_mhz[i] < max_freq_mhz_medium)
1596-
mask_little.enable(i);
1586+
// A map from processor number to whether it is an E core
1587+
std::vector<std::pair<DWORD, bool> > processorCoreType;
1588+
BYTE maxEfficiencyClass = 0; // In a system without E cores, all cores EfficiencyClass is 0
1589+
1590+
BYTE* ptr = buffer.data();
1591+
while (ptr < buffer.data() + bufferSize)
1592+
{
1593+
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)ptr;
1594+
if (info->Relationship == RelationProcessorCore)
1595+
{
1596+
// Mingw and some old MSVC do not have EfficiencyClass in PROCESSOR_RELATIONSHIP
1597+
// So we should redefine PROCESSOR_RELATIONSHIP
1598+
// Because ncnn need to support c++98, so we can't use some new features in c++11
1599+
// So there is a ugly implementation
1600+
1601+
BYTE efficiencyClass = ((BYTE*)&info->Processor)[1];
1602+
1603+
bool isECore = (efficiencyClass == 0);
1604+
maxEfficiencyClass = (std::max)(maxEfficiencyClass, efficiencyClass);
1605+
1606+
for (WORD g = 0; g < info->Processor.GroupCount; ++g)
1607+
{
1608+
const GROUP_AFFINITY& ga = info->Processor.GroupMask[g];
1609+
KAFFINITY mask = ga.Mask;
1610+
WORD group = ga.Group;
1611+
for (int bit = 0; bit < 64; ++bit)
1612+
{ // for each bit in the mask
1613+
if (mask & (static_cast<KAFFINITY>(1) << bit))
1614+
{
1615+
DWORD processorNumber = group * 64 + bit;
1616+
processorCoreType.push_back(std::pair<DWORD, bool>(processorNumber, isECore));
1617+
}
1618+
}
1619+
}
1620+
}
1621+
ptr += info->Size;
1622+
}
1623+
1624+
if (maxEfficiencyClass == 0)
1625+
{
1626+
// All cores are P cores
1627+
mask_little.disable_all();
1628+
mask_big = mask_all;
1629+
}
15971630
else
1598-
mask_big.enable(i);
1631+
{
1632+
for (int i = 0; i < g_cpucount; i++)
1633+
{
1634+
bool isECore = false;
1635+
for (int j = 0; j < processorCoreType.size(); j++)
1636+
{
1637+
std::pair<DWORD, bool> p = processorCoreType[j];
1638+
if (p.first == i)
1639+
{
1640+
isECore = p.second;
1641+
break;
1642+
}
1643+
}
1644+
// fprintf(stderr, "processor %d is %s\n", i, isECore ? "E" : "P");
1645+
1646+
if (isECore)
1647+
{
1648+
mask_little.enable(i);
1649+
}
1650+
else
1651+
{
1652+
mask_big.enable(i);
1653+
}
1654+
}
1655+
}
1656+
}
1657+
else
1658+
#endif
1659+
{
1660+
// get max freq mhz for all cores
1661+
int max_freq_mhz_min = INT_MAX;
1662+
int max_freq_mhz_max = 0;
1663+
std::vector<int> cpu_max_freq_mhz = get_max_freq_mhz();
1664+
for (int i = 0; i < g_cpucount; i++)
1665+
{
1666+
int max_freq_mhz = cpu_max_freq_mhz[i];
1667+
1668+
// NCNN_LOGE("%d max freq = %d khz", i, max_freq_mhz);
1669+
1670+
if (max_freq_mhz > max_freq_mhz_max)
1671+
max_freq_mhz_max = max_freq_mhz;
1672+
if (max_freq_mhz < max_freq_mhz_min)
1673+
max_freq_mhz_min = max_freq_mhz;
1674+
}
1675+
1676+
int max_freq_mhz_medium = (max_freq_mhz_min + max_freq_mhz_max) / 2;
1677+
if (max_freq_mhz_medium == max_freq_mhz_max)
1678+
{
1679+
mask_little.disable_all();
1680+
mask_big = mask_all;
1681+
return;
1682+
}
1683+
1684+
ncnn::CpuSet smt_cpu_mask = get_smt_cpu_mask();
1685+
1686+
for (int i = 0; i < g_cpucount; i++)
1687+
{
1688+
if (smt_cpu_mask.is_enabled(i))
1689+
{
1690+
// always treat smt core as big core
1691+
mask_big.enable(i);
1692+
continue;
1693+
}
1694+
1695+
if (cpu_max_freq_mhz[i] < max_freq_mhz_medium)
1696+
mask_little.enable(i);
1697+
else
1698+
mask_big.enable(i);
1699+
}
15991700
}
16001701
#elif defined __ANDROID__ || defined __linux__
16011702
int max_freq_khz_min = INT_MAX;

tests/test_cpu.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,39 @@ static int test_cpu_powersave()
117117

118118
#else
119119

120+
#if defined _WIN32
121+
// Check SDK >= Win7
122+
#if _WIN32_WINNT >= _WIN32_WINNT_WIN7 // win7
123+
124+
static int test_cpu_info()
125+
{
126+
int cpucount = ncnn::get_cpu_count();
127+
int bigcpucount = ncnn::get_big_cpu_count();
128+
int littlecpucount = ncnn::get_little_cpu_count();
129+
130+
fprintf(stderr, "cpucount = %d\n", cpucount);
131+
fprintf(stderr, "bigcpucount = %d\n", bigcpucount);
132+
fprintf(stderr, "littlecpucount = %d\n", littlecpucount);
133+
134+
if ((cpucount != bigcpucount + littlecpucount) || (bigcpucount > cpucount) || (littlecpucount > cpucount))
135+
{
136+
fprintf(stderr, "The number of big and little cpus must be less than or equal to the total number of cpus\n");
137+
return -1;
138+
}
139+
140+
return 0;
141+
}
142+
143+
#endif
144+
#else
145+
120146
static int test_cpu_info()
121147
{
122148
return 0;
123149
}
124150

151+
#endif
152+
125153
static int test_cpu_omp()
126154
{
127155
return 0;

0 commit comments

Comments
 (0)