@@ -1557,45 +1557,146 @@ static void initialize_cpu_thread_affinity_mask(ncnn::CpuSet& mask_all, ncnn::Cp
15571557 }
15581558
15591559#if defined _WIN32
1560- // get max freq mhz for all cores
1561- int max_freq_mhz_min = INT_MAX;
1562- int max_freq_mhz_max = 0 ;
1563- std::vector<int > cpu_max_freq_mhz = get_max_freq_mhz ();
1564- for (int i = 0 ; i < g_cpucount; i++)
1565- {
1566- int max_freq_mhz = cpu_max_freq_mhz[i];
1567-
1568- // NCNN_LOGE("%d max freq = %d khz", i, max_freq_mhz);
1569-
1570- if (max_freq_mhz > max_freq_mhz_max)
1571- max_freq_mhz_max = max_freq_mhz;
1572- if (max_freq_mhz < max_freq_mhz_min)
1573- max_freq_mhz_min = max_freq_mhz;
1574- }
1560+ // Check SDK >= Win7
1561+ #if _WIN32_WINNT >= _WIN32_WINNT_WIN7 // win7
15751562
1576- int max_freq_mhz_medium = (max_freq_mhz_min + max_freq_mhz_max) / 2 ;
1577- if (max_freq_mhz_medium == max_freq_mhz_max)
1563+ // Load GetLogicalProcessorInformationEx
1564+ HMODULE kernel32 = LoadLibrary (TEXT (" kernel32.dll" ));
1565+ if (!kernel32)
15781566 {
1579- mask_little.disable_all ();
1580- mask_big = mask_all;
1567+ NCNN_LOGE (" LoadLibrary kernel32.dll failed" );
15811568 return ;
15821569 }
15831570
1584- ncnn::CpuSet smt_cpu_mask = get_smt_cpu_mask ();
1571+ typedef BOOL (WINAPI * LPFN_GLPIE)(LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD);
1572+ LPFN_GLPIE glpie = (LPFN_GLPIE)GetProcAddress (kernel32, " GetLogicalProcessorInformationEx" );
15851573
1586- for ( int i = 0 ; i < g_cpucount; i++ )
1574+ if (glpie != NULL )
15871575 {
1588- if (smt_cpu_mask.is_enabled (i))
1576+ DWORD bufferSize = 0 ;
1577+ glpie (RelationProcessorCore, nullptr , &bufferSize);
1578+ std::vector<BYTE> buffer (bufferSize);
1579+ if (!GetLogicalProcessorInformationEx (RelationProcessorCore,
1580+ (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(buffer.data ()), &bufferSize))
15891581 {
1590- // always treat smt core as big core
1591- mask_big.enable (i);
1592- continue ;
1582+ NCNN_LOGE (" GetLogicalProcessorInformationEx failed" );
1583+ return ;
15931584 }
15941585
1595- if (cpu_max_freq_mhz[i] < max_freq_mhz_medium)
1596- mask_little.enable (i);
1586+ // A map from processor number to whether it is an E core
1587+ std::vector<std::pair<DWORD, bool > > processorCoreType;
1588+ BYTE maxEfficiencyClass = 0 ; // In a system without E cores, all cores EfficiencyClass is 0
1589+
1590+ BYTE* ptr = buffer.data ();
1591+ while (ptr < buffer.data () + bufferSize)
1592+ {
1593+ SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)ptr;
1594+ if (info->Relationship == RelationProcessorCore)
1595+ {
1596+ // Mingw and some old MSVC do not have EfficiencyClass in PROCESSOR_RELATIONSHIP
1597+ // So we should redefine PROCESSOR_RELATIONSHIP
1598+ // Because ncnn need to support c++98, so we can't use some new features in c++11
1599+ // So there is a ugly implementation
1600+
1601+ BYTE efficiencyClass = ((BYTE*)&info->Processor )[1 ];
1602+
1603+ bool isECore = (efficiencyClass == 0 );
1604+ maxEfficiencyClass = (std::max)(maxEfficiencyClass, efficiencyClass);
1605+
1606+ for (WORD g = 0 ; g < info->Processor .GroupCount ; ++g)
1607+ {
1608+ const GROUP_AFFINITY& ga = info->Processor .GroupMask [g];
1609+ KAFFINITY mask = ga.Mask ;
1610+ WORD group = ga.Group ;
1611+ for (int bit = 0 ; bit < 64 ; ++bit)
1612+ { // for each bit in the mask
1613+ if (mask & (static_cast <KAFFINITY>(1 ) << bit))
1614+ {
1615+ DWORD processorNumber = group * 64 + bit;
1616+ processorCoreType.push_back (std::pair<DWORD, bool >(processorNumber, isECore));
1617+ }
1618+ }
1619+ }
1620+ }
1621+ ptr += info->Size ;
1622+ }
1623+
1624+ if (maxEfficiencyClass == 0 )
1625+ {
1626+ // All cores are P cores
1627+ mask_little.disable_all ();
1628+ mask_big = mask_all;
1629+ }
15971630 else
1598- mask_big.enable (i);
1631+ {
1632+ for (int i = 0 ; i < g_cpucount; i++)
1633+ {
1634+ bool isECore = false ;
1635+ for (int j = 0 ; j < processorCoreType.size (); j++)
1636+ {
1637+ std::pair<DWORD, bool > p = processorCoreType[j];
1638+ if (p.first == i)
1639+ {
1640+ isECore = p.second ;
1641+ break ;
1642+ }
1643+ }
1644+ // fprintf(stderr, "processor %d is %s\n", i, isECore ? "E" : "P");
1645+
1646+ if (isECore)
1647+ {
1648+ mask_little.enable (i);
1649+ }
1650+ else
1651+ {
1652+ mask_big.enable (i);
1653+ }
1654+ }
1655+ }
1656+ }
1657+ else
1658+ #endif
1659+ {
1660+ // get max freq mhz for all cores
1661+ int max_freq_mhz_min = INT_MAX;
1662+ int max_freq_mhz_max = 0 ;
1663+ std::vector<int > cpu_max_freq_mhz = get_max_freq_mhz ();
1664+ for (int i = 0 ; i < g_cpucount; i++)
1665+ {
1666+ int max_freq_mhz = cpu_max_freq_mhz[i];
1667+
1668+ // NCNN_LOGE("%d max freq = %d khz", i, max_freq_mhz);
1669+
1670+ if (max_freq_mhz > max_freq_mhz_max)
1671+ max_freq_mhz_max = max_freq_mhz;
1672+ if (max_freq_mhz < max_freq_mhz_min)
1673+ max_freq_mhz_min = max_freq_mhz;
1674+ }
1675+
1676+ int max_freq_mhz_medium = (max_freq_mhz_min + max_freq_mhz_max) / 2 ;
1677+ if (max_freq_mhz_medium == max_freq_mhz_max)
1678+ {
1679+ mask_little.disable_all ();
1680+ mask_big = mask_all;
1681+ return ;
1682+ }
1683+
1684+ ncnn::CpuSet smt_cpu_mask = get_smt_cpu_mask ();
1685+
1686+ for (int i = 0 ; i < g_cpucount; i++)
1687+ {
1688+ if (smt_cpu_mask.is_enabled (i))
1689+ {
1690+ // always treat smt core as big core
1691+ mask_big.enable (i);
1692+ continue ;
1693+ }
1694+
1695+ if (cpu_max_freq_mhz[i] < max_freq_mhz_medium)
1696+ mask_little.enable (i);
1697+ else
1698+ mask_big.enable (i);
1699+ }
15991700 }
16001701#elif defined __ANDROID__ || defined __linux__
16011702 int max_freq_khz_min = INT_MAX;
0 commit comments