@@ -1132,6 +1132,12 @@ bool PCM::discoverSystemTopology()
11321132 uint32 corePlusSMTMaskWidth = 0 ;
11331133 uint32 coreMaskWidth = 0 ;
11341134
1135+ struct domain
1136+ {
1137+ TopologyEntry::DomainTypeID type = TopologyEntry::DomainTypeID::InvalidDomainTypeID;
1138+ unsigned levelShift = 0 , nextLevelShift = 0 , width = 0 ;
1139+ };
1140+ std::unordered_map<int , domain> topologyDomainMap;
11351141 {
11361142 TemporalThreadAffinity aff0 (0 );
11371143 do
@@ -1159,10 +1165,6 @@ bool PCM::discoverSystemTopology()
11591165 subleaf++;
11601166 } while (1 );
11611167
1162- struct domain
1163- {
1164- unsigned type = 0 , levelShift = 0 , nextLevelShift = 0 , width = 0 ;
1165- };
11661168 std::vector<domain> topologyDomains;
11671169 if (max_cpuid >= 0x1F )
11681170 {
@@ -1171,7 +1173,7 @@ bool PCM::discoverSystemTopology()
11711173 {
11721174 pcm_cpuid (0x1F , subleaf, cpuid_args);
11731175 domain d;
1174- d.type = extract_bits_ui (cpuid_args.reg .ecx , 8 , 15 );
1176+ d.type = (TopologyEntry::DomainTypeID) extract_bits_ui (cpuid_args.reg .ecx , 8 , 15 );
11751177 if (d.type == TopologyEntry::DomainTypeID::InvalidDomainTypeID)
11761178 {
11771179 break ;
@@ -1192,16 +1194,18 @@ bool PCM::discoverSystemTopology()
11921194 d.width = d.nextLevelShift - d.levelShift ;
11931195 topologyDomains.push_back (d);
11941196 }
1195- #if 0
11961197 for (size_t l = 0 ; l < topologyDomains.size (); ++l)
11971198 {
1198- std::cerr << "Topology level " << l <<
1199- " type " << topologyDomains[l].type <<
1200- " width " << topologyDomains[l].width <<
1201- " levelShift " << topologyDomains[l].levelShift <<
1202- " nextLevelShift " << topologyDomains[l].nextLevelShift << "\n";
1203- }
1199+ topologyDomainMap[topologyDomains[l].type ] = topologyDomains[l];
1200+ #if 0
1201+ std::cerr << "Topology level: " << l <<
1202+ " type: " << topologyDomains[l].type <<
1203+ " (" << TopologyEntry::getDomainTypeStr(topologyDomains[l].type) << ")" <<
1204+ " width: " << topologyDomains[l].width <<
1205+ " levelShift: " << topologyDomains[l].levelShift <<
1206+ " nextLevelShift: " << topologyDomains[l].nextLevelShift << "\n";
12041207#endif
1208+ }
12051209 }
12061210 }
12071211
@@ -1242,19 +1246,51 @@ bool PCM::discoverSystemTopology()
12421246#endif
12431247
12441248#ifndef __APPLE__
1245- auto populateEntry = [&smtMaskWidth, &coreMaskWidth, &l2CacheMaskShift](TopologyEntry & entry)
1249+ auto populateEntry = [&topologyDomainMap,& smtMaskWidth, &coreMaskWidth, &l2CacheMaskShift](TopologyEntry& entry)
12461250 {
1247- PCM_CPUID_INFO cpuid_args;
1251+ auto getAPICID = [&](const uint32 leaf)
1252+ {
1253+ PCM_CPUID_INFO cpuid_args;
12481254#if defined(__FreeBSD__) || defined(__DragonFly__)
1249- pcm_cpuid_bsd (0xb , cpuid_args, entry.os_id );
1255+ pcm_cpuid_bsd (leaf , cpuid_args, entry.os_id );
12501256#else
1251- pcm_cpuid (0xb , 0x0 , cpuid_args);
1257+ pcm_cpuid (leaf , 0x0 , cpuid_args);
12521258#endif
1253- const int apic_id = cpuid_args.array [3 ];
1254- entry.thread_id = smtMaskWidth ? extract_bits_ui (apic_id, 0 , smtMaskWidth - 1 ) : 0 ;
1255- entry.core_id = (smtMaskWidth + coreMaskWidth) ? extract_bits_ui (apic_id, smtMaskWidth, smtMaskWidth + coreMaskWidth - 1 ) : 0 ;
1256- entry.socket = extract_bits_ui (apic_id, smtMaskWidth + coreMaskWidth, 31 );
1257- entry.tile_id = extract_bits_ui (apic_id, l2CacheMaskShift, 31 );
1259+ return cpuid_args.array [3 ];
1260+ };
1261+ if (topologyDomainMap.size ())
1262+ {
1263+ auto getID = [&topologyDomainMap](const int apic_id, const TopologyEntry::DomainTypeID t)
1264+ {
1265+ const auto di = topologyDomainMap.find (t);
1266+ if (di != topologyDomainMap.end ())
1267+ {
1268+ const auto & d = di->second ;
1269+ return extract_bits_ui (apic_id, d.levelShift , d.nextLevelShift - 1 );
1270+ }
1271+ return 0U ;
1272+ };
1273+ entry.tile_id = extract_bits_ui (getAPICID (0xb ), l2CacheMaskShift, 31 );
1274+ const int apic_id = getAPICID (0x1F );
1275+ entry.thread_id = getID (apic_id, TopologyEntry::DomainTypeID::LogicalProcessorDomain);
1276+ entry.core_id = getID (apic_id, TopologyEntry::DomainTypeID::CoreDomain);
1277+ entry.module_id = getID (apic_id, TopologyEntry::DomainTypeID::ModuleDomain);
1278+ if (entry.tile_id == 0 )
1279+ {
1280+ entry.tile_id = getID (apic_id, TopologyEntry::DomainTypeID::TileDomain);
1281+ }
1282+ entry.die_id = getID (apic_id, TopologyEntry::DomainTypeID::DieDomain);
1283+ entry.die_grp_id = getID (apic_id, TopologyEntry::DomainTypeID::DieGrpDomain);
1284+ entry.socket = getID (apic_id, TopologyEntry::DomainTypeID::SocketPackageDomain);
1285+ }
1286+ else
1287+ {
1288+ const int apic_id = getAPICID (0xb );
1289+ entry.thread_id = smtMaskWidth ? extract_bits_ui (apic_id, 0 , smtMaskWidth - 1 ) : 0 ;
1290+ entry.core_id = (smtMaskWidth + coreMaskWidth) ? extract_bits_ui (apic_id, smtMaskWidth, smtMaskWidth + coreMaskWidth - 1 ) : 0 ;
1291+ entry.socket = extract_bits_ui (apic_id, smtMaskWidth + coreMaskWidth, 31 );
1292+ entry.tile_id = extract_bits_ui (apic_id, l2CacheMaskShift, 31 );
1293+ }
12581294 };
12591295#endif
12601296
@@ -2873,7 +2909,7 @@ PCM::PCM() :
28732909 if (safe_getenv (" PCM_PRINT_TOPOLOGY" ) == " 1" )
28742910#endif
28752911 {
2876- printDetailedSystemTopology ();
2912+ printDetailedSystemTopology (1 );
28772913 }
28782914
28792915 initEnergyMonitoring ();
@@ -2905,23 +2941,29 @@ PCM::PCM() :
29052941#endif
29062942}
29072943
2908- void PCM::printDetailedSystemTopology ()
2944+ void PCM::printDetailedSystemTopology (const int detailLevel )
29092945{
29102946 // produce debug output similar to Intel MPI cpuinfo
29112947 if (true )
29122948 {
29132949 std::cerr << " \n ===== Processor topology =====\n " ;
2914- std::cerr << " OS_Processor Thread_Id Core_Id Tile_Id Package_Id Core_Type Native_CPU_Model\n " ;
2950+ std::cerr << " OS_Processor Thread_Id Core_Id " ;
2951+ if (detailLevel > 0 ) std::cerr << " Module_Id " ;
2952+ std::cerr << " Tile_Id " ;
2953+ if (detailLevel > 0 ) std::cerr << " Die_Id Die_Group_Id " ;
2954+ std::cerr << " Package_Id Core_Type Native_CPU_Model\n " ;
29152955 std::map<uint32, std::vector<uint32> > os_id_by_core, os_id_by_tile, core_id_by_socket;
29162956 size_t counter = 0 ;
29172957 for (auto it = topology.begin (); it != topology.end (); ++it)
29182958 {
29192959 std::cerr << std::left << std::setfill (' ' )
29202960 << std::setw (16 ) << ((it->os_id >= 0 ) ? it->os_id : counter)
29212961 << std::setw (16 ) << it->thread_id
2922- << std::setw (16 ) << it->core_id
2923- << std::setw (16 ) << it->tile_id
2924- << std::setw (16 ) << it->socket
2962+ << std::setw (16 ) << it->core_id ;
2963+ if (detailLevel > 0 ) std::cerr << std::setw (16 ) << it->module_id ;
2964+ std::cerr << std::setw (16 ) << it->tile_id ;
2965+ if (detailLevel > 0 ) std::cerr << std::setw (16 ) << it->die_id << std::setw (16 ) << it->die_grp_id ;
2966+ std::cerr << std::setw (16 ) << it->socket
29252967 << std::setw (16 ) << it->getCoreTypeStr ()
29262968 << std::setw (16 ) << it->native_cpu_model
29272969 << " \n " ;
0 commit comments