Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Code to periodically track IProfiler overhead #20898

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 118 additions & 7 deletions runtime/compiler/runtime/IProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,12 @@ TR_IProfiler::TR_IProfiler(J9JITConfig *jitConfig)
{
_isIProfilingEnabled = false;
}
#if defined(TRACK_SAMPLE_SOURCES)
if (_isIProfilingEnabled)
{
_trackProfilingActivity = feGetEnv("TR_TrackIProfilerActivity") != NULL;
}
#endif
}
}

Expand Down Expand Up @@ -3982,14 +3988,16 @@ TR_IProfiler::postIprofilingBufferToWorkingQueue(J9VMThread * vmThread, const U_
// Method executed by the java thread when jitHookBytecodeProfiling() is called
bool TR_IProfiler::processProfilingBuffer(J9VMThread *vmThread, const U_8* dataStart, UDATA size)
{
if (_numOutstandingBuffers >= TR::Options::_iprofilerNumOutstandingBuffers ||
if ((_numOutstandingBuffers >= TR::Options::_iprofilerNumOutstandingBuffers ||
_compInfo->getPersistentInfo()->getLoadFactor() >= 1) // More active threads than CPUs
#if defined(TRACK_SAMPLE_SOURCES)
&& !_trackProfilingActivity // If tracking is enabled we want to parse the buffer on the dedicated IProfiler thread
#endif
)
{
if (100*_numRequestsSkipped >= (uint64_t)TR::Options::_iprofilerBufferMaxPercentageToDiscard * _numRequests)
{
// too many skipped requests; let the java thread handle this one
//records = parseBuffer(vmThread, cursor, size, iProfiler);
//setProfilingBufferCursor(vmThread, (U_8*)cursor);
return false; // delegate the processing to the java thread
}
else // skip this request altogether
Expand All @@ -4004,8 +4012,6 @@ bool TR_IProfiler::processProfilingBuffer(J9VMThread *vmThread, const U_8* dataS
{
// If posting fails, we should let the app thread process the buffer
return false;
//_numRequestsSkipped++;
//setProfilingBufferCursor(vmThread, (U_8*)dataStart);
}
}
return true;
Expand Down Expand Up @@ -4144,6 +4150,55 @@ static inline uint16_t readU16(U_8 *_offset)
static inline uint16_t readU16(U_8 *_offset) { return (*(U_16*)(_offset)); }
#endif /* !FIXUP_UNALIGNED */

#if defined(TRACK_SAMPLE_SOURCES)
void
TR_IProfiler::TopFiveMethods::processElement(const std::pair<const J9ROMMethod*, uint32_t> & newElem)
{
if (_sortedList.size() == 5)
{
if (_sortedList.back().second < newElem.second)
{
// Take the last element out (smallest) to make room for the new one
_sortedList.pop_back();
}
}
if (_sortedList.size() < 5)
{
// Insert new element
bool inserted = false;
for (auto it = _sortedList.begin(); it != _sortedList.end(); ++it)
{
if (it->second < newElem.second) // compare number of samples
{
_sortedList.insert(it, newElem);
inserted = true;
break;
}
} // end for
if (!inserted)
{
_sortedList.push_back(newElem);
}
}
} // processElement

void
TR_IProfiler::TopFiveMethods::print(const std::map<const J9ROMMethod *, std::string> &methodNames)
{
TR_VerboseLog::vlogAcquire();
TR_VerboseLog::writeLine(TR_Vlog_IPROFILER, "Top five methods in terms of samples in the last interval:");
for (auto it = _sortedList.cbegin(); it != _sortedList.cend(); ++it)
{
const J9ROMMethod *romMethod = it->first;
uint32_t numSamples = it->second;
auto methodEntryIter = methodNames.find(romMethod);
const std::string methodName = (methodEntryIter != methodNames.end()) ? methodEntryIter->second : "";
TR_VerboseLog::writeLine(TR_Vlog_IPROFILER, "%lu samples in method %p %s", (unsigned long)numSamples, romMethod, methodName.c_str());
}
TR_VerboseLog::vlogRelease();
}

#endif //#if defined(TRACK_SAMPLE_SOURCES)

//------------------------------ parseBuffer -------------------------
// Parses a buffer from the VM and populates a hash table
Expand Down Expand Up @@ -4414,7 +4469,39 @@ UDATA TR_IProfiler::parseBuffer(J9VMThread * vmThread, const U_8* dataStart, UDA

if (addSample && !verboseReparse)
{
profilingSample ((uintptr_t)pc, (uintptr_t)data, true);
TR_IPBytecodeHashTableEntry *entry = profilingSample ((uintptr_t)pc, (uintptr_t)data, true);
#if defined(TRACK_SAMPLE_SOURCES)
if (_trackProfilingActivity && entry && vmThread == _iprofilerThread) // only IProfiler thread is allowed to execute this)
{
// If this sample does not yet have the romMethod set, find it and set it
if (entry->getROMMethod() == NULL)
{
J9ROMClass *romClass = NULL;
J9ROMMethod *romMethod = findROMMethodFromPC(vmThread, (uintptr_t)pc, romClass);
entry->setROMMethod(romMethod);
// Determine the full method name and memorize it
if (romMethod && romClass)
{
auto methodIt = _methodNames.find(romMethod);
if (methodIt == _methodNames.end())
{
J9UTF8* className = J9ROMCLASS_CLASSNAME(romClass);
J9UTF8* name = J9ROMMETHOD_NAME(romMethod);
J9UTF8* signature = J9ROMMETHOD_SIGNATURE(romMethod);
std::string fullMethodName(utf8Data(className), J9UTF8_LENGTH(className));
size_t len = J9UTF8_LENGTH(className) + J9UTF8_LENGTH(name) + J9UTF8_LENGTH(signature) + 2;
fullMethodName.reserve(len);
fullMethodName.push_back('.');
fullMethodName.append(utf8Data(name), J9UTF8_LENGTH(name));
fullMethodName.append(utf8Data(signature), J9UTF8_LENGTH(signature));
_methodNames.insert({romMethod, fullMethodName});
}
}
}
// Search the hashtable for my method and increment the number of samples
_samplesPerMethod[entry->getROMMethod()]++;
}
#endif
records++;
}
}
Expand All @@ -4426,7 +4513,31 @@ UDATA TR_IProfiler::parseBuffer(J9VMThread * vmThread, const U_8* dataStart, UDA
return 0;
}
_iprofilerNumRecords += records;

#if defined(TRACK_SAMPLE_SOURCES)
// If enough time has passed since the last operation
// determine the top 5 romMethods with the most number of samples and
// print their names. Then clear _samplesPerMethod and start all over.
static uint64_t lastPrintTime = 0;
static uint64_t lastNumIPsamples = 0;
if (_trackProfilingActivity && vmThread == _iprofilerThread) // only IProfiler thread is allowed to execute this)
{
uint64_t crtTime = _compInfo->getPersistentInfo()->getElapsedTime();
if (crtTime - lastPrintTime >= 5000) // every 5 seconds or so
{
TopFiveMethods topFive;
for (auto it = _samplesPerMethod.begin(); it != _samplesPerMethod.end(); ++it)
topFive.processElement(*it);
TR_VerboseLog::writeLineLocked(TR_Vlog_IPROFILER, "t=%lu IP samples in the last interval = %lu Total CPU spent in IP thread = %u ms",
(unsigned long)crtTime, (unsigned long)(_iprofilerNumRecords - lastNumIPsamples),
(unsigned)(j9thread_get_self_cpu_time(j9thread_self())/1000000));
topFive.print(_methodNames);
// Prepare for next interval
_samplesPerMethod.clear();
lastPrintTime = crtTime;
lastNumIPsamples = _iprofilerNumRecords;
}
}
#endif
return records;
}

Expand Down
38 changes: 36 additions & 2 deletions runtime/compiler/runtime/IProfiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@
* Interpreter Profiling Framework" by Ian Gartley, Marius Pirvu, Vijay
* Sundaresan and Nikola Grecvski, published in *Code Generation and Optimization (CGO)*, 2013.
*/
#define TRACK_SAMPLE_SOURCES
#if defined(TRACK_SAMPLE_SOURCES)
#include <list>
#include <string>
#include <map>
#endif // defined(TRACK_SAMPLE_SOURCES)

#include "j9.h"
#include "j9cfg.h"
Expand Down Expand Up @@ -196,9 +202,18 @@ class TR_IPBytecodeHashTableEntry
void * operator new (size_t size, void * placement) {return placement;}
void operator delete(void *p, void *) {}

TR_IPBytecodeHashTableEntry(uintptr_t pc) : _next(NULL), _pc(pc), _lastSeenClassUnloadID(-1), _entryFlags(0), _persistFlags(IPBC_ENTRY_CAN_PERSIST_FLAG) {}
TR_IPBytecodeHashTableEntry(uintptr_t pc) : _next(NULL), _pc(pc), _lastSeenClassUnloadID(-1), _entryFlags(0), _persistFlags(IPBC_ENTRY_CAN_PERSIST_FLAG)
{
#if defined(TRACK_SAMPLE_SOURCES)
_romMethod = NULL;
#endif
}
virtual ~TR_IPBytecodeHashTableEntry() {}
uintptr_t getPC() const { return _pc; }
#if defined(TRACK_SAMPLE_SOURCES)
J9ROMMethod *getROMMethod() const { return _romMethod; }
void setROMMethod(J9ROMMethod *method) { _romMethod = method; }
#endif // TRACK_SAMPLE_SOURCES
TR_IPBytecodeHashTableEntry * getNext() const { return _next; }
void setNext(TR_IPBytecodeHashTableEntry *n) { _next = n; }
int32_t getLastSeenClassUnloadID() const { return _lastSeenClassUnloadID; }
Expand Down Expand Up @@ -243,6 +258,9 @@ class TR_IPBytecodeHashTableEntry
protected:
TR_IPBytecodeHashTableEntry *_next;
uintptr_t _pc;
#if defined(TRACK_SAMPLE_SOURCES)
J9ROMMethod *_romMethod; // romMethod containing the _pc
#endif // TRACK_SAMPLE_SOURCES
int32_t _lastSeenClassUnloadID;

enum TR_PersistenceFlags
Expand Down Expand Up @@ -818,6 +836,22 @@ class TR_IProfiler : public TR_ExternalProfiler

volatile TR_IprofilerThreadLifetimeStates _iprofilerThreadLifetimeState;

#if defined(TRACK_SAMPLE_SOURCES)
bool _trackProfilingActivity;
// Maps ROMMethods to number of samples. Uses malloc underneath
std::map<const J9ROMMethod *, uint32_t> _samplesPerMethod;
// Maps ROMMethods to their names; this avoids any class unloading concerns
std::map<const J9ROMMethod *, std::string> _methodNames;
class TopFiveMethods
{
public:
void processElement(const std::pair<const J9ROMMethod*, uint32_t> & newElem);
void print(const std::map<const J9ROMMethod *, std::string> &names);
private:
std::list<std::pair<const J9ROMMethod*, uint32_t> > _sortedList;
}; // class TopFiveMethods
#endif

public:
static int32_t _STATS_noProfilingInfo;
static int32_t _STATS_doesNotWantToGiveProfilingInfo;
Expand Down Expand Up @@ -852,7 +886,7 @@ class TR_IProfiler : public TR_ExternalProfiler

static int32_t _STATS_IPEntryRead;
static int32_t _STATS_IPEntryChoosePersistent;
};
}; // TR_IProfiler

void printIprofilerStats(TR::Options *options, J9JITConfig * jitConfig, TR_IProfiler *iProfiler, const char *event);
void turnOffInterpreterProfiling(J9JITConfig *jitConfig);
Expand Down