diff options
author | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
---|---|---|
committer | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
commit | 5f8de423f190bbb79a62f804151bc24824fa32d8 (patch) | |
tree | 10027f336435511475e392454359edea8e25895d /tools/profiler/core | |
parent | 49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff) | |
download | uxp-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz |
Add m-esr52 at 52.6.0
Diffstat (limited to 'tools/profiler/core')
32 files changed, 9103 insertions, 0 deletions
diff --git a/tools/profiler/core/EHABIStackWalk.cpp b/tools/profiler/core/EHABIStackWalk.cpp new file mode 100644 index 0000000000..76068cdea6 --- /dev/null +++ b/tools/profiler/core/EHABIStackWalk.cpp @@ -0,0 +1,678 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This is an implementation of stack unwinding according to a subset + * of the ARM Exception Handling ABI, as described in: + * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0038a/IHI0038A_ehabi.pdf + * + * This handles only the ARM-defined "personality routines" (chapter + * 9), and don't track the value of FP registers, because profiling + * needs only chain of PC/SP values. + * + * Because the exception handling info may not be accurate for all + * possible places where an async signal could occur (e.g., in a + * prologue or epilogue), this bounds-checks all stack accesses. + * + * This file uses "struct" for structures in the exception tables and + * "class" otherwise. We should avoid violating the C++11 + * standard-layout rules in the former. + */ + +#include "EHABIStackWalk.h" + +#include "shared-libraries.h" +#include "platform.h" + +#include "mozilla/Atomics.h" +#include "mozilla/Attributes.h" +#include "mozilla/DebugOnly.h" +#include "mozilla/EndianUtils.h" + +#include <algorithm> +#include <elf.h> +#include <stdint.h> +#include <vector> +#include <string> + +#ifndef PT_ARM_EXIDX +#define PT_ARM_EXIDX 0x70000001 +#endif + +// Bug 1082817: ICS B2G has a buggy linker that doesn't always ensure +// that the EXIDX is sorted by address, as the spec requires. So in +// that case we build and sort an array of pointers into the index, +// and binary-search that; otherwise, we search the index in place +// (avoiding the time and space overhead of the indirection). +#if defined(ANDROID_VERSION) && ANDROID_VERSION < 16 +#define HAVE_UNSORTED_EXIDX +#endif + +namespace mozilla { + +struct PRel31 { + uint32_t mBits; + bool topBit() const { return mBits & 0x80000000; } + uint32_t value() const { return mBits & 0x7fffffff; } + int32_t offset() const { return (static_cast<int32_t>(mBits) << 1) >> 1; } + const void *compute() const { + return reinterpret_cast<const char *>(this) + offset(); + } +private: + PRel31(const PRel31 &copied) = delete; + PRel31() = delete; +}; + +struct EHEntry { + PRel31 startPC; + PRel31 exidx; +private: + EHEntry(const EHEntry &copied) = delete; + EHEntry() = delete; +}; + +class EHState { + // Note that any core register can be used as a "frame pointer" to + // influence the unwinding process, so this must track all of them. + uint32_t mRegs[16]; +public: + bool unwind(const EHEntry *aEntry, const void *stackBase); + uint32_t &operator[](int i) { return mRegs[i]; } + const uint32_t &operator[](int i) const { return mRegs[i]; } + EHState(const mcontext_t &); +}; + +enum { + R_SP = 13, + R_LR = 14, + R_PC = 15 +}; + +#ifdef HAVE_UNSORTED_EXIDX +class EHEntryHandle { + const EHEntry *mValue; +public: + EHEntryHandle(const EHEntry *aEntry) : mValue(aEntry) { } + const EHEntry *value() const { return mValue; } +}; + +bool operator<(const EHEntryHandle &lhs, const EHEntryHandle &rhs) { + return lhs.value()->startPC.compute() < rhs.value()->startPC.compute(); +} +#endif + +class EHTable { + uint32_t mStartPC; + uint32_t mEndPC; + uint32_t mLoadOffset; +#ifdef HAVE_UNSORTED_EXIDX + // In principle we should be able to binary-search the index section in + // place, but the ICS toolchain's linker is noncompliant and produces + // indices that aren't entirely sorted (e.g., libc). So we have this: + std::vector<EHEntryHandle> mEntries; + typedef std::vector<EHEntryHandle>::const_iterator EntryIterator; + EntryIterator entriesBegin() const { return mEntries.begin(); } + EntryIterator entriesEnd() const { return mEntries.end(); } + static const EHEntry* entryGet(EntryIterator aEntry) { + return aEntry->value(); + } +#else + typedef const EHEntry *EntryIterator; + EntryIterator mEntriesBegin, mEntriesEnd; + EntryIterator entriesBegin() const { return mEntriesBegin; } + EntryIterator entriesEnd() const { return mEntriesEnd; } + static const EHEntry* entryGet(EntryIterator aEntry) { return aEntry; } +#endif + std::string mName; +public: + EHTable(const void *aELF, size_t aSize, const std::string &aName); + const EHEntry *lookup(uint32_t aPC) const; + bool isValid() const { return entriesEnd() != entriesBegin(); } + const std::string &name() const { return mName; } + uint32_t startPC() const { return mStartPC; } + uint32_t endPC() const { return mEndPC; } + uint32_t loadOffset() const { return mLoadOffset; } +}; + +class EHAddrSpace { + std::vector<uint32_t> mStarts; + std::vector<EHTable> mTables; + static mozilla::Atomic<const EHAddrSpace*> sCurrent; +public: + explicit EHAddrSpace(const std::vector<EHTable>& aTables); + const EHTable *lookup(uint32_t aPC) const; + static void Update(); + static const EHAddrSpace *Get(); +}; + + +void EHABIStackWalkInit() +{ + EHAddrSpace::Update(); +} + +size_t EHABIStackWalk(const mcontext_t &aContext, void *stackBase, + void **aSPs, void **aPCs, const size_t aNumFrames) +{ + const EHAddrSpace *space = EHAddrSpace::Get(); + EHState state(aContext); + size_t count = 0; + + while (count < aNumFrames) { + uint32_t pc = state[R_PC], sp = state[R_SP]; + aPCs[count] = reinterpret_cast<void *>(pc); + aSPs[count] = reinterpret_cast<void *>(sp); + count++; + + if (!space) + break; + // TODO: cache these lookups. Binary-searching libxul is + // expensive (possibly more expensive than doing the actual + // unwind), and even a small cache should help. + const EHTable *table = space->lookup(pc); + if (!table) + break; + const EHEntry *entry = table->lookup(pc); + if (!entry) + break; + if (!state.unwind(entry, stackBase)) + break; + } + + return count; +} + + +class EHInterp { +public: + // Note that stackLimit is exclusive and stackBase is inclusive + // (i.e, stackLimit < SP <= stackBase), following the convention + // set by the AAPCS spec. + EHInterp(EHState &aState, const EHEntry *aEntry, + uint32_t aStackLimit, uint32_t aStackBase) + : mState(aState), + mStackLimit(aStackLimit), + mStackBase(aStackBase), + mNextWord(0), + mWordsLeft(0), + mFailed(false) + { + const PRel31 &exidx = aEntry->exidx; + uint32_t firstWord; + + if (exidx.mBits == 1) { // EXIDX_CANTUNWIND + mFailed = true; + return; + } + if (exidx.topBit()) { + firstWord = exidx.mBits; + } else { + mNextWord = reinterpret_cast<const uint32_t *>(exidx.compute()); + firstWord = *mNextWord++; + } + + switch (firstWord >> 24) { + case 0x80: // short + mWord = firstWord << 8; + mBytesLeft = 3; + break; + case 0x81: case 0x82: // long; catch descriptor size ignored + mWord = firstWord << 16; + mBytesLeft = 2; + mWordsLeft = (firstWord >> 16) & 0xff; + break; + default: + // unknown personality + mFailed = true; + } + } + + bool unwind(); + +private: + // TODO: GCC has been observed not CSEing repeated reads of + // mState[R_SP] with writes to mFailed between them, suggesting that + // it hasn't determined that they can't alias and is thus missing + // optimization opportunities. So, we may want to flatten EHState + // into this class; this may also make the code simpler. + EHState &mState; + uint32_t mStackLimit; + uint32_t mStackBase; + const uint32_t *mNextWord; + uint32_t mWord; + uint8_t mWordsLeft; + uint8_t mBytesLeft; + bool mFailed; + + enum { + I_ADDSP = 0x00, // 0sxxxxxx (subtract if s) + M_ADDSP = 0x80, + I_POPMASK = 0x80, // 1000iiii iiiiiiii (if any i set) + M_POPMASK = 0xf0, + I_MOVSP = 0x90, // 1001nnnn + M_MOVSP = 0xf0, + I_POPN = 0xa0, // 1010lnnn + M_POPN = 0xf0, + I_FINISH = 0xb0, // 10110000 + I_POPLO = 0xb1, // 10110001 0000iiii (if any i set) + I_ADDSPBIG = 0xb2, // 10110010 uleb128 + I_POPFDX = 0xb3, // 10110011 sssscccc + I_POPFDX8 = 0xb8, // 10111nnn + M_POPFDX8 = 0xf8, + // "Intel Wireless MMX" extensions omitted. + I_POPFDD = 0xc8, // 1100100h sssscccc + M_POPFDD = 0xfe, + I_POPFDD8 = 0xd0, // 11010nnn + M_POPFDD8 = 0xf8 + }; + + uint8_t next() { + if (mBytesLeft == 0) { + if (mWordsLeft == 0) { + return I_FINISH; + } + mWordsLeft--; + mWord = *mNextWord++; + mBytesLeft = 4; + } + mBytesLeft--; + mWord = (mWord << 8) | (mWord >> 24); // rotate + return mWord; + } + + uint32_t &vSP() { return mState[R_SP]; } + uint32_t *ptrSP() { return reinterpret_cast<uint32_t *>(vSP()); } + + void checkStackBase() { if (vSP() > mStackBase) mFailed = true; } + void checkStackLimit() { if (vSP() <= mStackLimit) mFailed = true; } + void checkStackAlign() { if ((vSP() & 3) != 0) mFailed = true; } + void checkStack() { + checkStackBase(); + checkStackLimit(); + checkStackAlign(); + } + + void popRange(uint8_t first, uint8_t last, uint16_t mask) { + bool hasSP = false; + uint32_t tmpSP; + if (mask == 0) + mFailed = true; + for (uint8_t r = first; r <= last; ++r) { + if (mask & 1) { + if (r == R_SP) { + hasSP = true; + tmpSP = *ptrSP(); + } else + mState[r] = *ptrSP(); + vSP() += 4; + checkStackBase(); + if (mFailed) + return; + } + mask >>= 1; + } + if (hasSP) { + vSP() = tmpSP; + checkStack(); + } + } +}; + + +bool EHState::unwind(const EHEntry *aEntry, const void *stackBasePtr) { + // The unwinding program cannot set SP to less than the initial value. + uint32_t stackLimit = mRegs[R_SP] - 4; + uint32_t stackBase = reinterpret_cast<uint32_t>(stackBasePtr); + EHInterp interp(*this, aEntry, stackLimit, stackBase); + return interp.unwind(); +} + +bool EHInterp::unwind() { + mState[R_PC] = 0; + checkStack(); + while (!mFailed) { + uint8_t insn = next(); +#if DEBUG_EHABI_UNWIND + LOGF("unwind insn = %02x", (unsigned)insn); +#endif + // Try to put the common cases first. + + // 00xxxxxx: vsp = vsp + (xxxxxx << 2) + 4 + // 01xxxxxx: vsp = vsp - (xxxxxx << 2) - 4 + if ((insn & M_ADDSP) == I_ADDSP) { + uint32_t offset = ((insn & 0x3f) << 2) + 4; + if (insn & 0x40) { + vSP() -= offset; + checkStackLimit(); + } else { + vSP() += offset; + checkStackBase(); + } + continue; + } + + // 10100nnn: Pop r4-r[4+nnn] + // 10101nnn: Pop r4-r[4+nnn], r14 + if ((insn & M_POPN) == I_POPN) { + uint8_t n = (insn & 0x07) + 1; + bool lr = insn & 0x08; + uint32_t *ptr = ptrSP(); + vSP() += (n + (lr ? 1 : 0)) * 4; + checkStackBase(); + for (uint8_t r = 4; r < 4 + n; ++r) + mState[r] = *ptr++; + if (lr) + mState[R_LR] = *ptr++; + continue; + } + + // 1011000: Finish + if (insn == I_FINISH) { + if (mState[R_PC] == 0) { + mState[R_PC] = mState[R_LR]; + // Non-standard change (bug 916106): Prevent the caller from + // re-using LR. Since the caller is by definition not a leaf + // routine, it will have to restore LR from somewhere to + // return to its own caller, so we can safely zero it here. + // This makes a difference only if an error in unwinding + // (e.g., caused by starting from within a prologue/epilogue) + // causes us to load a pointer to a leaf routine as LR; if we + // don't do something, we'll go into an infinite loop of + // "returning" to that same function. + mState[R_LR] = 0; + } + return true; + } + + // 1001nnnn: Set vsp = r[nnnn] + if ((insn & M_MOVSP) == I_MOVSP) { + vSP() = mState[insn & 0x0f]; + checkStack(); + continue; + } + + // 11001000 sssscccc: Pop VFP regs D[16+ssss]-D[16+ssss+cccc] (as FLDMFDD) + // 11001001 sssscccc: Pop VFP regs D[ssss]-D[ssss+cccc] (as FLDMFDD) + if ((insn & M_POPFDD) == I_POPFDD) { + uint8_t n = (next() & 0x0f) + 1; + // Note: if the 16+ssss+cccc > 31, the encoding is reserved. + // As the space is currently unused, we don't try to check. + vSP() += 8 * n; + checkStackBase(); + continue; + } + + // 11010nnn: Pop VFP regs D[8]-D[8+nnn] (as FLDMFDD) + if ((insn & M_POPFDD8) == I_POPFDD8) { + uint8_t n = (insn & 0x07) + 1; + vSP() += 8 * n; + checkStackBase(); + continue; + } + + // 10110010 uleb128: vsp = vsp + 0x204 + (uleb128 << 2) + if (insn == I_ADDSPBIG) { + uint32_t acc = 0; + uint8_t shift = 0; + uint8_t byte; + do { + if (shift >= 32) + return false; + byte = next(); + acc |= (byte & 0x7f) << shift; + shift += 7; + } while (byte & 0x80); + uint32_t offset = 0x204 + (acc << 2); + // The calculations above could have overflowed. + // But the one we care about is this: + if (vSP() + offset < vSP()) + mFailed = true; + vSP() += offset; + // ...so that this is the only other check needed: + checkStackBase(); + continue; + } + + // 1000iiii iiiiiiii (i not all 0): Pop under masks {r15-r12}, {r11-r4} + if ((insn & M_POPMASK) == I_POPMASK) { + popRange(4, 15, ((insn & 0x0f) << 8) | next()); + continue; + } + + // 1011001 0000iiii (i not all 0): Pop under mask {r3-r0} + if (insn == I_POPLO) { + popRange(0, 3, next() & 0x0f); + continue; + } + + // 10110011 sssscccc: Pop VFP regs D[ssss]-D[ssss+cccc] (as FLDMFDX) + if (insn == I_POPFDX) { + uint8_t n = (next() & 0x0f) + 1; + vSP() += 8 * n + 4; + checkStackBase(); + continue; + } + + // 10111nnn: Pop VFP regs D[8]-D[8+nnn] (as FLDMFDX) + if ((insn & M_POPFDX8) == I_POPFDX8) { + uint8_t n = (insn & 0x07) + 1; + vSP() += 8 * n + 4; + checkStackBase(); + continue; + } + + // unhandled instruction +#ifdef DEBUG_EHABI_UNWIND + LOGF("Unhandled EHABI instruction 0x%02x", insn); +#endif + mFailed = true; + } + return false; +} + + +bool operator<(const EHTable &lhs, const EHTable &rhs) { + return lhs.startPC() < rhs.startPC(); +} + +// Async signal unsafe. +EHAddrSpace::EHAddrSpace(const std::vector<EHTable>& aTables) + : mTables(aTables) +{ + std::sort(mTables.begin(), mTables.end()); + DebugOnly<uint32_t> lastEnd = 0; + for (std::vector<EHTable>::iterator i = mTables.begin(); + i != mTables.end(); ++i) { + MOZ_ASSERT(i->startPC() >= lastEnd); + mStarts.push_back(i->startPC()); + lastEnd = i->endPC(); + } +} + +const EHTable *EHAddrSpace::lookup(uint32_t aPC) const { + ptrdiff_t i = (std::upper_bound(mStarts.begin(), mStarts.end(), aPC) + - mStarts.begin()) - 1; + + if (i < 0 || aPC >= mTables[i].endPC()) + return 0; + return &mTables[i]; +} + + +const EHEntry *EHTable::lookup(uint32_t aPC) const { + MOZ_ASSERT(aPC >= mStartPC); + if (aPC >= mEndPC) + return nullptr; + + EntryIterator begin = entriesBegin(); + EntryIterator end = entriesEnd(); + MOZ_ASSERT(begin < end); + if (aPC < reinterpret_cast<uint32_t>(entryGet(begin)->startPC.compute())) + return nullptr; + + while (end - begin > 1) { +#ifdef EHABI_UNWIND_MORE_ASSERTS + if (entryGet(end - 1)->startPC.compute() + < entryGet(begin)->startPC.compute()) { + MOZ_CRASH("unsorted exidx"); + } +#endif + EntryIterator mid = begin + (end - begin) / 2; + if (aPC < reinterpret_cast<uint32_t>(entryGet(mid)->startPC.compute())) + end = mid; + else + begin = mid; + } + return entryGet(begin); +} + + +#if MOZ_LITTLE_ENDIAN +static const unsigned char hostEndian = ELFDATA2LSB; +#elif MOZ_BIG_ENDIAN +static const unsigned char hostEndian = ELFDATA2MSB; +#else +#error "No endian?" +#endif + +// Async signal unsafe: std::vector::reserve, std::string copy ctor. +EHTable::EHTable(const void *aELF, size_t aSize, const std::string &aName) + : mStartPC(~0), // largest uint32_t + mEndPC(0), +#ifndef HAVE_UNSORTED_EXIDX + mEntriesBegin(nullptr), + mEntriesEnd(nullptr), +#endif + mName(aName) +{ + const uint32_t base = reinterpret_cast<uint32_t>(aELF); + + if (aSize < sizeof(Elf32_Ehdr)) + return; + + const Elf32_Ehdr &file = *(reinterpret_cast<Elf32_Ehdr *>(base)); + if (memcmp(&file.e_ident[EI_MAG0], ELFMAG, SELFMAG) != 0 || + file.e_ident[EI_CLASS] != ELFCLASS32 || + file.e_ident[EI_DATA] != hostEndian || + file.e_ident[EI_VERSION] != EV_CURRENT || + file.e_ident[EI_OSABI] != ELFOSABI_SYSV || +#ifdef EI_ABIVERSION + file.e_ident[EI_ABIVERSION] != 0 || +#endif + file.e_machine != EM_ARM || + file.e_version != EV_CURRENT) + // e_flags? + return; + + MOZ_ASSERT(file.e_phoff + file.e_phnum * file.e_phentsize <= aSize); + const Elf32_Phdr *exidxHdr = 0, *zeroHdr = 0; + for (unsigned i = 0; i < file.e_phnum; ++i) { + const Elf32_Phdr &phdr = + *(reinterpret_cast<Elf32_Phdr *>(base + file.e_phoff + + i * file.e_phentsize)); + if (phdr.p_type == PT_ARM_EXIDX) { + exidxHdr = &phdr; + } else if (phdr.p_type == PT_LOAD) { + if (phdr.p_offset == 0) { + zeroHdr = &phdr; + } + if (phdr.p_flags & PF_X) { + mStartPC = std::min(mStartPC, phdr.p_vaddr); + mEndPC = std::max(mEndPC, phdr.p_vaddr + phdr.p_memsz); + } + } + } + if (!exidxHdr) + return; + if (!zeroHdr) + return; + mLoadOffset = base - zeroHdr->p_vaddr; + mStartPC += mLoadOffset; + mEndPC += mLoadOffset; + + // Create a sorted index of the index to work around linker bugs. + const EHEntry *startTable = + reinterpret_cast<const EHEntry *>(mLoadOffset + exidxHdr->p_vaddr); + const EHEntry *endTable = + reinterpret_cast<const EHEntry *>(mLoadOffset + exidxHdr->p_vaddr + + exidxHdr->p_memsz); +#ifdef HAVE_UNSORTED_EXIDX + mEntries.reserve(endTable - startTable); + for (const EHEntry *i = startTable; i < endTable; ++i) + mEntries.push_back(i); + std::sort(mEntries.begin(), mEntries.end()); +#else + mEntriesBegin = startTable; + mEntriesEnd = endTable; +#endif +} + + +mozilla::Atomic<const EHAddrSpace*> EHAddrSpace::sCurrent(nullptr); + +// Async signal safe; can fail if Update() hasn't returned yet. +const EHAddrSpace *EHAddrSpace::Get() { + return sCurrent; +} + +// Collect unwinding information from loaded objects. Calls after the +// first have no effect. Async signal unsafe. +void EHAddrSpace::Update() { + const EHAddrSpace *space = sCurrent; + if (space) + return; + + SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf(); + std::vector<EHTable> tables; + + for (size_t i = 0; i < info.GetSize(); ++i) { + const SharedLibrary &lib = info.GetEntry(i); + if (lib.GetOffset() != 0) + // TODO: if it has a name, and we haven't seen a mapping of + // offset 0 for that file, try opening it and reading the + // headers instead. The only thing I've seen so far that's + // linked so as to need that treatment is the dynamic linker + // itself. + continue; + EHTable tab(reinterpret_cast<const void *>(lib.GetStart()), + lib.GetEnd() - lib.GetStart(), lib.GetName()); + if (tab.isValid()) + tables.push_back(tab); + } + space = new EHAddrSpace(tables); + + if (!sCurrent.compareExchange(nullptr, space)) { + delete space; + space = sCurrent; + } +} + + +EHState::EHState(const mcontext_t &context) { +#ifdef linux + mRegs[0] = context.arm_r0; + mRegs[1] = context.arm_r1; + mRegs[2] = context.arm_r2; + mRegs[3] = context.arm_r3; + mRegs[4] = context.arm_r4; + mRegs[5] = context.arm_r5; + mRegs[6] = context.arm_r6; + mRegs[7] = context.arm_r7; + mRegs[8] = context.arm_r8; + mRegs[9] = context.arm_r9; + mRegs[10] = context.arm_r10; + mRegs[11] = context.arm_fp; + mRegs[12] = context.arm_ip; + mRegs[13] = context.arm_sp; + mRegs[14] = context.arm_lr; + mRegs[15] = context.arm_pc; +#else +# error "Unhandled OS for ARM EHABI unwinding" +#endif +} + +} // namespace mozilla + diff --git a/tools/profiler/core/EHABIStackWalk.h b/tools/profiler/core/EHABIStackWalk.h new file mode 100644 index 0000000000..5529d9511f --- /dev/null +++ b/tools/profiler/core/EHABIStackWalk.h @@ -0,0 +1,28 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This is an implementation of stack unwinding according to a subset + * of the ARM Exception Handling ABI; see the comment at the top of + * the .cpp file for details. + */ + +#ifndef mozilla_EHABIStackWalk_h__ +#define mozilla_EHABIStackWalk_h__ + +#include <stddef.h> +#include <ucontext.h> + +namespace mozilla { + +void EHABIStackWalkInit(); + +size_t EHABIStackWalk(const mcontext_t &aContext, void *stackBase, + void **aSPs, void **aPCs, size_t aNumFrames); + +} + +#endif diff --git a/tools/profiler/core/GeckoSampler.cpp b/tools/profiler/core/GeckoSampler.cpp new file mode 100644 index 0000000000..f4249a7a5e --- /dev/null +++ b/tools/profiler/core/GeckoSampler.cpp @@ -0,0 +1,1306 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <algorithm> +#include <string> +#include <stdio.h> +#include <fstream> +#include <sstream> +#include "GeckoProfiler.h" +#ifndef SPS_STANDALONE +#include "SaveProfileTask.h" +#include "nsThreadUtils.h" +#include "prenv.h" +#include "prtime.h" +#include "nsXULAppAPI.h" +#endif +#include "ProfileEntry.h" +#include "SyncProfile.h" +#include "platform.h" +#include "shared-libraries.h" +#include "mozilla/StackWalk.h" +#include "GeckoSampler.h" + +// JSON +#include "ProfileJSONWriter.h" + +#ifndef SPS_STANDALONE +// Meta +#include "nsXPCOM.h" +#include "nsXPCOMCID.h" +#include "nsIHttpProtocolHandler.h" +#include "nsServiceManagerUtils.h" +#include "nsIXULRuntime.h" +#include "nsIXULAppInfo.h" +#include "nsDirectoryServiceUtils.h" +#include "nsDirectoryServiceDefs.h" +#include "nsIObserverService.h" +#include "mozilla/Services.h" +#include "PlatformMacros.h" +#include "nsTArray.h" + +#include "mozilla/ProfileGatherer.h" +#endif + +#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) + #include "FennecJNIWrappers.h" +#endif + +#ifndef SPS_STANDALONE +// JS +#include "jsfriendapi.h" +#include "js/ProfilingFrameIterator.h" +#endif + +#if defined(MOZ_PROFILING) && (defined(XP_MACOSX) || defined(XP_WIN)) + #define USE_NS_STACKWALK +#endif + +#if defined(XP_WIN) +typedef CONTEXT tickcontext_t; +#elif defined(LINUX) +#include <ucontext.h> +typedef ucontext_t tickcontext_t; +#endif + +#if defined(LINUX) || defined(XP_MACOSX) +#include <sys/types.h> +pid_t gettid(); +#endif + +#if defined(__arm__) && defined(ANDROID) + // Should also work on ARM Linux, but not tested there yet. + #define USE_EHABI_STACKWALK +#endif +#ifdef USE_EHABI_STACKWALK + #include "EHABIStackWalk.h" +#endif + +#ifndef SPS_STANDALONE +#if defined(SPS_PLAT_amd64_linux) || defined(SPS_PLAT_x86_linux) +# define USE_LUL_STACKWALK +# include "lul/LulMain.h" +# include "lul/platform-linux-lul.h" +#endif +#endif + +using std::string; +using namespace mozilla; + +#ifndef MAXPATHLEN + #ifdef PATH_MAX + #define MAXPATHLEN PATH_MAX + #elif defined(MAX_PATH) + #define MAXPATHLEN MAX_PATH + #elif defined(_MAX_PATH) + #define MAXPATHLEN _MAX_PATH + #elif defined(CCHMAXPATH) + #define MAXPATHLEN CCHMAXPATH + #else + #define MAXPATHLEN 1024 + #endif +#endif + +#ifdef MOZ_VALGRIND +# include <valgrind/memcheck.h> +#else +# define VALGRIND_MAKE_MEM_DEFINED(_addr,_len) ((void)0) +#endif + + +/////////////////////////////////////////////////////////////////////// +// BEGIN SaveProfileTask et al + +static void +AddSharedLibraryInfoToStream(std::ostream& aStream, const SharedLibrary& aLib) +{ + aStream << "{"; + aStream << "\"start\":" << aLib.GetStart(); + aStream << ",\"end\":" << aLib.GetEnd(); + aStream << ",\"offset\":" << aLib.GetOffset(); + aStream << ",\"name\":\"" << aLib.GetName() << "\""; + const std::string &breakpadId = aLib.GetBreakpadId(); + aStream << ",\"breakpadId\":\"" << breakpadId << "\""; +#ifdef XP_WIN + // FIXME: remove this XP_WIN code when the profiler plugin has switched to + // using breakpadId. + std::string pdbSignature = breakpadId.substr(0, 32); + std::string pdbAgeStr = breakpadId.substr(32, breakpadId.size() - 1); + + std::stringstream stream; + stream << pdbAgeStr; + + unsigned pdbAge; + stream << std::hex; + stream >> pdbAge; + +#ifdef DEBUG + std::ostringstream oStream; + oStream << pdbSignature << std::hex << std::uppercase << pdbAge; + MOZ_ASSERT(breakpadId == oStream.str()); +#endif + + aStream << ",\"pdbSignature\":\"" << pdbSignature << "\""; + aStream << ",\"pdbAge\":" << pdbAge; + aStream << ",\"pdbName\":\"" << aLib.GetName() << "\""; +#endif + aStream << "}"; +} + +std::string +GetSharedLibraryInfoStringInternal() +{ + SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf(); + if (info.GetSize() == 0) + return "[]"; + + std::ostringstream os; + os << "["; + AddSharedLibraryInfoToStream(os, info.GetEntry(0)); + + for (size_t i = 1; i < info.GetSize(); i++) { + os << ","; + AddSharedLibraryInfoToStream(os, info.GetEntry(i)); + } + + os << "]"; + return os.str(); +} + +static bool +hasFeature(const char** aFeatures, uint32_t aFeatureCount, const char* aFeature) { + for(size_t i = 0; i < aFeatureCount; i++) { + if (strcmp(aFeatures[i], aFeature) == 0) + return true; + } + return false; +} + +GeckoSampler::GeckoSampler(double aInterval, int aEntrySize, + const char** aFeatures, uint32_t aFeatureCount, + const char** aThreadNameFilters, uint32_t aFilterCount) + : Sampler(aInterval, true, aEntrySize) + , mPrimaryThreadProfile(nullptr) + , mBuffer(new ProfileBuffer(aEntrySize)) + , mSaveRequested(false) +#if defined(XP_WIN) + , mIntelPowerGadget(nullptr) +#endif +{ + mUseStackWalk = hasFeature(aFeatures, aFeatureCount, "stackwalk"); + + mProfileJS = hasFeature(aFeatures, aFeatureCount, "js"); + mProfileGPU = hasFeature(aFeatures, aFeatureCount, "gpu"); + mProfilePower = hasFeature(aFeatures, aFeatureCount, "power"); + // Users sometimes ask to filter by a list of threads but forget to request + // profiling non main threads. Let's make it implificit if we have a filter + mProfileThreads = hasFeature(aFeatures, aFeatureCount, "threads") || aFilterCount > 0; + mAddLeafAddresses = hasFeature(aFeatures, aFeatureCount, "leaf"); + mPrivacyMode = hasFeature(aFeatures, aFeatureCount, "privacy"); + mAddMainThreadIO = hasFeature(aFeatures, aFeatureCount, "mainthreadio"); + mProfileMemory = hasFeature(aFeatures, aFeatureCount, "memory"); + mTaskTracer = hasFeature(aFeatures, aFeatureCount, "tasktracer"); + mLayersDump = hasFeature(aFeatures, aFeatureCount, "layersdump"); + mDisplayListDump = hasFeature(aFeatures, aFeatureCount, "displaylistdump"); + mProfileRestyle = hasFeature(aFeatures, aFeatureCount, "restyle"); + +#if defined(XP_WIN) + if (mProfilePower) { + mIntelPowerGadget = new IntelPowerGadget(); + mProfilePower = mIntelPowerGadget->Init(); + } +#endif + +#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) + mProfileJava = mozilla::jni::IsFennec() && + hasFeature(aFeatures, aFeatureCount, "java"); +#else + mProfileJava = false; +#endif + + // Deep copy aThreadNameFilters + MOZ_ALWAYS_TRUE(mThreadNameFilters.resize(aFilterCount)); + for (uint32_t i = 0; i < aFilterCount; ++i) { + mThreadNameFilters[i] = aThreadNameFilters[i]; + } + + // Deep copy aFeatures + MOZ_ALWAYS_TRUE(mFeatures.resize(aFeatureCount)); + for (uint32_t i = 0; i < aFeatureCount; ++i) { + mFeatures[i] = aFeatures[i]; + } + + bool ignore; + sStartTime = mozilla::TimeStamp::ProcessCreation(ignore); + + { + ::MutexAutoLock lock(*sRegisteredThreadsMutex); + + // Create ThreadProfile for each registered thread + for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) { + ThreadInfo* info = sRegisteredThreads->at(i); + + RegisterThread(info); + } + + SetActiveSampler(this); + } + +#ifdef MOZ_TASK_TRACER + if (mTaskTracer) { + mozilla::tasktracer::StartLogging(); + } +#endif + + mGatherer = new mozilla::ProfileGatherer(this); +} + +GeckoSampler::~GeckoSampler() +{ + if (IsActive()) + Stop(); + + SetActiveSampler(nullptr); + + // Destroy ThreadProfile for all threads + { + ::MutexAutoLock lock(*sRegisteredThreadsMutex); + + for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) { + ThreadInfo* info = sRegisteredThreads->at(i); + ThreadProfile* profile = info->Profile(); + if (profile) { + delete profile; + info->SetProfile(nullptr); + } + // We've stopped profiling. We no longer need to retain + // information for an old thread. + if (info->IsPendingDelete()) { + delete info; + sRegisteredThreads->erase(sRegisteredThreads->begin() + i); + i--; + } + } + } +#if defined(XP_WIN) + delete mIntelPowerGadget; +#endif + + // Cancel any in-flight async profile gatherering + // requests + mGatherer->Cancel(); +} + +void GeckoSampler::HandleSaveRequest() +{ + if (!mSaveRequested) + return; + mSaveRequested = false; + +#ifndef SPS_STANDALONE + // TODO: Use use the ipc/chromium Tasks here to support processes + // without XPCOM. + nsCOMPtr<nsIRunnable> runnable = new SaveProfileTask(); + NS_DispatchToMainThread(runnable); +#endif +} + +void GeckoSampler::DeleteExpiredMarkers() +{ + mBuffer->deleteExpiredStoredMarkers(); +} + +void GeckoSampler::StreamTaskTracer(SpliceableJSONWriter& aWriter) +{ +#ifdef MOZ_TASK_TRACER + aWriter.StartArrayProperty("data"); + UniquePtr<nsTArray<nsCString>> data = mozilla::tasktracer::GetLoggedData(sStartTime); + for (uint32_t i = 0; i < data->Length(); ++i) { + aWriter.StringElement((data->ElementAt(i)).get()); + } + aWriter.EndArray(); + + aWriter.StartArrayProperty("threads"); + ::MutexAutoLock lock(*sRegisteredThreadsMutex); + for (size_t i = 0; i < sRegisteredThreads->size(); i++) { + // Thread meta data + ThreadInfo* info = sRegisteredThreads->at(i); + aWriter.StartObjectElement(); + if (XRE_GetProcessType() == GeckoProcessType_Plugin) { + // TODO Add the proper plugin name + aWriter.StringProperty("name", "Plugin"); + } else { + aWriter.StringProperty("name", info->Name()); + } + aWriter.IntProperty("tid", static_cast<int>(info->ThreadId())); + aWriter.EndObject(); + } + aWriter.EndArray(); + + aWriter.DoubleProperty("start", static_cast<double>(mozilla::tasktracer::GetStartTime())); +#endif +} + + +void GeckoSampler::StreamMetaJSCustomObject(SpliceableJSONWriter& aWriter) +{ + aWriter.IntProperty("version", 3); + aWriter.DoubleProperty("interval", interval()); + aWriter.IntProperty("stackwalk", mUseStackWalk); + +#ifndef SPS_STANDALONE + mozilla::TimeDuration delta = mozilla::TimeStamp::Now() - sStartTime; + aWriter.DoubleProperty("startTime", static_cast<double>(PR_Now()/1000.0 - delta.ToMilliseconds())); + + aWriter.IntProperty("processType", XRE_GetProcessType()); + + nsresult res; + nsCOMPtr<nsIHttpProtocolHandler> http = do_GetService(NS_NETWORK_PROTOCOL_CONTRACTID_PREFIX "http", &res); + if (!NS_FAILED(res)) { + nsAutoCString string; + + res = http->GetPlatform(string); + if (!NS_FAILED(res)) + aWriter.StringProperty("platform", string.Data()); + + res = http->GetOscpu(string); + if (!NS_FAILED(res)) + aWriter.StringProperty("oscpu", string.Data()); + + res = http->GetMisc(string); + if (!NS_FAILED(res)) + aWriter.StringProperty("misc", string.Data()); + } + + nsCOMPtr<nsIXULRuntime> runtime = do_GetService("@mozilla.org/xre/runtime;1"); + if (runtime) { + nsAutoCString string; + + res = runtime->GetXPCOMABI(string); + if (!NS_FAILED(res)) + aWriter.StringProperty("abi", string.Data()); + + res = runtime->GetWidgetToolkit(string); + if (!NS_FAILED(res)) + aWriter.StringProperty("toolkit", string.Data()); + } + + nsCOMPtr<nsIXULAppInfo> appInfo = do_GetService("@mozilla.org/xre/app-info;1"); + if (appInfo) { + nsAutoCString string; + + res = appInfo->GetName(string); + if (!NS_FAILED(res)) + aWriter.StringProperty("product", string.Data()); + } +#endif +} + +void GeckoSampler::ToStreamAsJSON(std::ostream& stream, double aSinceTime) +{ + SpliceableJSONWriter b(mozilla::MakeUnique<OStreamJSONWriteFunc>(stream)); + StreamJSON(b, aSinceTime); +} + +#ifndef SPS_STANDALONE +JSObject* GeckoSampler::ToJSObject(JSContext *aCx, double aSinceTime) +{ + JS::RootedValue val(aCx); + { + UniquePtr<char[]> buf = ToJSON(aSinceTime); + NS_ConvertUTF8toUTF16 js_string(nsDependentCString(buf.get())); + MOZ_ALWAYS_TRUE(JS_ParseJSON(aCx, static_cast<const char16_t*>(js_string.get()), + js_string.Length(), &val)); + } + return &val.toObject(); +} + +void GeckoSampler::GetGatherer(nsISupports** aRetVal) +{ + if (!aRetVal || NS_WARN_IF(!mGatherer)) { + return; + } + NS_ADDREF(*aRetVal = mGatherer); +} +#endif + +UniquePtr<char[]> GeckoSampler::ToJSON(double aSinceTime) +{ + SpliceableChunkedJSONWriter b; + StreamJSON(b, aSinceTime); + return b.WriteFunc()->CopyData(); +} + +void GeckoSampler::ToJSObjectAsync(double aSinceTime, + mozilla::dom::Promise* aPromise) +{ + if (NS_WARN_IF(!mGatherer)) { + return; + } + + mGatherer->Start(aSinceTime, aPromise); +} + +struct SubprocessClosure { + explicit SubprocessClosure(SpliceableJSONWriter* aWriter) + : mWriter(aWriter) + {} + + SpliceableJSONWriter* mWriter; +}; + +void SubProcessCallback(const char* aProfile, void* aClosure) +{ + // Called by the observer to get their profile data included + // as a sub profile + SubprocessClosure* closure = (SubprocessClosure*)aClosure; + + // Add the string profile into the profile + closure->mWriter->StringElement(aProfile); +} + + +#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) +static +void BuildJavaThreadJSObject(SpliceableJSONWriter& aWriter) +{ + aWriter.StringProperty("name", "Java Main Thread"); + + aWriter.StartArrayProperty("samples"); + + // for each sample + for (int sampleId = 0; true; sampleId++) { + bool firstRun = true; + // for each frame + for (int frameId = 0; true; frameId++) { + jni::String::LocalRef frameName = + java::GeckoJavaSampler::GetFrameName(0, sampleId, frameId); + // when we run out of frames, we stop looping + if (!frameName) { + // if we found at least one frame, we have objects to close + if (!firstRun) { + aWriter.EndArray(); + aWriter.EndObject(); + } + break; + } + // the first time around, open the sample object and frames array + if (firstRun) { + firstRun = false; + + double sampleTime = + java::GeckoJavaSampler::GetSampleTime(0, sampleId); + + aWriter.StartObjectElement(); + aWriter.DoubleProperty("time", sampleTime); + + aWriter.StartArrayProperty("frames"); + } + // add a frame to the sample + aWriter.StartObjectElement(); + aWriter.StringProperty("location", + frameName->ToCString().BeginReading()); + aWriter.EndObject(); + } + // if we found no frames for this sample, we are done + if (firstRun) { + break; + } + } + + aWriter.EndArray(); +} +#endif + +void GeckoSampler::StreamJSON(SpliceableJSONWriter& aWriter, double aSinceTime) +{ + aWriter.Start(SpliceableJSONWriter::SingleLineStyle); + { + // Put shared library info + aWriter.StringProperty("libs", GetSharedLibraryInfoStringInternal().c_str()); + + // Put meta data + aWriter.StartObjectProperty("meta"); + StreamMetaJSCustomObject(aWriter); + aWriter.EndObject(); + + // Data of TaskTracer doesn't belong in the circular buffer. + if (TaskTracer()) { + aWriter.StartObjectProperty("tasktracer"); + StreamTaskTracer(aWriter); + aWriter.EndObject(); + } + + // Lists the samples for each ThreadProfile + aWriter.StartArrayProperty("threads"); + { + SetPaused(true); + + { + ::MutexAutoLock lock(*sRegisteredThreadsMutex); + + for (size_t i = 0; i < sRegisteredThreads->size(); i++) { + // Thread not being profiled, skip it + if (!sRegisteredThreads->at(i)->Profile()) + continue; + + // Note that we intentionally include ThreadProfile which + // have been marked for pending delete. + + ::MutexAutoLock lock(sRegisteredThreads->at(i)->Profile()->GetMutex()); + + sRegisteredThreads->at(i)->Profile()->StreamJSON(aWriter, aSinceTime); + } + } + +#ifndef SPS_STANDALONE + if (Sampler::CanNotifyObservers()) { + // Send a event asking any subprocesses (plugins) to + // give us their information + SubprocessClosure closure(&aWriter); + nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService(); + if (os) { + RefPtr<ProfileSaveEvent> pse = new ProfileSaveEvent(SubProcessCallback, &closure); + os->NotifyObservers(pse, "profiler-subprocess", nullptr); + } + } + + #if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) + if (ProfileJava()) { + java::GeckoJavaSampler::Pause(); + + aWriter.Start(); + { + BuildJavaThreadJSObject(aWriter); + } + aWriter.End(); + + java::GeckoJavaSampler::Unpause(); + } + #endif +#endif + + SetPaused(false); + } + aWriter.EndArray(); + } + aWriter.End(); +} + +void GeckoSampler::FlushOnJSShutdown(JSContext* aContext) +{ +#ifndef SPS_STANDALONE + SetPaused(true); + + { + ::MutexAutoLock lock(*sRegisteredThreadsMutex); + + for (size_t i = 0; i < sRegisteredThreads->size(); i++) { + // Thread not being profiled, skip it. + if (!sRegisteredThreads->at(i)->Profile() || + sRegisteredThreads->at(i)->IsPendingDelete()) { + continue; + } + + // Thread not profiling the context that's going away, skip it. + if (sRegisteredThreads->at(i)->Profile()->GetPseudoStack()->mContext != aContext) { + continue; + } + + ::MutexAutoLock lock(sRegisteredThreads->at(i)->Profile()->GetMutex()); + sRegisteredThreads->at(i)->Profile()->FlushSamplesAndMarkers(); + } + } + + SetPaused(false); +#endif +} + +void PseudoStack::flushSamplerOnJSShutdown() +{ +#ifndef SPS_STANDALONE + MOZ_ASSERT(mContext); + GeckoSampler* t = tlsTicker.get(); + if (t) { + t->FlushOnJSShutdown(mContext); + } +#endif +} + +// END SaveProfileTask et al +//////////////////////////////////////////////////////////////////////// + +static +void addDynamicTag(ThreadProfile &aProfile, char aTagName, const char *aStr) +{ + aProfile.addTag(ProfileEntry(aTagName, "")); + // Add one to store the null termination + size_t strLen = strlen(aStr) + 1; + for (size_t j = 0; j < strLen;) { + // Store as many characters in the void* as the platform allows + char text[sizeof(void*)]; + size_t len = sizeof(void*)/sizeof(char); + if (j+len >= strLen) { + len = strLen - j; + } + memcpy(text, &aStr[j], len); + j += sizeof(void*)/sizeof(char); + // Cast to *((void**) to pass the text data to a void* + aProfile.addTag(ProfileEntry('d', *((void**)(&text[0])))); + } +} + +static +void addPseudoEntry(volatile StackEntry &entry, ThreadProfile &aProfile, + PseudoStack *stack, void *lastpc) +{ + // Pseudo-frames with the BEGIN_PSEUDO_JS flag are just annotations + // and should not be recorded in the profile. + if (entry.hasFlag(StackEntry::BEGIN_PSEUDO_JS)) + return; + + int lineno = -1; + + // First entry has tagName 's' (start) + // Check for magic pointer bit 1 to indicate copy + const char* sampleLabel = entry.label(); + if (entry.isCopyLabel()) { + // Store the string using 1 or more 'd' (dynamic) tags + // that will happen to the preceding tag + + addDynamicTag(aProfile, 'c', sampleLabel); +#ifndef SPS_STANDALONE + if (entry.isJs()) { + JSScript* script = entry.script(); + if (script) { + if (!entry.pc()) { + // The JIT only allows the top-most entry to have a nullptr pc + MOZ_ASSERT(&entry == &stack->mStack[stack->stackSize() - 1]); + // If stack-walking was disabled, then that's just unfortunate + if (lastpc) { + jsbytecode *jspc = js::ProfilingGetPC(stack->mContext, script, + lastpc); + if (jspc) { + lineno = JS_PCToLineNumber(script, jspc); + } + } + } else { + lineno = JS_PCToLineNumber(script, entry.pc()); + } + } + } else { + lineno = entry.line(); + } +#endif + } else { + aProfile.addTag(ProfileEntry('c', sampleLabel)); + + // XXX: Bug 1010578. Don't assume a CPP entry and try to get the + // line for js entries as well. + if (entry.isCpp()) { + lineno = entry.line(); + } + } + + if (lineno != -1) { + aProfile.addTag(ProfileEntry('n', lineno)); + } + + uint32_t category = entry.category(); + MOZ_ASSERT(!(category & StackEntry::IS_CPP_ENTRY)); + MOZ_ASSERT(!(category & StackEntry::FRAME_LABEL_COPY)); + + if (category) { + aProfile.addTag(ProfileEntry('y', (int)category)); + } +} + +struct NativeStack +{ + void** pc_array; + void** sp_array; + size_t size; + size_t count; +}; + +mozilla::Atomic<bool> WALKING_JS_STACK(false); + +struct AutoWalkJSStack { + bool walkAllowed; + + AutoWalkJSStack() : walkAllowed(false) { + walkAllowed = WALKING_JS_STACK.compareExchange(false, true); + } + + ~AutoWalkJSStack() { + if (walkAllowed) + WALKING_JS_STACK = false; + } +}; + +static +void mergeStacksIntoProfile(ThreadProfile& aProfile, TickSample* aSample, NativeStack& aNativeStack) +{ + PseudoStack* pseudoStack = aProfile.GetPseudoStack(); + volatile StackEntry *pseudoFrames = pseudoStack->mStack; + uint32_t pseudoCount = pseudoStack->stackSize(); + + // Make a copy of the JS stack into a JSFrame array. This is necessary since, + // like the native stack, the JS stack is iterated youngest-to-oldest and we + // need to iterate oldest-to-youngest when adding entries to aProfile. + + // Synchronous sampling reports an invalid buffer generation to + // ProfilingFrameIterator to avoid incorrectly resetting the generation of + // sampled JIT entries inside the JS engine. See note below concerning 'J' + // entries. + uint32_t startBufferGen; + if (aSample->isSamplingCurrentThread) { + startBufferGen = UINT32_MAX; + } else { + startBufferGen = aProfile.bufferGeneration(); + } + uint32_t jsCount = 0; +#ifndef SPS_STANDALONE + JS::ProfilingFrameIterator::Frame jsFrames[1000]; + // Only walk jit stack if profiling frame iterator is turned on. + if (pseudoStack->mContext && JS::IsProfilingEnabledForContext(pseudoStack->mContext)) { + AutoWalkJSStack autoWalkJSStack; + const uint32_t maxFrames = mozilla::ArrayLength(jsFrames); + + if (aSample && autoWalkJSStack.walkAllowed) { + JS::ProfilingFrameIterator::RegisterState registerState; + registerState.pc = aSample->pc; + registerState.sp = aSample->sp; +#ifdef ENABLE_ARM_LR_SAVING + registerState.lr = aSample->lr; +#endif + + JS::ProfilingFrameIterator jsIter(pseudoStack->mContext, + registerState, + startBufferGen); + for (; jsCount < maxFrames && !jsIter.done(); ++jsIter) { + // See note below regarding 'J' entries. + if (aSample->isSamplingCurrentThread || jsIter.isWasm()) { + uint32_t extracted = jsIter.extractStack(jsFrames, jsCount, maxFrames); + jsCount += extracted; + if (jsCount == maxFrames) + break; + } else { + mozilla::Maybe<JS::ProfilingFrameIterator::Frame> frame = + jsIter.getPhysicalFrameWithoutLabel(); + if (frame.isSome()) + jsFrames[jsCount++] = mozilla::Move(frame.ref()); + } + } + } + } +#endif + + // Start the sample with a root entry. + aProfile.addTag(ProfileEntry('s', "(root)")); + + // While the pseudo-stack array is ordered oldest-to-youngest, the JS and + // native arrays are ordered youngest-to-oldest. We must add frames to + // aProfile oldest-to-youngest. Thus, iterate over the pseudo-stack forwards + // and JS and native arrays backwards. Note: this means the terminating + // condition jsIndex and nativeIndex is being < 0. + uint32_t pseudoIndex = 0; + int32_t jsIndex = jsCount - 1; + int32_t nativeIndex = aNativeStack.count - 1; + + uint8_t *lastPseudoCppStackAddr = nullptr; + + // Iterate as long as there is at least one frame remaining. + while (pseudoIndex != pseudoCount || jsIndex >= 0 || nativeIndex >= 0) { + // There are 1 to 3 frames available. Find and add the oldest. + + uint8_t *pseudoStackAddr = nullptr; + uint8_t *jsStackAddr = nullptr; + uint8_t *nativeStackAddr = nullptr; + + if (pseudoIndex != pseudoCount) { + volatile StackEntry &pseudoFrame = pseudoFrames[pseudoIndex]; + + if (pseudoFrame.isCpp()) + lastPseudoCppStackAddr = (uint8_t *) pseudoFrame.stackAddress(); + +#ifndef SPS_STANDALONE + // Skip any pseudo-stack JS frames which are marked isOSR + // Pseudostack frames are marked isOSR when the JS interpreter + // enters a jit frame on a loop edge (via on-stack-replacement, + // or OSR). To avoid both the pseudoframe and jit frame being + // recorded (and showing up twice), the interpreter marks the + // interpreter pseudostack entry with the OSR flag to ensure that + // it doesn't get counted. + if (pseudoFrame.isJs() && pseudoFrame.isOSR()) { + pseudoIndex++; + continue; + } +#endif + + MOZ_ASSERT(lastPseudoCppStackAddr); + pseudoStackAddr = lastPseudoCppStackAddr; + } + +#ifndef SPS_STANDALONE + if (jsIndex >= 0) + jsStackAddr = (uint8_t *) jsFrames[jsIndex].stackAddress; +#endif + + if (nativeIndex >= 0) + nativeStackAddr = (uint8_t *) aNativeStack.sp_array[nativeIndex]; + + // If there's a native stack entry which has the same SP as a + // pseudo stack entry, pretend we didn't see the native stack + // entry. Ditto for a native stack entry which has the same SP as + // a JS stack entry. In effect this means pseudo or JS entries + // trump conflicting native entries. + if (nativeStackAddr && (pseudoStackAddr == nativeStackAddr || jsStackAddr == nativeStackAddr)) { + nativeStackAddr = nullptr; + nativeIndex--; + MOZ_ASSERT(pseudoStackAddr || jsStackAddr); + } + + // Sanity checks. + MOZ_ASSERT_IF(pseudoStackAddr, pseudoStackAddr != jsStackAddr && + pseudoStackAddr != nativeStackAddr); + MOZ_ASSERT_IF(jsStackAddr, jsStackAddr != pseudoStackAddr && + jsStackAddr != nativeStackAddr); + MOZ_ASSERT_IF(nativeStackAddr, nativeStackAddr != pseudoStackAddr && + nativeStackAddr != jsStackAddr); + + // Check to see if pseudoStack frame is top-most. + if (pseudoStackAddr > jsStackAddr && pseudoStackAddr > nativeStackAddr) { + MOZ_ASSERT(pseudoIndex < pseudoCount); + volatile StackEntry &pseudoFrame = pseudoFrames[pseudoIndex]; + addPseudoEntry(pseudoFrame, aProfile, pseudoStack, nullptr); + pseudoIndex++; + continue; + } + +#ifndef SPS_STANDALONE + // Check to see if JS jit stack frame is top-most + if (jsStackAddr > nativeStackAddr) { + MOZ_ASSERT(jsIndex >= 0); + const JS::ProfilingFrameIterator::Frame& jsFrame = jsFrames[jsIndex]; + + // Stringifying non-wasm JIT frames is delayed until streaming + // time. To re-lookup the entry in the JitcodeGlobalTable, we need to + // store the JIT code address ('J') in the circular buffer. + // + // Note that we cannot do this when we are sychronously sampling the + // current thread; that is, when called from profiler_get_backtrace. The + // captured backtrace is usually externally stored for an indeterminate + // amount of time, such as in nsRefreshDriver. Problematically, the + // stored backtrace may be alive across a GC during which the profiler + // itself is disabled. In that case, the JS engine is free to discard + // its JIT code. This means that if we inserted such 'J' entries into + // the buffer, nsRefreshDriver would now be holding on to a backtrace + // with stale JIT code return addresses. + if (aSample->isSamplingCurrentThread || + jsFrame.kind == JS::ProfilingFrameIterator::Frame_Wasm) { + addDynamicTag(aProfile, 'c', jsFrame.label.get()); + } else { + MOZ_ASSERT(jsFrame.kind == JS::ProfilingFrameIterator::Frame_Ion || + jsFrame.kind == JS::ProfilingFrameIterator::Frame_Baseline); + aProfile.addTag(ProfileEntry('J', jsFrames[jsIndex].returnAddress)); + } + + jsIndex--; + continue; + } +#endif + + // If we reach here, there must be a native stack entry and it must be the + // greatest entry. + if (nativeStackAddr) { + MOZ_ASSERT(nativeIndex >= 0); + aProfile + .addTag(ProfileEntry('l', (void*)aNativeStack.pc_array[nativeIndex])); + } + if (nativeIndex >= 0) { + nativeIndex--; + } + } + +#ifndef SPS_STANDALONE + // Update the JS context with the current profile sample buffer generation. + // + // Do not do this for synchronous sampling, which create their own + // ProfileBuffers. + if (!aSample->isSamplingCurrentThread && pseudoStack->mContext) { + MOZ_ASSERT(aProfile.bufferGeneration() >= startBufferGen); + uint32_t lapCount = aProfile.bufferGeneration() - startBufferGen; + JS::UpdateJSContextProfilerSampleBufferGen(pseudoStack->mContext, + aProfile.bufferGeneration(), + lapCount); + } +#endif +} + +#ifdef USE_NS_STACKWALK +static +void StackWalkCallback(uint32_t aFrameNumber, void* aPC, void* aSP, + void* aClosure) +{ + NativeStack* nativeStack = static_cast<NativeStack*>(aClosure); + MOZ_ASSERT(nativeStack->count < nativeStack->size); + nativeStack->sp_array[nativeStack->count] = aSP; + nativeStack->pc_array[nativeStack->count] = aPC; + nativeStack->count++; +} + +void GeckoSampler::doNativeBacktrace(ThreadProfile &aProfile, TickSample* aSample) +{ + void* pc_array[1000]; + void* sp_array[1000]; + NativeStack nativeStack = { + pc_array, + sp_array, + mozilla::ArrayLength(pc_array), + 0 + }; + + // Start with the current function. We use 0 as the frame number here because + // the FramePointerStackWalk() and MozStackWalk() calls below will use 1..N. + // This is a bit weird but it doesn't matter because StackWalkCallback() + // doesn't use the frame number argument. + StackWalkCallback(/* frameNumber */ 0, aSample->pc, aSample->sp, &nativeStack); + + uint32_t maxFrames = uint32_t(nativeStack.size - nativeStack.count); + // win X64 doesn't support disabling frame pointers emission so we need + // to fallback to using StackWalk64 which is slower. +#if defined(XP_MACOSX) || (defined(XP_WIN) && !defined(V8_HOST_ARCH_X64)) + void *stackEnd = aSample->threadProfile->GetStackTop(); + bool rv = true; + if (aSample->fp >= aSample->sp && aSample->fp <= stackEnd) + rv = FramePointerStackWalk(StackWalkCallback, /* skipFrames */ 0, + maxFrames, &nativeStack, + reinterpret_cast<void**>(aSample->fp), stackEnd); +#else + void *platformData = nullptr; + + uintptr_t thread = GetThreadHandle(aSample->threadProfile->GetPlatformData()); + MOZ_ASSERT(thread); + bool rv = MozStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames, + &nativeStack, thread, platformData); +#endif + if (rv) + mergeStacksIntoProfile(aProfile, aSample, nativeStack); +} +#endif + + +#ifdef USE_EHABI_STACKWALK +void GeckoSampler::doNativeBacktrace(ThreadProfile &aProfile, TickSample* aSample) +{ + void *pc_array[1000]; + void *sp_array[1000]; + NativeStack nativeStack = { + pc_array, + sp_array, + mozilla::ArrayLength(pc_array), + 0 + }; + + const mcontext_t *mcontext = &reinterpret_cast<ucontext_t *>(aSample->context)->uc_mcontext; + mcontext_t savedContext; + PseudoStack *pseudoStack = aProfile.GetPseudoStack(); + + nativeStack.count = 0; + // The pseudostack contains an "EnterJIT" frame whenever we enter + // JIT code with profiling enabled; the stack pointer value points + // the saved registers. We use this to unwind resume unwinding + // after encounting JIT code. + for (uint32_t i = pseudoStack->stackSize(); i > 0; --i) { + // The pseudostack grows towards higher indices, so we iterate + // backwards (from callee to caller). + volatile StackEntry &entry = pseudoStack->mStack[i - 1]; + if (!entry.isJs() && strcmp(entry.label(), "EnterJIT") == 0) { + // Found JIT entry frame. Unwind up to that point (i.e., force + // the stack walk to stop before the block of saved registers; + // note that it yields nondecreasing stack pointers), then restore + // the saved state. + uint32_t *vSP = reinterpret_cast<uint32_t*>(entry.stackAddress()); + + nativeStack.count += EHABIStackWalk(*mcontext, + /* stackBase = */ vSP, + sp_array + nativeStack.count, + pc_array + nativeStack.count, + nativeStack.size - nativeStack.count); + + memset(&savedContext, 0, sizeof(savedContext)); + // See also: struct EnterJITStack in js/src/jit/arm/Trampoline-arm.cpp + savedContext.arm_r4 = *vSP++; + savedContext.arm_r5 = *vSP++; + savedContext.arm_r6 = *vSP++; + savedContext.arm_r7 = *vSP++; + savedContext.arm_r8 = *vSP++; + savedContext.arm_r9 = *vSP++; + savedContext.arm_r10 = *vSP++; + savedContext.arm_fp = *vSP++; + savedContext.arm_lr = *vSP++; + savedContext.arm_sp = reinterpret_cast<uint32_t>(vSP); + savedContext.arm_pc = savedContext.arm_lr; + mcontext = &savedContext; + } + } + + // Now unwind whatever's left (starting from either the last EnterJIT + // frame or, if no EnterJIT was found, the original registers). + nativeStack.count += EHABIStackWalk(*mcontext, + aProfile.GetStackTop(), + sp_array + nativeStack.count, + pc_array + nativeStack.count, + nativeStack.size - nativeStack.count); + + mergeStacksIntoProfile(aProfile, aSample, nativeStack); +} +#endif + + +#ifdef USE_LUL_STACKWALK +void GeckoSampler::doNativeBacktrace(ThreadProfile &aProfile, TickSample* aSample) +{ + const mcontext_t* mc + = &reinterpret_cast<ucontext_t *>(aSample->context)->uc_mcontext; + + lul::UnwindRegs startRegs; + memset(&startRegs, 0, sizeof(startRegs)); + +# if defined(SPS_PLAT_amd64_linux) + startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]); + startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]); + startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]); +# elif defined(SPS_PLAT_arm_android) + startRegs.r15 = lul::TaggedUWord(mc->arm_pc); + startRegs.r14 = lul::TaggedUWord(mc->arm_lr); + startRegs.r13 = lul::TaggedUWord(mc->arm_sp); + startRegs.r12 = lul::TaggedUWord(mc->arm_ip); + startRegs.r11 = lul::TaggedUWord(mc->arm_fp); + startRegs.r7 = lul::TaggedUWord(mc->arm_r7); +# elif defined(SPS_PLAT_x86_linux) || defined(SPS_PLAT_x86_android) + startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]); + startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]); + startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]); +# else +# error "Unknown plat" +# endif + + /* Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not + going past the stack's registered top point. Do some basic + sanity checks too. This assumes that the TaggedUWord holding + the stack pointer value is valid, but it should be, since it + was constructed that way in the code just above. */ + + lul::StackImage stackImg; + + { +# if defined(SPS_PLAT_amd64_linux) + uintptr_t rEDZONE_SIZE = 128; + uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE; +# elif defined(SPS_PLAT_arm_android) + uintptr_t rEDZONE_SIZE = 0; + uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE; +# elif defined(SPS_PLAT_x86_linux) || defined(SPS_PLAT_x86_android) + uintptr_t rEDZONE_SIZE = 0; + uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE; +# else +# error "Unknown plat" +# endif + uintptr_t end = reinterpret_cast<uintptr_t>(aProfile.GetStackTop()); + uintptr_t ws = sizeof(void*); + start &= ~(ws-1); + end &= ~(ws-1); + uintptr_t nToCopy = 0; + if (start < end) { + nToCopy = end - start; + if (nToCopy > lul::N_STACK_BYTES) + nToCopy = lul::N_STACK_BYTES; + } + MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES); + stackImg.mLen = nToCopy; + stackImg.mStartAvma = start; + if (nToCopy > 0) { + memcpy(&stackImg.mContents[0], (void*)start, nToCopy); + (void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy); + } + } + + // The maximum number of frames that LUL will produce. Setting it + // too high gives a risk of it wasting a lot of time looping on + // corrupted stacks. + const int MAX_NATIVE_FRAMES = 256; + + size_t scannedFramesAllowed = 0; + + uintptr_t framePCs[MAX_NATIVE_FRAMES]; + uintptr_t frameSPs[MAX_NATIVE_FRAMES]; + size_t framesAvail = mozilla::ArrayLength(framePCs); + size_t framesUsed = 0; + size_t scannedFramesAcquired = 0; + sLUL->Unwind( &framePCs[0], &frameSPs[0], + &framesUsed, &scannedFramesAcquired, + framesAvail, scannedFramesAllowed, + &startRegs, &stackImg ); + + NativeStack nativeStack = { + reinterpret_cast<void**>(framePCs), + reinterpret_cast<void**>(frameSPs), + mozilla::ArrayLength(framePCs), + 0 + }; + + nativeStack.count = framesUsed; + + mergeStacksIntoProfile(aProfile, aSample, nativeStack); + + // Update stats in the LUL stats object. Unfortunately this requires + // three global memory operations. + sLUL->mStats.mContext += 1; + sLUL->mStats.mCFI += framesUsed - 1 - scannedFramesAcquired; + sLUL->mStats.mScanned += scannedFramesAcquired; +} +#endif + + +static +void doSampleStackTrace(ThreadProfile &aProfile, TickSample *aSample, bool aAddLeafAddresses) +{ + NativeStack nativeStack = { nullptr, nullptr, 0, 0 }; + mergeStacksIntoProfile(aProfile, aSample, nativeStack); + +#ifdef ENABLE_SPS_LEAF_DATA + if (aSample && aAddLeafAddresses) { + aProfile.addTag(ProfileEntry('l', (void*)aSample->pc)); +#ifdef ENABLE_ARM_LR_SAVING + aProfile.addTag(ProfileEntry('L', (void*)aSample->lr)); +#endif + } +#endif +} + +void GeckoSampler::Tick(TickSample* sample) +{ + // Don't allow for ticks to happen within other ticks. + InplaceTick(sample); +} + +void GeckoSampler::InplaceTick(TickSample* sample) +{ + ThreadProfile& currThreadProfile = *sample->threadProfile; + + currThreadProfile.addTag(ProfileEntry('T', currThreadProfile.ThreadId())); + + if (sample) { + mozilla::TimeDuration delta = sample->timestamp - sStartTime; + currThreadProfile.addTag(ProfileEntry('t', delta.ToMilliseconds())); + } + + PseudoStack* stack = currThreadProfile.GetPseudoStack(); + +#if defined(USE_NS_STACKWALK) || defined(USE_EHABI_STACKWALK) || \ + defined(USE_LUL_STACKWALK) + if (mUseStackWalk) { + doNativeBacktrace(currThreadProfile, sample); + } else { + doSampleStackTrace(currThreadProfile, sample, mAddLeafAddresses); + } +#else + doSampleStackTrace(currThreadProfile, sample, mAddLeafAddresses); +#endif + + // Don't process the PeudoStack's markers if we're + // synchronously sampling the current thread. + if (!sample->isSamplingCurrentThread) { + ProfilerMarkerLinkedList* pendingMarkersList = stack->getPendingMarkers(); + while (pendingMarkersList && pendingMarkersList->peek()) { + ProfilerMarker* marker = pendingMarkersList->popHead(); + currThreadProfile.addStoredMarker(marker); + currThreadProfile.addTag(ProfileEntry('m', marker)); + } + } + +#ifndef SPS_STANDALONE + if (sample && currThreadProfile.GetThreadResponsiveness()->HasData()) { + mozilla::TimeDuration delta = currThreadProfile.GetThreadResponsiveness()->GetUnresponsiveDuration(sample->timestamp); + currThreadProfile.addTag(ProfileEntry('r', delta.ToMilliseconds())); + } +#endif + + // rssMemory is equal to 0 when we are not recording. + if (sample && sample->rssMemory != 0) { + currThreadProfile.addTag(ProfileEntry('R', static_cast<double>(sample->rssMemory))); + } + + // ussMemory is equal to 0 when we are not recording. + if (sample && sample->ussMemory != 0) { + currThreadProfile.addTag(ProfileEntry('U', static_cast<double>(sample->ussMemory))); + } + +#if defined(XP_WIN) + if (mProfilePower) { + mIntelPowerGadget->TakeSample(); + currThreadProfile.addTag(ProfileEntry('p', static_cast<double>(mIntelPowerGadget->GetTotalPackagePowerInWatts()))); + } +#endif + + if (sLastFrameNumber != sFrameNumber) { + currThreadProfile.addTag(ProfileEntry('f', sFrameNumber)); + sLastFrameNumber = sFrameNumber; + } +} + +namespace { + +SyncProfile* NewSyncProfile() +{ + PseudoStack* stack = tlsPseudoStack.get(); + if (!stack) { + MOZ_ASSERT(stack); + return nullptr; + } + Thread::tid_t tid = Thread::GetCurrentId(); + + ThreadInfo* info = new ThreadInfo("SyncProfile", tid, false, stack, nullptr); + SyncProfile* profile = new SyncProfile(info, GET_BACKTRACE_DEFAULT_ENTRY); + return profile; +} + +} // namespace + +SyncProfile* GeckoSampler::GetBacktrace() +{ + SyncProfile* profile = NewSyncProfile(); + + TickSample sample; + sample.threadProfile = profile; + +#if defined(HAVE_NATIVE_UNWIND) || defined(USE_LUL_STACKWALK) +#if defined(XP_WIN) || defined(LINUX) + tickcontext_t context; + sample.PopulateContext(&context); +#elif defined(XP_MACOSX) + sample.PopulateContext(nullptr); +#endif +#endif + + sample.isSamplingCurrentThread = true; + sample.timestamp = mozilla::TimeStamp::Now(); + + profile->BeginUnwind(); + Tick(&sample); + profile->EndUnwind(); + + return profile; +} + +void +GeckoSampler::GetBufferInfo(uint32_t *aCurrentPosition, uint32_t *aTotalSize, uint32_t *aGeneration) +{ + *aCurrentPosition = mBuffer->mWritePos; + *aTotalSize = mBuffer->mEntrySize; + *aGeneration = mBuffer->mGeneration; +} diff --git a/tools/profiler/core/GeckoSampler.h b/tools/profiler/core/GeckoSampler.h new file mode 100644 index 0000000000..da1fdfe437 --- /dev/null +++ b/tools/profiler/core/GeckoSampler.h @@ -0,0 +1,181 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef GeckoSampler_h +#define GeckoSampler_h + +#include "platform.h" +#include "ProfileEntry.h" +#include "mozilla/Vector.h" +#include "ThreadProfile.h" +#include "ThreadInfo.h" +#ifndef SPS_STANDALONE +#include "IntelPowerGadget.h" +#endif +#ifdef MOZ_TASK_TRACER +#include "GeckoTaskTracer.h" +#endif + +#include <algorithm> + +namespace mozilla { +class ProfileGatherer; +} // namespace mozilla + +typedef mozilla::Vector<std::string> ThreadNameFilterList; +typedef mozilla::Vector<std::string> FeatureList; + +static bool +threadSelected(ThreadInfo* aInfo, const ThreadNameFilterList &aThreadNameFilters) { + if (aThreadNameFilters.empty()) { + return true; + } + + std::string name = aInfo->Name(); + std::transform(name.begin(), name.end(), name.begin(), ::tolower); + + for (uint32_t i = 0; i < aThreadNameFilters.length(); ++i) { + std::string filter = aThreadNameFilters[i]; + std::transform(filter.begin(), filter.end(), filter.begin(), ::tolower); + + // Crude, non UTF-8 compatible, case insensitive substring search + if (name.find(filter) != std::string::npos) { + return true; + } + } + + return false; +} + +extern mozilla::TimeStamp sLastTracerEvent; +extern int sFrameNumber; +extern int sLastFrameNumber; + +class GeckoSampler: public Sampler { + public: + GeckoSampler(double aInterval, int aEntrySize, + const char** aFeatures, uint32_t aFeatureCount, + const char** aThreadNameFilters, uint32_t aFilterCount); + ~GeckoSampler(); + + void RegisterThread(ThreadInfo* aInfo) { + if (!aInfo->IsMainThread() && !mProfileThreads) { + return; + } + + if (!threadSelected(aInfo, mThreadNameFilters)) { + return; + } + + ThreadProfile* profile = new ThreadProfile(aInfo, mBuffer); + aInfo->SetProfile(profile); + } + + // Called within a signal. This function must be reentrant + virtual void Tick(TickSample* sample) override; + + // Immediately captures the calling thread's call stack and returns it. + virtual SyncProfile* GetBacktrace() override; + + // Called within a signal. This function must be reentrant + virtual void RequestSave() override + { + mSaveRequested = true; +#ifdef MOZ_TASK_TRACER + if (mTaskTracer) { + mozilla::tasktracer::StopLogging(); + } +#endif + } + + virtual void HandleSaveRequest() override; + virtual void DeleteExpiredMarkers() override; + + ThreadProfile* GetPrimaryThreadProfile() + { + if (!mPrimaryThreadProfile) { + ::MutexAutoLock lock(*sRegisteredThreadsMutex); + + for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) { + ThreadInfo* info = sRegisteredThreads->at(i); + if (info->IsMainThread() && !info->IsPendingDelete()) { + mPrimaryThreadProfile = info->Profile(); + break; + } + } + } + + return mPrimaryThreadProfile; + } + + void ToStreamAsJSON(std::ostream& stream, double aSinceTime = 0); +#ifndef SPS_STANDALONE + virtual JSObject *ToJSObject(JSContext *aCx, double aSinceTime = 0); + void GetGatherer(nsISupports** aRetVal); +#endif + mozilla::UniquePtr<char[]> ToJSON(double aSinceTime = 0); + virtual void ToJSObjectAsync(double aSinceTime = 0, mozilla::dom::Promise* aPromise = 0); + void StreamMetaJSCustomObject(SpliceableJSONWriter& aWriter); + void StreamTaskTracer(SpliceableJSONWriter& aWriter); + void FlushOnJSShutdown(JSContext* aContext); + bool ProfileJS() const { return mProfileJS; } + bool ProfileJava() const { return mProfileJava; } + bool ProfileGPU() const { return mProfileGPU; } + bool ProfilePower() const { return mProfilePower; } + bool ProfileThreads() const override { return mProfileThreads; } + bool InPrivacyMode() const { return mPrivacyMode; } + bool AddMainThreadIO() const { return mAddMainThreadIO; } + bool ProfileMemory() const { return mProfileMemory; } + bool TaskTracer() const { return mTaskTracer; } + bool LayersDump() const { return mLayersDump; } + bool DisplayListDump() const { return mDisplayListDump; } + bool ProfileRestyle() const { return mProfileRestyle; } + const ThreadNameFilterList& ThreadNameFilters() { return mThreadNameFilters; } + const FeatureList& Features() { return mFeatures; } + + void GetBufferInfo(uint32_t *aCurrentPosition, uint32_t *aTotalSize, uint32_t *aGeneration); + +protected: + // Called within a signal. This function must be reentrant + virtual void InplaceTick(TickSample* sample); + + // Not implemented on platforms which do not support backtracing + void doNativeBacktrace(ThreadProfile &aProfile, TickSample* aSample); + + void StreamJSON(SpliceableJSONWriter& aWriter, double aSinceTime); + + // This represent the application's main thread (SAMPLER_INIT) + ThreadProfile* mPrimaryThreadProfile; + RefPtr<ProfileBuffer> mBuffer; + bool mSaveRequested; + bool mAddLeafAddresses; + bool mUseStackWalk; + bool mProfileJS; + bool mProfileGPU; + bool mProfileThreads; + bool mProfileJava; + bool mProfilePower; + bool mLayersDump; + bool mDisplayListDump; + bool mProfileRestyle; + + // Keep the thread filter to check against new thread that + // are started while profiling + ThreadNameFilterList mThreadNameFilters; + FeatureList mFeatures; + bool mPrivacyMode; + bool mAddMainThreadIO; + bool mProfileMemory; + bool mTaskTracer; +#if defined(XP_WIN) + IntelPowerGadget* mIntelPowerGadget; +#endif + +private: + RefPtr<mozilla::ProfileGatherer> mGatherer; +}; + +#endif + diff --git a/tools/profiler/core/IntelPowerGadget.cpp b/tools/profiler/core/IntelPowerGadget.cpp new file mode 100644 index 0000000000..fe267b80f3 --- /dev/null +++ b/tools/profiler/core/IntelPowerGadget.cpp @@ -0,0 +1,310 @@ +/*
+ * Copyright 2013, Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Author: Joe Olivas <joseph.k.olivas@intel.com>
+ */
+
+#include "nsDebug.h"
+#include "nsString.h"
+#include "IntelPowerGadget.h"
+#include "prenv.h"
+
+IntelPowerGadget::IntelPowerGadget() :
+ libpowergadget(nullptr),
+ Initialize(nullptr),
+ GetNumNodes(nullptr),
+ GetMsrName(nullptr),
+ GetMsrFunc(nullptr),
+ ReadMSR(nullptr),
+ WriteMSR(nullptr),
+ GetIAFrequency(nullptr),
+ GetTDP(nullptr),
+ GetMaxTemperature(nullptr),
+ GetThresholds(nullptr),
+ GetTemperature(nullptr),
+ ReadSample(nullptr),
+ GetSysTime(nullptr),
+ GetRDTSC(nullptr),
+ GetTimeInterval(nullptr),
+ GetBaseFrequency(nullptr),
+ GetPowerData(nullptr),
+ StartLog(nullptr),
+ StopLog(nullptr),
+ GetNumMsrs(nullptr),
+ packageMSR(-1),
+ cpuMSR(-1),
+ freqMSR(-1),
+ tempMSR(-1)
+{
+}
+
+bool
+IntelPowerGadget::Init()
+{
+ bool success = false;
+ const char *path = PR_GetEnv("IPG_Dir");
+ nsCString ipg_library;
+ if (path && *path) {
+ ipg_library.Append(path);
+ ipg_library.Append('/');
+ ipg_library.AppendLiteral(PG_LIBRARY_NAME);
+ libpowergadget = PR_LoadLibrary(ipg_library.get());
+ }
+
+ if(libpowergadget) {
+ Initialize = (IPGInitialize) PR_FindFunctionSymbol(libpowergadget, "IntelEnergyLibInitialize");
+ GetNumNodes = (IPGGetNumNodes) PR_FindFunctionSymbol(libpowergadget, "GetNumNodes");
+ GetMsrName = (IPGGetMsrName) PR_FindFunctionSymbol(libpowergadget, "GetMsrName");
+ GetMsrFunc = (IPGGetMsrFunc) PR_FindFunctionSymbol(libpowergadget, "GetMsrFunc");
+ ReadMSR = (IPGReadMSR) PR_FindFunctionSymbol(libpowergadget, "ReadMSR");
+ WriteMSR = (IPGWriteMSR) PR_FindFunctionSymbol(libpowergadget, "WriteMSR");
+ GetIAFrequency = (IPGGetIAFrequency) PR_FindFunctionSymbol(libpowergadget, "GetIAFrequency");
+ GetTDP = (IPGGetTDP) PR_FindFunctionSymbol(libpowergadget, "GetTDP");
+ GetMaxTemperature = (IPGGetMaxTemperature) PR_FindFunctionSymbol(libpowergadget, "GetMaxTemperature");
+ GetThresholds = (IPGGetThresholds) PR_FindFunctionSymbol(libpowergadget, "GetThresholds");
+ GetTemperature = (IPGGetTemperature) PR_FindFunctionSymbol(libpowergadget, "GetTemperature");
+ ReadSample = (IPGReadSample) PR_FindFunctionSymbol(libpowergadget, "ReadSample");
+ GetSysTime = (IPGGetSysTime) PR_FindFunctionSymbol(libpowergadget, "GetSysTime");
+ GetRDTSC = (IPGGetRDTSC) PR_FindFunctionSymbol(libpowergadget, "GetRDTSC");
+ GetTimeInterval = (IPGGetTimeInterval) PR_FindFunctionSymbol(libpowergadget, "GetTimeInterval");
+ GetBaseFrequency = (IPGGetBaseFrequency) PR_FindFunctionSymbol(libpowergadget, "GetBaseFrequency");
+ GetPowerData = (IPGGetPowerData) PR_FindFunctionSymbol(libpowergadget, "GetPowerData");
+ StartLog = (IPGStartLog) PR_FindFunctionSymbol(libpowergadget, "StartLog");
+ StopLog = (IPGStopLog) PR_FindFunctionSymbol(libpowergadget, "StopLog");
+ GetNumMsrs = (IPGGetNumMsrs) PR_FindFunctionSymbol(libpowergadget, "GetNumMsrs");
+ }
+
+ if(Initialize) {
+ Initialize();
+ int msrCount = GetNumberMsrs();
+ wchar_t name[1024] = {0};
+ for(int i = 0; i < msrCount; ++i) {
+ GetMsrName(i, name);
+ int func = 0;
+ GetMsrFunc(i, &func);
+ // MSR for frequency
+ if(wcscmp(name, L"CPU Frequency") == 0 && (func == 0)) {
+ this->freqMSR = i;
+ }
+ // MSR for Package
+ else if(wcscmp(name, L"Processor") == 0 && (func == 1)) {
+ this->packageMSR = i;
+ }
+ // MSR for CPU
+ else if(wcscmp(name, L"IA") == 0 && (func == 1)) {
+ this->cpuMSR = i;
+ }
+ // MSR for Temperature
+ else if(wcscmp(name, L"Package") == 0 && (func == 2)) {
+ this->tempMSR = i;
+ }
+ }
+ // Grab one sample at startup for a diff
+ TakeSample();
+ success = true;
+ }
+ return success;
+}
+
+IntelPowerGadget::~IntelPowerGadget()
+{
+ if(libpowergadget) {
+ NS_WARNING("Unloading PowerGadget library!\n");
+ PR_UnloadLibrary(libpowergadget);
+ libpowergadget = nullptr;
+ Initialize = nullptr;
+ GetNumNodes = nullptr;
+ GetMsrName = nullptr;
+ GetMsrFunc = nullptr;
+ ReadMSR = nullptr;
+ WriteMSR = nullptr;
+ GetIAFrequency = nullptr;
+ GetTDP = nullptr;
+ GetMaxTemperature = nullptr;
+ GetThresholds = nullptr;
+ GetTemperature = nullptr;
+ ReadSample = nullptr;
+ GetSysTime = nullptr;
+ GetRDTSC = nullptr;
+ GetTimeInterval = nullptr;
+ GetBaseFrequency = nullptr;
+ GetPowerData = nullptr;
+ StartLog = nullptr;
+ StopLog = nullptr;
+ GetNumMsrs = nullptr;
+ }
+}
+
+int
+IntelPowerGadget::GetNumberNodes()
+{
+ int nodes = 0;
+ if(GetNumNodes) {
+ int ok = GetNumNodes(&nodes);
+ }
+ return nodes;
+}
+
+int
+IntelPowerGadget::GetNumberMsrs()
+{
+ int msrs = 0;
+ if(GetNumMsrs) {
+ int ok = GetNumMsrs(&msrs);
+ }
+ return msrs;
+}
+
+int
+IntelPowerGadget::GetCPUFrequency(int node)
+{
+ int frequency = 0;
+ if(GetIAFrequency) {
+ int ok = GetIAFrequency(node, &frequency);
+ }
+ return frequency;
+}
+
+double
+IntelPowerGadget::GetTdp(int node)
+{
+ double tdp = 0.0;
+ if(GetTDP) {
+ int ok = GetTDP(node, &tdp);
+ }
+ return tdp;
+}
+
+int
+IntelPowerGadget::GetMaxTemp(int node)
+{
+ int maxTemperatureC = 0;
+ if(GetMaxTemperature) {
+ int ok = GetMaxTemperature(node, &maxTemperatureC);
+ }
+ return maxTemperatureC;
+}
+
+int
+IntelPowerGadget::GetTemp(int node)
+{
+ int temperatureC = 0;
+ if(GetTemperature) {
+ int ok = GetTemperature(node, &temperatureC);
+ }
+ return temperatureC;
+}
+
+int
+IntelPowerGadget::TakeSample()
+{
+ int ok = 0;
+ if(ReadSample) {
+ ok = ReadSample();
+ }
+ return ok;
+}
+
+uint64_t
+IntelPowerGadget::GetRdtsc()
+{
+ uint64_t rdtsc = 0;
+ if(GetRDTSC) {
+ int ok = GetRDTSC(&rdtsc);
+ }
+ return rdtsc;
+}
+
+double
+IntelPowerGadget::GetInterval()
+{
+ double interval = 0.0;
+ if(GetTimeInterval) {
+ int ok = GetTimeInterval(&interval);
+ }
+ return interval;
+}
+
+double
+IntelPowerGadget::GetCPUBaseFrequency(int node)
+{
+ double freq = 0.0;
+ if(GetBaseFrequency) {
+ int ok = GetBaseFrequency(node, &freq);
+ }
+ return freq;
+}
+
+double
+IntelPowerGadget::GetTotalPackagePowerInWatts()
+{
+ int nodes = GetNumberNodes();
+ double totalPower = 0.0;
+ for(int i = 0; i < nodes; ++i) {
+ totalPower += GetPackagePowerInWatts(i);
+ }
+ return totalPower;
+}
+
+double
+IntelPowerGadget::GetPackagePowerInWatts(int node)
+{
+ int numResult = 0;
+ double result[] = {0.0, 0.0, 0.0};
+ if(GetPowerData && packageMSR != -1) {
+ int ok = GetPowerData(node, packageMSR, result, &numResult);
+ }
+ return result[0];
+}
+
+double
+IntelPowerGadget::GetTotalCPUPowerInWatts()
+{
+ int nodes = GetNumberNodes();
+ double totalPower = 0.0;
+ for(int i = 0; i < nodes; ++i) {
+ totalPower += GetCPUPowerInWatts(i);
+ }
+ return totalPower;
+}
+
+double
+IntelPowerGadget::GetCPUPowerInWatts(int node)
+{
+ int numResult = 0;
+ double result[] = {0.0, 0.0, 0.0};
+ if(GetPowerData && cpuMSR != -1) {
+ int ok = GetPowerData(node, cpuMSR, result, &numResult);
+ }
+ return result[0];
+}
+
+double
+IntelPowerGadget::GetTotalGPUPowerInWatts()
+{
+ int nodes = GetNumberNodes();
+ double totalPower = 0.0;
+ for(int i = 0; i < nodes; ++i) {
+ totalPower += GetGPUPowerInWatts(i);
+ }
+ return totalPower;
+}
+
+double
+IntelPowerGadget::GetGPUPowerInWatts(int node)
+{
+ return 0.0;
+}
+
diff --git a/tools/profiler/core/IntelPowerGadget.h b/tools/profiler/core/IntelPowerGadget.h new file mode 100644 index 0000000000..4a24215b62 --- /dev/null +++ b/tools/profiler/core/IntelPowerGadget.h @@ -0,0 +1,150 @@ +/* + * Copyright 2013, Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Author: Joe Olivas <joseph.k.olivas@intel.com> + */ + +#ifndef profiler_IntelPowerGadget_h +#define profiler_IntelPowerGadget_h + +#ifdef _MSC_VER +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#else +#include <stdint.h> +#endif +#include "prlink.h" + +typedef int (*IPGInitialize) (); +typedef int (*IPGGetNumNodes) (int *nNodes); +typedef int (*IPGGetNumMsrs) (int *nMsr); +typedef int (*IPGGetMsrName) (int iMsr, wchar_t *szName); +typedef int (*IPGGetMsrFunc) (int iMsr, int *pFuncID); +typedef int (*IPGReadMSR) (int iNode, unsigned int address, uint64_t *value); +typedef int (*IPGWriteMSR) (int iNode, unsigned int address, uint64_t value); +typedef int (*IPGGetIAFrequency) (int iNode, int *freqInMHz); +typedef int (*IPGGetTDP) (int iNode, double *TDP); +typedef int (*IPGGetMaxTemperature) (int iNode, int *degreeC); +typedef int (*IPGGetThresholds) (int iNode, int *degree1C, int *degree2C); +typedef int (*IPGGetTemperature) (int iNode, int *degreeC); +typedef int (*IPGReadSample) (); +typedef int (*IPGGetSysTime) (void *pSysTime); +typedef int (*IPGGetRDTSC) (uint64_t *pTSC); +typedef int (*IPGGetTimeInterval) (double *pOffset); +typedef int (*IPGGetBaseFrequency) (int iNode, double *pBaseFrequency); +typedef int (*IPGGetPowerData) (int iNode, int iMSR, double *pResult, int *nResult); +typedef int (*IPGStartLog) (wchar_t *szFileName); +typedef int (*IPGStopLog) (); + +#if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) +#define PG_LIBRARY_NAME "EnergyLib64" +#else +#define PG_LIBRARY_NAME "EnergyLib32" +#endif + + +class IntelPowerGadget +{ +public: + + IntelPowerGadget(); + ~IntelPowerGadget(); + + // Fails if initialization is incomplete + bool Init(); + + // Returns the number of packages on the system + int GetNumberNodes(); + + // Returns the number of MSRs being tracked + int GetNumberMsrs(); + + // Given a node, returns the temperature + int GetCPUFrequency(int); + + // Returns the TDP of the given node + double GetTdp(int); + + // Returns the maximum temperature for the given node + int GetMaxTemp(int); + + // Returns the current temperature in degrees C + // of the given node + int GetTemp(int); + + // Takes a sample of data. Must be called before + // any current data is retrieved. + int TakeSample(); + + // Gets the timestamp of the most recent sample + uint64_t GetRdtsc(); + + // returns number of seconds between the last + // two samples + double GetInterval(); + + // Returns the base frequency for the given node + double GetCPUBaseFrequency(int node); + + // Returns the combined package power for all + // packages on the system for the last sample. + double GetTotalPackagePowerInWatts(); + double GetPackagePowerInWatts(int node); + + // Returns the combined CPU power for all + // packages on the system for the last sample. + // If the reading is not available, returns 0.0 + double GetTotalCPUPowerInWatts(); + double GetCPUPowerInWatts(int node); + + // Returns the combined GPU power for all + // packages on the system for the last sample. + // If the reading is not available, returns 0.0 + double GetTotalGPUPowerInWatts(); + double GetGPUPowerInWatts(int node); + +private: + + PRLibrary *libpowergadget; + IPGInitialize Initialize; + IPGGetNumNodes GetNumNodes; + IPGGetNumMsrs GetNumMsrs; + IPGGetMsrName GetMsrName; + IPGGetMsrFunc GetMsrFunc; + IPGReadMSR ReadMSR; + IPGWriteMSR WriteMSR; + IPGGetIAFrequency GetIAFrequency; + IPGGetTDP GetTDP; + IPGGetMaxTemperature GetMaxTemperature; + IPGGetThresholds GetThresholds; + IPGGetTemperature GetTemperature; + IPGReadSample ReadSample; + IPGGetSysTime GetSysTime; + IPGGetRDTSC GetRDTSC; + IPGGetTimeInterval GetTimeInterval; + IPGGetBaseFrequency GetBaseFrequency; + IPGGetPowerData GetPowerData; + IPGStartLog StartLog; + IPGStopLog StopLog; + + int packageMSR; + int cpuMSR; + int freqMSR; + int tempMSR; +}; + +#endif // profiler_IntelPowerGadget_h diff --git a/tools/profiler/core/PlatformMacros.h b/tools/profiler/core/PlatformMacros.h new file mode 100644 index 0000000000..9a544a42e3 --- /dev/null +++ b/tools/profiler/core/PlatformMacros.h @@ -0,0 +1,76 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef SPS_PLATFORM_MACROS_H +#define SPS_PLATFORM_MACROS_H + +/* Define platform selection macros in a consistent way. Don't add + anything else to this file, so it can remain freestanding. The + primary factorisation is on (ARCH,OS) pairs ("PLATforms") but ARCH_ + and OS_ macros are defined too, since they are sometimes + convenient. */ + +#undef SPS_PLAT_arm_android +#undef SPS_PLAT_amd64_linux +#undef SPS_PLAT_x86_linux +#undef SPS_PLAT_amd64_darwin +#undef SPS_PLAT_x86_darwin +#undef SPS_PLAT_x86_windows +#undef SPS_PLAT_amd64_windows + +#undef SPS_ARCH_arm +#undef SPS_ARCH_x86 +#undef SPS_ARCH_amd64 + +#undef SPS_OS_android +#undef SPS_OS_linux +#undef SPS_OS_darwin +#undef SPS_OS_windows + +#if defined(__linux__) && defined(__x86_64__) +# define SPS_PLAT_amd64_linux 1 +# define SPS_ARCH_amd64 1 +# define SPS_OS_linux 1 + +#elif defined(__ANDROID__) && defined(__arm__) +# define SPS_PLAT_arm_android 1 +# define SPS_ARCH_arm 1 +# define SPS_OS_android 1 + +#elif defined(__ANDROID__) && defined(__i386__) +# define SPS_PLAT_x86_android 1 +# define SPS_ARCH_x86 1 +# define SPS_OS_android 1 + +#elif defined(__linux__) && defined(__i386__) +# define SPS_PLAT_x86_linux 1 +# define SPS_ARCH_x86 1 +# define SPS_OS_linux 1 + +#elif defined(__APPLE__) && defined(__x86_64__) +# define SPS_PLAT_amd64_darwin 1 +# define SPS_ARCH_amd64 1 +# define SPS_OS_darwin 1 + +#elif defined(__APPLE__) && defined(__i386__) +# define SPS_PLAT_x86_darwin 1 +# define SPS_ARCH_x86 1 +# define SPS_OS_darwin 1 + +#elif (defined(_MSC_VER) || defined(__MINGW32__)) && (defined(_M_IX86) || defined(__i386__)) +# define SPS_PLAT_x86_windows 1 +# define SPS_ARCH_x86 1 +# define SPS_OS_windows 1 + +#elif (defined(_MSC_VER) || defined(__MINGW32__)) && (defined(_M_X64) || defined(__x86_64__)) +# define SPS_PLAT_amd64_windows 1 +# define SPS_ARCH_amd64 1 +# define SPS_OS_windows 1 + +#else +# error "Unsupported platform" +#endif + +#endif /* ndef SPS_PLATFORM_MACROS_H */ diff --git a/tools/profiler/core/ProfileBuffer.cpp b/tools/profiler/core/ProfileBuffer.cpp new file mode 100644 index 0000000000..a4b91d8fc8 --- /dev/null +++ b/tools/profiler/core/ProfileBuffer.cpp @@ -0,0 +1,89 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ProfileBuffer.h" + +ProfileBuffer::ProfileBuffer(int aEntrySize) + : mEntries(MakeUnique<ProfileEntry[]>(aEntrySize)) + , mWritePos(0) + , mReadPos(0) + , mEntrySize(aEntrySize) + , mGeneration(0) +{ +} + +ProfileBuffer::~ProfileBuffer() +{ + while (mStoredMarkers.peek()) { + delete mStoredMarkers.popHead(); + } +} + +// Called from signal, call only reentrant functions +void ProfileBuffer::addTag(const ProfileEntry& aTag) +{ + mEntries[mWritePos++] = aTag; + if (mWritePos == mEntrySize) { + // Wrapping around may result in things referenced in the buffer (e.g., + // JIT code addresses and markers) being incorrectly collected. + MOZ_ASSERT(mGeneration != UINT32_MAX); + mGeneration++; + mWritePos = 0; + } + if (mWritePos == mReadPos) { + // Keep one slot open. + mEntries[mReadPos] = ProfileEntry(); + mReadPos = (mReadPos + 1) % mEntrySize; + } +} + +void ProfileBuffer::addStoredMarker(ProfilerMarker *aStoredMarker) { + aStoredMarker->SetGeneration(mGeneration); + mStoredMarkers.insert(aStoredMarker); +} + +void ProfileBuffer::deleteExpiredStoredMarkers() { + // Delete markers of samples that have been overwritten due to circular + // buffer wraparound. + uint32_t generation = mGeneration; + while (mStoredMarkers.peek() && + mStoredMarkers.peek()->HasExpired(generation)) { + delete mStoredMarkers.popHead(); + } +} + +void ProfileBuffer::reset() { + mGeneration += 2; + mReadPos = mWritePos = 0; +} + +#define DYNAMIC_MAX_STRING 8192 + +char* ProfileBuffer::processDynamicTag(int readPos, + int* tagsConsumed, char* tagBuff) +{ + int readAheadPos = (readPos + 1) % mEntrySize; + int tagBuffPos = 0; + + // Read the string stored in mTagData until the null character is seen + bool seenNullByte = false; + while (readAheadPos != mWritePos && !seenNullByte) { + (*tagsConsumed)++; + ProfileEntry readAheadEntry = mEntries[readAheadPos]; + for (size_t pos = 0; pos < sizeof(void*); pos++) { + tagBuff[tagBuffPos] = readAheadEntry.mTagChars[pos]; + if (tagBuff[tagBuffPos] == '\0' || tagBuffPos == DYNAMIC_MAX_STRING-2) { + seenNullByte = true; + break; + } + tagBuffPos++; + } + if (!seenNullByte) + readAheadPos = (readAheadPos + 1) % mEntrySize; + } + return tagBuff; +} + + diff --git a/tools/profiler/core/ProfileBuffer.h b/tools/profiler/core/ProfileBuffer.h new file mode 100644 index 0000000000..7d90fe3851 --- /dev/null +++ b/tools/profiler/core/ProfileBuffer.h @@ -0,0 +1,61 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MOZ_PROFILE_BUFFER_H +#define MOZ_PROFILE_BUFFER_H + +#include "ProfileEntry.h" +#include "platform.h" +#include "ProfileJSONWriter.h" +#include "mozilla/RefPtr.h" +#include "mozilla/RefCounted.h" + +class ProfileBuffer : public mozilla::RefCounted<ProfileBuffer> { +public: + MOZ_DECLARE_REFCOUNTED_VIRTUAL_TYPENAME(ProfileBuffer) + + explicit ProfileBuffer(int aEntrySize); + + virtual ~ProfileBuffer(); + + void addTag(const ProfileEntry& aTag); + void StreamSamplesToJSON(SpliceableJSONWriter& aWriter, int aThreadId, double aSinceTime, + JSContext* cx, UniqueStacks& aUniqueStacks); + void StreamMarkersToJSON(SpliceableJSONWriter& aWriter, int aThreadId, double aSinceTime, + UniqueStacks& aUniqueStacks); + void DuplicateLastSample(int aThreadId); + + void addStoredMarker(ProfilerMarker* aStoredMarker); + + // The following two methods are not signal safe! They delete markers. + void deleteExpiredStoredMarkers(); + void reset(); + +protected: + char* processDynamicTag(int readPos, int* tagsConsumed, char* tagBuff); + int FindLastSampleOfThread(int aThreadId); + +public: + // Circular buffer 'Keep One Slot Open' implementation for simplicity + mozilla::UniquePtr<ProfileEntry[]> mEntries; + + // Points to the next entry we will write to, which is also the one at which + // we need to stop reading. + int mWritePos; + + // Points to the entry at which we can start reading. + int mReadPos; + + // The number of entries in our buffer. + int mEntrySize; + + // How many times mWritePos has wrapped around. + uint32_t mGeneration; + + // Markers that marker entries in the buffer might refer to. + ProfilerMarkerLinkedList mStoredMarkers; +}; + +#endif diff --git a/tools/profiler/core/ProfileEntry.cpp b/tools/profiler/core/ProfileEntry.cpp new file mode 100644 index 0000000000..22d53a6f30 --- /dev/null +++ b/tools/profiler/core/ProfileEntry.cpp @@ -0,0 +1,881 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <ostream> +#include "platform.h" +#include "mozilla/HashFunctions.h" + +#ifndef SPS_STANDALONE +#include "nsThreadUtils.h" +#include "nsXULAppAPI.h" + +// JS +#include "jsapi.h" +#include "jsfriendapi.h" +#include "js/TrackedOptimizationInfo.h" +#endif + +// Self +#include "ProfileEntry.h" + +using mozilla::MakeUnique; +using mozilla::UniquePtr; +using mozilla::Maybe; +using mozilla::Some; +using mozilla::Nothing; +using mozilla::JSONWriter; + + +//////////////////////////////////////////////////////////////////////// +// BEGIN ProfileEntry + +ProfileEntry::ProfileEntry() + : mTagData(nullptr) + , mTagName(0) +{ } + +// aTagData must not need release (i.e. be a string from the text segment) +ProfileEntry::ProfileEntry(char aTagName, const char *aTagData) + : mTagData(aTagData) + , mTagName(aTagName) +{ } + +ProfileEntry::ProfileEntry(char aTagName, ProfilerMarker *aTagMarker) + : mTagMarker(aTagMarker) + , mTagName(aTagName) +{ } + +ProfileEntry::ProfileEntry(char aTagName, void *aTagPtr) + : mTagPtr(aTagPtr) + , mTagName(aTagName) +{ } + +ProfileEntry::ProfileEntry(char aTagName, double aTagDouble) + : mTagDouble(aTagDouble) + , mTagName(aTagName) +{ } + +ProfileEntry::ProfileEntry(char aTagName, uintptr_t aTagOffset) + : mTagOffset(aTagOffset) + , mTagName(aTagName) +{ } + +ProfileEntry::ProfileEntry(char aTagName, Address aTagAddress) + : mTagAddress(aTagAddress) + , mTagName(aTagName) +{ } + +ProfileEntry::ProfileEntry(char aTagName, int aTagInt) + : mTagInt(aTagInt) + , mTagName(aTagName) +{ } + +ProfileEntry::ProfileEntry(char aTagName, char aTagChar) + : mTagChar(aTagChar) + , mTagName(aTagName) +{ } + +bool ProfileEntry::is_ent_hint(char hintChar) { + return mTagName == 'h' && mTagChar == hintChar; +} + +bool ProfileEntry::is_ent_hint() { + return mTagName == 'h'; +} + +bool ProfileEntry::is_ent(char tagChar) { + return mTagName == tagChar; +} + +void* ProfileEntry::get_tagPtr() { + // No consistency checking. Oh well. + return mTagPtr; +} + +// END ProfileEntry +//////////////////////////////////////////////////////////////////////// + +class JSONSchemaWriter +{ + JSONWriter& mWriter; + uint32_t mIndex; + +public: + explicit JSONSchemaWriter(JSONWriter& aWriter) + : mWriter(aWriter) + , mIndex(0) + { + aWriter.StartObjectProperty("schema"); + } + + void WriteField(const char* aName) { + mWriter.IntProperty(aName, mIndex++); + } + + ~JSONSchemaWriter() { + mWriter.EndObject(); + } +}; + +#ifndef SPS_STANDALONE +class StreamOptimizationTypeInfoOp : public JS::ForEachTrackedOptimizationTypeInfoOp +{ + JSONWriter& mWriter; + UniqueJSONStrings& mUniqueStrings; + bool mStartedTypeList; + +public: + StreamOptimizationTypeInfoOp(JSONWriter& aWriter, UniqueJSONStrings& aUniqueStrings) + : mWriter(aWriter) + , mUniqueStrings(aUniqueStrings) + , mStartedTypeList(false) + { } + + void readType(const char* keyedBy, const char* name, + const char* location, Maybe<unsigned> lineno) override { + if (!mStartedTypeList) { + mStartedTypeList = true; + mWriter.StartObjectElement(); + mWriter.StartArrayProperty("typeset"); + } + + mWriter.StartObjectElement(); + { + mUniqueStrings.WriteProperty(mWriter, "keyedBy", keyedBy); + if (name) { + mUniqueStrings.WriteProperty(mWriter, "name", name); + } + if (location) { + mUniqueStrings.WriteProperty(mWriter, "location", location); + } + if (lineno.isSome()) { + mWriter.IntProperty("line", *lineno); + } + } + mWriter.EndObject(); + } + + void operator()(JS::TrackedTypeSite site, const char* mirType) override { + if (mStartedTypeList) { + mWriter.EndArray(); + mStartedTypeList = false; + } else { + mWriter.StartObjectElement(); + } + + { + mUniqueStrings.WriteProperty(mWriter, "site", JS::TrackedTypeSiteString(site)); + mUniqueStrings.WriteProperty(mWriter, "mirType", mirType); + } + mWriter.EndObject(); + } +}; + +// As mentioned in ProfileEntry.h, the JSON format contains many arrays whose +// elements are laid out according to various schemas to help +// de-duplication. This RAII class helps write these arrays by keeping track of +// the last non-null element written and adding the appropriate number of null +// elements when writing new non-null elements. It also automatically opens and +// closes an array element on the given JSON writer. +// +// Example usage: +// +// // Define the schema of elements in this type of array: [FOO, BAR, BAZ] +// enum Schema : uint32_t { +// FOO = 0, +// BAR = 1, +// BAZ = 2 +// }; +// +// AutoArraySchemaWriter writer(someJsonWriter, someUniqueStrings); +// if (shouldWriteFoo) { +// writer.IntElement(FOO, getFoo()); +// } +// ... etc ... +class MOZ_RAII AutoArraySchemaWriter +{ + friend class AutoObjectWriter; + + SpliceableJSONWriter& mJSONWriter; + UniqueJSONStrings* mStrings; + uint32_t mNextFreeIndex; + +public: + AutoArraySchemaWriter(SpliceableJSONWriter& aWriter, UniqueJSONStrings& aStrings) + : mJSONWriter(aWriter) + , mStrings(&aStrings) + , mNextFreeIndex(0) + { + mJSONWriter.StartArrayElement(); + } + + // If you don't have access to a UniqueStrings, you had better not try and + // write a string element down the line! + explicit AutoArraySchemaWriter(SpliceableJSONWriter& aWriter) + : mJSONWriter(aWriter) + , mStrings(nullptr) + , mNextFreeIndex(0) + { + mJSONWriter.StartArrayElement(); + } + + ~AutoArraySchemaWriter() { + mJSONWriter.EndArray(); + } + + void FillUpTo(uint32_t aIndex) { + MOZ_ASSERT(aIndex >= mNextFreeIndex); + mJSONWriter.NullElements(aIndex - mNextFreeIndex); + mNextFreeIndex = aIndex + 1; + } + + void IntElement(uint32_t aIndex, uint32_t aValue) { + FillUpTo(aIndex); + mJSONWriter.IntElement(aValue); + } + + void DoubleElement(uint32_t aIndex, double aValue) { + FillUpTo(aIndex); + mJSONWriter.DoubleElement(aValue); + } + + void StringElement(uint32_t aIndex, const char* aValue) { + MOZ_RELEASE_ASSERT(mStrings); + FillUpTo(aIndex); + mStrings->WriteElement(mJSONWriter, aValue); + } +}; + +class StreamOptimizationAttemptsOp : public JS::ForEachTrackedOptimizationAttemptOp +{ + SpliceableJSONWriter& mWriter; + UniqueJSONStrings& mUniqueStrings; + +public: + StreamOptimizationAttemptsOp(SpliceableJSONWriter& aWriter, UniqueJSONStrings& aUniqueStrings) + : mWriter(aWriter), + mUniqueStrings(aUniqueStrings) + { } + + void operator()(JS::TrackedStrategy strategy, JS::TrackedOutcome outcome) override { + enum Schema : uint32_t { + STRATEGY = 0, + OUTCOME = 1 + }; + + AutoArraySchemaWriter writer(mWriter, mUniqueStrings); + writer.StringElement(STRATEGY, JS::TrackedStrategyString(strategy)); + writer.StringElement(OUTCOME, JS::TrackedOutcomeString(outcome)); + } +}; + +class StreamJSFramesOp : public JS::ForEachProfiledFrameOp +{ + void* mReturnAddress; + UniqueStacks::Stack& mStack; + unsigned mDepth; + +public: + StreamJSFramesOp(void* aReturnAddr, UniqueStacks::Stack& aStack) + : mReturnAddress(aReturnAddr) + , mStack(aStack) + , mDepth(0) + { } + + unsigned depth() const { + MOZ_ASSERT(mDepth > 0); + return mDepth; + } + + void operator()(const JS::ForEachProfiledFrameOp::FrameHandle& aFrameHandle) override { + UniqueStacks::OnStackFrameKey frameKey(mReturnAddress, mDepth, aFrameHandle); + mStack.AppendFrame(frameKey); + mDepth++; + } +}; +#endif + +uint32_t UniqueJSONStrings::GetOrAddIndex(const char* aStr) +{ + uint32_t index; + StringKey key(aStr); + + auto it = mStringToIndexMap.find(key); + + if (it != mStringToIndexMap.end()) { + return it->second; + } + index = mStringToIndexMap.size(); + mStringToIndexMap[key] = index; + mStringTableWriter.StringElement(aStr); + return index; +} + +bool UniqueStacks::FrameKey::operator==(const FrameKey& aOther) const +{ + return mLocation == aOther.mLocation && + mLine == aOther.mLine && + mCategory == aOther.mCategory && + mJITAddress == aOther.mJITAddress && + mJITDepth == aOther.mJITDepth; +} + +bool UniqueStacks::StackKey::operator==(const StackKey& aOther) const +{ + MOZ_ASSERT_IF(mPrefix == aOther.mPrefix, mPrefixHash == aOther.mPrefixHash); + return mPrefix == aOther.mPrefix && mFrame == aOther.mFrame; +} + +UniqueStacks::Stack::Stack(UniqueStacks& aUniqueStacks, const OnStackFrameKey& aRoot) + : mUniqueStacks(aUniqueStacks) + , mStack(aUniqueStacks.GetOrAddFrameIndex(aRoot)) +{ +} + +void UniqueStacks::Stack::AppendFrame(const OnStackFrameKey& aFrame) +{ + // Compute the prefix hash and index before mutating mStack. + uint32_t prefixHash = mStack.Hash(); + uint32_t prefix = mUniqueStacks.GetOrAddStackIndex(mStack); + mStack.UpdateHash(prefixHash, prefix, mUniqueStacks.GetOrAddFrameIndex(aFrame)); +} + +uint32_t UniqueStacks::Stack::GetOrAddIndex() const +{ + return mUniqueStacks.GetOrAddStackIndex(mStack); +} + +uint32_t UniqueStacks::FrameKey::Hash() const +{ + uint32_t hash = 0; + if (!mLocation.IsEmpty()) { +#ifdef SPS_STANDALONE + hash = mozilla::HashString(mLocation.c_str()); +#else + hash = mozilla::HashString(mLocation.get()); +#endif + } + if (mLine.isSome()) { + hash = mozilla::AddToHash(hash, *mLine); + } + if (mCategory.isSome()) { + hash = mozilla::AddToHash(hash, *mCategory); + } + if (mJITAddress.isSome()) { + hash = mozilla::AddToHash(hash, *mJITAddress); + if (mJITDepth.isSome()) { + hash = mozilla::AddToHash(hash, *mJITDepth); + } + } + return hash; +} + +uint32_t UniqueStacks::StackKey::Hash() const +{ + if (mPrefix.isNothing()) { + return mozilla::HashGeneric(mFrame); + } + return mozilla::AddToHash(*mPrefixHash, mFrame); +} + +UniqueStacks::Stack UniqueStacks::BeginStack(const OnStackFrameKey& aRoot) +{ + return Stack(*this, aRoot); +} + +UniqueStacks::UniqueStacks(JSContext* aContext) + : mContext(aContext) + , mFrameCount(0) +{ + mFrameTableWriter.StartBareList(); + mStackTableWriter.StartBareList(); +} + +#ifdef SPS_STANDALONE +uint32_t UniqueStacks::GetOrAddStackIndex(const StackKey& aStack) +{ + uint32_t index; + auto it = mStackToIndexMap.find(aStack); + + if (it != mStackToIndexMap.end()) { + return it->second; + } + + index = mStackToIndexMap.size(); + mStackToIndexMap[aStack] = index; + StreamStack(aStack); + return index; +} +#else +uint32_t UniqueStacks::GetOrAddStackIndex(const StackKey& aStack) +{ + uint32_t index; + if (mStackToIndexMap.Get(aStack, &index)) { + MOZ_ASSERT(index < mStackToIndexMap.Count()); + return index; + } + + index = mStackToIndexMap.Count(); + mStackToIndexMap.Put(aStack, index); + StreamStack(aStack); + return index; +} +#endif + +#ifdef SPS_STANDALONE +uint32_t UniqueStacks::GetOrAddFrameIndex(const OnStackFrameKey& aFrame) +{ + uint32_t index; + auto it = mFrameToIndexMap.find(aFrame); + if (it != mFrameToIndexMap.end()) { + MOZ_ASSERT(it->second < mFrameCount); + return it->second; + } + + // A manual count is used instead of mFrameToIndexMap.Count() due to + // forwarding of canonical JIT frames above. + index = mFrameCount++; + mFrameToIndexMap[aFrame] = index; + StreamFrame(aFrame); + return index; +} +#else +uint32_t UniqueStacks::GetOrAddFrameIndex(const OnStackFrameKey& aFrame) +{ + uint32_t index; + if (mFrameToIndexMap.Get(aFrame, &index)) { + MOZ_ASSERT(index < mFrameCount); + return index; + } + + // If aFrame isn't canonical, forward it to the canonical frame's index. + if (aFrame.mJITFrameHandle) { + void* canonicalAddr = aFrame.mJITFrameHandle->canonicalAddress(); + if (canonicalAddr != *aFrame.mJITAddress) { + OnStackFrameKey canonicalKey(canonicalAddr, *aFrame.mJITDepth, *aFrame.mJITFrameHandle); + uint32_t canonicalIndex = GetOrAddFrameIndex(canonicalKey); + mFrameToIndexMap.Put(aFrame, canonicalIndex); + return canonicalIndex; + } + } + + // A manual count is used instead of mFrameToIndexMap.Count() due to + // forwarding of canonical JIT frames above. + index = mFrameCount++; + mFrameToIndexMap.Put(aFrame, index); + StreamFrame(aFrame); + return index; +} +#endif + +uint32_t UniqueStacks::LookupJITFrameDepth(void* aAddr) +{ + uint32_t depth; + + auto it = mJITFrameDepthMap.find(aAddr); + if (it != mJITFrameDepthMap.end()) { + depth = it->second; + MOZ_ASSERT(depth > 0); + return depth; + } + return 0; +} + +void UniqueStacks::AddJITFrameDepth(void* aAddr, unsigned depth) +{ + mJITFrameDepthMap[aAddr] = depth; +} + +void UniqueStacks::SpliceFrameTableElements(SpliceableJSONWriter& aWriter) +{ + mFrameTableWriter.EndBareList(); + aWriter.TakeAndSplice(mFrameTableWriter.WriteFunc()); +} + +void UniqueStacks::SpliceStackTableElements(SpliceableJSONWriter& aWriter) +{ + mStackTableWriter.EndBareList(); + aWriter.TakeAndSplice(mStackTableWriter.WriteFunc()); +} + +void UniqueStacks::StreamStack(const StackKey& aStack) +{ + enum Schema : uint32_t { + PREFIX = 0, + FRAME = 1 + }; + + AutoArraySchemaWriter writer(mStackTableWriter, mUniqueStrings); + if (aStack.mPrefix.isSome()) { + writer.IntElement(PREFIX, *aStack.mPrefix); + } + writer.IntElement(FRAME, aStack.mFrame); +} + +void UniqueStacks::StreamFrame(const OnStackFrameKey& aFrame) +{ + enum Schema : uint32_t { + LOCATION = 0, + IMPLEMENTATION = 1, + OPTIMIZATIONS = 2, + LINE = 3, + CATEGORY = 4 + }; + + AutoArraySchemaWriter writer(mFrameTableWriter, mUniqueStrings); + +#ifndef SPS_STANDALONE + if (!aFrame.mJITFrameHandle) { +#else + { +#endif +#ifdef SPS_STANDALONE + writer.StringElement(LOCATION, aFrame.mLocation.c_str()); +#else + writer.StringElement(LOCATION, aFrame.mLocation.get()); +#endif + if (aFrame.mLine.isSome()) { + writer.IntElement(LINE, *aFrame.mLine); + } + if (aFrame.mCategory.isSome()) { + writer.IntElement(CATEGORY, *aFrame.mCategory); + } + } +#ifndef SPS_STANDALONE + else { + const JS::ForEachProfiledFrameOp::FrameHandle& jitFrame = *aFrame.mJITFrameHandle; + + writer.StringElement(LOCATION, jitFrame.label()); + + JS::ProfilingFrameIterator::FrameKind frameKind = jitFrame.frameKind(); + MOZ_ASSERT(frameKind == JS::ProfilingFrameIterator::Frame_Ion || + frameKind == JS::ProfilingFrameIterator::Frame_Baseline); + writer.StringElement(IMPLEMENTATION, + frameKind == JS::ProfilingFrameIterator::Frame_Ion + ? "ion" + : "baseline"); + + if (jitFrame.hasTrackedOptimizations()) { + writer.FillUpTo(OPTIMIZATIONS); + mFrameTableWriter.StartObjectElement(); + { + mFrameTableWriter.StartArrayProperty("types"); + { + StreamOptimizationTypeInfoOp typeInfoOp(mFrameTableWriter, mUniqueStrings); + jitFrame.forEachOptimizationTypeInfo(typeInfoOp); + } + mFrameTableWriter.EndArray(); + + JS::Rooted<JSScript*> script(mContext); + jsbytecode* pc; + mFrameTableWriter.StartObjectProperty("attempts"); + { + { + JSONSchemaWriter schema(mFrameTableWriter); + schema.WriteField("strategy"); + schema.WriteField("outcome"); + } + + mFrameTableWriter.StartArrayProperty("data"); + { + StreamOptimizationAttemptsOp attemptOp(mFrameTableWriter, mUniqueStrings); + jitFrame.forEachOptimizationAttempt(attemptOp, script.address(), &pc); + } + mFrameTableWriter.EndArray(); + } + mFrameTableWriter.EndObject(); + + if (JSAtom* name = js::GetPropertyNameFromPC(script, pc)) { + char buf[512]; + JS_PutEscapedFlatString(buf, mozilla::ArrayLength(buf), js::AtomToFlatString(name), 0); + mUniqueStrings.WriteProperty(mFrameTableWriter, "propertyName", buf); + } + + unsigned line, column; + line = JS_PCToLineNumber(script, pc, &column); + mFrameTableWriter.IntProperty("line", line); + mFrameTableWriter.IntProperty("column", column); + } + mFrameTableWriter.EndObject(); + } + } +#endif +} + +struct ProfileSample +{ + uint32_t mStack; + Maybe<double> mTime; + Maybe<double> mResponsiveness; + Maybe<double> mRSS; + Maybe<double> mUSS; + Maybe<int> mFrameNumber; + Maybe<double> mPower; +}; + +static void WriteSample(SpliceableJSONWriter& aWriter, ProfileSample& aSample) +{ + enum Schema : uint32_t { + STACK = 0, + TIME = 1, + RESPONSIVENESS = 2, + RSS = 3, + USS = 4, + FRAME_NUMBER = 5, + POWER = 6 + }; + + AutoArraySchemaWriter writer(aWriter); + + writer.IntElement(STACK, aSample.mStack); + + if (aSample.mTime.isSome()) { + writer.DoubleElement(TIME, *aSample.mTime); + } + + if (aSample.mResponsiveness.isSome()) { + writer.DoubleElement(RESPONSIVENESS, *aSample.mResponsiveness); + } + + if (aSample.mRSS.isSome()) { + writer.DoubleElement(RSS, *aSample.mRSS); + } + + if (aSample.mUSS.isSome()) { + writer.DoubleElement(USS, *aSample.mUSS); + } + + if (aSample.mFrameNumber.isSome()) { + writer.IntElement(FRAME_NUMBER, *aSample.mFrameNumber); + } + + if (aSample.mPower.isSome()) { + writer.DoubleElement(POWER, *aSample.mPower); + } +} + +void ProfileBuffer::StreamSamplesToJSON(SpliceableJSONWriter& aWriter, int aThreadId, + double aSinceTime, JSContext* aContext, + UniqueStacks& aUniqueStacks) +{ + Maybe<ProfileSample> sample; + int readPos = mReadPos; + int currentThreadID = -1; + Maybe<double> currentTime; + UniquePtr<char[]> tagBuff = MakeUnique<char[]>(DYNAMIC_MAX_STRING); + + while (readPos != mWritePos) { + ProfileEntry entry = mEntries[readPos]; + if (entry.mTagName == 'T') { + currentThreadID = entry.mTagInt; + currentTime.reset(); + int readAheadPos = (readPos + 1) % mEntrySize; + if (readAheadPos != mWritePos) { + ProfileEntry readAheadEntry = mEntries[readAheadPos]; + if (readAheadEntry.mTagName == 't') { + currentTime = Some(readAheadEntry.mTagDouble); + } + } + } + if (currentThreadID == aThreadId && (currentTime.isNothing() || *currentTime >= aSinceTime)) { + switch (entry.mTagName) { + case 'r': + if (sample.isSome()) { + sample->mResponsiveness = Some(entry.mTagDouble); + } + break; + case 'p': + if (sample.isSome()) { + sample->mPower = Some(entry.mTagDouble); + } + break; + case 'R': + if (sample.isSome()) { + sample->mRSS = Some(entry.mTagDouble); + } + break; + case 'U': + if (sample.isSome()) { + sample->mUSS = Some(entry.mTagDouble); + } + break; + case 'f': + if (sample.isSome()) { + sample->mFrameNumber = Some(entry.mTagInt); + } + break; + case 's': + { + // end the previous sample if there was one + if (sample.isSome()) { + WriteSample(aWriter, *sample); + sample.reset(); + } + // begin the next sample + sample.emplace(); + sample->mTime = currentTime; + + // Seek forward through the entire sample, looking for frames + // this is an easier approach to reason about than adding more + // control variables and cases to the loop that goes through the buffer once + + UniqueStacks::Stack stack = + aUniqueStacks.BeginStack(UniqueStacks::OnStackFrameKey("(root)")); + + int framePos = (readPos + 1) % mEntrySize; + ProfileEntry frame = mEntries[framePos]; + while (framePos != mWritePos && frame.mTagName != 's' && frame.mTagName != 'T') { + int incBy = 1; + frame = mEntries[framePos]; + + // Read ahead to the next tag, if it's a 'd' tag process it now + const char* tagStringData = frame.mTagData; + int readAheadPos = (framePos + 1) % mEntrySize; + // Make sure the string is always null terminated if it fills up + // DYNAMIC_MAX_STRING-2 + tagBuff[DYNAMIC_MAX_STRING-1] = '\0'; + + if (readAheadPos != mWritePos && mEntries[readAheadPos].mTagName == 'd') { + tagStringData = processDynamicTag(framePos, &incBy, tagBuff.get()); + } + + // Write one frame. It can have either + // 1. only location - 'l' containing a memory address + // 2. location and line number - 'c' followed by 'd's, + // an optional 'n' and an optional 'y' + // 3. a JIT return address - 'j' containing native code address + if (frame.mTagName == 'l') { + // Bug 753041 + // We need a double cast here to tell GCC that we don't want to sign + // extend 32-bit addresses starting with 0xFXXXXXX. + unsigned long long pc = (unsigned long long)(uintptr_t)frame.mTagPtr; + snprintf(tagBuff.get(), DYNAMIC_MAX_STRING, "%#llx", pc); + stack.AppendFrame(UniqueStacks::OnStackFrameKey(tagBuff.get())); + } else if (frame.mTagName == 'c') { + UniqueStacks::OnStackFrameKey frameKey(tagStringData); + readAheadPos = (framePos + incBy) % mEntrySize; + if (readAheadPos != mWritePos && + mEntries[readAheadPos].mTagName == 'n') { + frameKey.mLine = Some((unsigned) mEntries[readAheadPos].mTagInt); + incBy++; + } + readAheadPos = (framePos + incBy) % mEntrySize; + if (readAheadPos != mWritePos && + mEntries[readAheadPos].mTagName == 'y') { + frameKey.mCategory = Some((unsigned) mEntries[readAheadPos].mTagInt); + incBy++; + } + stack.AppendFrame(frameKey); +#ifndef SPS_STANDALONE + } else if (frame.mTagName == 'J') { + // A JIT frame may expand to multiple frames due to inlining. + void* pc = frame.mTagPtr; + unsigned depth = aUniqueStacks.LookupJITFrameDepth(pc); + if (depth == 0) { + StreamJSFramesOp framesOp(pc, stack); + JS::ForEachProfiledFrame(aContext, pc, framesOp); + aUniqueStacks.AddJITFrameDepth(pc, framesOp.depth()); + } else { + for (unsigned i = 0; i < depth; i++) { + UniqueStacks::OnStackFrameKey inlineFrameKey(pc, i); + stack.AppendFrame(inlineFrameKey); + } + } +#endif + } + framePos = (framePos + incBy) % mEntrySize; + } + + sample->mStack = stack.GetOrAddIndex(); + break; + } + } + } + readPos = (readPos + 1) % mEntrySize; + } + if (sample.isSome()) { + WriteSample(aWriter, *sample); + } +} + +void ProfileBuffer::StreamMarkersToJSON(SpliceableJSONWriter& aWriter, int aThreadId, + double aSinceTime, UniqueStacks& aUniqueStacks) +{ + int readPos = mReadPos; + int currentThreadID = -1; + while (readPos != mWritePos) { + ProfileEntry entry = mEntries[readPos]; + if (entry.mTagName == 'T') { + currentThreadID = entry.mTagInt; + } else if (currentThreadID == aThreadId && entry.mTagName == 'm') { + const ProfilerMarker* marker = entry.getMarker(); + if (marker->GetTime() >= aSinceTime) { + entry.getMarker()->StreamJSON(aWriter, aUniqueStacks); + } + } + readPos = (readPos + 1) % mEntrySize; + } +} + +int ProfileBuffer::FindLastSampleOfThread(int aThreadId) +{ + // We search backwards from mWritePos-1 to mReadPos. + // Adding mEntrySize makes the result of the modulus positive. + for (int readPos = (mWritePos + mEntrySize - 1) % mEntrySize; + readPos != (mReadPos + mEntrySize - 1) % mEntrySize; + readPos = (readPos + mEntrySize - 1) % mEntrySize) { + ProfileEntry entry = mEntries[readPos]; + if (entry.mTagName == 'T' && entry.mTagInt == aThreadId) { + return readPos; + } + } + + return -1; +} + +void ProfileBuffer::DuplicateLastSample(int aThreadId) +{ + int lastSampleStartPos = FindLastSampleOfThread(aThreadId); + if (lastSampleStartPos == -1) { + return; + } + + MOZ_ASSERT(mEntries[lastSampleStartPos].mTagName == 'T'); + + addTag(mEntries[lastSampleStartPos]); + + // Go through the whole entry and duplicate it, until we find the next one. + for (int readPos = (lastSampleStartPos + 1) % mEntrySize; + readPos != mWritePos; + readPos = (readPos + 1) % mEntrySize) { + switch (mEntries[readPos].mTagName) { + case 'T': + // We're done. + return; + case 't': + // Copy with new time + addTag(ProfileEntry('t', (mozilla::TimeStamp::Now() - sStartTime).ToMilliseconds())); + break; + case 'm': + // Don't copy markers + break; + // Copy anything else we don't know about + // L, B, S, c, s, d, l, f, h, r, t, p + default: + addTag(mEntries[readPos]); + break; + } + } +} + +// END ProfileBuffer +//////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////// +// BEGIN ThreadProfile + +// END ThreadProfile +//////////////////////////////////////////////////////////////////////// diff --git a/tools/profiler/core/ProfileEntry.h b/tools/profiler/core/ProfileEntry.h new file mode 100644 index 0000000000..b82a2f2718 --- /dev/null +++ b/tools/profiler/core/ProfileEntry.h @@ -0,0 +1,407 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MOZ_PROFILE_ENTRY_H +#define MOZ_PROFILE_ENTRY_H + +#include <ostream> +#include "GeckoProfiler.h" +#include "platform.h" +#include "ProfileJSONWriter.h" +#include "ProfilerBacktrace.h" +#include "mozilla/RefPtr.h" +#include <string> +#include <map> +#ifndef SPS_STANDALONE +#include "js/ProfilingFrameIterator.h" +#include "js/TrackedOptimizationInfo.h" +#include "nsHashKeys.h" +#include "nsDataHashtable.h" +#endif +#include "mozilla/Maybe.h" +#include "mozilla/Vector.h" +#ifndef SPS_STANDALONE +#include "gtest/MozGtestFriend.h" +#else +#define FRIEND_TEST(a, b) // TODO Support standalone gtest +#endif +#include "mozilla/HashFunctions.h" +#include "mozilla/UniquePtr.h" + +class ThreadProfile; + +// NB: Packing this structure has been shown to cause SIGBUS issues on ARM. +#ifndef __arm__ +#pragma pack(push, 1) +#endif + +class ProfileEntry +{ +public: + ProfileEntry(); + + // aTagData must not need release (i.e. be a string from the text segment) + ProfileEntry(char aTagName, const char *aTagData); + ProfileEntry(char aTagName, void *aTagPtr); + ProfileEntry(char aTagName, ProfilerMarker *aTagMarker); + ProfileEntry(char aTagName, double aTagDouble); + ProfileEntry(char aTagName, uintptr_t aTagOffset); + ProfileEntry(char aTagName, Address aTagAddress); + ProfileEntry(char aTagName, int aTagLine); + ProfileEntry(char aTagName, char aTagChar); + bool is_ent_hint(char hintChar); + bool is_ent_hint(); + bool is_ent(char tagName); + void* get_tagPtr(); + const ProfilerMarker* getMarker() { + MOZ_ASSERT(mTagName == 'm'); + return mTagMarker; + } + + char getTagName() const { return mTagName; } + +private: + FRIEND_TEST(ThreadProfile, InsertOneTag); + FRIEND_TEST(ThreadProfile, InsertOneTagWithTinyBuffer); + FRIEND_TEST(ThreadProfile, InsertTagsNoWrap); + FRIEND_TEST(ThreadProfile, InsertTagsWrap); + FRIEND_TEST(ThreadProfile, MemoryMeasure); + friend class ProfileBuffer; + union { + const char* mTagData; + char mTagChars[sizeof(void*)]; + void* mTagPtr; + ProfilerMarker* mTagMarker; + double mTagDouble; + Address mTagAddress; + uintptr_t mTagOffset; + int mTagInt; + char mTagChar; + }; + char mTagName; +}; + +#ifndef __arm__ +#pragma pack(pop) +#endif + +class UniqueJSONStrings +{ +public: + UniqueJSONStrings() { + mStringTableWriter.StartBareList(); + } + + void SpliceStringTableElements(SpliceableJSONWriter& aWriter) { + aWriter.TakeAndSplice(mStringTableWriter.WriteFunc()); + } + + void WriteProperty(mozilla::JSONWriter& aWriter, const char* aName, const char* aStr) { + aWriter.IntProperty(aName, GetOrAddIndex(aStr)); + } + + void WriteElement(mozilla::JSONWriter& aWriter, const char* aStr) { + aWriter.IntElement(GetOrAddIndex(aStr)); + } + + uint32_t GetOrAddIndex(const char* aStr); + + struct StringKey { + + explicit StringKey(const char* aStr) + : mStr(strdup(aStr)) + { + mHash = mozilla::HashString(mStr); + } + + StringKey(const StringKey& aOther) + : mStr(strdup(aOther.mStr)) + { + mHash = aOther.mHash; + } + + ~StringKey() { + free(mStr); + } + + uint32_t Hash() const; + bool operator==(const StringKey& aOther) const { + return strcmp(mStr, aOther.mStr) == 0; + } + bool operator<(const StringKey& aOther) const { + return mHash < aOther.mHash; + } + + private: + uint32_t mHash; + char* mStr; + }; +private: + SpliceableChunkedJSONWriter mStringTableWriter; + std::map<StringKey, uint32_t> mStringToIndexMap; +}; + +class UniqueStacks +{ +public: + struct FrameKey { +#ifdef SPS_STANDALONE + std::string mLocation; +#else + // This cannot be a std::string, as it is not memmove compatible, which + // is used by nsHashTable + nsCString mLocation; +#endif + mozilla::Maybe<unsigned> mLine; + mozilla::Maybe<unsigned> mCategory; + mozilla::Maybe<void*> mJITAddress; + mozilla::Maybe<uint32_t> mJITDepth; + + explicit FrameKey(const char* aLocation) + : mLocation(aLocation) + { + mHash = Hash(); + } + + FrameKey(const FrameKey& aToCopy) + : mLocation(aToCopy.mLocation) + , mLine(aToCopy.mLine) + , mCategory(aToCopy.mCategory) + , mJITAddress(aToCopy.mJITAddress) + , mJITDepth(aToCopy.mJITDepth) + { + mHash = Hash(); + } + + FrameKey(void* aJITAddress, uint32_t aJITDepth) + : mJITAddress(mozilla::Some(aJITAddress)) + , mJITDepth(mozilla::Some(aJITDepth)) + { + mHash = Hash(); + } + + uint32_t Hash() const; + bool operator==(const FrameKey& aOther) const; + bool operator<(const FrameKey& aOther) const { + return mHash < aOther.mHash; + } + + private: + uint32_t mHash; + }; + + // A FrameKey that holds a scoped reference to a JIT FrameHandle. + struct MOZ_STACK_CLASS OnStackFrameKey : public FrameKey { + explicit OnStackFrameKey(const char* aLocation) + : FrameKey(aLocation) +#ifndef SPS_STANDALONE + , mJITFrameHandle(nullptr) +#endif + { } + + OnStackFrameKey(const OnStackFrameKey& aToCopy) + : FrameKey(aToCopy) +#ifndef SPS_STANDALONE + , mJITFrameHandle(aToCopy.mJITFrameHandle) +#endif + { } + +#ifndef SPS_STANDALONE + const JS::ForEachProfiledFrameOp::FrameHandle* mJITFrameHandle; + + OnStackFrameKey(void* aJITAddress, unsigned aJITDepth) + : FrameKey(aJITAddress, aJITDepth) + , mJITFrameHandle(nullptr) + { } + + OnStackFrameKey(void* aJITAddress, unsigned aJITDepth, + const JS::ForEachProfiledFrameOp::FrameHandle& aJITFrameHandle) + : FrameKey(aJITAddress, aJITDepth) + , mJITFrameHandle(&aJITFrameHandle) + { } +#endif + }; + + struct StackKey { + mozilla::Maybe<uint32_t> mPrefixHash; + mozilla::Maybe<uint32_t> mPrefix; + uint32_t mFrame; + + explicit StackKey(uint32_t aFrame) + : mFrame(aFrame) + { + mHash = Hash(); + } + + uint32_t Hash() const; + bool operator==(const StackKey& aOther) const; + bool operator<(const StackKey& aOther) const { + return mHash < aOther.mHash; + } + + void UpdateHash(uint32_t aPrefixHash, uint32_t aPrefix, uint32_t aFrame) { + mPrefixHash = mozilla::Some(aPrefixHash); + mPrefix = mozilla::Some(aPrefix); + mFrame = aFrame; + mHash = Hash(); + } + + private: + uint32_t mHash; + }; + + class Stack { + public: + Stack(UniqueStacks& aUniqueStacks, const OnStackFrameKey& aRoot); + + void AppendFrame(const OnStackFrameKey& aFrame); + uint32_t GetOrAddIndex() const; + + private: + UniqueStacks& mUniqueStacks; + StackKey mStack; + }; + + explicit UniqueStacks(JSContext* aContext); + + Stack BeginStack(const OnStackFrameKey& aRoot); + uint32_t LookupJITFrameDepth(void* aAddr); + void AddJITFrameDepth(void* aAddr, unsigned depth); + void SpliceFrameTableElements(SpliceableJSONWriter& aWriter); + void SpliceStackTableElements(SpliceableJSONWriter& aWriter); + +private: + uint32_t GetOrAddFrameIndex(const OnStackFrameKey& aFrame); + uint32_t GetOrAddStackIndex(const StackKey& aStack); + void StreamFrame(const OnStackFrameKey& aFrame); + void StreamStack(const StackKey& aStack); + +public: + UniqueJSONStrings mUniqueStrings; + +private: + JSContext* mContext; + + // To avoid incurring JitcodeGlobalTable lookup costs for every JIT frame, + // we cache the depth of frames keyed by JIT code address. If an address a + // maps to a depth d, then frames keyed by a for depths 0 to d are + // guaranteed to be in mFrameToIndexMap. + std::map<void*, uint32_t> mJITFrameDepthMap; + + uint32_t mFrameCount; + SpliceableChunkedJSONWriter mFrameTableWriter; +#ifdef SPS_STANDALNOE + std::map<FrameKey, uint32_t> mFrameToIndexMap; +#else + nsDataHashtable<nsGenericHashKey<FrameKey>, uint32_t> mFrameToIndexMap; +#endif + + SpliceableChunkedJSONWriter mStackTableWriter; + + // This sucks but this is really performance critical, nsDataHashtable is way faster + // than map/unordered_map but nsDataHashtable is tied to xpcom so we ifdef + // until we can find a better solution. +#ifdef SPS_STANDALONE + std::map<StackKey, uint32_t> mStackToIndexMap; +#else + nsDataHashtable<nsGenericHashKey<StackKey>, uint32_t> mStackToIndexMap; +#endif +}; + +// +// ThreadProfile JSON Format +// ------------------------- +// +// The profile contains much duplicate information. The output JSON of the +// profile attempts to deduplicate strings, frames, and stack prefixes, to cut +// down on size and to increase JSON streaming speed. Deduplicated values are +// streamed as indices into their respective tables. +// +// Further, arrays of objects with the same set of properties (e.g., samples, +// frames) are output as arrays according to a schema instead of an object +// with property names. A property that is not present is represented in the +// array as null or undefined. +// +// The format of the thread profile JSON is shown by the following example +// with 1 sample and 1 marker: +// +// { +// "name": "Foo", +// "tid": 42, +// "samples": +// { +// "schema": +// { +// "stack": 0, /* index into stackTable */ +// "time": 1, /* number */ +// "responsiveness": 2, /* number */ +// "rss": 3, /* number */ +// "uss": 4, /* number */ +// "frameNumber": 5, /* number */ +// "power": 6 /* number */ +// }, +// "data": +// [ +// [ 1, 0.0, 0.0 ] /* { stack: 1, time: 0.0, responsiveness: 0.0 } */ +// ] +// }, +// +// "markers": +// { +// "schema": +// { +// "name": 0, /* index into stringTable */ +// "time": 1, /* number */ +// "data": 2 /* arbitrary JSON */ +// }, +// "data": +// [ +// [ 3, 0.1 ] /* { name: 'example marker', time: 0.1 } */ +// ] +// }, +// +// "stackTable": +// { +// "schema": +// { +// "prefix": 0, /* index into stackTable */ +// "frame": 1 /* index into frameTable */ +// }, +// "data": +// [ +// [ null, 0 ], /* (root) */ +// [ 0, 1 ] /* (root) > foo.js */ +// ] +// }, +// +// "frameTable": +// { +// "schema": +// { +// "location": 0, /* index into stringTable */ +// "implementation": 1, /* index into stringTable */ +// "optimizations": 2, /* arbitrary JSON */ +// "line": 3, /* number */ +// "category": 4 /* number */ +// }, +// "data": +// [ +// [ 0 ], /* { location: '(root)' } */ +// [ 1, 2 ] /* { location: 'foo.js', implementation: 'baseline' } */ +// ] +// }, +// +// "stringTable": +// [ +// "(root)", +// "foo.js", +// "baseline", +// "example marker" +// ] +// } +// + +#endif /* ndef MOZ_PROFILE_ENTRY_H */ diff --git a/tools/profiler/core/ProfileJSONWriter.cpp b/tools/profiler/core/ProfileJSONWriter.cpp new file mode 100644 index 0000000000..65a9425a32 --- /dev/null +++ b/tools/profiler/core/ProfileJSONWriter.cpp @@ -0,0 +1,115 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/HashFunctions.h" + +#include "ProfileJSONWriter.h" + +void +ChunkedJSONWriteFunc::Write(const char* aStr) +{ + MOZ_ASSERT(mChunkPtr >= mChunkList.back().get() && mChunkPtr <= mChunkEnd); + MOZ_ASSERT(mChunkEnd >= mChunkList.back().get() + mChunkLengths.back()); + MOZ_ASSERT(*mChunkPtr == '\0'); + + size_t len = strlen(aStr); + + // Most strings to be written are small, but subprocess profiles (e.g., + // from the content process in e10s) may be huge. If the string is larger + // than a chunk, allocate its own chunk. + char* newPtr; + if (len >= kChunkSize) { + AllocChunk(len + 1); + newPtr = mChunkPtr + len; + } else { + newPtr = mChunkPtr + len; + if (newPtr >= mChunkEnd) { + AllocChunk(kChunkSize); + newPtr = mChunkPtr + len; + } + } + + memcpy(mChunkPtr, aStr, len); + *newPtr = '\0'; + mChunkPtr = newPtr; + mChunkLengths.back() += len; +} + +mozilla::UniquePtr<char[]> +ChunkedJSONWriteFunc::CopyData() const +{ + MOZ_ASSERT(mChunkLengths.length() == mChunkList.length()); + size_t totalLen = 1; + for (size_t i = 0; i < mChunkLengths.length(); i++) { + MOZ_ASSERT(strlen(mChunkList[i].get()) == mChunkLengths[i]); + totalLen += mChunkLengths[i]; + } + mozilla::UniquePtr<char[]> c = mozilla::MakeUnique<char[]>(totalLen); + char* ptr = c.get(); + for (size_t i = 0; i < mChunkList.length(); i++) { + size_t len = mChunkLengths[i]; + memcpy(ptr, mChunkList[i].get(), len); + ptr += len; + } + *ptr = '\0'; + return c; +} + +void +ChunkedJSONWriteFunc::Take(ChunkedJSONWriteFunc&& aOther) +{ + for (size_t i = 0; i < aOther.mChunkList.length(); i++) { + MOZ_ALWAYS_TRUE(mChunkLengths.append(aOther.mChunkLengths[i])); + MOZ_ALWAYS_TRUE(mChunkList.append(mozilla::Move(aOther.mChunkList[i]))); + } + mChunkPtr = mChunkList.back().get() + mChunkLengths.back(); + mChunkEnd = mChunkPtr; + aOther.mChunkPtr = nullptr; + aOther.mChunkEnd = nullptr; + aOther.mChunkList.clear(); + aOther.mChunkLengths.clear(); +} + +void +ChunkedJSONWriteFunc::AllocChunk(size_t aChunkSize) +{ + MOZ_ASSERT(mChunkLengths.length() == mChunkList.length()); + mozilla::UniquePtr<char[]> newChunk = mozilla::MakeUnique<char[]>(aChunkSize); + mChunkPtr = newChunk.get(); + mChunkEnd = mChunkPtr + aChunkSize; + *mChunkPtr = '\0'; + MOZ_ALWAYS_TRUE(mChunkLengths.append(0)); + MOZ_ALWAYS_TRUE(mChunkList.append(mozilla::Move(newChunk))); +} + +void +SpliceableJSONWriter::TakeAndSplice(ChunkedJSONWriteFunc* aFunc) +{ + Separator(); + for (size_t i = 0; i < aFunc->mChunkList.length(); i++) { + WriteFunc()->Write(aFunc->mChunkList[i].get()); + } + aFunc->mChunkPtr = nullptr; + aFunc->mChunkEnd = nullptr; + aFunc->mChunkList.clear(); + aFunc->mChunkLengths.clear(); + mNeedComma[mDepth] = true; +} + +void +SpliceableJSONWriter::Splice(const char* aStr) +{ + Separator(); + WriteFunc()->Write(aStr); + mNeedComma[mDepth] = true; +} + +void +SpliceableChunkedJSONWriter::TakeAndSplice(ChunkedJSONWriteFunc* aFunc) +{ + Separator(); + WriteFunc()->Take(mozilla::Move(*aFunc)); + mNeedComma[mDepth] = true; +} diff --git a/tools/profiler/core/ProfileJSONWriter.h b/tools/profiler/core/ProfileJSONWriter.h new file mode 100644 index 0000000000..d9e2115f9a --- /dev/null +++ b/tools/profiler/core/ProfileJSONWriter.h @@ -0,0 +1,126 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef PROFILEJSONWRITER_H +#define PROFILEJSONWRITER_H + +#include <ostream> +#include <string> +#include <string.h> + +#include "mozilla/JSONWriter.h" +#include "mozilla/UniquePtr.h" + +class SpliceableChunkedJSONWriter; + +// On average, profile JSONs are large enough such that we want to avoid +// reallocating its buffer when expanding. Additionally, the contents of the +// profile are not accessed until the profile is entirely written. For these +// reasons we use a chunked writer that keeps an array of chunks, which is +// concatenated together after writing is finished. +class ChunkedJSONWriteFunc : public mozilla::JSONWriteFunc +{ +public: + friend class SpliceableJSONWriter; + + ChunkedJSONWriteFunc() { + AllocChunk(kChunkSize); + } + + bool IsEmpty() const { + MOZ_ASSERT_IF(!mChunkPtr, !mChunkEnd && + mChunkList.length() == 0 && + mChunkLengths.length() == 0); + return !mChunkPtr; + } + + void Write(const char* aStr) override; + mozilla::UniquePtr<char[]> CopyData() const; + void Take(ChunkedJSONWriteFunc&& aOther); + +private: + void AllocChunk(size_t aChunkSize); + + static const size_t kChunkSize = 4096 * 512; + + // Pointer for writing inside the current chunk. + // + // The current chunk is always at the back of mChunkList, i.e., + // mChunkList.back() <= mChunkPtr <= mChunkEnd. + char* mChunkPtr; + + // Pointer to the end of the current chunk. + // + // The current chunk is always at the back of mChunkList, i.e., + // mChunkEnd >= mChunkList.back() + mChunkLengths.back(). + char* mChunkEnd; + + // List of chunks and their lengths. + // + // For all i, the length of the string in mChunkList[i] is + // mChunkLengths[i]. + mozilla::Vector<mozilla::UniquePtr<char[]>> mChunkList; + mozilla::Vector<size_t> mChunkLengths; +}; + +struct OStreamJSONWriteFunc : public mozilla::JSONWriteFunc +{ + explicit OStreamJSONWriteFunc(std::ostream& aStream) + : mStream(aStream) + { } + + void Write(const char* aStr) override { + mStream << aStr; + } + + std::ostream& mStream; +}; + +class SpliceableJSONWriter : public mozilla::JSONWriter +{ +public: + explicit SpliceableJSONWriter(mozilla::UniquePtr<mozilla::JSONWriteFunc> aWriter) + : JSONWriter(mozilla::Move(aWriter)) + { } + + void StartBareList(CollectionStyle aStyle = SingleLineStyle) { + StartCollection(nullptr, "", aStyle); + } + + void EndBareList() { + EndCollection(""); + } + + void NullElements(uint32_t aCount) { + for (uint32_t i = 0; i < aCount; i++) { + NullElement(); + } + } + + void Splice(const ChunkedJSONWriteFunc* aFunc); + void Splice(const char* aStr); + + // Takes the chunks from aFunc and write them. If move is not possible + // (e.g., using OStreamJSONWriteFunc), aFunc's chunks are copied and its + // storage cleared. + virtual void TakeAndSplice(ChunkedJSONWriteFunc* aFunc); +}; + +class SpliceableChunkedJSONWriter : public SpliceableJSONWriter +{ +public: + explicit SpliceableChunkedJSONWriter() + : SpliceableJSONWriter(mozilla::MakeUnique<ChunkedJSONWriteFunc>()) + { } + + ChunkedJSONWriteFunc* WriteFunc() const { + return static_cast<ChunkedJSONWriteFunc*>(JSONWriter::WriteFunc()); + } + + // Adopts the chunks from aFunc without copying. + virtual void TakeAndSplice(ChunkedJSONWriteFunc* aFunc) override; +}; + +#endif // PROFILEJSONWRITER_H diff --git a/tools/profiler/core/ProfilerBacktrace.cpp b/tools/profiler/core/ProfilerBacktrace.cpp new file mode 100644 index 0000000000..7302dd64c2 --- /dev/null +++ b/tools/profiler/core/ProfilerBacktrace.cpp @@ -0,0 +1,33 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ProfilerBacktrace.h" + +#include "ProfileJSONWriter.h" +#include "SyncProfile.h" + +ProfilerBacktrace::ProfilerBacktrace(SyncProfile* aProfile) + : mProfile(aProfile) +{ + MOZ_COUNT_CTOR(ProfilerBacktrace); + MOZ_ASSERT(aProfile); +} + +ProfilerBacktrace::~ProfilerBacktrace() +{ + MOZ_COUNT_DTOR(ProfilerBacktrace); + if (mProfile->ShouldDestroy()) { + delete mProfile; + } +} + +void +ProfilerBacktrace::StreamJSON(SpliceableJSONWriter& aWriter, + UniqueStacks& aUniqueStacks) +{ + ::MutexAutoLock lock(mProfile->GetMutex()); + mProfile->StreamJSON(aWriter, aUniqueStacks); +} diff --git a/tools/profiler/core/ProfilerMarkers.cpp b/tools/profiler/core/ProfilerMarkers.cpp new file mode 100644 index 0000000000..3cb47de487 --- /dev/null +++ b/tools/profiler/core/ProfilerMarkers.cpp @@ -0,0 +1,210 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "GeckoProfiler.h" +#include "ProfilerBacktrace.h" +#include "ProfilerMarkers.h" +#include "SyncProfile.h" +#ifndef SPS_STANDALONE +#include "gfxASurface.h" +#include "Layers.h" +#include "mozilla/Sprintf.h" +#endif + +ProfilerMarkerPayload::ProfilerMarkerPayload(ProfilerBacktrace* aStack) + : mStack(aStack) +{} + +ProfilerMarkerPayload::ProfilerMarkerPayload(const mozilla::TimeStamp& aStartTime, + const mozilla::TimeStamp& aEndTime, + ProfilerBacktrace* aStack) + : mStartTime(aStartTime) + , mEndTime(aEndTime) + , mStack(aStack) +{} + +ProfilerMarkerPayload::~ProfilerMarkerPayload() +{ + profiler_free_backtrace(mStack); +} + +void +ProfilerMarkerPayload::streamCommonProps(const char* aMarkerType, + SpliceableJSONWriter& aWriter, + UniqueStacks& aUniqueStacks) +{ + MOZ_ASSERT(aMarkerType); + aWriter.StringProperty("type", aMarkerType); + if (!mStartTime.IsNull()) { + aWriter.DoubleProperty("startTime", profiler_time(mStartTime)); + } + if (!mEndTime.IsNull()) { + aWriter.DoubleProperty("endTime", profiler_time(mEndTime)); + } + if (mStack) { + aWriter.StartObjectProperty("stack"); + { + mStack->StreamJSON(aWriter, aUniqueStacks); + } + aWriter.EndObject(); + } +} + +ProfilerMarkerTracing::ProfilerMarkerTracing(const char* aCategory, TracingMetadata aMetaData) + : mCategory(aCategory) + , mMetaData(aMetaData) +{ + if (aMetaData == TRACING_EVENT_BACKTRACE) { + SetStack(profiler_get_backtrace()); + } +} + +ProfilerMarkerTracing::ProfilerMarkerTracing(const char* aCategory, TracingMetadata aMetaData, + ProfilerBacktrace* aCause) + : mCategory(aCategory) + , mMetaData(aMetaData) +{ + if (aCause) { + SetStack(aCause); + } +} + +void +ProfilerMarkerTracing::StreamPayload(SpliceableJSONWriter& aWriter, + UniqueStacks& aUniqueStacks) +{ + streamCommonProps("tracing", aWriter, aUniqueStacks); + + if (GetCategory()) { + aWriter.StringProperty("category", GetCategory()); + } + if (GetMetaData() != TRACING_DEFAULT) { + if (GetMetaData() == TRACING_INTERVAL_START) { + aWriter.StringProperty("interval", "start"); + } else if (GetMetaData() == TRACING_INTERVAL_END) { + aWriter.StringProperty("interval", "end"); + } + } +} + +#ifndef SPS_STANDALONE +GPUMarkerPayload::GPUMarkerPayload( + const mozilla::TimeStamp& aCpuTimeStart, + const mozilla::TimeStamp& aCpuTimeEnd, + uint64_t aGpuTimeStart, + uint64_t aGpuTimeEnd) + + : ProfilerMarkerPayload(aCpuTimeStart, aCpuTimeEnd) + , mCpuTimeStart(aCpuTimeStart) + , mCpuTimeEnd(aCpuTimeEnd) + , mGpuTimeStart(aGpuTimeStart) + , mGpuTimeEnd(aGpuTimeEnd) +{ } + +void +GPUMarkerPayload::StreamPayload(SpliceableJSONWriter& aWriter, + UniqueStacks& aUniqueStacks) +{ + streamCommonProps("gpu_timer_query", aWriter, aUniqueStacks); + + aWriter.DoubleProperty("cpustart", profiler_time(mCpuTimeStart)); + aWriter.DoubleProperty("cpuend", profiler_time(mCpuTimeEnd)); + aWriter.IntProperty("gpustart", (int)mGpuTimeStart); + aWriter.IntProperty("gpuend", (int)mGpuTimeEnd); +} + +ProfilerMarkerImagePayload::ProfilerMarkerImagePayload(gfxASurface *aImg) + : mImg(aImg) +{ } + +void +ProfilerMarkerImagePayload::StreamPayload(SpliceableJSONWriter& aWriter, + UniqueStacks& aUniqueStacks) +{ + streamCommonProps("innerHTML", aWriter, aUniqueStacks); + // TODO: Finish me + //aWriter.NameValue("innerHTML", "<img src=''/>"); +} + +IOMarkerPayload::IOMarkerPayload(const char* aSource, + const char* aFilename, + const mozilla::TimeStamp& aStartTime, + const mozilla::TimeStamp& aEndTime, + ProfilerBacktrace* aStack) + : ProfilerMarkerPayload(aStartTime, aEndTime, aStack), + mSource(aSource) +{ + mFilename = aFilename ? strdup(aFilename) : nullptr; + MOZ_ASSERT(aSource); +} + +IOMarkerPayload::~IOMarkerPayload(){ + free(mFilename); +} + +void +IOMarkerPayload::StreamPayload(SpliceableJSONWriter& aWriter, UniqueStacks& aUniqueStacks) +{ + streamCommonProps("io", aWriter, aUniqueStacks); + aWriter.StringProperty("source", mSource); + if (mFilename != nullptr) { + aWriter.StringProperty("filename", mFilename); + } +} + +void +ProfilerJSEventMarker(const char *event) +{ + PROFILER_MARKER(event); +} + +LayerTranslationPayload::LayerTranslationPayload(mozilla::layers::Layer* aLayer, + mozilla::gfx::Point aPoint) + : ProfilerMarkerPayload(mozilla::TimeStamp::Now(), mozilla::TimeStamp::Now(), nullptr) + , mLayer(aLayer) + , mPoint(aPoint) +{ +} + +void +LayerTranslationPayload::StreamPayload(SpliceableJSONWriter& aWriter, + UniqueStacks& aUniqueStacks) +{ + const size_t bufferSize = 32; + char buffer[bufferSize]; + SprintfLiteral(buffer, "%p", mLayer); + + aWriter.StringProperty("layer", buffer); + aWriter.IntProperty("x", mPoint.x); + aWriter.IntProperty("y", mPoint.y); + aWriter.StringProperty("category", "LayerTranslation"); +} + +TouchDataPayload::TouchDataPayload(const mozilla::ScreenIntPoint& aPoint) + : ProfilerMarkerPayload(mozilla::TimeStamp::Now(), mozilla::TimeStamp::Now(), nullptr) +{ + mPoint = aPoint; +} + +void +TouchDataPayload::StreamPayload(SpliceableJSONWriter& aWriter, UniqueStacks& aUniqueStacks) +{ + aWriter.IntProperty("x", mPoint.x); + aWriter.IntProperty("y", mPoint.y); +} + +VsyncPayload::VsyncPayload(mozilla::TimeStamp aVsyncTimestamp) + : ProfilerMarkerPayload(aVsyncTimestamp, aVsyncTimestamp, nullptr) + , mVsyncTimestamp(aVsyncTimestamp) +{ +} + +void +VsyncPayload::StreamPayload(SpliceableJSONWriter& aWriter, UniqueStacks& aUniqueStacks) +{ + aWriter.DoubleProperty("vsync", profiler_time(mVsyncTimestamp)); + aWriter.StringProperty("category", "VsyncTimestamp"); +} +#endif diff --git a/tools/profiler/core/StackTop.cpp b/tools/profiler/core/StackTop.cpp new file mode 100644 index 0000000000..1f7944e5e9 --- /dev/null +++ b/tools/profiler/core/StackTop.cpp @@ -0,0 +1,48 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef XP_MACOSX +#include <mach/task.h> +#include <mach/thread_act.h> +#include <pthread.h> +#elif XP_WIN +#include <windows.h> +#endif + +#include "StackTop.h" + +void *GetStackTop(void *guess) { +#if defined(XP_MACOSX) + pthread_t thread = pthread_self(); + return pthread_get_stackaddr_np(thread); +#elif defined(XP_WIN) +#if defined(_MSC_VER) && defined(_M_IX86) + // offset 0x18 from the FS segment register gives a pointer to + // the thread information block for the current thread + NT_TIB* pTib; + __asm { + MOV EAX, FS:[18h] + MOV pTib, EAX + } + return static_cast<void*>(pTib->StackBase); +#elif defined(__GNUC__) && defined(i386) + // offset 0x18 from the FS segment register gives a pointer to + // the thread information block for the current thread + NT_TIB* pTib; + asm ( "movl %%fs:0x18, %0\n" + : "=r" (pTib) + ); + return static_cast<void*>(pTib->StackBase); +#elif defined(_M_X64) || defined(__x86_64) + PNT_TIB64 pTib = reinterpret_cast<PNT_TIB64>(NtCurrentTeb()); + return reinterpret_cast<void*>(pTib->StackBase); +#else +#error Need a way to get the stack bounds on this platform (Windows) +#endif +#else + return guess; +#endif +} diff --git a/tools/profiler/core/StackTop.h b/tools/profiler/core/StackTop.h new file mode 100644 index 0000000000..a933d10b4e --- /dev/null +++ b/tools/profiler/core/StackTop.h @@ -0,0 +1,10 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MOZ_STACK_TOP_H +#define MOZ_STACK_TOP_H +void *GetStackTop(void *guess); +#endif diff --git a/tools/profiler/core/SyncProfile.cpp b/tools/profiler/core/SyncProfile.cpp new file mode 100644 index 0000000000..4c4742f348 --- /dev/null +++ b/tools/profiler/core/SyncProfile.cpp @@ -0,0 +1,57 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "SyncProfile.h" + +SyncProfile::SyncProfile(ThreadInfo* aInfo, int aEntrySize) + : ThreadProfile(aInfo, new ProfileBuffer(aEntrySize)) + , mOwnerState(REFERENCED) +{ + MOZ_COUNT_CTOR(SyncProfile); +} + +SyncProfile::~SyncProfile() +{ + MOZ_COUNT_DTOR(SyncProfile); + + // SyncProfile owns the ThreadInfo; see NewSyncProfile. + ThreadInfo* info = GetThreadInfo(); + delete info; +} + +bool +SyncProfile::ShouldDestroy() +{ + ::MutexAutoLock lock(GetMutex()); + if (mOwnerState == OWNED) { + mOwnerState = OWNER_DESTROYING; + return true; + } + mOwnerState = ORPHANED; + return false; +} + +void +SyncProfile::EndUnwind() +{ + if (mOwnerState != ORPHANED) { + mOwnerState = OWNED; + } + // Save mOwnerState before we release the mutex + OwnerState ownerState = mOwnerState; + ThreadProfile::EndUnwind(); + if (ownerState == ORPHANED) { + delete this; + } +} + +// SyncProfiles' stacks are deduplicated in the context of the containing +// profile in which the backtrace is as a marker payload. +void +SyncProfile::StreamJSON(SpliceableJSONWriter& aWriter, UniqueStacks& aUniqueStacks) +{ + ThreadProfile::StreamSamplesAndMarkers(aWriter, /* aSinceTime = */ 0, aUniqueStacks); +} diff --git a/tools/profiler/core/SyncProfile.h b/tools/profiler/core/SyncProfile.h new file mode 100644 index 0000000000..58f6b0d818 --- /dev/null +++ b/tools/profiler/core/SyncProfile.h @@ -0,0 +1,43 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __SYNCPROFILE_H +#define __SYNCPROFILE_H + +#include "ProfileEntry.h" +#include "ThreadProfile.h" + +class SyncProfile : public ThreadProfile +{ +public: + SyncProfile(ThreadInfo* aInfo, int aEntrySize); + ~SyncProfile(); + + // SyncProfiles' stacks are deduplicated in the context of the containing + // profile in which the backtrace is as a marker payload. + void StreamJSON(SpliceableJSONWriter& aWriter, UniqueStacks& aUniqueStacks); + + virtual void EndUnwind(); + virtual SyncProfile* AsSyncProfile() { return this; } + +private: + friend class ProfilerBacktrace; + + enum OwnerState + { + REFERENCED, // ProfilerBacktrace has a pointer to this but doesn't own + OWNED, // ProfilerBacktrace is responsible for destroying this + OWNER_DESTROYING, // ProfilerBacktrace owns this and is destroying + ORPHANED // No owner, we must destroy ourselves + }; + + bool ShouldDestroy(); + + OwnerState mOwnerState; +}; + +#endif // __SYNCPROFILE_H + diff --git a/tools/profiler/core/ThreadInfo.cpp b/tools/profiler/core/ThreadInfo.cpp new file mode 100644 index 0000000000..0e25d23306 --- /dev/null +++ b/tools/profiler/core/ThreadInfo.cpp @@ -0,0 +1,73 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ThreadInfo.h" +#include "ThreadProfile.h" + +#include "mozilla/DebugOnly.h" + +ThreadInfo::ThreadInfo(const char* aName, int aThreadId, + bool aIsMainThread, PseudoStack* aPseudoStack, + void* aStackTop) + : mName(strdup(aName)) + , mThreadId(aThreadId) + , mIsMainThread(aIsMainThread) + , mPseudoStack(aPseudoStack) + , mPlatformData(Sampler::AllocPlatformData(aThreadId)) + , mProfile(nullptr) + , mStackTop(aStackTop) + , mPendingDelete(false) +{ + MOZ_COUNT_CTOR(ThreadInfo); +#ifndef SPS_STANDALONE + mThread = NS_GetCurrentThread(); +#endif + + // We don't have to guess on mac +#ifdef XP_MACOSX + pthread_t self = pthread_self(); + mStackTop = pthread_get_stackaddr_np(self); +#endif +} + +ThreadInfo::~ThreadInfo() { + MOZ_COUNT_DTOR(ThreadInfo); + free(mName); + + if (mProfile) + delete mProfile; + + Sampler::FreePlatformData(mPlatformData); +} + +void +ThreadInfo::SetPendingDelete() +{ + mPendingDelete = true; + // We don't own the pseudostack so disconnect it. + mPseudoStack = nullptr; + if (mProfile) { + mProfile->SetPendingDelete(); + } +} + +bool +ThreadInfo::CanInvokeJS() const +{ +#ifdef SPS_STANDALONE + return false; +#else + nsIThread* thread = GetThread(); + if (!thread) { + MOZ_ASSERT(IsMainThread()); + return true; + } + bool result; + mozilla::DebugOnly<nsresult> rv = thread->GetCanInvokeJS(&result); + MOZ_ASSERT(NS_SUCCEEDED(rv)); + return result; +#endif +} diff --git a/tools/profiler/core/ThreadInfo.h b/tools/profiler/core/ThreadInfo.h new file mode 100644 index 0000000000..1cb4e5dc8a --- /dev/null +++ b/tools/profiler/core/ThreadInfo.h @@ -0,0 +1,66 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MOZ_THREAD_INFO_H +#define MOZ_THREAD_INFO_H + +#include "platform.h" + +class ThreadInfo { + public: + ThreadInfo(const char* aName, int aThreadId, bool aIsMainThread, PseudoStack* aPseudoStack, void* aStackTop); + + virtual ~ThreadInfo(); + + const char* Name() const { return mName; } + int ThreadId() const { return mThreadId; } + + bool IsMainThread() const { return mIsMainThread; } + PseudoStack* Stack() const { return mPseudoStack; } + + void SetProfile(ThreadProfile* aProfile) { mProfile = aProfile; } + ThreadProfile* Profile() const { return mProfile; } + + PlatformData* GetPlatformData() const { return mPlatformData; } + void* StackTop() const { return mStackTop; } + + virtual void SetPendingDelete(); + bool IsPendingDelete() const { return mPendingDelete; } + +#ifndef SPS_STANDALONE + /** + * May be null for the main thread if the profiler was started during startup + */ + nsIThread* GetThread() const { return mThread.get(); } + +#endif + + bool CanInvokeJS() const; + + private: + char* mName; + int mThreadId; + const bool mIsMainThread; + PseudoStack* mPseudoStack; + PlatformData* mPlatformData; + ThreadProfile* mProfile; + void* mStackTop; +#ifndef SPS_STANDALONE + nsCOMPtr<nsIThread> mThread; +#endif + bool mPendingDelete; +}; + +// Just like ThreadInfo, but owns a reference to the PseudoStack. +class StackOwningThreadInfo : public ThreadInfo { + public: + StackOwningThreadInfo(const char* aName, int aThreadId, bool aIsMainThread, PseudoStack* aPseudoStack, void* aStackTop); + virtual ~StackOwningThreadInfo(); + + virtual void SetPendingDelete(); +}; + +#endif diff --git a/tools/profiler/core/ThreadProfile.cpp b/tools/profiler/core/ThreadProfile.cpp new file mode 100644 index 0000000000..7452a7ee8d --- /dev/null +++ b/tools/profiler/core/ThreadProfile.cpp @@ -0,0 +1,260 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +ThreadProfile::ThreadProfile(ThreadInfo* aInfo, ProfileBuffer* aBuffer) + : mThreadInfo(aInfo) + , mBuffer(aBuffer) + , mPseudoStack(aInfo->Stack()) + , mMutex(OS::CreateMutex("ThreadProfile::mMutex")) + , mThreadId(int(aInfo->ThreadId())) + , mIsMainThread(aInfo->IsMainThread()) + , mPlatformData(aInfo->GetPlatformData()) + , mStackTop(aInfo->StackTop()) +#ifndef SPS_STANDALONE + , mRespInfo(this) +#endif +#ifdef XP_LINUX + , mRssMemory(0) + , mUssMemory(0) +#endif +{ + MOZ_COUNT_CTOR(ThreadProfile); + MOZ_ASSERT(aBuffer); + + // I don't know if we can assert this. But we should warn. + MOZ_ASSERT(aInfo->ThreadId() >= 0, "native thread ID is < 0"); + MOZ_ASSERT(aInfo->ThreadId() <= INT32_MAX, "native thread ID is > INT32_MAX"); +} + +ThreadProfile::~ThreadProfile() +{ + MOZ_COUNT_DTOR(ThreadProfile); +} + +void ThreadProfile::addTag(const ProfileEntry& aTag) +{ + mBuffer->addTag(aTag); +} + +void ThreadProfile::addStoredMarker(ProfilerMarker *aStoredMarker) { + mBuffer->addStoredMarker(aStoredMarker); +} + +void ThreadProfile::StreamJSON(SpliceableJSONWriter& aWriter, double aSinceTime) +{ + // mUniqueStacks may already be emplaced from FlushSamplesAndMarkers. + if (!mUniqueStacks.isSome()) { +#ifndef SPS_STANDALONE + mUniqueStacks.emplace(mPseudoStack->mContext); +#else + mUniqueStacks.emplace(nullptr); +#endif + } + + aWriter.Start(SpliceableJSONWriter::SingleLineStyle); + { + StreamSamplesAndMarkers(aWriter, aSinceTime, *mUniqueStacks); + + aWriter.StartObjectProperty("stackTable"); + { + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("prefix"); + schema.WriteField("frame"); + } + + aWriter.StartArrayProperty("data"); + { + mUniqueStacks->SpliceStackTableElements(aWriter); + } + aWriter.EndArray(); + } + aWriter.EndObject(); + + aWriter.StartObjectProperty("frameTable"); + { + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("location"); + schema.WriteField("implementation"); + schema.WriteField("optimizations"); + schema.WriteField("line"); + schema.WriteField("category"); + } + + aWriter.StartArrayProperty("data"); + { + mUniqueStacks->SpliceFrameTableElements(aWriter); + } + aWriter.EndArray(); + } + aWriter.EndObject(); + + aWriter.StartArrayProperty("stringTable"); + { + mUniqueStacks->mUniqueStrings.SpliceStringTableElements(aWriter); + } + aWriter.EndArray(); + } + aWriter.End(); + + mUniqueStacks.reset(); +} + +void ThreadProfile::StreamSamplesAndMarkers(SpliceableJSONWriter& aWriter, double aSinceTime, + UniqueStacks& aUniqueStacks) +{ +#ifndef SPS_STANDALONE + // Thread meta data + if (XRE_GetProcessType() == GeckoProcessType_Plugin) { + // TODO Add the proper plugin name + aWriter.StringProperty("name", "Plugin"); + } else if (XRE_GetProcessType() == GeckoProcessType_Content) { + // This isn't going to really help once we have multiple content + // processes, but it'll do for now. + aWriter.StringProperty("name", "Content"); + } else { + aWriter.StringProperty("name", Name()); + } +#else + aWriter.StringProperty("name", Name()); +#endif + + aWriter.IntProperty("tid", static_cast<int>(mThreadId)); + + aWriter.StartObjectProperty("samples"); + { + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("stack"); + schema.WriteField("time"); + schema.WriteField("responsiveness"); + schema.WriteField("rss"); + schema.WriteField("uss"); + schema.WriteField("frameNumber"); + schema.WriteField("power"); + } + + aWriter.StartArrayProperty("data"); + { + if (mSavedStreamedSamples) { + // We would only have saved streamed samples during shutdown + // streaming, which cares about dumping the entire buffer, and thus + // should have passed in 0 for aSinceTime. + MOZ_ASSERT(aSinceTime == 0); + aWriter.Splice(mSavedStreamedSamples.get()); + mSavedStreamedSamples.reset(); + } + mBuffer->StreamSamplesToJSON(aWriter, mThreadId, aSinceTime, +#ifndef SPS_STANDALONE + mPseudoStack->mContext, +#else + nullptr, +#endif + aUniqueStacks); + } + aWriter.EndArray(); + } + aWriter.EndObject(); + + aWriter.StartObjectProperty("markers"); + { + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("name"); + schema.WriteField("time"); + schema.WriteField("data"); + } + + aWriter.StartArrayProperty("data"); + { + if (mSavedStreamedMarkers) { + MOZ_ASSERT(aSinceTime == 0); + aWriter.Splice(mSavedStreamedMarkers.get()); + mSavedStreamedMarkers.reset(); + } + mBuffer->StreamMarkersToJSON(aWriter, mThreadId, aSinceTime, aUniqueStacks); + } + aWriter.EndArray(); + } + aWriter.EndObject(); +} + +void ThreadProfile::FlushSamplesAndMarkers() +{ + // This function is used to serialize the current buffer just before + // JSContext destruction. + MOZ_ASSERT(mPseudoStack->mContext); + + // Unlike StreamJSObject, do not surround the samples in brackets by calling + // aWriter.{Start,End}BareList. The result string will be a comma-separated + // list of JSON object literals that will prepended by StreamJSObject into + // an existing array. + // + // Note that the UniqueStacks instance is persisted so that the frame-index + // mapping is stable across JS shutdown. +#ifndef SPS_STANDALONE + mUniqueStacks.emplace(mPseudoStack->mContext); +#else + mUniqueStacks.emplace(nullptr); +#endif + + { + SpliceableChunkedJSONWriter b; + b.StartBareList(); + { + mBuffer->StreamSamplesToJSON(b, mThreadId, /* aSinceTime = */ 0, +#ifndef SPS_STANDALONE + mPseudoStack->mContext, +#else + nullptr, +#endif + *mUniqueStacks); + } + b.EndBareList(); + mSavedStreamedSamples = b.WriteFunc()->CopyData(); + } + + { + SpliceableChunkedJSONWriter b; + b.StartBareList(); + { + mBuffer->StreamMarkersToJSON(b, mThreadId, /* aSinceTime = */ 0, *mUniqueStacks); + } + b.EndBareList(); + mSavedStreamedMarkers = b.WriteFunc()->CopyData(); + } + + // Reset the buffer. Attempting to symbolicate JS samples after mContext has + // gone away will crash. + mBuffer->reset(); +} + +PseudoStack* ThreadProfile::GetPseudoStack() +{ + return mPseudoStack; +} + +void ThreadProfile::BeginUnwind() +{ + mMutex->Lock(); +} + +void ThreadProfile::EndUnwind() +{ + mMutex->Unlock(); +} + +::Mutex& ThreadProfile::GetMutex() +{ + return *mMutex.get(); +} + +void ThreadProfile::DuplicateLastSample() +{ + mBuffer->DuplicateLastSample(mThreadId); +} + diff --git a/tools/profiler/core/ThreadProfile.h b/tools/profiler/core/ThreadProfile.h new file mode 100644 index 0000000000..ca2bbfe7ab --- /dev/null +++ b/tools/profiler/core/ThreadProfile.h @@ -0,0 +1,107 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MOZ_THREAD_PROFILE_H +#define MOZ_THREAD_PROFILE_H + +#include "ProfileBuffer.h" +#include "ThreadInfo.h" + +class ThreadProfile +{ +public: + ThreadProfile(ThreadInfo* aThreadInfo, ProfileBuffer* aBuffer); + virtual ~ThreadProfile(); + void addTag(const ProfileEntry& aTag); + + /** + * Track a marker which has been inserted into the ThreadProfile. + * This marker can safely be deleted once the generation has + * expired. + */ + void addStoredMarker(ProfilerMarker *aStoredMarker); + PseudoStack* GetPseudoStack(); + ::Mutex& GetMutex(); + void StreamJSON(SpliceableJSONWriter& aWriter, double aSinceTime = 0); + + /** + * Call this method when the JS entries inside the buffer are about to + * become invalid, i.e., just before JS shutdown. + */ + void FlushSamplesAndMarkers(); + + void BeginUnwind(); + virtual void EndUnwind(); + virtual SyncProfile* AsSyncProfile() { return nullptr; } + + bool IsMainThread() const { return mIsMainThread; } + const char* Name() const { return mThreadInfo->Name(); } + int ThreadId() const { return mThreadId; } + + PlatformData* GetPlatformData() const { return mPlatformData; } + void* GetStackTop() const { return mStackTop; } + void DuplicateLastSample(); + + ThreadInfo* GetThreadInfo() const { return mThreadInfo; } +#ifndef SPS_STANDALONE + ThreadResponsiveness* GetThreadResponsiveness() { return &mRespInfo; } +#endif + + bool CanInvokeJS() const { return mThreadInfo->CanInvokeJS(); } + + void SetPendingDelete() + { + mPseudoStack = nullptr; + mPlatformData = nullptr; + } + + uint32_t bufferGeneration() const { + return mBuffer->mGeneration; + } + +protected: + void StreamSamplesAndMarkers(SpliceableJSONWriter& aWriter, double aSinceTime, + UniqueStacks& aUniqueStacks); + +private: + FRIEND_TEST(ThreadProfile, InsertOneTag); + FRIEND_TEST(ThreadProfile, InsertOneTagWithTinyBuffer); + FRIEND_TEST(ThreadProfile, InsertTagsNoWrap); + FRIEND_TEST(ThreadProfile, InsertTagsWrap); + FRIEND_TEST(ThreadProfile, MemoryMeasure); + ThreadInfo* mThreadInfo; + + const RefPtr<ProfileBuffer> mBuffer; + + // JS frames in the buffer may require a live JSRuntime to stream (e.g., + // stringifying JIT frames). In the case of JSRuntime destruction, + // FlushSamplesAndMarkers should be called to save them. These are spliced + // into the final stream. + mozilla::UniquePtr<char[]> mSavedStreamedSamples; + mozilla::UniquePtr<char[]> mSavedStreamedMarkers; + mozilla::Maybe<UniqueStacks> mUniqueStacks; + + PseudoStack* mPseudoStack; + mozilla::UniquePtr<Mutex> mMutex; + int mThreadId; + bool mIsMainThread; + PlatformData* mPlatformData; // Platform specific data. + void* const mStackTop; +#ifndef SPS_STANDALONE + ThreadResponsiveness mRespInfo; +#endif + + // Only Linux is using a signal sender, instead of stopping the thread, so we + // need some space to store the data which cannot be collected in the signal + // handler code. +#ifdef XP_LINUX +public: + int64_t mRssMemory; + int64_t mUssMemory; +#endif +}; + +#endif diff --git a/tools/profiler/core/platform-linux.cc b/tools/profiler/core/platform-linux.cc new file mode 100644 index 0000000000..160873c9dc --- /dev/null +++ b/tools/profiler/core/platform-linux.cc @@ -0,0 +1,715 @@ +// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google, Inc. nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +/* +# vim: sw=2 +*/ +#include <stdio.h> +#include <math.h> + +#include <pthread.h> +#include <semaphore.h> +#include <signal.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/prctl.h> // set name +#include <stdlib.h> +#include <sched.h> +#ifdef ANDROID +#include <android/log.h> +#else +#define __android_log_print(a, ...) +#endif +#include <ucontext.h> +// Ubuntu Dapper requires memory pages to be marked as +// executable. Otherwise, OS raises an exception when executing code +// in that page. +#include <sys/types.h> // mmap & munmap +#include <sys/mman.h> // mmap & munmap +#include <sys/stat.h> // open +#include <fcntl.h> // open +#include <unistd.h> // sysconf +#include <semaphore.h> +#ifdef __GLIBC__ +#include <execinfo.h> // backtrace, backtrace_symbols +#endif // def __GLIBC__ +#include <strings.h> // index +#include <errno.h> +#include <stdarg.h> +#include "prenv.h" +#include "platform.h" +#include "GeckoProfiler.h" +#include "mozilla/Mutex.h" +#include "mozilla/Atomics.h" +#include "mozilla/LinuxSignal.h" +#include "mozilla/TimeStamp.h" +#include "mozilla/DebugOnly.h" +#include "ProfileEntry.h" +#include "nsThreadUtils.h" +#include "GeckoSampler.h" +#include "ThreadResponsiveness.h" + +#if defined(__ARM_EABI__) && defined(ANDROID) + // Should also work on other Android and ARM Linux, but not tested there yet. +# define USE_EHABI_STACKWALK +# include "EHABIStackWalk.h" +#elif defined(SPS_PLAT_amd64_linux) || defined(SPS_PLAT_x86_linux) +# define USE_LUL_STACKWALK +# include "lul/LulMain.h" +# include "lul/platform-linux-lul.h" +#endif + +// Memory profile +#include "nsMemoryReporterManager.h" + +#include <string.h> +#include <list> + +#define SIGNAL_SAVE_PROFILE SIGUSR2 + +using namespace mozilla; + +#if defined(USE_LUL_STACKWALK) +// A singleton instance of the library. It is initialised at first +// use. Currently only the main thread can call Sampler::Start, so +// there is no need for a mechanism to ensure that it is only +// created once in a multi-thread-use situation. +lul::LUL* sLUL = nullptr; + +// This is the sLUL initialization routine. +static void sLUL_initialization_routine(void) +{ + MOZ_ASSERT(!sLUL); + MOZ_ASSERT(gettid() == getpid()); /* "this is the main thread" */ + sLUL = new lul::LUL(logging_sink_for_LUL); + // Read all the unwind info currently available. + read_procmaps(sLUL); +} +#endif + +/* static */ Thread::tid_t +Thread::GetCurrentId() +{ + return gettid(); +} + +#if !defined(ANDROID) +// Keep track of when any of our threads calls fork(), so we can +// temporarily disable signal delivery during the fork() call. Not +// doing so appears to cause a kind of race, in which signals keep +// getting delivered to the thread doing fork(), which keeps causing +// it to fail and be restarted; hence forward progress is delayed a +// great deal. A side effect of this is to permanently disable +// sampling in the child process. See bug 837390. + +// Unfortunately this is only doable on non-Android, since Bionic +// doesn't have pthread_atfork. + +// This records the current state at the time we paused it. +static bool was_paused = false; + +// In the parent, just before the fork, record the pausedness state, +// and then pause. +static void paf_prepare(void) { + if (Sampler::GetActiveSampler()) { + was_paused = Sampler::GetActiveSampler()->IsPaused(); + Sampler::GetActiveSampler()->SetPaused(true); + } else { + was_paused = false; + } +} + +// In the parent, just after the fork, return pausedness to the +// pre-fork state. +static void paf_parent(void) { + if (Sampler::GetActiveSampler()) + Sampler::GetActiveSampler()->SetPaused(was_paused); +} + +// Set up the fork handlers. +static void* setup_atfork() { + pthread_atfork(paf_prepare, paf_parent, NULL); + return NULL; +} +#endif /* !defined(ANDROID) */ + +struct SamplerRegistry { + static void AddActiveSampler(Sampler *sampler) { + ASSERT(!SamplerRegistry::sampler); + SamplerRegistry::sampler = sampler; + } + static void RemoveActiveSampler(Sampler *sampler) { + SamplerRegistry::sampler = NULL; + } + static Sampler *sampler; +}; + +Sampler *SamplerRegistry::sampler = NULL; + +static mozilla::Atomic<ThreadProfile*> sCurrentThreadProfile; +static sem_t sSignalHandlingDone; + +static void ProfilerSaveSignalHandler(int signal, siginfo_t* info, void* context) { + Sampler::GetActiveSampler()->RequestSave(); +} + +static void SetSampleContext(TickSample* sample, void* context) +{ + // Extracting the sample from the context is extremely machine dependent. + ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(context); + mcontext_t& mcontext = ucontext->uc_mcontext; +#if V8_HOST_ARCH_IA32 + sample->pc = reinterpret_cast<Address>(mcontext.gregs[REG_EIP]); + sample->sp = reinterpret_cast<Address>(mcontext.gregs[REG_ESP]); + sample->fp = reinterpret_cast<Address>(mcontext.gregs[REG_EBP]); +#elif V8_HOST_ARCH_X64 + sample->pc = reinterpret_cast<Address>(mcontext.gregs[REG_RIP]); + sample->sp = reinterpret_cast<Address>(mcontext.gregs[REG_RSP]); + sample->fp = reinterpret_cast<Address>(mcontext.gregs[REG_RBP]); +#elif V8_HOST_ARCH_ARM +// An undefined macro evaluates to 0, so this applies to Android's Bionic also. +#if !defined(ANDROID) && (__GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ <= 3)) + sample->pc = reinterpret_cast<Address>(mcontext.gregs[R15]); + sample->sp = reinterpret_cast<Address>(mcontext.gregs[R13]); + sample->fp = reinterpret_cast<Address>(mcontext.gregs[R11]); +#ifdef ENABLE_ARM_LR_SAVING + sample->lr = reinterpret_cast<Address>(mcontext.gregs[R14]); +#endif +#else + sample->pc = reinterpret_cast<Address>(mcontext.arm_pc); + sample->sp = reinterpret_cast<Address>(mcontext.arm_sp); + sample->fp = reinterpret_cast<Address>(mcontext.arm_fp); +#ifdef ENABLE_ARM_LR_SAVING + sample->lr = reinterpret_cast<Address>(mcontext.arm_lr); +#endif +#endif +#elif V8_HOST_ARCH_MIPS + // Implement this on MIPS. + UNIMPLEMENTED(); +#endif +} + +#ifdef ANDROID +#define V8_HOST_ARCH_ARM 1 +#define SYS_gettid __NR_gettid +#define SYS_tgkill __NR_tgkill +#else +#define V8_HOST_ARCH_X64 1 +#endif + +namespace { + +void ProfilerSignalHandler(int signal, siginfo_t* info, void* context) { + // Avoid TSan warning about clobbering errno. + int savedErrno = errno; + + if (!Sampler::GetActiveSampler()) { + sem_post(&sSignalHandlingDone); + errno = savedErrno; + return; + } + + TickSample sample_obj; + TickSample* sample = &sample_obj; + sample->context = context; + + // If profiling, we extract the current pc and sp. + if (Sampler::GetActiveSampler()->IsProfiling()) { + SetSampleContext(sample, context); + } + sample->threadProfile = sCurrentThreadProfile; + sample->timestamp = mozilla::TimeStamp::Now(); + sample->rssMemory = sample->threadProfile->mRssMemory; + sample->ussMemory = sample->threadProfile->mUssMemory; + + Sampler::GetActiveSampler()->Tick(sample); + + sCurrentThreadProfile = NULL; + sem_post(&sSignalHandlingDone); + errno = savedErrno; +} + +} // namespace + +static void ProfilerSignalThread(ThreadProfile *profile, + bool isFirstProfiledThread) +{ + if (isFirstProfiledThread && Sampler::GetActiveSampler()->ProfileMemory()) { + profile->mRssMemory = nsMemoryReporterManager::ResidentFast(); + profile->mUssMemory = nsMemoryReporterManager::ResidentUnique(); + } else { + profile->mRssMemory = 0; + profile->mUssMemory = 0; + } +} + +int tgkill(pid_t tgid, pid_t tid, int signalno) { + return syscall(SYS_tgkill, tgid, tid, signalno); +} + +class PlatformData { + public: + PlatformData() + { + MOZ_COUNT_CTOR(PlatformData); + } + + ~PlatformData() + { + MOZ_COUNT_DTOR(PlatformData); + } +}; + +/* static */ PlatformData* +Sampler::AllocPlatformData(int aThreadId) +{ + return new PlatformData; +} + +/* static */ void +Sampler::FreePlatformData(PlatformData* aData) +{ + delete aData; +} + +static void* SignalSender(void* arg) { + // Taken from platform_thread_posix.cc + prctl(PR_SET_NAME, "SamplerThread", 0, 0, 0); + + int vm_tgid_ = getpid(); + DebugOnly<int> my_tid = gettid(); + + unsigned int nSignalsSent = 0; + + TimeDuration lastSleepOverhead = 0; + TimeStamp sampleStart = TimeStamp::Now(); + while (SamplerRegistry::sampler->IsActive()) { + + SamplerRegistry::sampler->HandleSaveRequest(); + SamplerRegistry::sampler->DeleteExpiredMarkers(); + + if (!SamplerRegistry::sampler->IsPaused()) { + ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); + std::vector<ThreadInfo*> threads = + SamplerRegistry::sampler->GetRegisteredThreads(); + + bool isFirstProfiledThread = true; + for (uint32_t i = 0; i < threads.size(); i++) { + ThreadInfo* info = threads[i]; + + // This will be null if we're not interested in profiling this thread. + if (!info->Profile() || info->IsPendingDelete()) + continue; + + PseudoStack::SleepState sleeping = info->Stack()->observeSleeping(); + if (sleeping == PseudoStack::SLEEPING_AGAIN) { + info->Profile()->DuplicateLastSample(); + continue; + } + + info->Profile()->GetThreadResponsiveness()->Update(); + + // We use sCurrentThreadProfile the ThreadProfile for the + // thread we're profiling to the signal handler + sCurrentThreadProfile = info->Profile(); + + int threadId = info->ThreadId(); + MOZ_ASSERT(threadId != my_tid); + + // Profile from the signal sender for information which is not signal + // safe, and will have low variation between the emission of the signal + // and the signal handler catch. + ProfilerSignalThread(sCurrentThreadProfile, isFirstProfiledThread); + + // Profile from the signal handler for information which is signal safe + // and needs to be precise too, such as the stack of the interrupted + // thread. + if (tgkill(vm_tgid_, threadId, SIGPROF) != 0) { + printf_stderr("profiler failed to signal tid=%d\n", threadId); +#ifdef DEBUG + abort(); +#else + continue; +#endif + } + + // Wait for the signal handler to run before moving on to the next one + sem_wait(&sSignalHandlingDone); + isFirstProfiledThread = false; + + // The LUL unwind object accumulates frame statistics. + // Periodically we should poke it to give it a chance to print + // those statistics. This involves doing I/O (fprintf, + // __android_log_print, etc) and so can't safely be done from + // the unwinder threads, which is why it is done here. + if ((++nSignalsSent & 0xF) == 0) { +# if defined(USE_LUL_STACKWALK) + sLUL->MaybeShowStats(); +# endif + } + } + } + + TimeStamp targetSleepEndTime = sampleStart + TimeDuration::FromMicroseconds(SamplerRegistry::sampler->interval() * 1000); + TimeStamp beforeSleep = TimeStamp::Now(); + TimeDuration targetSleepDuration = targetSleepEndTime - beforeSleep; + double sleepTime = std::max(0.0, (targetSleepDuration - lastSleepOverhead).ToMicroseconds()); + OS::SleepMicro(sleepTime); + sampleStart = TimeStamp::Now(); + lastSleepOverhead = sampleStart - (beforeSleep + TimeDuration::FromMicroseconds(sleepTime)); + } + return 0; +} + +Sampler::Sampler(double interval, bool profiling, int entrySize) + : interval_(interval), + profiling_(profiling), + paused_(false), + active_(false), + entrySize_(entrySize) { + MOZ_COUNT_CTOR(Sampler); +} + +Sampler::~Sampler() { + MOZ_COUNT_DTOR(Sampler); + ASSERT(!signal_sender_launched_); +} + + +void Sampler::Start() { + LOG("Sampler started"); + +#if defined(USE_EHABI_STACKWALK) + mozilla::EHABIStackWalkInit(); +#elif defined(USE_LUL_STACKWALK) + // NOTE: this isn't thread-safe. But we expect Sampler::Start to be + // called only from the main thread, so this is OK in general. + if (!sLUL) { + sLUL_initialization_routine(); + } +#endif + + SamplerRegistry::AddActiveSampler(this); + + // Initialize signal handler communication + sCurrentThreadProfile = NULL; + if (sem_init(&sSignalHandlingDone, /* pshared: */ 0, /* value: */ 0) != 0) { + LOG("Error initializing semaphore"); + return; + } + + // Request profiling signals. + LOG("Request signal"); + struct sigaction sa; + sa.sa_sigaction = MOZ_SIGNAL_TRAMPOLINE(ProfilerSignalHandler); + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART | SA_SIGINFO; + if (sigaction(SIGPROF, &sa, &old_sigprof_signal_handler_) != 0) { + LOG("Error installing signal"); + return; + } + + // Request save profile signals + struct sigaction sa2; + sa2.sa_sigaction = ProfilerSaveSignalHandler; + sigemptyset(&sa2.sa_mask); + sa2.sa_flags = SA_RESTART | SA_SIGINFO; + if (sigaction(SIGNAL_SAVE_PROFILE, &sa2, &old_sigsave_signal_handler_) != 0) { + LOG("Error installing start signal"); + return; + } + LOG("Signal installed"); + signal_handler_installed_ = true; + +#if defined(USE_LUL_STACKWALK) + // Switch into unwind mode. After this point, we can't add or + // remove any unwind info to/from this LUL instance. The only thing + // we can do with it is Unwind() calls. + sLUL->EnableUnwinding(); + + // Has a test been requested? + if (PR_GetEnv("MOZ_PROFILER_LUL_TEST")) { + int nTests = 0, nTestsPassed = 0; + RunLulUnitTests(&nTests, &nTestsPassed, sLUL); + } +#endif + + // Start a thread that sends SIGPROF signal to VM thread. + // Sending the signal ourselves instead of relying on itimer provides + // much better accuracy. + SetActive(true); + if (pthread_create( + &signal_sender_thread_, NULL, SignalSender, NULL) == 0) { + signal_sender_launched_ = true; + } + LOG("Profiler thread started"); +} + + +void Sampler::Stop() { + SetActive(false); + + // Wait for signal sender termination (it will exit after setting + // active_ to false). + if (signal_sender_launched_) { + pthread_join(signal_sender_thread_, NULL); + signal_sender_launched_ = false; + } + + SamplerRegistry::RemoveActiveSampler(this); + + // Restore old signal handler + if (signal_handler_installed_) { + sigaction(SIGNAL_SAVE_PROFILE, &old_sigsave_signal_handler_, 0); + sigaction(SIGPROF, &old_sigprof_signal_handler_, 0); + signal_handler_installed_ = false; + } +} + +bool Sampler::RegisterCurrentThread(const char* aName, + PseudoStack* aPseudoStack, + bool aIsMainThread, void* stackTop) +{ + if (!Sampler::sRegisteredThreadsMutex) + return false; + + ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); + + int id = gettid(); + for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) { + ThreadInfo* info = sRegisteredThreads->at(i); + if (info->ThreadId() == id && !info->IsPendingDelete()) { + // Thread already registered. This means the first unregister will be + // too early. + ASSERT(false); + return false; + } + } + + set_tls_stack_top(stackTop); + + ThreadInfo* info = new StackOwningThreadInfo(aName, id, + aIsMainThread, aPseudoStack, stackTop); + + if (sActiveSampler) { + sActiveSampler->RegisterThread(info); + } + + sRegisteredThreads->push_back(info); + + return true; +} + +void Sampler::UnregisterCurrentThread() +{ + if (!Sampler::sRegisteredThreadsMutex) + return; + + tlsStackTop.set(nullptr); + + ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); + + int id = gettid(); + + for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) { + ThreadInfo* info = sRegisteredThreads->at(i); + if (info->ThreadId() == id && !info->IsPendingDelete()) { + if (profiler_is_active()) { + // We still want to show the results of this thread if you + // save the profile shortly after a thread is terminated. + // For now we will defer the delete to profile stop. + info->SetPendingDelete(); + break; + } else { + delete info; + sRegisteredThreads->erase(sRegisteredThreads->begin() + i); + break; + } + } + } +} + +#ifdef ANDROID +static struct sigaction old_sigstart_signal_handler; +const int SIGSTART = SIGUSR2; + +static void freeArray(const char** array, int size) { + for (int i = 0; i < size; i++) { + free((void*) array[i]); + } +} + +static uint32_t readCSVArray(char* csvList, const char** buffer) { + uint32_t count; + char* savePtr; + int newlinePos = strlen(csvList) - 1; + if (csvList[newlinePos] == '\n') { + csvList[newlinePos] = '\0'; + } + + char* item = strtok_r(csvList, ",", &savePtr); + for (count = 0; item; item = strtok_r(NULL, ",", &savePtr)) { + int length = strlen(item) + 1; // Include \0 + char* newBuf = (char*) malloc(sizeof(char) * length); + buffer[count] = newBuf; + strncpy(newBuf, item, length); + count++; + } + + return count; +} + +// Currently support only the env variables +// reported in read_profiler_env +static void ReadProfilerVars(const char* fileName, const char** features, + uint32_t* featureCount, const char** threadNames, uint32_t* threadCount) { + FILE* file = fopen(fileName, "r"); + const int bufferSize = 1024; + char line[bufferSize]; + char* feature; + char* value; + char* savePtr; + + if (file) { + while (fgets(line, bufferSize, file) != NULL) { + feature = strtok_r(line, "=", &savePtr); + value = strtok_r(NULL, "", &savePtr); + + if (strncmp(feature, PROFILER_INTERVAL, bufferSize) == 0) { + set_profiler_interval(value); + } else if (strncmp(feature, PROFILER_ENTRIES, bufferSize) == 0) { + set_profiler_entries(value); + } else if (strncmp(feature, PROFILER_STACK, bufferSize) == 0) { + set_profiler_scan(value); + } else if (strncmp(feature, PROFILER_FEATURES, bufferSize) == 0) { + *featureCount = readCSVArray(value, features); + } else if (strncmp(feature, "threads", bufferSize) == 0) { + *threadCount = readCSVArray(value, threadNames); + } + } + + fclose(file); + } +} + +static void DoStartTask() { + uint32_t featureCount = 0; + uint32_t threadCount = 0; + + // Just allocate 10 features for now + // FIXME: these don't really point to const chars* + // So we free them later, but we don't want to change the const char** + // declaration in profiler_start. Annoying but ok for now. + const char* threadNames[10]; + const char* features[10]; + const char* profilerConfigFile = "/data/local/tmp/profiler.options"; + + ReadProfilerVars(profilerConfigFile, features, &featureCount, threadNames, &threadCount); + MOZ_ASSERT(featureCount < 10); + MOZ_ASSERT(threadCount < 10); + + profiler_start(PROFILE_DEFAULT_ENTRY, 1, + features, featureCount, + threadNames, threadCount); + + freeArray(threadNames, threadCount); + freeArray(features, featureCount); +} + +static void StartSignalHandler(int signal, siginfo_t* info, void* context) { + class StartTask : public Runnable { + public: + NS_IMETHOD Run() override { + DoStartTask(); + return NS_OK; + } + }; + // XXX: technically NS_DispatchToMainThread is NOT async signal safe. We risk + // nasty things like deadlocks, but the probability is very low and we + // typically only do this once so it tends to be ok. See bug 909403. + NS_DispatchToMainThread(new StartTask()); +} + +void OS::Startup() +{ + LOG("Registering start signal"); + struct sigaction sa; + sa.sa_sigaction = StartSignalHandler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART | SA_SIGINFO; + if (sigaction(SIGSTART, &sa, &old_sigstart_signal_handler) != 0) { + LOG("Error installing signal"); + } +} + +#else + +void OS::Startup() { + // Set up the fork handlers. + setup_atfork(); +} + +#endif + + + +void TickSample::PopulateContext(void* aContext) +{ + MOZ_ASSERT(aContext); + ucontext_t* pContext = reinterpret_cast<ucontext_t*>(aContext); + if (!getcontext(pContext)) { + context = pContext; + SetSampleContext(this, aContext); + } +} + +void OS::SleepMicro(int microseconds) +{ + if (MOZ_UNLIKELY(microseconds >= 1000000)) { + // Use usleep for larger intervals, because the nanosleep + // code below only supports intervals < 1 second. + MOZ_ALWAYS_TRUE(!::usleep(microseconds)); + return; + } + + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = microseconds * 1000UL; + + int rv = ::nanosleep(&ts, &ts); + + while (rv != 0 && errno == EINTR) { + // Keep waiting in case of interrupt. + // nanosleep puts the remaining time back into ts. + rv = ::nanosleep(&ts, &ts); + } + + MOZ_ASSERT(!rv, "nanosleep call failed"); +} diff --git a/tools/profiler/core/platform-macos.cc b/tools/profiler/core/platform-macos.cc new file mode 100644 index 0000000000..9a98d1a268 --- /dev/null +++ b/tools/profiler/core/platform-macos.cc @@ -0,0 +1,469 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <dlfcn.h> +#include <unistd.h> +#include <sys/mman.h> +#include <mach/mach_init.h> +#include <mach-o/dyld.h> +#include <mach-o/getsect.h> + +#include <AvailabilityMacros.h> + +#include <pthread.h> +#include <semaphore.h> +#include <signal.h> +#include <libkern/OSAtomic.h> +#include <mach/mach.h> +#include <mach/semaphore.h> +#include <mach/task.h> +#include <mach/vm_statistics.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/types.h> +#include <sys/sysctl.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <math.h> + +#ifndef SPS_STANDALONE +#include "ThreadResponsiveness.h" +#include "nsThreadUtils.h" + +// Memory profile +#include "nsMemoryReporterManager.h" +#endif + +#include "platform.h" +#include "GeckoSampler.h" +#include "mozilla/TimeStamp.h" + +using mozilla::TimeStamp; +using mozilla::TimeDuration; + +// this port is based off of v8 svn revision 9837 + +// XXX: this is a very stubbed out implementation +// that only supports a single Sampler +struct SamplerRegistry { + static void AddActiveSampler(Sampler *sampler) { + ASSERT(!SamplerRegistry::sampler); + SamplerRegistry::sampler = sampler; + } + static void RemoveActiveSampler(Sampler *sampler) { + SamplerRegistry::sampler = NULL; + } + static Sampler *sampler; +}; + +Sampler *SamplerRegistry::sampler = NULL; + +#ifdef DEBUG +// 0 is never a valid thread id on MacOSX since a pthread_t is a pointer. +static const pthread_t kNoThread = (pthread_t) 0; +#endif + +void OS::Startup() { +} + +void OS::Sleep(int milliseconds) { + usleep(1000 * milliseconds); +} + +void OS::SleepMicro(int microseconds) { + usleep(microseconds); +} + +Thread::Thread(const char* name) + : stack_size_(0) { + set_name(name); +} + + +Thread::~Thread() { +} + + +static void SetThreadName(const char* name) { + // pthread_setname_np is only available in 10.6 or later, so test + // for it at runtime. + int (*dynamic_pthread_setname_np)(const char*); + *reinterpret_cast<void**>(&dynamic_pthread_setname_np) = + dlsym(RTLD_DEFAULT, "pthread_setname_np"); + if (!dynamic_pthread_setname_np) + return; + + // Mac OS X does not expose the length limit of the name, so hardcode it. + static const int kMaxNameLength = 63; + USE(kMaxNameLength); + ASSERT(Thread::kMaxThreadNameLength <= kMaxNameLength); + dynamic_pthread_setname_np(name); +} + + +static void* ThreadEntry(void* arg) { + Thread* thread = reinterpret_cast<Thread*>(arg); + + thread->thread_ = pthread_self(); + SetThreadName(thread->name()); + ASSERT(thread->thread_ != kNoThread); + thread->Run(); + return NULL; +} + + +void Thread::set_name(const char* name) { + strncpy(name_, name, sizeof(name_)); + name_[sizeof(name_) - 1] = '\0'; +} + + +void Thread::Start() { + pthread_attr_t* attr_ptr = NULL; + pthread_attr_t attr; + if (stack_size_ > 0) { + pthread_attr_init(&attr); + pthread_attr_setstacksize(&attr, static_cast<size_t>(stack_size_)); + attr_ptr = &attr; + } + pthread_create(&thread_, attr_ptr, ThreadEntry, this); + ASSERT(thread_ != kNoThread); +} + +void Thread::Join() { + pthread_join(thread_, NULL); +} + +class PlatformData { + public: + PlatformData() : profiled_thread_(mach_thread_self()) + { + profiled_pthread_ = pthread_from_mach_thread_np(profiled_thread_); + } + + ~PlatformData() { + // Deallocate Mach port for thread. + mach_port_deallocate(mach_task_self(), profiled_thread_); + } + + thread_act_t profiled_thread() { return profiled_thread_; } + pthread_t profiled_pthread() { return profiled_pthread_; } + + private: + // Note: for profiled_thread_ Mach primitives are used instead of PThread's + // because the latter doesn't provide thread manipulation primitives required. + // For details, consult "Mac OS X Internals" book, Section 7.3. + thread_act_t profiled_thread_; + // we also store the pthread because Mach threads have no concept of stack + // and we want to be able to get the stack size when we need to unwind the + // stack using frame pointers. + pthread_t profiled_pthread_; +}; + +/* static */ PlatformData* +Sampler::AllocPlatformData(int aThreadId) +{ + return new PlatformData; +} + +/* static */ void +Sampler::FreePlatformData(PlatformData* aData) +{ + delete aData; +} + +class SamplerThread : public Thread { + public: + explicit SamplerThread(double interval) + : Thread("SamplerThread") + , intervalMicro_(floor(interval * 1000 + 0.5)) + { + if (intervalMicro_ <= 0) { + intervalMicro_ = 1; + } + } + + static void AddActiveSampler(Sampler* sampler) { + SamplerRegistry::AddActiveSampler(sampler); + if (instance_ == NULL) { + instance_ = new SamplerThread(sampler->interval()); + instance_->Start(); + } + } + + static void RemoveActiveSampler(Sampler* sampler) { + instance_->Join(); + //XXX: unlike v8 we need to remove the active sampler after doing the Join + // because we drop the sampler immediately + SamplerRegistry::RemoveActiveSampler(sampler); + delete instance_; + instance_ = NULL; + } + + // Implement Thread::Run(). + virtual void Run() { + TimeDuration lastSleepOverhead = 0; + TimeStamp sampleStart = TimeStamp::Now(); + while (SamplerRegistry::sampler->IsActive()) { + SamplerRegistry::sampler->DeleteExpiredMarkers(); + if (!SamplerRegistry::sampler->IsPaused()) { + ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); + std::vector<ThreadInfo*> threads = + SamplerRegistry::sampler->GetRegisteredThreads(); + bool isFirstProfiledThread = true; + for (uint32_t i = 0; i < threads.size(); i++) { + ThreadInfo* info = threads[i]; + + // This will be null if we're not interested in profiling this thread. + if (!info->Profile() || info->IsPendingDelete()) + continue; + + PseudoStack::SleepState sleeping = info->Stack()->observeSleeping(); + if (sleeping == PseudoStack::SLEEPING_AGAIN) { + info->Profile()->DuplicateLastSample(); + continue; + } + +#ifndef SPS_STANDALONE + info->Profile()->GetThreadResponsiveness()->Update(); +#endif + + ThreadProfile* thread_profile = info->Profile(); + + SampleContext(SamplerRegistry::sampler, thread_profile, + isFirstProfiledThread); + isFirstProfiledThread = false; + } + } + + TimeStamp targetSleepEndTime = sampleStart + TimeDuration::FromMicroseconds(intervalMicro_); + TimeStamp beforeSleep = TimeStamp::Now(); + TimeDuration targetSleepDuration = targetSleepEndTime - beforeSleep; + double sleepTime = std::max(0.0, (targetSleepDuration - lastSleepOverhead).ToMicroseconds()); + OS::SleepMicro(sleepTime); + sampleStart = TimeStamp::Now(); + lastSleepOverhead = sampleStart - (beforeSleep + TimeDuration::FromMicroseconds(sleepTime)); + } + } + + void SampleContext(Sampler* sampler, ThreadProfile* thread_profile, + bool isFirstProfiledThread) + { + thread_act_t profiled_thread = + thread_profile->GetPlatformData()->profiled_thread(); + + TickSample sample_obj; + TickSample* sample = &sample_obj; + + // Unique Set Size is not supported on Mac. + sample->ussMemory = 0; + sample->rssMemory = 0; + +#ifndef SPS_STANDALONE + if (isFirstProfiledThread && Sampler::GetActiveSampler()->ProfileMemory()) { + sample->rssMemory = nsMemoryReporterManager::ResidentFast(); + } +#endif + + // We're using thread_suspend on OS X because pthread_kill (which is what + // we're using on Linux) has less consistent performance and causes + // strange crashes, see bug 1166778 and bug 1166808. + + if (KERN_SUCCESS != thread_suspend(profiled_thread)) return; + +#if V8_HOST_ARCH_X64 + thread_state_flavor_t flavor = x86_THREAD_STATE64; + x86_thread_state64_t state; + mach_msg_type_number_t count = x86_THREAD_STATE64_COUNT; +#if __DARWIN_UNIX03 +#define REGISTER_FIELD(name) __r ## name +#else +#define REGISTER_FIELD(name) r ## name +#endif // __DARWIN_UNIX03 +#elif V8_HOST_ARCH_IA32 + thread_state_flavor_t flavor = i386_THREAD_STATE; + i386_thread_state_t state; + mach_msg_type_number_t count = i386_THREAD_STATE_COUNT; +#if __DARWIN_UNIX03 +#define REGISTER_FIELD(name) __e ## name +#else +#define REGISTER_FIELD(name) e ## name +#endif // __DARWIN_UNIX03 +#else +#error Unsupported Mac OS X host architecture. +#endif // V8_HOST_ARCH + + if (thread_get_state(profiled_thread, + flavor, + reinterpret_cast<natural_t*>(&state), + &count) == KERN_SUCCESS) { + sample->pc = reinterpret_cast<Address>(state.REGISTER_FIELD(ip)); + sample->sp = reinterpret_cast<Address>(state.REGISTER_FIELD(sp)); + sample->fp = reinterpret_cast<Address>(state.REGISTER_FIELD(bp)); + sample->timestamp = mozilla::TimeStamp::Now(); + sample->threadProfile = thread_profile; + + sampler->Tick(sample); + } + thread_resume(profiled_thread); + } + + int intervalMicro_; + //RuntimeProfilerRateLimiter rate_limiter_; + + static SamplerThread* instance_; + + DISALLOW_COPY_AND_ASSIGN(SamplerThread); +}; + +#undef REGISTER_FIELD + +SamplerThread* SamplerThread::instance_ = NULL; + +Sampler::Sampler(double interval, bool profiling, int entrySize) + : // isolate_(isolate), + interval_(interval), + profiling_(profiling), + paused_(false), + active_(false), + entrySize_(entrySize) /*, + samples_taken_(0)*/ { +} + + +Sampler::~Sampler() { + ASSERT(!IsActive()); +} + + +void Sampler::Start() { + ASSERT(!IsActive()); + SetActive(true); + SamplerThread::AddActiveSampler(this); +} + + +void Sampler::Stop() { + ASSERT(IsActive()); + SetActive(false); + SamplerThread::RemoveActiveSampler(this); +} + +pthread_t +Sampler::GetProfiledThread(PlatformData* aData) +{ + return aData->profiled_pthread(); +} + +#include <sys/syscall.h> +pid_t gettid() +{ + return (pid_t) syscall(SYS_thread_selfid); +} + +/* static */ Thread::tid_t +Thread::GetCurrentId() +{ + return gettid(); +} + +bool Sampler::RegisterCurrentThread(const char* aName, + PseudoStack* aPseudoStack, + bool aIsMainThread, void* stackTop) +{ + if (!Sampler::sRegisteredThreadsMutex) + return false; + + + ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); + + int id = gettid(); + for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) { + ThreadInfo* info = sRegisteredThreads->at(i); + if (info->ThreadId() == id && !info->IsPendingDelete()) { + // Thread already registered. This means the first unregister will be + // too early. + ASSERT(false); + return false; + } + } + + set_tls_stack_top(stackTop); + + ThreadInfo* info = new StackOwningThreadInfo(aName, id, + aIsMainThread, aPseudoStack, stackTop); + + if (sActiveSampler) { + sActiveSampler->RegisterThread(info); + } + + sRegisteredThreads->push_back(info); + + return true; +} + +void Sampler::UnregisterCurrentThread() +{ + if (!Sampler::sRegisteredThreadsMutex) + return; + + tlsStackTop.set(nullptr); + + ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); + + int id = gettid(); + + for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) { + ThreadInfo* info = sRegisteredThreads->at(i); + if (info->ThreadId() == id && !info->IsPendingDelete()) { + if (profiler_is_active()) { + // We still want to show the results of this thread if you + // save the profile shortly after a thread is terminated. + // For now we will defer the delete to profile stop. + info->SetPendingDelete(); + break; + } else { + delete info; + sRegisteredThreads->erase(sRegisteredThreads->begin() + i); + break; + } + } + } +} + +void TickSample::PopulateContext(void* aContext) +{ + // Note that this asm changes if PopulateContext's parameter list is altered +#if defined(SPS_PLAT_amd64_darwin) + asm ( + // Compute caller's %rsp by adding to %rbp: + // 8 bytes for previous %rbp, 8 bytes for return address + "leaq 0x10(%%rbp), %0\n\t" + // Dereference %rbp to get previous %rbp + "movq (%%rbp), %1\n\t" + : + "=r"(sp), + "=r"(fp) + ); +#elif defined(SPS_PLAT_x86_darwin) + asm ( + // Compute caller's %esp by adding to %ebp: + // 4 bytes for aContext + 4 bytes for return address + + // 4 bytes for previous %ebp + "leal 0xc(%%ebp), %0\n\t" + // Dereference %ebp to get previous %ebp + "movl (%%ebp), %1\n\t" + : + "=r"(sp), + "=r"(fp) + ); +#else +# error "Unsupported architecture" +#endif + pc = reinterpret_cast<Address>(__builtin_extract_return_addr( + __builtin_return_address(0))); +} + diff --git a/tools/profiler/core/platform-win32.cc b/tools/profiler/core/platform-win32.cc new file mode 100644 index 0000000000..74b311f285 --- /dev/null +++ b/tools/profiler/core/platform-win32.cc @@ -0,0 +1,431 @@ +// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google, Inc. nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +#include <windows.h> +#include <mmsystem.h> +#include <process.h> +#include "platform.h" +#include "GeckoSampler.h" +#include "ThreadResponsiveness.h" +#include "ProfileEntry.h" + +// Memory profile +#include "nsMemoryReporterManager.h" + +#include "mozilla/StackWalk_windows.h" + + +class PlatformData { + public: + // Get a handle to the calling thread. This is the thread that we are + // going to profile. We need to make a copy of the handle because we are + // going to use it in the sampler thread. Using GetThreadHandle() will + // not work in this case. We're using OpenThread because DuplicateHandle + // for some reason doesn't work in Chrome's sandbox. + PlatformData(int aThreadId) : profiled_thread_(OpenThread(THREAD_GET_CONTEXT | + THREAD_SUSPEND_RESUME | + THREAD_QUERY_INFORMATION, + false, + aThreadId)) {} + + ~PlatformData() { + if (profiled_thread_ != NULL) { + CloseHandle(profiled_thread_); + profiled_thread_ = NULL; + } + } + + HANDLE profiled_thread() { return profiled_thread_; } + + private: + HANDLE profiled_thread_; +}; + +/* static */ PlatformData* +Sampler::AllocPlatformData(int aThreadId) +{ + return new PlatformData(aThreadId); +} + +/* static */ void +Sampler::FreePlatformData(PlatformData* aData) +{ + delete aData; +} + +uintptr_t +Sampler::GetThreadHandle(PlatformData* aData) +{ + return (uintptr_t) aData->profiled_thread(); +} + +class SamplerThread : public Thread { + public: + SamplerThread(double interval, Sampler* sampler) + : Thread("SamplerThread") + , sampler_(sampler) + , interval_(interval) + { + interval_ = floor(interval + 0.5); + if (interval_ <= 0) { + interval_ = 1; + } + } + + static void StartSampler(Sampler* sampler) { + if (instance_ == NULL) { + instance_ = new SamplerThread(sampler->interval(), sampler); + instance_->Start(); + } else { + ASSERT(instance_->interval_ == sampler->interval()); + } + } + + static void StopSampler() { + instance_->Join(); + delete instance_; + instance_ = NULL; + } + + // Implement Thread::Run(). + virtual void Run() { + + // By default we'll not adjust the timer resolution which tends to be around + // 16ms. However, if the requested interval is sufficiently low we'll try to + // adjust the resolution to match. + if (interval_ < 10) + ::timeBeginPeriod(interval_); + + while (sampler_->IsActive()) { + sampler_->DeleteExpiredMarkers(); + + if (!sampler_->IsPaused()) { + ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); + std::vector<ThreadInfo*> threads = + sampler_->GetRegisteredThreads(); + bool isFirstProfiledThread = true; + for (uint32_t i = 0; i < threads.size(); i++) { + ThreadInfo* info = threads[i]; + + // This will be null if we're not interested in profiling this thread. + if (!info->Profile() || info->IsPendingDelete()) + continue; + + PseudoStack::SleepState sleeping = info->Stack()->observeSleeping(); + if (sleeping == PseudoStack::SLEEPING_AGAIN) { + info->Profile()->DuplicateLastSample(); + continue; + } + + info->Profile()->GetThreadResponsiveness()->Update(); + + ThreadProfile* thread_profile = info->Profile(); + + SampleContext(sampler_, thread_profile, isFirstProfiledThread); + isFirstProfiledThread = false; + } + } + OS::Sleep(interval_); + } + + // disable any timer resolution changes we've made + if (interval_ < 10) + ::timeEndPeriod(interval_); + } + + void SampleContext(Sampler* sampler, ThreadProfile* thread_profile, + bool isFirstProfiledThread) + { + uintptr_t thread = Sampler::GetThreadHandle( + thread_profile->GetPlatformData()); + HANDLE profiled_thread = reinterpret_cast<HANDLE>(thread); + if (profiled_thread == NULL) + return; + + // Context used for sampling the register state of the profiled thread. + CONTEXT context; + memset(&context, 0, sizeof(context)); + + TickSample sample_obj; + TickSample* sample = &sample_obj; + + // Grab the timestamp before pausing the thread, to avoid deadlocks. + sample->timestamp = mozilla::TimeStamp::Now(); + sample->threadProfile = thread_profile; + + if (isFirstProfiledThread && Sampler::GetActiveSampler()->ProfileMemory()) { + sample->rssMemory = nsMemoryReporterManager::ResidentFast(); + } else { + sample->rssMemory = 0; + } + + // Unique Set Size is not supported on Windows. + sample->ussMemory = 0; + + static const DWORD kSuspendFailed = static_cast<DWORD>(-1); + if (SuspendThread(profiled_thread) == kSuspendFailed) + return; + + // SuspendThread is asynchronous, so the thread may still be running. + // Call GetThreadContext first to ensure the thread is really suspended. + // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743. + + // Using only CONTEXT_CONTROL is faster but on 64-bit it causes crashes in + // RtlVirtualUnwind (see bug 1120126) so we set all the flags. +#if V8_HOST_ARCH_X64 + context.ContextFlags = CONTEXT_FULL; +#else + context.ContextFlags = CONTEXT_CONTROL; +#endif + if (!GetThreadContext(profiled_thread, &context)) { + ResumeThread(profiled_thread); + return; + } + + // Threads that may invoke JS require extra attention. Since, on windows, + // the jits also need to modify the same dynamic function table that we need + // to get a stack trace, we have to be wary of that to avoid deadlock. + // + // When embedded in Gecko, for threads that aren't the main thread, + // CanInvokeJS consults an unlocked value in the nsIThread, so we must + // consult this after suspending the profiled thread to avoid racing + // against a value change. + if (thread_profile->CanInvokeJS()) { + if (!TryAcquireStackWalkWorkaroundLock()) { + ResumeThread(profiled_thread); + return; + } + + // It is safe to immediately drop the lock. We only need to contend with + // the case in which the profiled thread held needed system resources. + // If the profiled thread had held those resources, the trylock would have + // failed. Anyone else who grabs those resources will continue to make + // progress, since those threads are not suspended. Because of this, + // we cannot deadlock with them, and should let them run as they please. + ReleaseStackWalkWorkaroundLock(); + } + +#if V8_HOST_ARCH_X64 + sample->pc = reinterpret_cast<Address>(context.Rip); + sample->sp = reinterpret_cast<Address>(context.Rsp); + sample->fp = reinterpret_cast<Address>(context.Rbp); +#else + sample->pc = reinterpret_cast<Address>(context.Eip); + sample->sp = reinterpret_cast<Address>(context.Esp); + sample->fp = reinterpret_cast<Address>(context.Ebp); +#endif + + sample->context = &context; + sampler->Tick(sample); + + ResumeThread(profiled_thread); + } + + Sampler* sampler_; + int interval_; // units: ms + + // Protects the process wide state below. + static SamplerThread* instance_; + + DISALLOW_COPY_AND_ASSIGN(SamplerThread); +}; + +SamplerThread* SamplerThread::instance_ = NULL; + + +Sampler::Sampler(double interval, bool profiling, int entrySize) + : interval_(interval), + profiling_(profiling), + paused_(false), + active_(false), + entrySize_(entrySize) { +} + +Sampler::~Sampler() { + ASSERT(!IsActive()); +} + +void Sampler::Start() { + ASSERT(!IsActive()); + SetActive(true); + SamplerThread::StartSampler(this); +} + +void Sampler::Stop() { + ASSERT(IsActive()); + SetActive(false); + SamplerThread::StopSampler(); +} + + +static const HANDLE kNoThread = INVALID_HANDLE_VALUE; + +static unsigned int __stdcall ThreadEntry(void* arg) { + Thread* thread = reinterpret_cast<Thread*>(arg); + thread->Run(); + return 0; +} + +// Initialize a Win32 thread object. The thread has an invalid thread +// handle until it is started. +Thread::Thread(const char* name) + : stack_size_(0) { + thread_ = kNoThread; + set_name(name); +} + +void Thread::set_name(const char* name) { + strncpy(name_, name, sizeof(name_)); + name_[sizeof(name_) - 1] = '\0'; +} + +// Close our own handle for the thread. +Thread::~Thread() { + if (thread_ != kNoThread) CloseHandle(thread_); +} + +// Create a new thread. It is important to use _beginthreadex() instead of +// the Win32 function CreateThread(), because the CreateThread() does not +// initialize thread specific structures in the C runtime library. +void Thread::Start() { + thread_ = reinterpret_cast<HANDLE>( + _beginthreadex(NULL, + static_cast<unsigned>(stack_size_), + ThreadEntry, + this, + 0, + (unsigned int*) &thread_id_)); +} + +// Wait for thread to terminate. +void Thread::Join() { + if (thread_id_ != GetCurrentId()) { + WaitForSingleObject(thread_, INFINITE); + } +} + +/* static */ Thread::tid_t +Thread::GetCurrentId() +{ + return GetCurrentThreadId(); +} + +void OS::Startup() { +} + +void OS::Sleep(int milliseconds) { + ::Sleep(milliseconds); +} + +bool Sampler::RegisterCurrentThread(const char* aName, + PseudoStack* aPseudoStack, + bool aIsMainThread, void* stackTop) +{ + if (!Sampler::sRegisteredThreadsMutex) + return false; + + + ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); + + int id = GetCurrentThreadId(); + + for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) { + ThreadInfo* info = sRegisteredThreads->at(i); + if (info->ThreadId() == id && !info->IsPendingDelete()) { + // Thread already registered. This means the first unregister will be + // too early. + ASSERT(false); + return false; + } + } + + set_tls_stack_top(stackTop); + + ThreadInfo* info = new StackOwningThreadInfo(aName, id, + aIsMainThread, aPseudoStack, stackTop); + + if (sActiveSampler) { + sActiveSampler->RegisterThread(info); + } + + sRegisteredThreads->push_back(info); + + return true; +} + +void Sampler::UnregisterCurrentThread() +{ + if (!Sampler::sRegisteredThreadsMutex) + return; + + tlsStackTop.set(nullptr); + + ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); + + int id = GetCurrentThreadId(); + + for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) { + ThreadInfo* info = sRegisteredThreads->at(i); + if (info->ThreadId() == id && !info->IsPendingDelete()) { + if (profiler_is_active()) { + // We still want to show the results of this thread if you + // save the profile shortly after a thread is terminated. + // For now we will defer the delete to profile stop. + info->SetPendingDelete(); + break; + } else { + delete info; + sRegisteredThreads->erase(sRegisteredThreads->begin() + i); + break; + } + } + } +} + +void TickSample::PopulateContext(void* aContext) +{ + MOZ_ASSERT(aContext); + CONTEXT* pContext = reinterpret_cast<CONTEXT*>(aContext); + context = pContext; + RtlCaptureContext(pContext); + +#if defined(SPS_PLAT_amd64_windows) + + pc = reinterpret_cast<Address>(pContext->Rip); + sp = reinterpret_cast<Address>(pContext->Rsp); + fp = reinterpret_cast<Address>(pContext->Rbp); + +#elif defined(SPS_PLAT_x86_windows) + + pc = reinterpret_cast<Address>(pContext->Eip); + sp = reinterpret_cast<Address>(pContext->Esp); + fp = reinterpret_cast<Address>(pContext->Ebp); + +#endif +} + diff --git a/tools/profiler/core/platform.cpp b/tools/profiler/core/platform.cpp new file mode 100644 index 0000000000..0d3cb16482 --- /dev/null +++ b/tools/profiler/core/platform.cpp @@ -0,0 +1,1266 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <ostream> +#include <fstream> +#include <sstream> +#include <errno.h> + +#include "platform.h" +#include "PlatformMacros.h" +#include "mozilla/ArrayUtils.h" +#include "mozilla/UniquePtr.h" +#include "GeckoProfiler.h" +#ifndef SPS_STANDALONE +#include "ProfilerIOInterposeObserver.h" +#include "mozilla/StaticPtr.h" +#endif +#include "mozilla/ThreadLocal.h" +#include "mozilla/TimeStamp.h" +#include "mozilla/Sprintf.h" +#include "PseudoStack.h" +#include "GeckoSampler.h" +#ifndef SPS_STANDALONE +#include "nsIObserverService.h" +#include "nsDirectoryServiceUtils.h" +#include "nsDirectoryServiceDefs.h" +#include "nsXULAppAPI.h" +#include "nsProfilerStartParams.h" +#include "mozilla/Services.h" +#include "nsThreadUtils.h" +#endif +#include "ProfilerMarkers.h" + +#ifdef MOZ_TASK_TRACER +#include "GeckoTaskTracer.h" +#endif + +#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) + #include "FennecJNIWrappers.h" +#endif + +#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) +#include "FennecJNINatives.h" +#endif + +#ifndef SPS_STANDALONE +#if defined(SPS_PLAT_amd64_linux) || defined(SPS_PLAT_x86_linux) +# define USE_LUL_STACKWALK +# include "lul/LulMain.h" +# include "lul/platform-linux-lul.h" +#endif +#endif + +#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) +class GeckoJavaSampler : public java::GeckoJavaSampler::Natives<GeckoJavaSampler> +{ +private: + GeckoJavaSampler(); + +public: + static double GetProfilerTime() { + if (!profiler_is_active()) { + return 0.0; + } + return profiler_time(); + }; +}; +#endif + +MOZ_THREAD_LOCAL(PseudoStack *) tlsPseudoStack; +MOZ_THREAD_LOCAL(GeckoSampler *) tlsTicker; +MOZ_THREAD_LOCAL(void *) tlsStackTop; +// We need to track whether we've been initialized otherwise +// we end up using tlsStack without initializing it. +// Because tlsStack is totally opaque to us we can't reuse +// it as the flag itself. +bool stack_key_initialized; + +mozilla::TimeStamp sLastTracerEvent; // is raced on +mozilla::TimeStamp sStartTime; +int sFrameNumber = 0; +int sLastFrameNumber = 0; +int sInitCount = 0; // Each init must have a matched shutdown. +static bool sIsProfiling = false; // is raced on +static bool sIsGPUProfiling = false; // is raced on +static bool sIsLayersDump = false; // is raced on +static bool sIsDisplayListDump = false; // is raced on +static bool sIsRestyleProfiling = false; // is raced on + +// Environment variables to control the profiler +const char* PROFILER_HELP = "MOZ_PROFILER_HELP"; +const char* PROFILER_INTERVAL = "MOZ_PROFILER_INTERVAL"; +const char* PROFILER_ENTRIES = "MOZ_PROFILER_ENTRIES"; +const char* PROFILER_STACK = "MOZ_PROFILER_STACK_SCAN"; +const char* PROFILER_FEATURES = "MOZ_PROFILING_FEATURES"; + +/* we don't need to worry about overflow because we only treat the + * case of them being the same as special. i.e. we only run into + * a problem if 2^32 events happen between samples that we need + * to know are associated with different events */ + +// Values harvested from env vars, that control the profiler. +static int sUnwindInterval; /* in milliseconds */ +static int sUnwindStackScan; /* max # of dubious frames allowed */ +static int sProfileEntries; /* how many entries do we store? */ + +std::vector<ThreadInfo*>* Sampler::sRegisteredThreads = nullptr; +mozilla::UniquePtr< ::Mutex> Sampler::sRegisteredThreadsMutex; + +GeckoSampler* Sampler::sActiveSampler; + +#ifndef SPS_STANDALONE +static mozilla::StaticAutoPtr<mozilla::ProfilerIOInterposeObserver> + sInterposeObserver; +#endif + +// The name that identifies the gecko thread for calls to +// profiler_register_thread. +static const char * gGeckoThreadName = "GeckoMain"; + +void Sampler::Startup() { + sRegisteredThreads = new std::vector<ThreadInfo*>(); + sRegisteredThreadsMutex = OS::CreateMutex("sRegisteredThreads mutex"); + + // We could create the sLUL object and read unwind info into it at + // this point. That would match the lifetime implied by destruction + // of it in Sampler::Shutdown just below. However, that gives a big + // delay on startup, even if no profiling is actually to be done. + // So, instead, sLUL is created on demand at the first call to + // Sampler::Start. +} + +void Sampler::Shutdown() { + while (sRegisteredThreads->size() > 0) { + delete sRegisteredThreads->back(); + sRegisteredThreads->pop_back(); + } + + sRegisteredThreadsMutex = nullptr; + delete sRegisteredThreads; + + // UnregisterThread can be called after shutdown in XPCShell. Thus + // we need to point to null to ignore such a call after shutdown. + sRegisteredThreadsMutex = nullptr; + sRegisteredThreads = nullptr; + +#if defined(USE_LUL_STACKWALK) + // Delete the sLUL object, if it actually got created. + if (sLUL) { + delete sLUL; + sLUL = nullptr; + } +#endif +} + +StackOwningThreadInfo::StackOwningThreadInfo(const char* aName, int aThreadId, + bool aIsMainThread, + PseudoStack* aPseudoStack, + void* aStackTop) + : ThreadInfo(aName, aThreadId, aIsMainThread, aPseudoStack, aStackTop) +{ + aPseudoStack->ref(); +} + +StackOwningThreadInfo::~StackOwningThreadInfo() +{ + PseudoStack* stack = Stack(); + if (stack) { + stack->deref(); + } +} + +void +StackOwningThreadInfo::SetPendingDelete() +{ + PseudoStack* stack = Stack(); + if (stack) { + stack->deref(); + } + ThreadInfo::SetPendingDelete(); +} + +ProfilerMarker::ProfilerMarker(const char* aMarkerName, + ProfilerMarkerPayload* aPayload, + double aTime) + : mMarkerName(strdup(aMarkerName)) + , mPayload(aPayload) + , mTime(aTime) +{ +} + +ProfilerMarker::~ProfilerMarker() { + free(mMarkerName); + delete mPayload; +} + +void +ProfilerMarker::SetGeneration(uint32_t aGenID) { + mGenID = aGenID; +} + +double +ProfilerMarker::GetTime() const { + return mTime; +} + +void ProfilerMarker::StreamJSON(SpliceableJSONWriter& aWriter, + UniqueStacks& aUniqueStacks) const +{ + // Schema: + // [name, time, data] + + aWriter.StartArrayElement(); + { + aUniqueStacks.mUniqueStrings.WriteElement(aWriter, GetMarkerName()); + aWriter.DoubleElement(mTime); + // TODO: Store the callsite for this marker if available: + // if have location data + // b.NameValue(marker, "location", ...); + if (mPayload) { + aWriter.StartObjectElement(); + { + mPayload->StreamPayload(aWriter, aUniqueStacks); + } + aWriter.EndObject(); + } + } + aWriter.EndArray(); +} + +/* Has MOZ_PROFILER_VERBOSE been set? */ + +// Verbosity control for the profiler. The aim is to check env var +// MOZ_PROFILER_VERBOSE only once. However, we may need to temporarily +// override that so as to print the profiler's help message. That's +// what moz_profiler_set_verbosity is for. + +enum class ProfilerVerbosity : int8_t { UNCHECKED, NOTVERBOSE, VERBOSE }; + +// Raced on, potentially +static ProfilerVerbosity profiler_verbosity = ProfilerVerbosity::UNCHECKED; + +bool moz_profiler_verbose() +{ + if (profiler_verbosity == ProfilerVerbosity::UNCHECKED) { + if (getenv("MOZ_PROFILER_VERBOSE") != nullptr) + profiler_verbosity = ProfilerVerbosity::VERBOSE; + else + profiler_verbosity = ProfilerVerbosity::NOTVERBOSE; + } + + return profiler_verbosity == ProfilerVerbosity::VERBOSE; +} + +void moz_profiler_set_verbosity(ProfilerVerbosity pv) +{ + MOZ_ASSERT(pv == ProfilerVerbosity::UNCHECKED || + pv == ProfilerVerbosity::VERBOSE); + profiler_verbosity = pv; +} + + +bool set_profiler_interval(const char* interval) { + if (interval) { + errno = 0; + long int n = strtol(interval, (char**)nullptr, 10); + if (errno == 0 && n >= 1 && n <= 1000) { + sUnwindInterval = n; + return true; + } + return false; + } + + return true; +} + +bool set_profiler_entries(const char* entries) { + if (entries) { + errno = 0; + long int n = strtol(entries, (char**)nullptr, 10); + if (errno == 0 && n > 0) { + sProfileEntries = n; + return true; + } + return false; + } + + return true; +} + +bool set_profiler_scan(const char* scanCount) { + if (scanCount) { + errno = 0; + long int n = strtol(scanCount, (char**)nullptr, 10); + if (errno == 0 && n >= 0 && n <= 100) { + sUnwindStackScan = n; + return true; + } + return false; + } + + return true; +} + +bool is_native_unwinding_avail() { +# if defined(HAVE_NATIVE_UNWIND) + return true; +#else + return false; +#endif +} + +// Read env vars at startup, so as to set: +// sUnwindInterval, sProfileEntries, sUnwindStackScan. +void read_profiler_env_vars() +{ + /* Set defaults */ + sUnwindInterval = 0; /* We'll have to look elsewhere */ + sProfileEntries = 0; + + const char* interval = getenv(PROFILER_INTERVAL); + const char* entries = getenv(PROFILER_ENTRIES); + const char* scanCount = getenv(PROFILER_STACK); + + if (getenv(PROFILER_HELP)) { + // Enable verbose output + moz_profiler_set_verbosity(ProfilerVerbosity::VERBOSE); + profiler_usage(); + // Now force the next enquiry of moz_profiler_verbose to re-query + // env var MOZ_PROFILER_VERBOSE. + moz_profiler_set_verbosity(ProfilerVerbosity::UNCHECKED); + } + + if (!set_profiler_interval(interval) || + !set_profiler_entries(entries) || + !set_profiler_scan(scanCount)) { + profiler_usage(); + } else { + LOG( "SPS:"); + LOGF("SPS: Sampling interval = %d ms (zero means \"platform default\")", + (int)sUnwindInterval); + LOGF("SPS: Entry store size = %d (zero means \"platform default\")", + (int)sProfileEntries); + LOGF("SPS: UnwindStackScan = %d (max dubious frames per unwind).", + (int)sUnwindStackScan); + LOG( "SPS:"); + } +} + +void profiler_usage() { + LOG( "SPS: "); + LOG( "SPS: Environment variable usage:"); + LOG( "SPS: "); + LOG( "SPS: MOZ_PROFILER_HELP"); + LOG( "SPS: If set to any value, prints this message."); + LOG( "SPS: "); + LOG( "SPS: MOZ_PROFILER_INTERVAL=<number> (milliseconds, 1 to 1000)"); + LOG( "SPS: If unset, platform default is used."); + LOG( "SPS: "); + LOG( "SPS: MOZ_PROFILER_ENTRIES=<number> (count, minimum of 1)"); + LOG( "SPS: If unset, platform default is used."); + LOG( "SPS: "); + LOG( "SPS: MOZ_PROFILER_VERBOSE"); + LOG( "SPS: If set to any value, increases verbosity (recommended)."); + LOG( "SPS: "); + LOG( "SPS: MOZ_PROFILER_STACK_SCAN=<number> (default is zero)"); + LOG( "SPS: The number of dubious (stack-scanned) frames allowed"); + LOG( "SPS: "); + LOG( "SPS: MOZ_PROFILER_LUL_TEST"); + LOG( "SPS: If set to any value, runs LUL unit tests at startup of"); + LOG( "SPS: the unwinder thread, and prints a short summary of results."); + LOG( "SPS: "); + LOGF("SPS: This platform %s native unwinding.", + is_native_unwinding_avail() ? "supports" : "does not support"); + LOG( "SPS: "); + + /* Re-set defaults */ + sUnwindInterval = 0; /* We'll have to look elsewhere */ + sProfileEntries = 0; + sUnwindStackScan = 0; + + LOG( "SPS:"); + LOGF("SPS: Sampling interval = %d ms (zero means \"platform default\")", + (int)sUnwindInterval); + LOGF("SPS: Entry store size = %d (zero means \"platform default\")", + (int)sProfileEntries); + LOGF("SPS: UnwindStackScan = %d (max dubious frames per unwind).", + (int)sUnwindStackScan); + LOG( "SPS:"); + + return; +} + +void set_tls_stack_top(void* stackTop) +{ + // Round |stackTop| up to the end of the containing page. We may + // as well do this -- there's no danger of a fault, and we might + // get a few more base-of-the-stack frames as a result. This + // assumes that no target has a page size smaller than 4096. + uintptr_t stackTopR = (uintptr_t)stackTop; + if (stackTop) { + stackTopR = (stackTopR & ~(uintptr_t)4095) + (uintptr_t)4095; + } + tlsStackTop.set((void*)stackTopR); +} + +bool is_main_thread_name(const char* aName) { + if (!aName) { + return false; + } + return strcmp(aName, gGeckoThreadName) == 0; +} + +#ifndef SPS_STANDALONE +#ifdef HAVE_VA_COPY +#define VARARGS_ASSIGN(foo, bar) VA_COPY(foo,bar) +#elif defined(HAVE_VA_LIST_AS_ARRAY) +#define VARARGS_ASSIGN(foo, bar) foo[0] = bar[0] +#else +#define VARARGS_ASSIGN(foo, bar) (foo) = (bar) +#endif + +void +mozilla_sampler_log(const char *fmt, va_list args) +{ + if (profiler_is_active()) { + // nsAutoCString AppendPrintf would be nicer but + // this is mozilla external code + char buf[2048]; + va_list argsCpy; + VARARGS_ASSIGN(argsCpy, args); + int required = VsprintfLiteral(buf, fmt, argsCpy); + va_end(argsCpy); + + if (required < 0) { + return; // silently drop for now + } else if (required < 2048) { + profiler_tracing("log", buf, TRACING_EVENT); + } else { + char* heapBuf = new char[required+1]; + va_list argsCpy; + VARARGS_ASSIGN(argsCpy, args); + vsnprintf(heapBuf, required+1, fmt, argsCpy); + va_end(argsCpy); + // EVENT_BACKTRACE could be used to get a source + // for all log events. This could be a runtime + // flag later. + profiler_tracing("log", heapBuf, TRACING_EVENT); + delete[] heapBuf; + } + } +} +#endif + +//////////////////////////////////////////////////////////////////////// +// BEGIN externally visible functions + +void mozilla_sampler_init(void* stackTop) +{ + sInitCount++; + + if (stack_key_initialized) + return; + +#ifdef MOZ_TASK_TRACER + mozilla::tasktracer::InitTaskTracer(); +#endif + +#ifdef SPS_STANDALONE + mozilla::TimeStamp::Startup(); +#endif + + LOG("BEGIN mozilla_sampler_init"); + if (!tlsPseudoStack.init() || !tlsTicker.init() || !tlsStackTop.init()) { + LOG("Failed to init."); + return; + } + bool ignore; + sStartTime = mozilla::TimeStamp::ProcessCreation(ignore); + + stack_key_initialized = true; + + Sampler::Startup(); + + PseudoStack *stack = PseudoStack::create(); + tlsPseudoStack.set(stack); + + bool isMainThread = true; + Sampler::RegisterCurrentThread(isMainThread ? + gGeckoThreadName : "Application Thread", + stack, isMainThread, stackTop); + + // Read interval settings from MOZ_PROFILER_INTERVAL and stack-scan + // threshhold from MOZ_PROFILER_STACK_SCAN. + read_profiler_env_vars(); + + // platform specific initialization + OS::Startup(); + +#ifndef SPS_STANDALONE + set_stderr_callback(mozilla_sampler_log); +#endif + +#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) + if (mozilla::jni::IsFennec()) { + GeckoJavaSampler::Init(); + } +#endif + + // We can't open pref so we use an environment variable + // to know if we should trigger the profiler on startup + // NOTE: Default + const char *val = getenv("MOZ_PROFILER_STARTUP"); + if (!val || !*val) { + return; + } + + const char* features[] = {"js" + , "leaf" + , "threads" +#if defined(XP_WIN) || defined(XP_MACOSX) \ + || (defined(SPS_ARCH_arm) && defined(linux)) \ + || defined(SPS_PLAT_amd64_linux) || defined(SPS_PLAT_x86_linux) + , "stackwalk" +#endif +#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) + , "java" +#endif + }; + + const char* threadFilters[] = { "GeckoMain", "Compositor" }; + + profiler_start(PROFILE_DEFAULT_ENTRY, PROFILE_DEFAULT_INTERVAL, + features, MOZ_ARRAY_LENGTH(features), + threadFilters, MOZ_ARRAY_LENGTH(threadFilters)); + LOG("END mozilla_sampler_init"); +} + +void mozilla_sampler_shutdown() +{ + sInitCount--; + + if (sInitCount > 0) + return; + + // Save the profile on shutdown if requested. + GeckoSampler *t = tlsTicker.get(); + if (t) { + const char *val = getenv("MOZ_PROFILER_SHUTDOWN"); + if (val) { + std::ofstream stream; + stream.open(val); + if (stream.is_open()) { + t->ToStreamAsJSON(stream); + stream.close(); + } + } + } + + profiler_stop(); + +#ifndef SPS_STANDALONE + set_stderr_callback(nullptr); +#endif + + Sampler::Shutdown(); + +#ifdef SPS_STANDALONE + mozilla::TimeStamp::Shutdown(); +#endif + + PseudoStack *stack = tlsPseudoStack.get(); + stack->deref(); + tlsPseudoStack.set(nullptr); + +#ifdef MOZ_TASK_TRACER + mozilla::tasktracer::ShutdownTaskTracer(); +#endif +} + +void mozilla_sampler_save() +{ + GeckoSampler *t = tlsTicker.get(); + if (!t) { + return; + } + + t->RequestSave(); + // We're on the main thread already so we don't + // have to wait to handle the save request. + t->HandleSaveRequest(); +} + +mozilla::UniquePtr<char[]> mozilla_sampler_get_profile(double aSinceTime) +{ + GeckoSampler *t = tlsTicker.get(); + if (!t) { + return nullptr; + } + + return t->ToJSON(aSinceTime); +} + +#ifndef SPS_STANDALONE +JSObject *mozilla_sampler_get_profile_data(JSContext *aCx, double aSinceTime) +{ + GeckoSampler *t = tlsTicker.get(); + if (!t) { + return nullptr; + } + + return t->ToJSObject(aCx, aSinceTime); +} + +void mozilla_sampler_get_profile_data_async(double aSinceTime, + mozilla::dom::Promise* aPromise) +{ + GeckoSampler *t = tlsTicker.get(); + if (NS_WARN_IF(!t)) { + return; + } + + t->ToJSObjectAsync(aSinceTime, aPromise); +} + +void mozilla_sampler_get_profiler_start_params(int* aEntrySize, + double* aInterval, + mozilla::Vector<const char*>* aFilters, + mozilla::Vector<const char*>* aFeatures) +{ + if (NS_WARN_IF(!aEntrySize) || NS_WARN_IF(!aInterval) || + NS_WARN_IF(!aFilters) || NS_WARN_IF(!aFeatures)) { + return; + } + + GeckoSampler *t = tlsTicker.get(); + if (NS_WARN_IF(!t)) { + return; + } + + *aEntrySize = t->EntrySize(); + *aInterval = t->interval(); + + const ThreadNameFilterList& threadNameFilterList = t->ThreadNameFilters(); + MOZ_ALWAYS_TRUE(aFilters->resize(threadNameFilterList.length())); + for (uint32_t i = 0; i < threadNameFilterList.length(); ++i) { + (*aFilters)[i] = threadNameFilterList[i].c_str(); + } + + const FeatureList& featureList = t->Features(); + MOZ_ALWAYS_TRUE(aFeatures->resize(featureList.length())); + for (size_t i = 0; i < featureList.length(); ++i) { + (*aFeatures)[i] = featureList[i].c_str(); + } +} + +void mozilla_sampler_get_gatherer(nsISupports** aRetVal) +{ + if (!aRetVal) { + return; + } + + if (NS_WARN_IF(!profiler_is_active())) { + *aRetVal = nullptr; + return; + } + + GeckoSampler *t = tlsTicker.get(); + if (NS_WARN_IF(!t)) { + *aRetVal = nullptr; + return; + } + + t->GetGatherer(aRetVal); +} + +#endif + +void mozilla_sampler_save_profile_to_file(const char* aFilename) +{ + GeckoSampler *t = tlsTicker.get(); + if (!t) { + return; + } + + std::ofstream stream; + stream.open(aFilename); + if (stream.is_open()) { + t->ToStreamAsJSON(stream); + stream.close(); + LOGF("Saved to %s", aFilename); + } else { + LOG("Fail to open profile log file."); + } +} + + +const char** mozilla_sampler_get_features() +{ + static const char* features[] = { +#if defined(MOZ_PROFILING) && defined(HAVE_NATIVE_UNWIND) + // Walk the C++ stack. + "stackwalk", +#endif +#if defined(ENABLE_SPS_LEAF_DATA) + // Include the C++ leaf node if not stackwalking. DevTools + // profiler doesn't want the native addresses. + "leaf", +#endif +#if !defined(SPS_OS_windows) + // Use a seperate thread of walking the stack. + "unwinder", +#endif + "java", + // Only record samples during periods of bad responsiveness + "jank", + // Tell the JS engine to emmit pseudostack entries in the + // pro/epilogue. + "js", + // GPU Profiling (may not be supported by the GL) + "gpu", + // Profile the registered secondary threads. + "threads", + // Do not include user-identifiable information + "privacy", + // Dump the layer tree with the textures. + "layersdump", + // Dump the display list with the textures. + "displaylistdump", + // Add main thread I/O to the profile + "mainthreadio", + // Add RSS collection + "memory", +#ifdef MOZ_TASK_TRACER + // Start profiling with feature TaskTracer. + "tasktracer", +#endif +#if defined(XP_WIN) + // Add power collection + "power", +#endif + nullptr + }; + + return features; +} + +void mozilla_sampler_get_buffer_info(uint32_t *aCurrentPosition, uint32_t *aTotalSize, + uint32_t *aGeneration) +{ + *aCurrentPosition = 0; + *aTotalSize = 0; + *aGeneration = 0; + + if (!stack_key_initialized) + return; + + GeckoSampler *t = tlsTicker.get(); + if (!t) + return; + + t->GetBufferInfo(aCurrentPosition, aTotalSize, aGeneration); +} + +// Values are only honored on the first start +void mozilla_sampler_start(int aProfileEntries, double aInterval, + const char** aFeatures, uint32_t aFeatureCount, + const char** aThreadNameFilters, uint32_t aFilterCount) + +{ + LOG("BEGIN mozilla_sampler_start"); + + if (!stack_key_initialized) + profiler_init(nullptr); + + /* If the sampling interval was set using env vars, use that + in preference to anything else. */ + if (sUnwindInterval > 0) + aInterval = sUnwindInterval; + + /* If the entry count was set using env vars, use that, too: */ + if (sProfileEntries > 0) + aProfileEntries = sProfileEntries; + + // Reset the current state if the profiler is running + profiler_stop(); + + GeckoSampler* t; + t = new GeckoSampler(aInterval ? aInterval : PROFILE_DEFAULT_INTERVAL, + aProfileEntries ? aProfileEntries : PROFILE_DEFAULT_ENTRY, + aFeatures, aFeatureCount, + aThreadNameFilters, aFilterCount); + + tlsTicker.set(t); + t->Start(); + if (t->ProfileJS() || t->InPrivacyMode()) { + ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); + std::vector<ThreadInfo*> threads = t->GetRegisteredThreads(); + + for (uint32_t i = 0; i < threads.size(); i++) { + ThreadInfo* info = threads[i]; + if (info->IsPendingDelete()) { + continue; + } + ThreadProfile* thread_profile = info->Profile(); + if (!thread_profile) { + continue; + } + thread_profile->GetPseudoStack()->reinitializeOnResume(); +#ifndef SPS_STANDALONE + if (t->ProfileJS()) { + thread_profile->GetPseudoStack()->enableJSSampling(); + } + if (t->InPrivacyMode()) { + thread_profile->GetPseudoStack()->mPrivacyMode = true; + } +#endif + } + } + +#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) + if (t->ProfileJava()) { + int javaInterval = aInterval; + // Java sampling doesn't accuratly keep up with 1ms sampling + if (javaInterval < 10) { + aInterval = 10; + } + java::GeckoJavaSampler::Start(javaInterval, 1000); + } +#endif + +#ifndef SPS_STANDALONE + if (t->AddMainThreadIO()) { + if (!sInterposeObserver) { + // Lazily create IO interposer observer + sInterposeObserver = new mozilla::ProfilerIOInterposeObserver(); + } + mozilla::IOInterposer::Register(mozilla::IOInterposeObserver::OpAll, + sInterposeObserver); + } +#endif + + sIsProfiling = true; +#ifndef SPS_STANDALONE + sIsGPUProfiling = t->ProfileGPU(); + sIsLayersDump = t->LayersDump(); + sIsDisplayListDump = t->DisplayListDump(); + sIsRestyleProfiling = t->ProfileRestyle(); + + if (Sampler::CanNotifyObservers()) { + nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService(); + if (os) { + nsTArray<nsCString> featuresArray; + nsTArray<nsCString> threadNameFiltersArray; + + for (size_t i = 0; i < aFeatureCount; ++i) { + featuresArray.AppendElement(aFeatures[i]); + } + + for (size_t i = 0; i < aFilterCount; ++i) { + threadNameFiltersArray.AppendElement(aThreadNameFilters[i]); + } + + nsCOMPtr<nsIProfilerStartParams> params = + new nsProfilerStartParams(aProfileEntries, aInterval, featuresArray, + threadNameFiltersArray); + + os->NotifyObservers(params, "profiler-started", nullptr); + } + } +#endif + + LOG("END mozilla_sampler_start"); +} + +void mozilla_sampler_stop() +{ + LOG("BEGIN mozilla_sampler_stop"); + + if (!stack_key_initialized) + return; + + GeckoSampler *t = tlsTicker.get(); + if (!t) { + LOG("END mozilla_sampler_stop-early"); + return; + } + + bool disableJS = t->ProfileJS(); + + t->Stop(); + delete t; + tlsTicker.set(nullptr); + +#ifndef SPS_STANDALONE + if (disableJS) { + PseudoStack *stack = tlsPseudoStack.get(); + ASSERT(stack != nullptr); + stack->disableJSSampling(); + } + + mozilla::IOInterposer::Unregister(mozilla::IOInterposeObserver::OpAll, + sInterposeObserver); + sInterposeObserver = nullptr; +#endif + + sIsProfiling = false; +#ifndef SPS_STANDALONE + sIsGPUProfiling = false; + sIsLayersDump = false; + sIsDisplayListDump = false; + sIsRestyleProfiling = false; + + if (Sampler::CanNotifyObservers()) { + nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService(); + if (os) + os->NotifyObservers(nullptr, "profiler-stopped", nullptr); + } +#endif + + LOG("END mozilla_sampler_stop"); +} + +bool mozilla_sampler_is_paused() { + if (Sampler::GetActiveSampler()) { + return Sampler::GetActiveSampler()->IsPaused(); + } else { + return false; + } +} + +void mozilla_sampler_pause() { + if (Sampler::GetActiveSampler()) { + Sampler::GetActiveSampler()->SetPaused(true); +#ifndef SPS_STANDALONE + if (Sampler::CanNotifyObservers()) { + nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService(); + if (os) + os->NotifyObservers(nullptr, "profiler-paused", nullptr); + } +#endif + } +} + +void mozilla_sampler_resume() { + if (Sampler::GetActiveSampler()) { + Sampler::GetActiveSampler()->SetPaused(false); +#ifndef SPS_STANDALONE + if (Sampler::CanNotifyObservers()) { + nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService(); + if (os) + os->NotifyObservers(nullptr, "profiler-resumed", nullptr); + } +#endif + } +} + +bool mozilla_sampler_feature_active(const char* aName) +{ + if (!profiler_is_active()) { + return false; + } + + if (strcmp(aName, "gpu") == 0) { + return sIsGPUProfiling; + } + + if (strcmp(aName, "layersdump") == 0) { + return sIsLayersDump; + } + + if (strcmp(aName, "displaylistdump") == 0) { + return sIsDisplayListDump; + } + + if (strcmp(aName, "restyle") == 0) { + return sIsRestyleProfiling; + } + + return false; +} + +bool mozilla_sampler_is_active() +{ + return sIsProfiling; +} + +void mozilla_sampler_responsiveness(const mozilla::TimeStamp& aTime) +{ + sLastTracerEvent = aTime; +} + +void mozilla_sampler_frame_number(int frameNumber) +{ + sFrameNumber = frameNumber; +} + +void mozilla_sampler_lock() +{ + profiler_stop(); +#ifndef SPS_STANDALONE + nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService(); + if (os) + os->NotifyObservers(nullptr, "profiler-locked", nullptr); +#endif +} + +void mozilla_sampler_unlock() +{ +#ifndef SPS_STANDALONE + nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService(); + if (os) + os->NotifyObservers(nullptr, "profiler-unlocked", nullptr); +#endif +} + +bool mozilla_sampler_register_thread(const char* aName, void* aGuessStackTop) +{ + if (sInitCount == 0) { + return false; + } + +#if defined(MOZ_WIDGET_GONK) && !defined(MOZ_PROFILING) + // The only way to profile secondary threads on b2g + // is to build with profiling OR have the profiler + // running on startup. + if (!profiler_is_active()) { + return false; + } +#endif + + MOZ_ASSERT(tlsPseudoStack.get() == nullptr); + PseudoStack* stack = PseudoStack::create(); + tlsPseudoStack.set(stack); + bool isMainThread = is_main_thread_name(aName); + void* stackTop = GetStackTop(aGuessStackTop); + return Sampler::RegisterCurrentThread(aName, stack, isMainThread, stackTop); +} + +void mozilla_sampler_unregister_thread() +{ + // Don't check sInitCount count here -- we may be unregistering the + // thread after the sampler was shut down. + if (!stack_key_initialized) { + return; + } + + PseudoStack *stack = tlsPseudoStack.get(); + if (!stack) { + return; + } + stack->deref(); + tlsPseudoStack.set(nullptr); + + Sampler::UnregisterCurrentThread(); +} + +void mozilla_sampler_sleep_start() { + if (sInitCount == 0) { + return; + } + + PseudoStack *stack = tlsPseudoStack.get(); + if (stack == nullptr) { + return; + } + stack->setSleeping(1); +} + +void mozilla_sampler_sleep_end() { + if (sInitCount == 0) { + return; + } + + PseudoStack *stack = tlsPseudoStack.get(); + if (stack == nullptr) { + return; + } + stack->setSleeping(0); +} + +bool mozilla_sampler_is_sleeping() { + if (sInitCount == 0) { + return false; + } + PseudoStack *stack = tlsPseudoStack.get(); + if (stack == nullptr) { + return false; + } + return stack->isSleeping(); +} + +double mozilla_sampler_time(const mozilla::TimeStamp& aTime) +{ + mozilla::TimeDuration delta = aTime - sStartTime; + return delta.ToMilliseconds(); +} + +double mozilla_sampler_time() +{ + return mozilla_sampler_time(mozilla::TimeStamp::Now()); +} + +ProfilerBacktrace* mozilla_sampler_get_backtrace() +{ + if (!stack_key_initialized) + return nullptr; + + // Don't capture a stack if we're not profiling + if (!profiler_is_active()) { + return nullptr; + } + + // Don't capture a stack if we don't want to include personal information + if (profiler_in_privacy_mode()) { + return nullptr; + } + + GeckoSampler* t = tlsTicker.get(); + if (!t) { + return nullptr; + } + + return new ProfilerBacktrace(t->GetBacktrace()); +} + +void mozilla_sampler_free_backtrace(ProfilerBacktrace* aBacktrace) +{ + delete aBacktrace; +} + +// Fill the output buffer with the following pattern: +// "Lable 1" "\0" "Label 2" "\0" ... "Label N" "\0" "\0" +// TODO: use the unwinder instead of pseudo stack. +void mozilla_sampler_get_backtrace_noalloc(char *output, size_t outputSize) +{ + MOZ_ASSERT(outputSize >= 2); + char *bound = output + outputSize - 2; + output[0] = output[1] = '\0'; + PseudoStack *pseudoStack = tlsPseudoStack.get(); + if (!pseudoStack) { + return; + } + + volatile StackEntry *pseudoFrames = pseudoStack->mStack; + uint32_t pseudoCount = pseudoStack->stackSize(); + + for (uint32_t i = 0; i < pseudoCount; i++) { + size_t len = strlen(pseudoFrames[i].label()); + if (output + len >= bound) + break; + strcpy(output, pseudoFrames[i].label()); + output += len; + *output++ = '\0'; + *output = '\0'; + } +} + +void mozilla_sampler_tracing(const char* aCategory, const char* aInfo, + TracingMetadata aMetaData) +{ + mozilla_sampler_add_marker(aInfo, new ProfilerMarkerTracing(aCategory, aMetaData)); +} + +void mozilla_sampler_tracing(const char* aCategory, const char* aInfo, + ProfilerBacktrace* aCause, + TracingMetadata aMetaData) +{ + mozilla_sampler_add_marker(aInfo, new ProfilerMarkerTracing(aCategory, aMetaData, aCause)); +} + +void mozilla_sampler_add_marker(const char *aMarker, ProfilerMarkerPayload *aPayload) +{ + // Note that aPayload may be allocated by the caller, so we need to make sure + // that we free it at some point. + mozilla::UniquePtr<ProfilerMarkerPayload> payload(aPayload); + + if (!stack_key_initialized) + return; + + // Don't insert a marker if we're not profiling to avoid + // the heap copy (malloc). + if (!profiler_is_active()) { + return; + } + + // Don't add a marker if we don't want to include personal information + if (profiler_in_privacy_mode()) { + return; + } + + PseudoStack *stack = tlsPseudoStack.get(); + if (!stack) { + return; + } + + mozilla::TimeStamp origin = (aPayload && !aPayload->GetStartTime().IsNull()) ? + aPayload->GetStartTime() : mozilla::TimeStamp::Now(); + mozilla::TimeDuration delta = origin - sStartTime; + stack->addMarker(aMarker, payload.release(), delta.ToMilliseconds()); +} + +#ifndef SPS_STANDALONE +#include "mozilla/Mutex.h" + +class GeckoMutex : public ::Mutex { + public: + explicit GeckoMutex(const char* aDesc) : + mMutex(aDesc) + {} + + virtual ~GeckoMutex() {} + + virtual int Lock() { + mMutex.Lock(); + return 0; + } + + virtual int Unlock() { + mMutex.Unlock(); + return 0; + } + + private: + mozilla::Mutex mMutex; +}; + +mozilla::UniquePtr< ::Mutex> OS::CreateMutex(const char* aDesc) { + return mozilla::MakeUnique<GeckoMutex>(aDesc); +} + +#else +// Otherwise use c++11 Mutex +#include <mutex> + +class OSXMutex : public ::Mutex { + public: + OSXMutex(const char* aDesc) : + mMutex() + {} + + virtual ~OSXMutex() {} + + virtual int Lock() { + mMutex.lock(); + return 0; + } + + virtual int Unlock() { + mMutex.unlock(); + return 0; + } + + private: + std::mutex mMutex; +}; + +mozilla::UniquePtr< ::Mutex> OS::CreateMutex(const char* aDesc) { + return mozilla::MakeUnique<GeckoMutex>(aDesc); +} + +#endif + +// END externally visible functions +//////////////////////////////////////////////////////////////////////// diff --git a/tools/profiler/core/platform.h b/tools/profiler/core/platform.h new file mode 100644 index 0000000000..2e736d97cb --- /dev/null +++ b/tools/profiler/core/platform.h @@ -0,0 +1,431 @@ +// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google, Inc. nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +#ifndef TOOLS_PLATFORM_H_ +#define TOOLS_PLATFORM_H_ + +#ifdef SPS_STANDALONE +#define MOZ_COUNT_CTOR(name) +#define MOZ_COUNT_DTOR(name) +#endif + +#ifdef ANDROID +#include <android/log.h> +#else +#define __android_log_print(a, ...) +#endif + +#ifdef XP_UNIX +#include <pthread.h> +#endif + +#include <stdint.h> +#include <math.h> +#ifndef SPS_STANDALONE +#include "MainThreadUtils.h" +#include "mozilla/Mutex.h" +#include "ThreadResponsiveness.h" +#endif +#include "mozilla/TimeStamp.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Unused.h" +#include "PlatformMacros.h" +#include "v8-support.h" +#include <vector> +#include "StackTop.h" + +// We need a definition of gettid(), but Linux libc implementations don't +// provide a wrapper for it (except for Bionic) +#if defined(__linux__) +#include <unistd.h> +#if !defined(__BIONIC__) +#include <sys/syscall.h> +static inline pid_t gettid() +{ + return (pid_t) syscall(SYS_gettid); +} +#endif +#endif + +#ifdef XP_WIN +#include <windows.h> +#endif + +#define ASSERT(a) MOZ_ASSERT(a) + +bool moz_profiler_verbose(); + +#ifdef ANDROID +# if defined(__arm__) || defined(__thumb__) +# define ENABLE_SPS_LEAF_DATA +# define ENABLE_ARM_LR_SAVING +# endif +# define LOG(text) \ + do { if (moz_profiler_verbose()) \ + __android_log_write(ANDROID_LOG_ERROR, "Profiler", text); \ + } while (0) +# define LOGF(format, ...) \ + do { if (moz_profiler_verbose()) \ + __android_log_print(ANDROID_LOG_ERROR, "Profiler", format, \ + __VA_ARGS__); \ + } while (0) + +#else +# define LOG(text) \ + do { if (moz_profiler_verbose()) fprintf(stderr, "Profiler: %s\n", text); \ + } while (0) +# define LOGF(format, ...) \ + do { if (moz_profiler_verbose()) fprintf(stderr, "Profiler: " format \ + "\n", __VA_ARGS__); \ + } while (0) + +#endif + +#if defined(XP_MACOSX) || defined(XP_WIN) || defined(XP_LINUX) +#define ENABLE_SPS_LEAF_DATA +#endif + +typedef int32_t Atomic32; + +extern mozilla::TimeStamp sStartTime; + +typedef uint8_t* Address; + +// ---------------------------------------------------------------------------- +// Mutex +// +// Mutexes are used for serializing access to non-reentrant sections of code. +// The implementations of mutex should allow for nested/recursive locking. + +class Mutex { + public: + virtual ~Mutex() {} + + // Locks the given mutex. If the mutex is currently unlocked, it becomes + // locked and owned by the calling thread, and immediately. If the mutex + // is already locked by another thread, suspends the calling thread until + // the mutex is unlocked. + virtual int Lock() = 0; + + // Unlocks the given mutex. The mutex is assumed to be locked and owned by + // the calling thread on entrance. + virtual int Unlock() = 0; +}; + +class MutexAutoLock { + public: + explicit MutexAutoLock(::Mutex& aMutex) + : mMutex(&aMutex) + { + mMutex->Lock(); + } + + ~MutexAutoLock() { + mMutex->Unlock(); + } + + private: + Mutex* mMutex; +}; + +// ---------------------------------------------------------------------------- +// OS +// +// This class has static methods for the different platform specific +// functions. Add methods here to cope with differences between the +// supported platforms. + +class OS { + public: + + // Sleep for a number of milliseconds. + static void Sleep(const int milliseconds); + + // Sleep for a number of microseconds. + static void SleepMicro(const int microseconds); + + // Called on startup to initialize platform specific things + static void Startup(); + + static mozilla::UniquePtr< ::Mutex> CreateMutex(const char* aDesc); + + private: + static const int msPerSecond = 1000; + +}; + + + + +// ---------------------------------------------------------------------------- +// Thread +// +// Thread objects are used for creating and running threads. When the start() +// method is called the new thread starts running the run() method in the new +// thread. The Thread object should not be deallocated before the thread has +// terminated. + +class Thread { + public: + // Create new thread. + explicit Thread(const char* name); + virtual ~Thread(); + + // Start new thread by calling the Run() method in the new thread. + void Start(); + + void Join(); + + inline const char* name() const { + return name_; + } + + // Abstract method for run handler. + virtual void Run() = 0; + + // The thread name length is limited to 16 based on Linux's implementation of + // prctl(). + static const int kMaxThreadNameLength = 16; + +#ifdef XP_WIN + HANDLE thread_; + typedef DWORD tid_t; + tid_t thread_id_; +#else + typedef ::pid_t tid_t; +#endif +#if defined(XP_MACOSX) + pthread_t thread_; +#endif + + static tid_t GetCurrentId(); + + private: + void set_name(const char *name); + + char name_[kMaxThreadNameLength]; + int stack_size_; + + DISALLOW_COPY_AND_ASSIGN(Thread); +}; + +// ---------------------------------------------------------------------------- +// HAVE_NATIVE_UNWIND +// +// Pseudo backtraces are available on all platforms. Native +// backtraces are available only on selected platforms. Breakpad is +// the only supported native unwinder. HAVE_NATIVE_UNWIND is set at +// build time to indicate whether native unwinding is possible on this +// platform. + +#undef HAVE_NATIVE_UNWIND +#if defined(MOZ_PROFILING) \ + && (defined(SPS_PLAT_amd64_linux) || defined(SPS_PLAT_arm_android) \ + || (defined(MOZ_WIDGET_ANDROID) && defined(__arm__)) \ + || defined(SPS_PLAT_x86_linux) \ + || defined(SPS_OS_windows) \ + || defined(SPS_OS_darwin)) +# define HAVE_NATIVE_UNWIND +#endif + +/* Some values extracted at startup from environment variables, that + control the behaviour of the breakpad unwinder. */ +extern const char* PROFILER_INTERVAL; +extern const char* PROFILER_ENTRIES; +extern const char* PROFILER_STACK; +extern const char* PROFILER_FEATURES; + +void read_profiler_env_vars(); +void profiler_usage(); + +// Helper methods to expose modifying profiler behavior +bool set_profiler_interval(const char*); +bool set_profiler_entries(const char*); +bool set_profiler_scan(const char*); +bool is_native_unwinding_avail(); + +void set_tls_stack_top(void* stackTop); + +// ---------------------------------------------------------------------------- +// Sampler +// +// A sampler periodically samples the state of the VM and optionally +// (if used for profiling) the program counter and stack pointer for +// the thread that created it. + +struct PseudoStack; +class ThreadProfile; + +// TickSample captures the information collected for each sample. +class TickSample { + public: + TickSample() + : pc(NULL) + , sp(NULL) + , fp(NULL) +#ifdef ENABLE_ARM_LR_SAVING + , lr(NULL) +#endif + , context(NULL) + , isSamplingCurrentThread(false) + , threadProfile(nullptr) + , rssMemory(0) + , ussMemory(0) + {} + + void PopulateContext(void* aContext); + + Address pc; // Instruction pointer. + Address sp; // Stack pointer. + Address fp; // Frame pointer. +#ifdef ENABLE_ARM_LR_SAVING + Address lr; // ARM link register +#endif + void* context; // The context from the signal handler, if available. On + // Win32 this may contain the windows thread context. + bool isSamplingCurrentThread; + ThreadProfile* threadProfile; + mozilla::TimeStamp timestamp; + int64_t rssMemory; + int64_t ussMemory; +}; + +class ThreadInfo; +class PlatformData; +class GeckoSampler; +class SyncProfile; +class Sampler { + public: + // Initialize sampler. + explicit Sampler(double interval, bool profiling, int entrySize); + virtual ~Sampler(); + + double interval() const { return interval_; } + + // This method is called for each sampling period with the current + // program counter. + virtual void Tick(TickSample* sample) = 0; + + // Immediately captures the calling thread's call stack and returns it. + virtual SyncProfile* GetBacktrace() = 0; + + // Request a save from a signal handler + virtual void RequestSave() = 0; + // Process any outstanding request outside a signal handler. + virtual void HandleSaveRequest() = 0; + // Delete markers which are no longer part of the profile due to buffer wraparound. + virtual void DeleteExpiredMarkers() = 0; + + // Start and stop sampler. + void Start(); + void Stop(); + + // Is the sampler used for profiling? + bool IsProfiling() const { return profiling_; } + + // Whether the sampler is running (that is, consumes resources). + bool IsActive() const { return active_; } + + // Low overhead way to stop the sampler from ticking + bool IsPaused() const { return paused_; } + void SetPaused(bool value) { NoBarrier_Store(&paused_, value); } + + virtual bool ProfileThreads() const = 0; + + int EntrySize() { return entrySize_; } + + // We can't new/delete the type safely without defining it + // (-Wdelete-incomplete). Use these Alloc/Free functions instead. + static PlatformData* AllocPlatformData(int aThreadId); + static void FreePlatformData(PlatformData*); + + // If we move the backtracing code into the platform files we won't + // need to have these hacks +#ifdef XP_WIN + // xxxehsan sucky hack :( + static uintptr_t GetThreadHandle(PlatformData*); +#endif +#ifdef XP_MACOSX + static pthread_t GetProfiledThread(PlatformData*); +#endif + + static std::vector<ThreadInfo*> GetRegisteredThreads() { + return *sRegisteredThreads; + } + + static bool RegisterCurrentThread(const char* aName, + PseudoStack* aPseudoStack, + bool aIsMainThread, void* stackTop); + static void UnregisterCurrentThread(); + + static void Startup(); + // Should only be called on shutdown + static void Shutdown(); + + static GeckoSampler* GetActiveSampler() { return sActiveSampler; } + static void SetActiveSampler(GeckoSampler* sampler) { sActiveSampler = sampler; } + + static mozilla::UniquePtr<Mutex> sRegisteredThreadsMutex; + + static bool CanNotifyObservers() { +#ifdef MOZ_WIDGET_GONK + // We use profile.sh on b2g to manually select threads and options per process. + return false; +#elif defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) + // Android ANR reporter uses the profiler off the main thread + return NS_IsMainThread(); +#else + MOZ_ASSERT(NS_IsMainThread()); + return true; +#endif + } + + protected: + static std::vector<ThreadInfo*>* sRegisteredThreads; + static GeckoSampler* sActiveSampler; + + private: + void SetActive(bool value) { NoBarrier_Store(&active_, value); } + + const double interval_; + const bool profiling_; + Atomic32 paused_; + Atomic32 active_; + const int entrySize_; + + // Refactor me! +#if defined(SPS_OS_linux) || defined(SPS_OS_android) + bool signal_handler_installed_; + struct sigaction old_sigprof_signal_handler_; + struct sigaction old_sigsave_signal_handler_; + bool signal_sender_launched_; + pthread_t signal_sender_thread_; +#endif +}; + +#endif /* ndef TOOLS_PLATFORM_H_ */ diff --git a/tools/profiler/core/shared-libraries-linux.cc b/tools/profiler/core/shared-libraries-linux.cc new file mode 100644 index 0000000000..24437fb4e7 --- /dev/null +++ b/tools/profiler/core/shared-libraries-linux.cc @@ -0,0 +1,159 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "shared-libraries.h" + +#define PATH_MAX_TOSTRING(x) #x +#define PATH_MAX_STRING(x) PATH_MAX_TOSTRING(x) +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <limits.h> +#include <unistd.h> +#include <fstream> +#include "platform.h" +#include "shared-libraries.h" + +#include "common/linux/file_id.h" +#include <algorithm> + +#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0])) + +// Get the breakpad Id for the binary file pointed by bin_name +static std::string getId(const char *bin_name) +{ + using namespace google_breakpad; + using namespace std; + + PageAllocator allocator; + auto_wasteful_vector<uint8_t, sizeof(MDGUID)> identifier(&allocator); + + FileID file_id(bin_name); + if (file_id.ElfFileIdentifier(identifier)) { + return FileID::ConvertIdentifierToUUIDString(identifier) + "0"; + } + + return ""; +} + +#if !defined(MOZ_WIDGET_GONK) +// TODO fix me with proper include +#include "nsDebug.h" +#ifdef ANDROID +#include "ElfLoader.h" // dl_phdr_info +#else +#include <link.h> // dl_phdr_info +#endif +#include <features.h> +#include <dlfcn.h> +#include <sys/types.h> + +#ifdef ANDROID +extern "C" MOZ_EXPORT __attribute__((weak)) +int dl_iterate_phdr( + int (*callback) (struct dl_phdr_info *info, + size_t size, void *data), + void *data); +#endif + +static int +dl_iterate_callback(struct dl_phdr_info *dl_info, size_t size, void *data) +{ + SharedLibraryInfo& info = *reinterpret_cast<SharedLibraryInfo*>(data); + + if (dl_info->dlpi_phnum <= 0) + return 0; + + unsigned long libStart = -1; + unsigned long libEnd = 0; + + for (size_t i = 0; i < dl_info->dlpi_phnum; i++) { + if (dl_info->dlpi_phdr[i].p_type != PT_LOAD) { + continue; + } + unsigned long start = dl_info->dlpi_addr + dl_info->dlpi_phdr[i].p_vaddr; + unsigned long end = start + dl_info->dlpi_phdr[i].p_memsz; + if (start < libStart) + libStart = start; + if (end > libEnd) + libEnd = end; + } + const char *name = dl_info->dlpi_name; + SharedLibrary shlib(libStart, libEnd, 0, getId(name), name); + info.AddSharedLibrary(shlib); + + return 0; +} + +#endif // !MOZ_WIDGET_GONK + +SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() +{ + SharedLibraryInfo info; + +#if !defined(MOZ_WIDGET_GONK) +#ifdef ANDROID + if (!dl_iterate_phdr) { + // On ARM Android, dl_iterate_phdr is provided by the custom linker. + // So if libxul was loaded by the system linker (e.g. as part of + // xpcshell when running tests), it won't be available and we should + // not call it. + return info; + } +#endif // ANDROID + + dl_iterate_phdr(dl_iterate_callback, &info); +#endif // !MOZ_WIDGET_GONK + +#if defined(ANDROID) || defined(MOZ_WIDGET_GONK) + pid_t pid = getpid(); + char path[PATH_MAX]; + snprintf(path, PATH_MAX, "/proc/%d/maps", pid); + std::ifstream maps(path); + std::string line; + int count = 0; + while (std::getline(maps, line)) { + int ret; + //XXX: needs input sanitizing + unsigned long start; + unsigned long end; + char perm[6] = ""; + unsigned long offset; + char name[PATH_MAX] = ""; + ret = sscanf(line.c_str(), + "%lx-%lx %6s %lx %*s %*x %" PATH_MAX_STRING(PATH_MAX) "s\n", + &start, &end, perm, &offset, name); + if (!strchr(perm, 'x')) { + // Ignore non executable entries + continue; + } + if (ret != 5 && ret != 4) { + LOG("Get maps line failed"); + continue; + } +#if defined(ANDROID) && !defined(MOZ_WIDGET_GONK) + // Use proc/pid/maps to get the dalvik-jit section since it has + // no associated phdrs + if (strcmp(name, "/dev/ashmem/dalvik-jit-code-cache") != 0) + continue; +#else + if (strcmp(perm, "r-xp") != 0) { + // Ignore entries that are writable and/or shared. + // At least one graphics driver uses short-lived "rwxs" mappings + // (see bug 926734 comment 5), so just checking for 'x' isn't enough. + continue; + } +#endif + SharedLibrary shlib(start, end, offset, getId(name), name); + info.AddSharedLibrary(shlib); + if (count > 10000) { + LOG("Get maps failed"); + break; + } + count++; + } +#endif // ANDROID || MOZ_WIDGET_GONK + + return info; +} diff --git a/tools/profiler/core/shared-libraries-macos.cc b/tools/profiler/core/shared-libraries-macos.cc new file mode 100644 index 0000000000..e218d2280d --- /dev/null +++ b/tools/profiler/core/shared-libraries-macos.cc @@ -0,0 +1,132 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <AvailabilityMacros.h> +#include <mach-o/loader.h> +#include <mach-o/dyld_images.h> +#include <mach/task_info.h> +#include <mach/task.h> +#include <mach/mach_init.h> +#include <mach/mach_traps.h> +#include <string.h> +#include <stdlib.h> +#include <vector> +#include <sstream> + +#include "shared-libraries.h" + +#ifndef MAC_OS_X_VERSION_10_6 +#define MAC_OS_X_VERSION_10_6 1060 +#endif + +#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_6 +// borrowed from Breakpad +// Fallback declarations for TASK_DYLD_INFO and friends, introduced in +// <mach/task_info.h> in the Mac OS X 10.6 SDK. +#define TASK_DYLD_INFO 17 +struct task_dyld_info { + mach_vm_address_t all_image_info_addr; + mach_vm_size_t all_image_info_size; + }; +typedef struct task_dyld_info task_dyld_info_data_t; +typedef struct task_dyld_info *task_dyld_info_t; +#define TASK_DYLD_INFO_COUNT (sizeof(task_dyld_info_data_t) / sizeof(natural_t)) + +#endif + +// Architecture specific abstraction. +#ifdef __i386__ +typedef mach_header platform_mach_header; +typedef segment_command mach_segment_command_type; +#define MACHO_MAGIC_NUMBER MH_MAGIC +#define CMD_SEGMENT LC_SEGMENT +#define seg_size uint32_t +#else +typedef mach_header_64 platform_mach_header; +typedef segment_command_64 mach_segment_command_type; +#define MACHO_MAGIC_NUMBER MH_MAGIC_64 +#define CMD_SEGMENT LC_SEGMENT_64 +#define seg_size uint64_t +#endif + +static +void addSharedLibrary(const platform_mach_header* header, char *name, SharedLibraryInfo &info) { + const struct load_command *cmd = + reinterpret_cast<const struct load_command *>(header + 1); + + seg_size size = 0; + unsigned long long start = reinterpret_cast<unsigned long long>(header); + // Find the cmd segment in the macho image. It will contain the offset we care about. + const uint8_t *uuid_bytes = nullptr; + for (unsigned int i = 0; + cmd && (i < header->ncmds) && (uuid_bytes == nullptr || size == 0); + ++i) { + if (cmd->cmd == CMD_SEGMENT) { + const mach_segment_command_type *seg = + reinterpret_cast<const mach_segment_command_type *>(cmd); + + if (!strcmp(seg->segname, "__TEXT")) { + size = seg->vmsize; + } + } else if (cmd->cmd == LC_UUID) { + const uuid_command *ucmd = reinterpret_cast<const uuid_command *>(cmd); + uuid_bytes = ucmd->uuid; + } + + cmd = reinterpret_cast<const struct load_command *> + (reinterpret_cast<const char *>(cmd) + cmd->cmdsize); + } + + std::stringstream uuid; + uuid << std::hex << std::uppercase; + if (uuid_bytes != nullptr) { + for (int i = 0; i < 16; ++i) { + uuid << ((uuid_bytes[i] & 0xf0) >> 4); + uuid << (uuid_bytes[i] & 0xf); + } + uuid << '0'; + } + + info.AddSharedLibrary(SharedLibrary(start, start + size, 0, uuid.str(), + name)); +} + +// Use dyld to inspect the macho image information. We can build the SharedLibraryEntry structure +// giving us roughtly the same info as /proc/PID/maps in Linux. +SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() +{ + SharedLibraryInfo sharedLibraryInfo; + + task_dyld_info_data_t task_dyld_info; + mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT; + if (task_info(mach_task_self (), TASK_DYLD_INFO, (task_info_t)&task_dyld_info, + &count) != KERN_SUCCESS) { + return sharedLibraryInfo; + } + + struct dyld_all_image_infos* aii = (struct dyld_all_image_infos*)task_dyld_info.all_image_info_addr; + size_t infoCount = aii->infoArrayCount; + + // Iterate through all dyld images (loaded libraries) to get their names + // and offests. + for (size_t i = 0; i < infoCount; ++i) { + const dyld_image_info *info = &aii->infoArray[i]; + + // If the magic number doesn't match then go no further + // since we're not pointing to where we think we are. + if (info->imageLoadAddress->magic != MACHO_MAGIC_NUMBER) { + continue; + } + + const platform_mach_header* header = + reinterpret_cast<const platform_mach_header*>(info->imageLoadAddress); + + // Add the entry for this image. + addSharedLibrary(header, (char*)info->imageFilePath, sharedLibraryInfo); + + } + return sharedLibraryInfo; +} + diff --git a/tools/profiler/core/shared-libraries-win32.cc b/tools/profiler/core/shared-libraries-win32.cc new file mode 100644 index 0000000000..e2db2579b8 --- /dev/null +++ b/tools/profiler/core/shared-libraries-win32.cc @@ -0,0 +1,137 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <windows.h> +#include <tlhelp32.h> +#include <dbghelp.h> +#include <sstream> + +#include "shared-libraries.h" +#include "nsWindowsHelpers.h" + +#define CV_SIGNATURE 0x53445352 // 'SDSR' + +struct CodeViewRecord70 +{ + uint32_t signature; + GUID pdbSignature; + uint32_t pdbAge; + char pdbFileName[1]; +}; + +static bool GetPdbInfo(uintptr_t aStart, nsID& aSignature, uint32_t& aAge, char** aPdbName) +{ + if (!aStart) { + return false; + } + + PIMAGE_DOS_HEADER dosHeader = reinterpret_cast<PIMAGE_DOS_HEADER>(aStart); + if (dosHeader->e_magic != IMAGE_DOS_SIGNATURE) { + return false; + } + + PIMAGE_NT_HEADERS ntHeaders = reinterpret_cast<PIMAGE_NT_HEADERS>( + aStart + dosHeader->e_lfanew); + if (ntHeaders->Signature != IMAGE_NT_SIGNATURE) { + return false; + } + + uint32_t relativeVirtualAddress = + ntHeaders->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG].VirtualAddress; + if (!relativeVirtualAddress) { + return false; + } + + PIMAGE_DEBUG_DIRECTORY debugDirectory = + reinterpret_cast<PIMAGE_DEBUG_DIRECTORY>(aStart + relativeVirtualAddress); + if (!debugDirectory || debugDirectory->Type != IMAGE_DEBUG_TYPE_CODEVIEW) { + return false; + } + + CodeViewRecord70 *debugInfo = reinterpret_cast<CodeViewRecord70 *>( + aStart + debugDirectory->AddressOfRawData); + if (!debugInfo || debugInfo->signature != CV_SIGNATURE) { + return false; + } + + aAge = debugInfo->pdbAge; + GUID& pdbSignature = debugInfo->pdbSignature; + aSignature.m0 = pdbSignature.Data1; + aSignature.m1 = pdbSignature.Data2; + aSignature.m2 = pdbSignature.Data3; + memcpy(aSignature.m3, pdbSignature.Data4, sizeof(pdbSignature.Data4)); + + // The PDB file name could be different from module filename, so report both + // e.g. The PDB for C:\Windows\SysWOW64\ntdll.dll is wntdll.pdb + char * leafName = strrchr(debugInfo->pdbFileName, '\\'); + if (leafName) { + // Only report the file portion of the path + *aPdbName = leafName + 1; + } else { + *aPdbName = debugInfo->pdbFileName; + } + + return true; +} + +static bool IsDashOrBraces(char c) +{ + return c == '-' || c == '{' || c == '}'; +} + +SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() +{ + SharedLibraryInfo sharedLibraryInfo; + + nsAutoHandle snap(CreateToolhelp32Snapshot(TH32CS_SNAPMODULE, GetCurrentProcessId())); + + MODULEENTRY32 module = {0}; + module.dwSize = sizeof(MODULEENTRY32); + if (Module32First(snap, &module)) { + do { + nsID pdbSig; + uint32_t pdbAge; + char *pdbName = NULL; + + // Load the module again to make sure that its handle will remain remain + // valid as we attempt to read the PDB information from it. We load the + // DLL as a datafile so that if the module actually gets unloaded between + // the call to Module32Next and the following LoadLibraryEx, we don't end + // up running the now newly loaded module's DllMain function. If the + // module is already loaded, LoadLibraryEx just increments its refcount. + // + // Note that because of the race condition above, merely loading the DLL + // again is not safe enough, therefore we also need to make sure that we + // can read the memory mapped at the base address before we can safely + // proceed to actually access those pages. + HMODULE handleLock = LoadLibraryEx(module.szExePath, NULL, LOAD_LIBRARY_AS_DATAFILE); + MEMORY_BASIC_INFORMATION vmemInfo = {0}; + if (handleLock && + sizeof(vmemInfo) == VirtualQuery(module.modBaseAddr, &vmemInfo, sizeof(vmemInfo)) && + vmemInfo.State == MEM_COMMIT && + GetPdbInfo((uintptr_t)module.modBaseAddr, pdbSig, pdbAge, &pdbName)) { + std::ostringstream stream; + stream << pdbSig.ToString() << std::hex << pdbAge; + std::string breakpadId = stream.str(); + std::string::iterator end = + std::remove_if(breakpadId.begin(), breakpadId.end(), IsDashOrBraces); + breakpadId.erase(end, breakpadId.end()); + std::transform(breakpadId.begin(), breakpadId.end(), + breakpadId.begin(), toupper); + + SharedLibrary shlib((uintptr_t)module.modBaseAddr, + (uintptr_t)module.modBaseAddr+module.modBaseSize, + 0, // DLLs are always mapped at offset 0 on Windows + breakpadId, + pdbName); + sharedLibraryInfo.AddSharedLibrary(shlib); + } + FreeLibrary(handleLock); // ok to free null handles + } while (Module32Next(snap, &module)); + } + + return sharedLibraryInfo; +} + diff --git a/tools/profiler/core/v8-support.h b/tools/profiler/core/v8-support.h new file mode 100644 index 0000000000..391069dcc7 --- /dev/null +++ b/tools/profiler/core/v8-support.h @@ -0,0 +1,48 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* This contains stubs and infrastructure to support code from v8 */ + +#ifndef V8_SUPPORT_H_ +#define V8_SUPPORT_H_ + +#if defined(_M_X64) || defined(__x86_64__) +#define V8_HOST_ARCH_X64 1 +#elif defined(_M_IX86) || defined(__i386__) || defined(__i386) +#define V8_HOST_ARCH_IA32 1 +#elif defined(__ARMEL__) +#define V8_HOST_ARCH_ARM 1 +#else +#warning Please add support for your architecture in chromium_types.h +#endif + +typedef int32_t Atomic32; + +#if defined(V8_HOST_ARCH_X64) || defined(V8_HOST_ARCH_IA32) || defined(V8_HOST_ARCH_ARM) +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} +#endif + + +const int kMaxInt = 0x7FFFFFFF; +const int kMinInt = -kMaxInt - 1; + +// A macro to disallow the evil copy constructor and operator= functions +// This should be used in the private: declarations for a class +#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName&); \ + void operator=(const TypeName&) + + +// The USE(x) template is used to silence C++ compiler warnings +// issued for (yet) unused variables (typically parameters). +template <typename T> +static inline void USE(T) { } + +class Malloced { +}; + +#endif // V8_SUPPORT_H_ |