blob: 7b41fee4fd50d5478f5bc1e92c5601d40ca2aaf1 [file] [log] [blame]
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <objbase.h>
#include <windows.h>
#include <winternl.h>
#include <cstdlib>
#include <map>
#include <utility>
#include <vector>
#include "base/lazy_instance.h"
#include "base/logging.h"
#include "base/macros.h"
#include "base/memory/scoped_ptr.h"
#include "base/profiler/native_stack_sampler.h"
#include "base/profiler/win32_stack_frame_unwinder.h"
#include "base/strings/string_util.h"
#include "base/strings/stringprintf.h"
#include "base/strings/utf_string_conversions.h"
#include "base/time/time.h"
#include "base/win/pe_image.h"
#include "base/win/scoped_handle.h"
namespace base {
// Stack recording functions --------------------------------------------------
namespace {
// The thread environment block internal type.
struct TEB {
NT_TIB Tib;
// Rest of struct is ignored.
};
// Returns the thread environment block pointer for |thread_handle|.
const TEB* GetThreadEnvironmentBlock(HANDLE thread_handle) {
// Define the internal types we need to invoke NtQueryInformationThread.
enum THREAD_INFORMATION_CLASS { ThreadBasicInformation };
struct CLIENT_ID {
HANDLE UniqueProcess;
HANDLE UniqueThread;
};
struct THREAD_BASIC_INFORMATION {
NTSTATUS ExitStatus;
TEB* Teb;
CLIENT_ID ClientId;
KAFFINITY AffinityMask;
LONG Priority;
LONG BasePriority;
};
using NtQueryInformationThreadFunction =
NTSTATUS (WINAPI*)(HANDLE, THREAD_INFORMATION_CLASS, PVOID, ULONG,
PULONG);
const NtQueryInformationThreadFunction nt_query_information_thread =
reinterpret_cast<NtQueryInformationThreadFunction>(
::GetProcAddress(::GetModuleHandle(L"ntdll.dll"),
"NtQueryInformationThread"));
if (!nt_query_information_thread)
return nullptr;
THREAD_BASIC_INFORMATION basic_info = {0};
NTSTATUS status =
nt_query_information_thread(thread_handle, ThreadBasicInformation,
&basic_info, sizeof(THREAD_BASIC_INFORMATION),
nullptr);
if (status != 0)
return nullptr;
return basic_info.Teb;
}
#if defined(_WIN64)
// If the value at |pointer| points to the original stack, rewrite it to point
// to the corresponding location in the copied stack.
void RewritePointerIfInOriginalStack(uintptr_t top, uintptr_t bottom,
void* stack_copy, const void** pointer) {
const uintptr_t value = reinterpret_cast<uintptr_t>(*pointer);
if (value >= bottom && value < top) {
*pointer = reinterpret_cast<const void*>(
static_cast<unsigned char*>(stack_copy) + (value - bottom));
}
}
#endif
// Rewrites possible pointers to locations within the stack to point to the
// corresponding locations in the copy, and rewrites the non-volatile registers
// in |context| likewise. This is necessary to handle stack frames with dynamic
// stack allocation, where a pointer to the beginning of the dynamic allocation
// area is stored on the stack and/or in a non-volatile register.
//
// Eager rewriting of anything that looks like a pointer to the stack, as done
// in this function, does not adversely affect the stack unwinding. The only
// other values on the stack the unwinding depends on are return addresses,
// which should not point within the stack memory. The rewriting is guaranteed
// to catch all pointers because the stacks are guaranteed by the ABI to be
// sizeof(void*) aligned.
//
// Note: this function must not access memory in the original stack as it may
// have been changed or deallocated by this point. This is why |top| and
// |bottom| are passed as uintptr_t.
void RewritePointersToStackMemory(uintptr_t top, uintptr_t bottom,
CONTEXT* context, void* stack_copy) {
#if defined(_WIN64)
DWORD64 CONTEXT::* const nonvolatile_registers[] = {
&CONTEXT::R12,
&CONTEXT::R13,
&CONTEXT::R14,
&CONTEXT::R15,
&CONTEXT::Rdi,
&CONTEXT::Rsi,
&CONTEXT::Rbx,
&CONTEXT::Rbp,
&CONTEXT::Rsp
};
// Rewrite pointers in the context.
for (size_t i = 0; i < arraysize(nonvolatile_registers); ++i) {
DWORD64* const reg = &(context->*nonvolatile_registers[i]);
RewritePointerIfInOriginalStack(top, bottom, stack_copy,
reinterpret_cast<const void**>(reg));
}
// Rewrite pointers on the stack.
const void** start = reinterpret_cast<const void**>(stack_copy);
const void** end = reinterpret_cast<const void**>(
reinterpret_cast<char*>(stack_copy) + (top - bottom));
for (const void** loc = start; loc < end; ++loc)
RewritePointerIfInOriginalStack(top, bottom, stack_copy, loc);
#endif
}
// Walks the stack represented by |context| from the current frame downwards,
// recording the instruction pointers for each frame in |instruction_pointers|.
int RecordStack(CONTEXT* context, int max_stack_size,
const void* instruction_pointers[],
ScopedModuleHandle modules[]) {
#ifdef _WIN64
Win32StackFrameUnwinder frame_unwinder;
int i = 0;
for (; (i < max_stack_size) && context->Rip; ++i) {
instruction_pointers[i] = reinterpret_cast<const void*>(context->Rip);
if (!frame_unwinder.TryUnwind(context, &modules[i]))
return i;
}
return i;
#else
return 0;
#endif
}
// Gets the unique build ID for a module. Windows build IDs are created by a
// concatenation of a GUID and AGE fields found in the headers of a module. The
// GUID is stored in the first 16 bytes and the AGE is stored in the last 4
// bytes. Returns the empty string if the function fails to get the build ID.
//
// Example:
// dumpbin chrome.exe /headers | find "Format:"
// ... Format: RSDS, {16B2A428-1DED-442E-9A36-FCE8CBD29726}, 10, ...
//
// The resulting buildID string of this instance of chrome.exe is
// "16B2A4281DED442E9A36FCE8CBD2972610".
//
// Note that the AGE field is encoded in decimal, not hex.
std::string GetBuildIDForModule(HMODULE module_handle) {
GUID guid;
DWORD age;
win::PEImage(module_handle).GetDebugId(&guid, &age);
const int kGUIDSize = 39;
std::wstring build_id;
int result =
::StringFromGUID2(guid, WriteInto(&build_id, kGUIDSize), kGUIDSize);
if (result != kGUIDSize)
return std::string();
RemoveChars(build_id, L"{}-", &build_id);
build_id += StringPrintf(L"%d", age);
return WideToUTF8(build_id);
}
// ScopedDisablePriorityBoost -------------------------------------------------
// Disables priority boost on a thread for the lifetime of the object.
class ScopedDisablePriorityBoost {
public:
ScopedDisablePriorityBoost(HANDLE thread_handle);
~ScopedDisablePriorityBoost();
private:
HANDLE thread_handle_;
BOOL got_previous_boost_state_;
BOOL boost_state_was_disabled_;
DISALLOW_COPY_AND_ASSIGN(ScopedDisablePriorityBoost);
};
ScopedDisablePriorityBoost::ScopedDisablePriorityBoost(HANDLE thread_handle)
: thread_handle_(thread_handle),
got_previous_boost_state_(false),
boost_state_was_disabled_(false) {
got_previous_boost_state_ =
::GetThreadPriorityBoost(thread_handle_, &boost_state_was_disabled_);
if (got_previous_boost_state_) {
// Confusingly, TRUE disables priority boost.
::SetThreadPriorityBoost(thread_handle_, TRUE);
}
}
ScopedDisablePriorityBoost::~ScopedDisablePriorityBoost() {
if (got_previous_boost_state_)
::SetThreadPriorityBoost(thread_handle_, boost_state_was_disabled_);
}
// ScopedSuspendThread --------------------------------------------------------
// Suspends a thread for the lifetime of the object.
class ScopedSuspendThread {
public:
ScopedSuspendThread(HANDLE thread_handle);
~ScopedSuspendThread();
bool was_successful() const { return was_successful_; }
private:
HANDLE thread_handle_;
bool was_successful_;
DISALLOW_COPY_AND_ASSIGN(ScopedSuspendThread);
};
ScopedSuspendThread::ScopedSuspendThread(HANDLE thread_handle)
: thread_handle_(thread_handle),
was_successful_(::SuspendThread(thread_handle) != -1) {
}
ScopedSuspendThread::~ScopedSuspendThread() {
if (!was_successful_)
return;
// Disable the priority boost that the thread would otherwise receive on
// resume. We do this to avoid artificially altering the dynamics of the
// executing application any more than we already are by suspending and
// resuming the thread.
//
// Note that this can racily disable a priority boost that otherwise would
// have been given to the thread, if the thread is waiting on other wait
// conditions at the time of SuspendThread and those conditions are satisfied
// before priority boost is reenabled. The measured length of this window is
// ~100us, so this should occur fairly rarely.
ScopedDisablePriorityBoost disable_priority_boost(thread_handle_);
bool resume_thread_succeeded = ::ResumeThread(thread_handle_) != -1;
CHECK(resume_thread_succeeded) << "ResumeThread failed: " << GetLastError();
}
// Suspends the thread with |thread_handle|, copies its stack and resumes the
// thread, then records the stack into |instruction_pointers|. Returns the size
// of the stack.
//
// IMPORTANT NOTE: No allocations from the default heap may occur in the
// ScopedSuspendThread scope, including indirectly via use of DCHECK/CHECK or
// other logging statements. Otherwise this code can deadlock on heap locks in
// the default heap acquired by the target thread before it was suspended. This
// is why we pass instruction pointers as preallocated arrays.
int SuspendThreadAndRecordStack(HANDLE thread_handle,
const void* base_address,
void* stack_copy_buffer,
size_t stack_copy_buffer_size,
int max_stack_size,
const void* instruction_pointers[],
ScopedModuleHandle modules[],
NativeStackSamplerTestDelegate* test_delegate) {
CONTEXT thread_context = {0};
thread_context.ContextFlags = CONTEXT_FULL;
// The stack bounds are saved to uintptr_ts for use outside
// ScopedSuspendThread, as the thread's memory is not safe to dereference
// beyond that point.
const uintptr_t top = reinterpret_cast<uintptr_t>(base_address);
uintptr_t bottom = 0u;
{
ScopedSuspendThread suspend_thread(thread_handle);
if (!suspend_thread.was_successful())
return 0;
if (!::GetThreadContext(thread_handle, &thread_context))
return 0;
#if defined(_WIN64)
bottom = thread_context.Rsp;
#else
bottom = thread_context.Esp;
#endif
if ((top - bottom) > stack_copy_buffer_size)
return 0;
std::memcpy(stack_copy_buffer, reinterpret_cast<const void*>(bottom),
top - bottom);
}
if (test_delegate)
test_delegate->OnPreStackWalk();
RewritePointersToStackMemory(top, bottom, &thread_context, stack_copy_buffer);
return RecordStack(&thread_context, max_stack_size, instruction_pointers,
modules);
}
// NativeStackSamplerWin ------------------------------------------------------
class NativeStackSamplerWin : public NativeStackSampler {
public:
NativeStackSamplerWin(win::ScopedHandle thread_handle,
NativeStackSamplerTestDelegate* test_delegate);
~NativeStackSamplerWin() override;
// StackSamplingProfiler::NativeStackSampler:
void ProfileRecordingStarting(
std::vector<StackSamplingProfiler::Module>* modules) override;
void RecordStackSample(StackSamplingProfiler::Sample* sample) override;
void ProfileRecordingStopped() override;
private:
enum {
// Intended to hold the largest stack used by Chrome. The default Win32
// reserved stack size is 1 MB and Chrome Windows threads currently always
// use the default, but this allows for expansion if it occurs. The size
// beyond the actual stack size consists of unallocated virtual memory pages
// so carries little cost (just a bit of wated address space).
kStackCopyBufferSize = 2 * 1024 * 1024
};
// Attempts to query the module filename, base address, and id for
// |module_handle|, and store them in |module|. Returns true if it succeeded.
static bool GetModuleForHandle(HMODULE module_handle,
StackSamplingProfiler::Module* module);
// Gets the index for the Module corresponding to |module_handle| in
// |modules|, adding it if it's not already present. Returns
// StackSamplingProfiler::Frame::kUnknownModuleIndex if no Module can be
// determined for |module|.
size_t GetModuleIndex(HMODULE module_handle,
std::vector<StackSamplingProfiler::Module>* modules);
// Copies the stack information represented by |instruction_pointers| into
// |sample| and |modules|.
void CopyToSample(const void* const instruction_pointers[],
const ScopedModuleHandle module_handles[],
int stack_depth,
StackSamplingProfiler::Sample* sample,
std::vector<StackSamplingProfiler::Module>* modules);
win::ScopedHandle thread_handle_;
NativeStackSamplerTestDelegate* const test_delegate_;
// The stack base address corresponding to |thread_handle_|.
const void* const thread_stack_base_address_;
// Buffer to use for copies of the stack. We use the same buffer for all the
// samples to avoid the overhead of multiple allocations and frees.
const scoped_ptr<unsigned char[]> stack_copy_buffer_;
// Weak. Points to the modules associated with the profile being recorded
// between ProfileRecordingStarting() and ProfileRecordingStopped().
std::vector<StackSamplingProfiler::Module>* current_modules_;
// Maps a module handle to the corresponding Module's index within
// current_modules_.
std::map<HMODULE, size_t> profile_module_index_;
DISALLOW_COPY_AND_ASSIGN(NativeStackSamplerWin);
};
NativeStackSamplerWin::NativeStackSamplerWin(
win::ScopedHandle thread_handle,
NativeStackSamplerTestDelegate* test_delegate)
: thread_handle_(thread_handle.Take()), test_delegate_(test_delegate),
thread_stack_base_address_(
GetThreadEnvironmentBlock(thread_handle_.Get())->Tib.StackBase),
stack_copy_buffer_(new unsigned char[kStackCopyBufferSize]) {
}
NativeStackSamplerWin::~NativeStackSamplerWin() {
}
void NativeStackSamplerWin::ProfileRecordingStarting(
std::vector<StackSamplingProfiler::Module>* modules) {
current_modules_ = modules;
profile_module_index_.clear();
}
void NativeStackSamplerWin::RecordStackSample(
StackSamplingProfiler::Sample* sample) {
DCHECK(current_modules_);
if (!stack_copy_buffer_)
return;
const int max_stack_size = 64;
const void* instruction_pointers[max_stack_size] = {0};
ScopedModuleHandle modules[max_stack_size];
int stack_depth = SuspendThreadAndRecordStack(thread_handle_.Get(),
thread_stack_base_address_,
stack_copy_buffer_.get(),
kStackCopyBufferSize,
max_stack_size,
instruction_pointers,
modules,
test_delegate_);
CopyToSample(instruction_pointers, modules, stack_depth, sample,
current_modules_);
}
void NativeStackSamplerWin::ProfileRecordingStopped() {
current_modules_ = nullptr;
}
// static
bool NativeStackSamplerWin::GetModuleForHandle(
HMODULE module_handle,
StackSamplingProfiler::Module* module) {
wchar_t module_name[MAX_PATH];
DWORD result_length =
GetModuleFileName(module_handle, module_name, arraysize(module_name));
if (result_length == 0)
return false;
module->filename = base::FilePath(module_name);
module->base_address = reinterpret_cast<uintptr_t>(module_handle);
module->id = GetBuildIDForModule(module_handle);
if (module->id.empty())
return false;
return true;
}
size_t NativeStackSamplerWin::GetModuleIndex(
HMODULE module_handle,
std::vector<StackSamplingProfiler::Module>* modules) {
if (!module_handle)
return StackSamplingProfiler::Frame::kUnknownModuleIndex;
auto loc = profile_module_index_.find(module_handle);
if (loc == profile_module_index_.end()) {
StackSamplingProfiler::Module module;
if (!GetModuleForHandle(module_handle, &module))
return StackSamplingProfiler::Frame::kUnknownModuleIndex;
modules->push_back(module);
loc = profile_module_index_.insert(std::make_pair(
module_handle, modules->size() - 1)).first;
}
return loc->second;
}
void NativeStackSamplerWin::CopyToSample(
const void* const instruction_pointers[],
const ScopedModuleHandle module_handles[],
int stack_depth,
StackSamplingProfiler::Sample* sample,
std::vector<StackSamplingProfiler::Module>* modules) {
sample->clear();
sample->reserve(stack_depth);
for (int i = 0; i < stack_depth; ++i) {
sample->push_back(StackSamplingProfiler::Frame(
reinterpret_cast<uintptr_t>(instruction_pointers[i]),
GetModuleIndex(module_handles[i].Get(), modules)));
}
}
} // namespace
scoped_ptr<NativeStackSampler> NativeStackSampler::Create(
PlatformThreadId thread_id,
NativeStackSamplerTestDelegate* test_delegate) {
#if _WIN64
// Get the thread's handle.
HANDLE thread_handle = ::OpenThread(
THREAD_GET_CONTEXT | THREAD_SUSPEND_RESUME | THREAD_QUERY_INFORMATION,
FALSE,
thread_id);
if (thread_handle) {
return scoped_ptr<NativeStackSampler>(new NativeStackSamplerWin(
win::ScopedHandle(thread_handle),
test_delegate));
}
#endif
return scoped_ptr<NativeStackSampler>();
}
} // namespace base