mirror of
https://github.com/microsoft/WSL.git
synced 2025-12-11 04:35:57 -06:00
Add crash dump collection (#13741)
This commit is contained in:
parent
f0d257f760
commit
911a3aa758
@ -16,7 +16,9 @@ Abstract:
|
|||||||
#include "SocketChannel.h"
|
#include "SocketChannel.h"
|
||||||
#include "message.h"
|
#include "message.h"
|
||||||
#include "localhost.h"
|
#include "localhost.h"
|
||||||
|
#include "common.h"
|
||||||
#include <utmp.h>
|
#include <utmp.h>
|
||||||
|
#include <unistd.h>
|
||||||
#include <sys/wait.h>
|
#include <sys/wait.h>
|
||||||
#include <sys/mount.h>
|
#include <sys/mount.h>
|
||||||
#include <sys/syscall.h>
|
#include <sys/syscall.h>
|
||||||
@ -52,6 +54,8 @@ int Chroot(const char* Target);
|
|||||||
|
|
||||||
extern int g_LogFd;
|
extern int g_LogFd;
|
||||||
|
|
||||||
|
extern void WSLAEnableCrashDumpCollection();
|
||||||
|
|
||||||
struct WSLAState
|
struct WSLAState
|
||||||
{
|
{
|
||||||
std::optional<std::filesystem::path> ModulesMountPoint;
|
std::optional<std::filesystem::path> ModulesMountPoint;
|
||||||
@ -59,6 +63,19 @@ struct WSLAState
|
|||||||
|
|
||||||
static WSLAState g_state;
|
static WSLAState g_state;
|
||||||
|
|
||||||
|
void WSLAEnableCrashDumpCollection()
|
||||||
|
{
|
||||||
|
if (symlink("/wsl-init", "/" LX_INIT_WSL_CAPTURE_CRASH) < 0)
|
||||||
|
{
|
||||||
|
LOG_ERROR("symlink({}, {}) failed {}", "/wsl-init", "/" LX_INIT_WSL_CAPTURE_CRASH, errno);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the first character is a pipe, then the kernel will interpret this path as a command.
|
||||||
|
constexpr auto core_pattern = "|/" LX_INIT_WSL_CAPTURE_CRASH " %t %E %p %s";
|
||||||
|
WriteToFile("/proc/sys/kernel/core_pattern", core_pattern);
|
||||||
|
}
|
||||||
|
|
||||||
void HandleMessageImpl(wsl::shared::SocketChannel& Channel, const WSLA_GET_DISK& Message, const gsl::span<gsl::byte>& Buffer)
|
void HandleMessageImpl(wsl::shared::SocketChannel& Channel, const WSLA_GET_DISK& Message, const gsl::span<gsl::byte>& Buffer)
|
||||||
{
|
{
|
||||||
wsl::shared::MessageWriter<WSLA_GET_DISK_RESULT> writer;
|
wsl::shared::MessageWriter<WSLA_GET_DISK_RESULT> writer;
|
||||||
@ -496,6 +513,9 @@ void HandleMessageImpl(wsl::shared::SocketChannel& Channel, const WSLA_MOUNT& Me
|
|||||||
if (WI_IsFlagSet(Message.Flags, WSLA_MOUNT::Chroot))
|
if (WI_IsFlagSet(Message.Flags, WSLA_MOUNT::Chroot))
|
||||||
{
|
{
|
||||||
THROW_LAST_ERROR_IF(Chroot(target) < 0);
|
THROW_LAST_ERROR_IF(Chroot(target) < 0);
|
||||||
|
|
||||||
|
// Reconfigure crash dump collection after chroot so symlink & core_pattern resolve correctly.
|
||||||
|
WSLAEnableCrashDumpCollection();
|
||||||
}
|
}
|
||||||
|
|
||||||
response.Result = 0;
|
response.Result = 0;
|
||||||
@ -796,6 +816,9 @@ int WSLAEntryPoint(int Argc, char* Argv[])
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Enable crash dump collection.
|
||||||
|
WSLAEnableCrashDumpCollection();
|
||||||
|
|
||||||
//
|
//
|
||||||
// Open kmesg for logging and ensure that the file descriptor is not set to one of the standard file descriptors.
|
// Open kmesg for logging and ensure that the file descriptor is not set to one of the standard file descriptors.
|
||||||
//
|
//
|
||||||
|
|||||||
@ -28,8 +28,10 @@ using wsl::windows::service::wsla::WSLAProcess;
|
|||||||
using wsl::windows::service::wsla::WSLAVirtualMachine;
|
using wsl::windows::service::wsla::WSLAVirtualMachine;
|
||||||
|
|
||||||
constexpr auto MAX_VM_CRASH_FILES = 3;
|
constexpr auto MAX_VM_CRASH_FILES = 3;
|
||||||
|
constexpr auto MAX_CRASH_DUMPS = 10;
|
||||||
constexpr auto SAVED_STATE_FILE_EXTENSION = L".vmrs";
|
constexpr auto SAVED_STATE_FILE_EXTENSION = L".vmrs";
|
||||||
constexpr auto SAVED_STATE_FILE_PREFIX = L"saved-state-";
|
constexpr auto SAVED_STATE_FILE_PREFIX = L"saved-state-";
|
||||||
|
constexpr auto RECEIVE_TIMEOUT = 30 * 1000;
|
||||||
|
|
||||||
WSLAVirtualMachine::WSLAVirtualMachine(const VIRTUAL_MACHINE_SETTINGS& Settings, PSID UserSid, WSLAUserSessionImpl* Session) :
|
WSLAVirtualMachine::WSLAVirtualMachine(const VIRTUAL_MACHINE_SETTINGS& Settings, PSID UserSid, WSLAUserSessionImpl* Session) :
|
||||||
m_settings(Settings), m_userSid(UserSid), m_userSession(Session)
|
m_settings(Settings), m_userSid(UserSid), m_userSession(Session)
|
||||||
@ -130,6 +132,11 @@ WSLAVirtualMachine::~WSLAVirtualMachine()
|
|||||||
m_processExitThread.join();
|
m_processExitThread.join();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (m_crashDumpCollectionThread.joinable())
|
||||||
|
{
|
||||||
|
m_crashDumpCollectionThread.join();
|
||||||
|
}
|
||||||
|
|
||||||
// Clear the state of all remaining processes now that the VM has exited.
|
// Clear the state of all remaining processes now that the VM has exited.
|
||||||
// The WSLAProcess object reference will be released when the last COM reference is closed.
|
// The WSLAProcess object reference will be released when the last COM reference is closed.
|
||||||
for (auto& e : m_trackedProcesses)
|
for (auto& e : m_trackedProcesses)
|
||||||
@ -313,6 +320,13 @@ void WSLAVirtualMachine::Start()
|
|||||||
|
|
||||||
wsl::windows::common::hcs::StartComputeSystem(m_computeSystem.get(), json.c_str());
|
wsl::windows::common::hcs::StartComputeSystem(m_computeSystem.get(), json.c_str());
|
||||||
|
|
||||||
|
// Create a socket listening for crash dumps.
|
||||||
|
auto crashDumpSocket = wsl::windows::common::hvsocket::Listen(runtimeId, LX_INIT_UTILITY_VM_CRASH_DUMP_PORT);
|
||||||
|
THROW_LAST_ERROR_IF(!crashDumpSocket);
|
||||||
|
|
||||||
|
m_crashDumpCollectionThread =
|
||||||
|
std::thread{[this, socket = std::move(crashDumpSocket)]() mutable { CollectCrashDumps(std::move(socket)); }};
|
||||||
|
|
||||||
// Create a socket listening for connections from mini_init.
|
// Create a socket listening for connections from mini_init.
|
||||||
auto listenSocket = wsl::windows::common::hvsocket::Listen(runtimeId, LX_INIT_UTILITY_VM_INIT_PORT);
|
auto listenSocket = wsl::windows::common::hvsocket::Listen(runtimeId, LX_INIT_UTILITY_VM_INIT_PORT);
|
||||||
auto socket = wsl::windows::common::hvsocket::Accept(listenSocket.get(), m_settings.BootTimeoutMs, m_vmTerminatingEvent.get());
|
auto socket = wsl::windows::common::hvsocket::Accept(listenSocket.get(), m_settings.BootTimeoutMs, m_vmTerminatingEvent.get());
|
||||||
@ -1317,4 +1331,65 @@ void WSLAVirtualMachine::OnProcessReleased(int Pid)
|
|||||||
std::lock_guard lock{m_lock};
|
std::lock_guard lock{m_lock};
|
||||||
|
|
||||||
auto erased = std::erase_if(m_trackedProcesses, [Pid](const auto* e) { return e->GetPid() == Pid; });
|
auto erased = std::erase_if(m_trackedProcesses, [Pid](const auto* e) { return e->GetPid() == Pid; });
|
||||||
|
}
|
||||||
|
|
||||||
|
void WSLAVirtualMachine::CollectCrashDumps(wil::unique_socket&& listenSocket) const
|
||||||
|
{
|
||||||
|
wsl::windows::common::wslutil::SetThreadDescription(L"CrashDumpCollection");
|
||||||
|
|
||||||
|
while (!m_vmExitEvent.is_signaled())
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
auto socket = wsl::windows::common::hvsocket::Accept(listenSocket.get(), INFINITE, m_vmExitEvent.get());
|
||||||
|
|
||||||
|
THROW_LAST_ERROR_IF(
|
||||||
|
setsockopt(listenSocket.get(), SOL_SOCKET, SO_RCVTIMEO, (const char*)&RECEIVE_TIMEOUT, sizeof(RECEIVE_TIMEOUT)) == SOCKET_ERROR);
|
||||||
|
|
||||||
|
auto channel = wsl::shared::SocketChannel{std::move(socket), "crash_dump", m_vmExitEvent.get()};
|
||||||
|
|
||||||
|
const auto& message = channel.ReceiveMessage<LX_PROCESS_CRASH>();
|
||||||
|
const char* process = reinterpret_cast<const char*>(&message.Buffer);
|
||||||
|
|
||||||
|
constexpr auto dumpExtension = ".dmp";
|
||||||
|
constexpr auto dumpPrefix = "wsl-crash";
|
||||||
|
|
||||||
|
auto filename = std::format("{}-{}-{}-{}-{}{}", dumpPrefix, message.Timestamp, message.Pid, process, message.Signal, dumpExtension);
|
||||||
|
|
||||||
|
std::replace_if(filename.begin(), filename.end(), [](auto e) { return !std::isalnum(e) && e != '.' && e != '-'; }, '_');
|
||||||
|
|
||||||
|
auto fullPath = m_crashDumpFolder / filename;
|
||||||
|
|
||||||
|
WSL_LOG(
|
||||||
|
"WSLALinuxCrash",
|
||||||
|
TraceLoggingValue(fullPath.c_str(), "FullPath"),
|
||||||
|
TraceLoggingValue(message.Pid, "Pid"),
|
||||||
|
TraceLoggingValue(message.Signal, "Signal"),
|
||||||
|
TraceLoggingValue(process, "process"));
|
||||||
|
|
||||||
|
auto runAsUser = wil::impersonate_token(m_userToken.get());
|
||||||
|
wsl::windows::common::filesystem::EnsureDirectory(m_crashDumpFolder.c_str());
|
||||||
|
|
||||||
|
// Only delete files that:
|
||||||
|
// - have the temporary flag set
|
||||||
|
// - start with 'wsl-crash'
|
||||||
|
// - end in .dmp
|
||||||
|
//
|
||||||
|
// This logic is here to prevent accidental user file deletion
|
||||||
|
auto pred = [&dumpExtension, &dumpPrefix](const auto& e) {
|
||||||
|
return WI_IsFlagSet(GetFileAttributes(e.path().c_str()), FILE_ATTRIBUTE_TEMPORARY) && e.path().has_extension() &&
|
||||||
|
e.path().extension() == dumpExtension && e.path().has_filename() &&
|
||||||
|
e.path().filename().string().find(dumpPrefix) == 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
wsl::windows::common::wslutil::EnforceFileLimit(m_crashDumpFolder.c_str(), MAX_CRASH_DUMPS, pred);
|
||||||
|
|
||||||
|
wil::unique_hfile file{CreateFileW(fullPath.c_str(), GENERIC_WRITE, 0, nullptr, CREATE_NEW, FILE_ATTRIBUTE_TEMPORARY, nullptr)};
|
||||||
|
THROW_LAST_ERROR_IF(!file);
|
||||||
|
|
||||||
|
channel.SendResultMessage<std::int32_t>(0);
|
||||||
|
wsl::windows::common::relay::InterruptableRelay(reinterpret_cast<HANDLE>(channel.Socket()), file.get(), nullptr);
|
||||||
|
}
|
||||||
|
CATCH_LOG();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@ -93,6 +93,7 @@ private:
|
|||||||
void CreateVmSavedStateFile();
|
void CreateVmSavedStateFile();
|
||||||
void EnforceVmSavedStateFileLimit();
|
void EnforceVmSavedStateFileLimit();
|
||||||
void WriteCrashLog(const std::wstring& crashLog);
|
void WriteCrashLog(const std::wstring& crashLog);
|
||||||
|
void CollectCrashDumps(wil::unique_socket&& listenSocket) const;
|
||||||
|
|
||||||
Microsoft::WRL::ComPtr<WSLAProcess> CreateLinuxProcessImpl(
|
Microsoft::WRL::ComPtr<WSLAProcess> CreateLinuxProcessImpl(
|
||||||
_In_ const WSLA_PROCESS_OPTIONS& Options, int* Errno = nullptr, const TPrepareCommandLine& PrepareCommandLine = [](const auto&) {});
|
_In_ const WSLA_PROCESS_OPTIONS& Options, int* Errno = nullptr, const TPrepareCommandLine& PrepareCommandLine = [](const auto&) {});
|
||||||
@ -110,6 +111,7 @@ private:
|
|||||||
|
|
||||||
VIRTUAL_MACHINE_SETTINGS m_settings;
|
VIRTUAL_MACHINE_SETTINGS m_settings;
|
||||||
std::thread m_processExitThread;
|
std::thread m_processExitThread;
|
||||||
|
std::thread m_crashDumpCollectionThread;
|
||||||
|
|
||||||
GUID m_vmId{};
|
GUID m_vmId{};
|
||||||
std::wstring m_vmIdString;
|
std::wstring m_vmIdString;
|
||||||
|
|||||||
@ -969,4 +969,81 @@ class WSLATests
|
|||||||
VERIFY_ARE_EQUAL(error, -1);
|
VERIFY_ARE_EQUAL(error, -1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_METHOD(CrashDumpCollection)
|
||||||
|
{
|
||||||
|
WSL2_TEST_ONLY();
|
||||||
|
|
||||||
|
VIRTUAL_MACHINE_SETTINGS settings{};
|
||||||
|
settings.CpuCount = 4;
|
||||||
|
settings.DisplayName = L"WSLA";
|
||||||
|
settings.MemoryMb = 2048;
|
||||||
|
settings.BootTimeoutMs = 30 * 1000;
|
||||||
|
settings.RootVhd = testVhd.c_str();
|
||||||
|
|
||||||
|
auto session = CreateSession(settings);
|
||||||
|
int processId = 0;
|
||||||
|
|
||||||
|
// Cache the existing crash dumps so we can check that a new one is created.
|
||||||
|
auto crashDumpsDir = std::filesystem::temp_directory_path() / "wsla-crashes";
|
||||||
|
std::set<std::filesystem::path> existingDumps;
|
||||||
|
|
||||||
|
if (std::filesystem::exists(crashDumpsDir))
|
||||||
|
{
|
||||||
|
existingDumps = {std::filesystem::directory_iterator(crashDumpsDir), std::filesystem::directory_iterator{}};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a stuck process and crash it.
|
||||||
|
{
|
||||||
|
WSLAProcessLauncher launcher("/bin/cat", {"/bin/cat"}, {}, ProcessFlags::Stdin | ProcessFlags::Stdout | ProcessFlags::Stderr);
|
||||||
|
|
||||||
|
auto process = launcher.Launch(*session);
|
||||||
|
|
||||||
|
// Get the process id. This is need to identify the crash dump file.
|
||||||
|
VERIFY_SUCCEEDED(process.Get().GetPid(&processId));
|
||||||
|
|
||||||
|
// Send SIGSEV(11) to crash the process.
|
||||||
|
VERIFY_SUCCEEDED(process.Get().Signal(11));
|
||||||
|
|
||||||
|
auto result = process.WaitAndCaptureOutput();
|
||||||
|
VERIFY_ARE_EQUAL(result.Code, 11);
|
||||||
|
VERIFY_ARE_EQUAL(result.Signalled, true);
|
||||||
|
VERIFY_ARE_EQUAL(result.Output[1], "");
|
||||||
|
VERIFY_ARE_EQUAL(result.Output[2], "");
|
||||||
|
|
||||||
|
VERIFY_ARE_EQUAL(process.Get().Signal(9), HRESULT_FROM_WIN32(ERROR_INVALID_STATE));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dumps files are named with the format: wsl-crash-<sessionId>-<pid>-<processname>-<code>.dmp
|
||||||
|
// Check if a new file was added in crashDumpsDir matching the pattern and not in existingDumps.
|
||||||
|
std::string expectedPattern = std::format("wsl-crash-*-{}-_usr_bin_cat-11.dmp", processId);
|
||||||
|
|
||||||
|
auto dumpFile = wsl::shared::retry::RetryWithTimeout<std::filesystem::path>(
|
||||||
|
[crashDumpsDir, expectedPattern, existingDumps]() {
|
||||||
|
for (const auto& entry : std::filesystem::directory_iterator(crashDumpsDir))
|
||||||
|
{
|
||||||
|
const auto& filePath = entry.path();
|
||||||
|
if (existingDumps.find(filePath) == existingDumps.end() &&
|
||||||
|
PathMatchSpecA(filePath.filename().string().c_str(), expectedPattern.c_str()))
|
||||||
|
{
|
||||||
|
return filePath;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw wil::ResultException(HRESULT_FROM_WIN32(ERROR_NOT_FOUND));
|
||||||
|
},
|
||||||
|
std::chrono::milliseconds{100},
|
||||||
|
std::chrono::seconds{10});
|
||||||
|
|
||||||
|
// Ensure that the dump file is cleaned up after test completion.
|
||||||
|
auto cleanup = wil::scope_exit([&] {
|
||||||
|
if (std::filesystem::exists(dumpFile))
|
||||||
|
{
|
||||||
|
std::filesystem::remove(dumpFile);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
VERIFY_IS_TRUE(std::filesystem::exists(dumpFile));
|
||||||
|
VERIFY_IS_TRUE(std::filesystem::file_size(dumpFile) > 0);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
Loading…
x
Reference in New Issue
Block a user