Add crash dump collection (#13741)

This commit is contained in:
Kevin Vega 2025-11-20 09:21:20 -08:00 committed by GitHub
parent f0d257f760
commit 911a3aa758
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 177 additions and 0 deletions

View File

@ -16,7 +16,9 @@ Abstract:
#include "SocketChannel.h"
#include "message.h"
#include "localhost.h"
#include "common.h"
#include <utmp.h>
#include <unistd.h>
#include <sys/wait.h>
#include <sys/mount.h>
#include <sys/syscall.h>
@ -52,6 +54,8 @@ int Chroot(const char* Target);
extern int g_LogFd;
extern void WSLAEnableCrashDumpCollection();
struct WSLAState
{
std::optional<std::filesystem::path> ModulesMountPoint;
@ -59,6 +63,19 @@ struct WSLAState
static WSLAState g_state;
void WSLAEnableCrashDumpCollection()
{
if (symlink("/wsl-init", "/" LX_INIT_WSL_CAPTURE_CRASH) < 0)
{
LOG_ERROR("symlink({}, {}) failed {}", "/wsl-init", "/" LX_INIT_WSL_CAPTURE_CRASH, errno);
return;
}
// If the first character is a pipe, then the kernel will interpret this path as a command.
constexpr auto core_pattern = "|/" LX_INIT_WSL_CAPTURE_CRASH " %t %E %p %s";
WriteToFile("/proc/sys/kernel/core_pattern", core_pattern);
}
void HandleMessageImpl(wsl::shared::SocketChannel& Channel, const WSLA_GET_DISK& Message, const gsl::span<gsl::byte>& Buffer)
{
wsl::shared::MessageWriter<WSLA_GET_DISK_RESULT> writer;
@ -496,6 +513,9 @@ void HandleMessageImpl(wsl::shared::SocketChannel& Channel, const WSLA_MOUNT& Me
if (WI_IsFlagSet(Message.Flags, WSLA_MOUNT::Chroot))
{
THROW_LAST_ERROR_IF(Chroot(target) < 0);
// Reconfigure crash dump collection after chroot so symlink & core_pattern resolve correctly.
WSLAEnableCrashDumpCollection();
}
response.Result = 0;
@ -796,6 +816,9 @@ int WSLAEntryPoint(int Argc, char* Argv[])
return -1;
}
// Enable crash dump collection.
WSLAEnableCrashDumpCollection();
//
// Open kmesg for logging and ensure that the file descriptor is not set to one of the standard file descriptors.
//

View File

@ -28,8 +28,10 @@ using wsl::windows::service::wsla::WSLAProcess;
using wsl::windows::service::wsla::WSLAVirtualMachine;
constexpr auto MAX_VM_CRASH_FILES = 3;
constexpr auto MAX_CRASH_DUMPS = 10;
constexpr auto SAVED_STATE_FILE_EXTENSION = L".vmrs";
constexpr auto SAVED_STATE_FILE_PREFIX = L"saved-state-";
constexpr auto RECEIVE_TIMEOUT = 30 * 1000;
WSLAVirtualMachine::WSLAVirtualMachine(const VIRTUAL_MACHINE_SETTINGS& Settings, PSID UserSid, WSLAUserSessionImpl* Session) :
m_settings(Settings), m_userSid(UserSid), m_userSession(Session)
@ -130,6 +132,11 @@ WSLAVirtualMachine::~WSLAVirtualMachine()
m_processExitThread.join();
}
if (m_crashDumpCollectionThread.joinable())
{
m_crashDumpCollectionThread.join();
}
// Clear the state of all remaining processes now that the VM has exited.
// The WSLAProcess object reference will be released when the last COM reference is closed.
for (auto& e : m_trackedProcesses)
@ -313,6 +320,13 @@ void WSLAVirtualMachine::Start()
wsl::windows::common::hcs::StartComputeSystem(m_computeSystem.get(), json.c_str());
// Create a socket listening for crash dumps.
auto crashDumpSocket = wsl::windows::common::hvsocket::Listen(runtimeId, LX_INIT_UTILITY_VM_CRASH_DUMP_PORT);
THROW_LAST_ERROR_IF(!crashDumpSocket);
m_crashDumpCollectionThread =
std::thread{[this, socket = std::move(crashDumpSocket)]() mutable { CollectCrashDumps(std::move(socket)); }};
// Create a socket listening for connections from mini_init.
auto listenSocket = wsl::windows::common::hvsocket::Listen(runtimeId, LX_INIT_UTILITY_VM_INIT_PORT);
auto socket = wsl::windows::common::hvsocket::Accept(listenSocket.get(), m_settings.BootTimeoutMs, m_vmTerminatingEvent.get());
@ -1317,4 +1331,65 @@ void WSLAVirtualMachine::OnProcessReleased(int Pid)
std::lock_guard lock{m_lock};
auto erased = std::erase_if(m_trackedProcesses, [Pid](const auto* e) { return e->GetPid() == Pid; });
}
void WSLAVirtualMachine::CollectCrashDumps(wil::unique_socket&& listenSocket) const
{
wsl::windows::common::wslutil::SetThreadDescription(L"CrashDumpCollection");
while (!m_vmExitEvent.is_signaled())
{
try
{
auto socket = wsl::windows::common::hvsocket::Accept(listenSocket.get(), INFINITE, m_vmExitEvent.get());
THROW_LAST_ERROR_IF(
setsockopt(listenSocket.get(), SOL_SOCKET, SO_RCVTIMEO, (const char*)&RECEIVE_TIMEOUT, sizeof(RECEIVE_TIMEOUT)) == SOCKET_ERROR);
auto channel = wsl::shared::SocketChannel{std::move(socket), "crash_dump", m_vmExitEvent.get()};
const auto& message = channel.ReceiveMessage<LX_PROCESS_CRASH>();
const char* process = reinterpret_cast<const char*>(&message.Buffer);
constexpr auto dumpExtension = ".dmp";
constexpr auto dumpPrefix = "wsl-crash";
auto filename = std::format("{}-{}-{}-{}-{}{}", dumpPrefix, message.Timestamp, message.Pid, process, message.Signal, dumpExtension);
std::replace_if(filename.begin(), filename.end(), [](auto e) { return !std::isalnum(e) && e != '.' && e != '-'; }, '_');
auto fullPath = m_crashDumpFolder / filename;
WSL_LOG(
"WSLALinuxCrash",
TraceLoggingValue(fullPath.c_str(), "FullPath"),
TraceLoggingValue(message.Pid, "Pid"),
TraceLoggingValue(message.Signal, "Signal"),
TraceLoggingValue(process, "process"));
auto runAsUser = wil::impersonate_token(m_userToken.get());
wsl::windows::common::filesystem::EnsureDirectory(m_crashDumpFolder.c_str());
// Only delete files that:
// - have the temporary flag set
// - start with 'wsl-crash'
// - end in .dmp
//
// This logic is here to prevent accidental user file deletion
auto pred = [&dumpExtension, &dumpPrefix](const auto& e) {
return WI_IsFlagSet(GetFileAttributes(e.path().c_str()), FILE_ATTRIBUTE_TEMPORARY) && e.path().has_extension() &&
e.path().extension() == dumpExtension && e.path().has_filename() &&
e.path().filename().string().find(dumpPrefix) == 0;
};
wsl::windows::common::wslutil::EnforceFileLimit(m_crashDumpFolder.c_str(), MAX_CRASH_DUMPS, pred);
wil::unique_hfile file{CreateFileW(fullPath.c_str(), GENERIC_WRITE, 0, nullptr, CREATE_NEW, FILE_ATTRIBUTE_TEMPORARY, nullptr)};
THROW_LAST_ERROR_IF(!file);
channel.SendResultMessage<std::int32_t>(0);
wsl::windows::common::relay::InterruptableRelay(reinterpret_cast<HANDLE>(channel.Socket()), file.get(), nullptr);
}
CATCH_LOG();
}
}

View File

@ -93,6 +93,7 @@ private:
void CreateVmSavedStateFile();
void EnforceVmSavedStateFileLimit();
void WriteCrashLog(const std::wstring& crashLog);
void CollectCrashDumps(wil::unique_socket&& listenSocket) const;
Microsoft::WRL::ComPtr<WSLAProcess> CreateLinuxProcessImpl(
_In_ const WSLA_PROCESS_OPTIONS& Options, int* Errno = nullptr, const TPrepareCommandLine& PrepareCommandLine = [](const auto&) {});
@ -110,6 +111,7 @@ private:
VIRTUAL_MACHINE_SETTINGS m_settings;
std::thread m_processExitThread;
std::thread m_crashDumpCollectionThread;
GUID m_vmId{};
std::wstring m_vmIdString;

View File

@ -969,4 +969,81 @@ class WSLATests
VERIFY_ARE_EQUAL(error, -1);
}
}
TEST_METHOD(CrashDumpCollection)
{
WSL2_TEST_ONLY();
VIRTUAL_MACHINE_SETTINGS settings{};
settings.CpuCount = 4;
settings.DisplayName = L"WSLA";
settings.MemoryMb = 2048;
settings.BootTimeoutMs = 30 * 1000;
settings.RootVhd = testVhd.c_str();
auto session = CreateSession(settings);
int processId = 0;
// Cache the existing crash dumps so we can check that a new one is created.
auto crashDumpsDir = std::filesystem::temp_directory_path() / "wsla-crashes";
std::set<std::filesystem::path> existingDumps;
if (std::filesystem::exists(crashDumpsDir))
{
existingDumps = {std::filesystem::directory_iterator(crashDumpsDir), std::filesystem::directory_iterator{}};
}
// Create a stuck process and crash it.
{
WSLAProcessLauncher launcher("/bin/cat", {"/bin/cat"}, {}, ProcessFlags::Stdin | ProcessFlags::Stdout | ProcessFlags::Stderr);
auto process = launcher.Launch(*session);
// Get the process id. This is need to identify the crash dump file.
VERIFY_SUCCEEDED(process.Get().GetPid(&processId));
// Send SIGSEV(11) to crash the process.
VERIFY_SUCCEEDED(process.Get().Signal(11));
auto result = process.WaitAndCaptureOutput();
VERIFY_ARE_EQUAL(result.Code, 11);
VERIFY_ARE_EQUAL(result.Signalled, true);
VERIFY_ARE_EQUAL(result.Output[1], "");
VERIFY_ARE_EQUAL(result.Output[2], "");
VERIFY_ARE_EQUAL(process.Get().Signal(9), HRESULT_FROM_WIN32(ERROR_INVALID_STATE));
}
// Dumps files are named with the format: wsl-crash-<sessionId>-<pid>-<processname>-<code>.dmp
// Check if a new file was added in crashDumpsDir matching the pattern and not in existingDumps.
std::string expectedPattern = std::format("wsl-crash-*-{}-_usr_bin_cat-11.dmp", processId);
auto dumpFile = wsl::shared::retry::RetryWithTimeout<std::filesystem::path>(
[crashDumpsDir, expectedPattern, existingDumps]() {
for (const auto& entry : std::filesystem::directory_iterator(crashDumpsDir))
{
const auto& filePath = entry.path();
if (existingDumps.find(filePath) == existingDumps.end() &&
PathMatchSpecA(filePath.filename().string().c_str(), expectedPattern.c_str()))
{
return filePath;
}
}
throw wil::ResultException(HRESULT_FROM_WIN32(ERROR_NOT_FOUND));
},
std::chrono::milliseconds{100},
std::chrono::seconds{10});
// Ensure that the dump file is cleaned up after test completion.
auto cleanup = wil::scope_exit([&] {
if (std::filesystem::exists(dumpFile))
{
std::filesystem::remove(dumpFile);
}
});
VERIFY_IS_TRUE(std::filesystem::exists(dumpFile));
VERIFY_IS_TRUE(std::filesystem::file_size(dumpFile) > 0);
}
};