From 911a3aa758f3283ab4b8a720a9f09dbae36daeb0 Mon Sep 17 00:00:00 2001 From: Kevin Vega <40717198+kvega005@users.noreply.github.com> Date: Thu, 20 Nov 2025 09:21:20 -0800 Subject: [PATCH] Add crash dump collection (#13741) --- src/linux/init/WSLAInit.cpp | 23 ++++++ .../wslaservice/exe/WSLAVirtualMachine.cpp | 75 ++++++++++++++++++ .../wslaservice/exe/WSLAVirtualMachine.h | 2 + test/windows/WSLATests.cpp | 77 +++++++++++++++++++ 4 files changed, 177 insertions(+) diff --git a/src/linux/init/WSLAInit.cpp b/src/linux/init/WSLAInit.cpp index c59d45b..56e0558 100644 --- a/src/linux/init/WSLAInit.cpp +++ b/src/linux/init/WSLAInit.cpp @@ -16,7 +16,9 @@ Abstract: #include "SocketChannel.h" #include "message.h" #include "localhost.h" +#include "common.h" #include +#include #include #include #include @@ -52,6 +54,8 @@ int Chroot(const char* Target); extern int g_LogFd; +extern void WSLAEnableCrashDumpCollection(); + struct WSLAState { std::optional ModulesMountPoint; @@ -59,6 +63,19 @@ struct WSLAState static WSLAState g_state; +void WSLAEnableCrashDumpCollection() +{ + if (symlink("/wsl-init", "/" LX_INIT_WSL_CAPTURE_CRASH) < 0) + { + LOG_ERROR("symlink({}, {}) failed {}", "/wsl-init", "/" LX_INIT_WSL_CAPTURE_CRASH, errno); + return; + } + + // If the first character is a pipe, then the kernel will interpret this path as a command. + constexpr auto core_pattern = "|/" LX_INIT_WSL_CAPTURE_CRASH " %t %E %p %s"; + WriteToFile("/proc/sys/kernel/core_pattern", core_pattern); +} + void HandleMessageImpl(wsl::shared::SocketChannel& Channel, const WSLA_GET_DISK& Message, const gsl::span& Buffer) { wsl::shared::MessageWriter writer; @@ -496,6 +513,9 @@ void HandleMessageImpl(wsl::shared::SocketChannel& Channel, const WSLA_MOUNT& Me if (WI_IsFlagSet(Message.Flags, WSLA_MOUNT::Chroot)) { THROW_LAST_ERROR_IF(Chroot(target) < 0); + + // Reconfigure crash dump collection after chroot so symlink & core_pattern resolve correctly. + WSLAEnableCrashDumpCollection(); } response.Result = 0; @@ -796,6 +816,9 @@ int WSLAEntryPoint(int Argc, char* Argv[]) return -1; } + // Enable crash dump collection. + WSLAEnableCrashDumpCollection(); + // // Open kmesg for logging and ensure that the file descriptor is not set to one of the standard file descriptors. // diff --git a/src/windows/wslaservice/exe/WSLAVirtualMachine.cpp b/src/windows/wslaservice/exe/WSLAVirtualMachine.cpp index 9bf2451..f5598d2 100644 --- a/src/windows/wslaservice/exe/WSLAVirtualMachine.cpp +++ b/src/windows/wslaservice/exe/WSLAVirtualMachine.cpp @@ -28,8 +28,10 @@ using wsl::windows::service::wsla::WSLAProcess; using wsl::windows::service::wsla::WSLAVirtualMachine; constexpr auto MAX_VM_CRASH_FILES = 3; +constexpr auto MAX_CRASH_DUMPS = 10; constexpr auto SAVED_STATE_FILE_EXTENSION = L".vmrs"; constexpr auto SAVED_STATE_FILE_PREFIX = L"saved-state-"; +constexpr auto RECEIVE_TIMEOUT = 30 * 1000; WSLAVirtualMachine::WSLAVirtualMachine(const VIRTUAL_MACHINE_SETTINGS& Settings, PSID UserSid, WSLAUserSessionImpl* Session) : m_settings(Settings), m_userSid(UserSid), m_userSession(Session) @@ -130,6 +132,11 @@ WSLAVirtualMachine::~WSLAVirtualMachine() m_processExitThread.join(); } + if (m_crashDumpCollectionThread.joinable()) + { + m_crashDumpCollectionThread.join(); + } + // Clear the state of all remaining processes now that the VM has exited. // The WSLAProcess object reference will be released when the last COM reference is closed. for (auto& e : m_trackedProcesses) @@ -313,6 +320,13 @@ void WSLAVirtualMachine::Start() wsl::windows::common::hcs::StartComputeSystem(m_computeSystem.get(), json.c_str()); + // Create a socket listening for crash dumps. + auto crashDumpSocket = wsl::windows::common::hvsocket::Listen(runtimeId, LX_INIT_UTILITY_VM_CRASH_DUMP_PORT); + THROW_LAST_ERROR_IF(!crashDumpSocket); + + m_crashDumpCollectionThread = + std::thread{[this, socket = std::move(crashDumpSocket)]() mutable { CollectCrashDumps(std::move(socket)); }}; + // Create a socket listening for connections from mini_init. auto listenSocket = wsl::windows::common::hvsocket::Listen(runtimeId, LX_INIT_UTILITY_VM_INIT_PORT); auto socket = wsl::windows::common::hvsocket::Accept(listenSocket.get(), m_settings.BootTimeoutMs, m_vmTerminatingEvent.get()); @@ -1317,4 +1331,65 @@ void WSLAVirtualMachine::OnProcessReleased(int Pid) std::lock_guard lock{m_lock}; auto erased = std::erase_if(m_trackedProcesses, [Pid](const auto* e) { return e->GetPid() == Pid; }); +} + +void WSLAVirtualMachine::CollectCrashDumps(wil::unique_socket&& listenSocket) const +{ + wsl::windows::common::wslutil::SetThreadDescription(L"CrashDumpCollection"); + + while (!m_vmExitEvent.is_signaled()) + { + try + { + auto socket = wsl::windows::common::hvsocket::Accept(listenSocket.get(), INFINITE, m_vmExitEvent.get()); + + THROW_LAST_ERROR_IF( + setsockopt(listenSocket.get(), SOL_SOCKET, SO_RCVTIMEO, (const char*)&RECEIVE_TIMEOUT, sizeof(RECEIVE_TIMEOUT)) == SOCKET_ERROR); + + auto channel = wsl::shared::SocketChannel{std::move(socket), "crash_dump", m_vmExitEvent.get()}; + + const auto& message = channel.ReceiveMessage(); + const char* process = reinterpret_cast(&message.Buffer); + + constexpr auto dumpExtension = ".dmp"; + constexpr auto dumpPrefix = "wsl-crash"; + + auto filename = std::format("{}-{}-{}-{}-{}{}", dumpPrefix, message.Timestamp, message.Pid, process, message.Signal, dumpExtension); + + std::replace_if(filename.begin(), filename.end(), [](auto e) { return !std::isalnum(e) && e != '.' && e != '-'; }, '_'); + + auto fullPath = m_crashDumpFolder / filename; + + WSL_LOG( + "WSLALinuxCrash", + TraceLoggingValue(fullPath.c_str(), "FullPath"), + TraceLoggingValue(message.Pid, "Pid"), + TraceLoggingValue(message.Signal, "Signal"), + TraceLoggingValue(process, "process")); + + auto runAsUser = wil::impersonate_token(m_userToken.get()); + wsl::windows::common::filesystem::EnsureDirectory(m_crashDumpFolder.c_str()); + + // Only delete files that: + // - have the temporary flag set + // - start with 'wsl-crash' + // - end in .dmp + // + // This logic is here to prevent accidental user file deletion + auto pred = [&dumpExtension, &dumpPrefix](const auto& e) { + return WI_IsFlagSet(GetFileAttributes(e.path().c_str()), FILE_ATTRIBUTE_TEMPORARY) && e.path().has_extension() && + e.path().extension() == dumpExtension && e.path().has_filename() && + e.path().filename().string().find(dumpPrefix) == 0; + }; + + wsl::windows::common::wslutil::EnforceFileLimit(m_crashDumpFolder.c_str(), MAX_CRASH_DUMPS, pred); + + wil::unique_hfile file{CreateFileW(fullPath.c_str(), GENERIC_WRITE, 0, nullptr, CREATE_NEW, FILE_ATTRIBUTE_TEMPORARY, nullptr)}; + THROW_LAST_ERROR_IF(!file); + + channel.SendResultMessage(0); + wsl::windows::common::relay::InterruptableRelay(reinterpret_cast(channel.Socket()), file.get(), nullptr); + } + CATCH_LOG(); + } } \ No newline at end of file diff --git a/src/windows/wslaservice/exe/WSLAVirtualMachine.h b/src/windows/wslaservice/exe/WSLAVirtualMachine.h index 7fe65e1..b0f01a9 100644 --- a/src/windows/wslaservice/exe/WSLAVirtualMachine.h +++ b/src/windows/wslaservice/exe/WSLAVirtualMachine.h @@ -93,6 +93,7 @@ private: void CreateVmSavedStateFile(); void EnforceVmSavedStateFileLimit(); void WriteCrashLog(const std::wstring& crashLog); + void CollectCrashDumps(wil::unique_socket&& listenSocket) const; Microsoft::WRL::ComPtr CreateLinuxProcessImpl( _In_ const WSLA_PROCESS_OPTIONS& Options, int* Errno = nullptr, const TPrepareCommandLine& PrepareCommandLine = [](const auto&) {}); @@ -110,6 +111,7 @@ private: VIRTUAL_MACHINE_SETTINGS m_settings; std::thread m_processExitThread; + std::thread m_crashDumpCollectionThread; GUID m_vmId{}; std::wstring m_vmIdString; diff --git a/test/windows/WSLATests.cpp b/test/windows/WSLATests.cpp index 3687b70..65c657c 100644 --- a/test/windows/WSLATests.cpp +++ b/test/windows/WSLATests.cpp @@ -969,4 +969,81 @@ class WSLATests VERIFY_ARE_EQUAL(error, -1); } } + + TEST_METHOD(CrashDumpCollection) + { + WSL2_TEST_ONLY(); + + VIRTUAL_MACHINE_SETTINGS settings{}; + settings.CpuCount = 4; + settings.DisplayName = L"WSLA"; + settings.MemoryMb = 2048; + settings.BootTimeoutMs = 30 * 1000; + settings.RootVhd = testVhd.c_str(); + + auto session = CreateSession(settings); + int processId = 0; + + // Cache the existing crash dumps so we can check that a new one is created. + auto crashDumpsDir = std::filesystem::temp_directory_path() / "wsla-crashes"; + std::set existingDumps; + + if (std::filesystem::exists(crashDumpsDir)) + { + existingDumps = {std::filesystem::directory_iterator(crashDumpsDir), std::filesystem::directory_iterator{}}; + } + + // Create a stuck process and crash it. + { + WSLAProcessLauncher launcher("/bin/cat", {"/bin/cat"}, {}, ProcessFlags::Stdin | ProcessFlags::Stdout | ProcessFlags::Stderr); + + auto process = launcher.Launch(*session); + + // Get the process id. This is need to identify the crash dump file. + VERIFY_SUCCEEDED(process.Get().GetPid(&processId)); + + // Send SIGSEV(11) to crash the process. + VERIFY_SUCCEEDED(process.Get().Signal(11)); + + auto result = process.WaitAndCaptureOutput(); + VERIFY_ARE_EQUAL(result.Code, 11); + VERIFY_ARE_EQUAL(result.Signalled, true); + VERIFY_ARE_EQUAL(result.Output[1], ""); + VERIFY_ARE_EQUAL(result.Output[2], ""); + + VERIFY_ARE_EQUAL(process.Get().Signal(9), HRESULT_FROM_WIN32(ERROR_INVALID_STATE)); + } + + // Dumps files are named with the format: wsl-crash----.dmp + // Check if a new file was added in crashDumpsDir matching the pattern and not in existingDumps. + std::string expectedPattern = std::format("wsl-crash-*-{}-_usr_bin_cat-11.dmp", processId); + + auto dumpFile = wsl::shared::retry::RetryWithTimeout( + [crashDumpsDir, expectedPattern, existingDumps]() { + for (const auto& entry : std::filesystem::directory_iterator(crashDumpsDir)) + { + const auto& filePath = entry.path(); + if (existingDumps.find(filePath) == existingDumps.end() && + PathMatchSpecA(filePath.filename().string().c_str(), expectedPattern.c_str())) + { + return filePath; + } + } + + throw wil::ResultException(HRESULT_FROM_WIN32(ERROR_NOT_FOUND)); + }, + std::chrono::milliseconds{100}, + std::chrono::seconds{10}); + + // Ensure that the dump file is cleaned up after test completion. + auto cleanup = wil::scope_exit([&] { + if (std::filesystem::exists(dumpFile)) + { + std::filesystem::remove(dumpFile); + } + }); + + VERIFY_IS_TRUE(std::filesystem::exists(dumpFile)); + VERIFY_IS_TRUE(std::filesystem::file_size(dumpFile) > 0); + } }; \ No newline at end of file