mirror of
https://github.com/microsoft/WSL.git
synced 2025-12-11 04:35:57 -06:00
Add crash dump collection (#13741)
This commit is contained in:
parent
f0d257f760
commit
911a3aa758
@ -16,7 +16,9 @@ Abstract:
|
||||
#include "SocketChannel.h"
|
||||
#include "message.h"
|
||||
#include "localhost.h"
|
||||
#include "common.h"
|
||||
#include <utmp.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/syscall.h>
|
||||
@ -52,6 +54,8 @@ int Chroot(const char* Target);
|
||||
|
||||
extern int g_LogFd;
|
||||
|
||||
extern void WSLAEnableCrashDumpCollection();
|
||||
|
||||
struct WSLAState
|
||||
{
|
||||
std::optional<std::filesystem::path> ModulesMountPoint;
|
||||
@ -59,6 +63,19 @@ struct WSLAState
|
||||
|
||||
static WSLAState g_state;
|
||||
|
||||
void WSLAEnableCrashDumpCollection()
|
||||
{
|
||||
if (symlink("/wsl-init", "/" LX_INIT_WSL_CAPTURE_CRASH) < 0)
|
||||
{
|
||||
LOG_ERROR("symlink({}, {}) failed {}", "/wsl-init", "/" LX_INIT_WSL_CAPTURE_CRASH, errno);
|
||||
return;
|
||||
}
|
||||
|
||||
// If the first character is a pipe, then the kernel will interpret this path as a command.
|
||||
constexpr auto core_pattern = "|/" LX_INIT_WSL_CAPTURE_CRASH " %t %E %p %s";
|
||||
WriteToFile("/proc/sys/kernel/core_pattern", core_pattern);
|
||||
}
|
||||
|
||||
void HandleMessageImpl(wsl::shared::SocketChannel& Channel, const WSLA_GET_DISK& Message, const gsl::span<gsl::byte>& Buffer)
|
||||
{
|
||||
wsl::shared::MessageWriter<WSLA_GET_DISK_RESULT> writer;
|
||||
@ -496,6 +513,9 @@ void HandleMessageImpl(wsl::shared::SocketChannel& Channel, const WSLA_MOUNT& Me
|
||||
if (WI_IsFlagSet(Message.Flags, WSLA_MOUNT::Chroot))
|
||||
{
|
||||
THROW_LAST_ERROR_IF(Chroot(target) < 0);
|
||||
|
||||
// Reconfigure crash dump collection after chroot so symlink & core_pattern resolve correctly.
|
||||
WSLAEnableCrashDumpCollection();
|
||||
}
|
||||
|
||||
response.Result = 0;
|
||||
@ -796,6 +816,9 @@ int WSLAEntryPoint(int Argc, char* Argv[])
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Enable crash dump collection.
|
||||
WSLAEnableCrashDumpCollection();
|
||||
|
||||
//
|
||||
// Open kmesg for logging and ensure that the file descriptor is not set to one of the standard file descriptors.
|
||||
//
|
||||
|
||||
@ -28,8 +28,10 @@ using wsl::windows::service::wsla::WSLAProcess;
|
||||
using wsl::windows::service::wsla::WSLAVirtualMachine;
|
||||
|
||||
constexpr auto MAX_VM_CRASH_FILES = 3;
|
||||
constexpr auto MAX_CRASH_DUMPS = 10;
|
||||
constexpr auto SAVED_STATE_FILE_EXTENSION = L".vmrs";
|
||||
constexpr auto SAVED_STATE_FILE_PREFIX = L"saved-state-";
|
||||
constexpr auto RECEIVE_TIMEOUT = 30 * 1000;
|
||||
|
||||
WSLAVirtualMachine::WSLAVirtualMachine(const VIRTUAL_MACHINE_SETTINGS& Settings, PSID UserSid, WSLAUserSessionImpl* Session) :
|
||||
m_settings(Settings), m_userSid(UserSid), m_userSession(Session)
|
||||
@ -130,6 +132,11 @@ WSLAVirtualMachine::~WSLAVirtualMachine()
|
||||
m_processExitThread.join();
|
||||
}
|
||||
|
||||
if (m_crashDumpCollectionThread.joinable())
|
||||
{
|
||||
m_crashDumpCollectionThread.join();
|
||||
}
|
||||
|
||||
// Clear the state of all remaining processes now that the VM has exited.
|
||||
// The WSLAProcess object reference will be released when the last COM reference is closed.
|
||||
for (auto& e : m_trackedProcesses)
|
||||
@ -313,6 +320,13 @@ void WSLAVirtualMachine::Start()
|
||||
|
||||
wsl::windows::common::hcs::StartComputeSystem(m_computeSystem.get(), json.c_str());
|
||||
|
||||
// Create a socket listening for crash dumps.
|
||||
auto crashDumpSocket = wsl::windows::common::hvsocket::Listen(runtimeId, LX_INIT_UTILITY_VM_CRASH_DUMP_PORT);
|
||||
THROW_LAST_ERROR_IF(!crashDumpSocket);
|
||||
|
||||
m_crashDumpCollectionThread =
|
||||
std::thread{[this, socket = std::move(crashDumpSocket)]() mutable { CollectCrashDumps(std::move(socket)); }};
|
||||
|
||||
// Create a socket listening for connections from mini_init.
|
||||
auto listenSocket = wsl::windows::common::hvsocket::Listen(runtimeId, LX_INIT_UTILITY_VM_INIT_PORT);
|
||||
auto socket = wsl::windows::common::hvsocket::Accept(listenSocket.get(), m_settings.BootTimeoutMs, m_vmTerminatingEvent.get());
|
||||
@ -1317,4 +1331,65 @@ void WSLAVirtualMachine::OnProcessReleased(int Pid)
|
||||
std::lock_guard lock{m_lock};
|
||||
|
||||
auto erased = std::erase_if(m_trackedProcesses, [Pid](const auto* e) { return e->GetPid() == Pid; });
|
||||
}
|
||||
|
||||
void WSLAVirtualMachine::CollectCrashDumps(wil::unique_socket&& listenSocket) const
|
||||
{
|
||||
wsl::windows::common::wslutil::SetThreadDescription(L"CrashDumpCollection");
|
||||
|
||||
while (!m_vmExitEvent.is_signaled())
|
||||
{
|
||||
try
|
||||
{
|
||||
auto socket = wsl::windows::common::hvsocket::Accept(listenSocket.get(), INFINITE, m_vmExitEvent.get());
|
||||
|
||||
THROW_LAST_ERROR_IF(
|
||||
setsockopt(listenSocket.get(), SOL_SOCKET, SO_RCVTIMEO, (const char*)&RECEIVE_TIMEOUT, sizeof(RECEIVE_TIMEOUT)) == SOCKET_ERROR);
|
||||
|
||||
auto channel = wsl::shared::SocketChannel{std::move(socket), "crash_dump", m_vmExitEvent.get()};
|
||||
|
||||
const auto& message = channel.ReceiveMessage<LX_PROCESS_CRASH>();
|
||||
const char* process = reinterpret_cast<const char*>(&message.Buffer);
|
||||
|
||||
constexpr auto dumpExtension = ".dmp";
|
||||
constexpr auto dumpPrefix = "wsl-crash";
|
||||
|
||||
auto filename = std::format("{}-{}-{}-{}-{}{}", dumpPrefix, message.Timestamp, message.Pid, process, message.Signal, dumpExtension);
|
||||
|
||||
std::replace_if(filename.begin(), filename.end(), [](auto e) { return !std::isalnum(e) && e != '.' && e != '-'; }, '_');
|
||||
|
||||
auto fullPath = m_crashDumpFolder / filename;
|
||||
|
||||
WSL_LOG(
|
||||
"WSLALinuxCrash",
|
||||
TraceLoggingValue(fullPath.c_str(), "FullPath"),
|
||||
TraceLoggingValue(message.Pid, "Pid"),
|
||||
TraceLoggingValue(message.Signal, "Signal"),
|
||||
TraceLoggingValue(process, "process"));
|
||||
|
||||
auto runAsUser = wil::impersonate_token(m_userToken.get());
|
||||
wsl::windows::common::filesystem::EnsureDirectory(m_crashDumpFolder.c_str());
|
||||
|
||||
// Only delete files that:
|
||||
// - have the temporary flag set
|
||||
// - start with 'wsl-crash'
|
||||
// - end in .dmp
|
||||
//
|
||||
// This logic is here to prevent accidental user file deletion
|
||||
auto pred = [&dumpExtension, &dumpPrefix](const auto& e) {
|
||||
return WI_IsFlagSet(GetFileAttributes(e.path().c_str()), FILE_ATTRIBUTE_TEMPORARY) && e.path().has_extension() &&
|
||||
e.path().extension() == dumpExtension && e.path().has_filename() &&
|
||||
e.path().filename().string().find(dumpPrefix) == 0;
|
||||
};
|
||||
|
||||
wsl::windows::common::wslutil::EnforceFileLimit(m_crashDumpFolder.c_str(), MAX_CRASH_DUMPS, pred);
|
||||
|
||||
wil::unique_hfile file{CreateFileW(fullPath.c_str(), GENERIC_WRITE, 0, nullptr, CREATE_NEW, FILE_ATTRIBUTE_TEMPORARY, nullptr)};
|
||||
THROW_LAST_ERROR_IF(!file);
|
||||
|
||||
channel.SendResultMessage<std::int32_t>(0);
|
||||
wsl::windows::common::relay::InterruptableRelay(reinterpret_cast<HANDLE>(channel.Socket()), file.get(), nullptr);
|
||||
}
|
||||
CATCH_LOG();
|
||||
}
|
||||
}
|
||||
@ -93,6 +93,7 @@ private:
|
||||
void CreateVmSavedStateFile();
|
||||
void EnforceVmSavedStateFileLimit();
|
||||
void WriteCrashLog(const std::wstring& crashLog);
|
||||
void CollectCrashDumps(wil::unique_socket&& listenSocket) const;
|
||||
|
||||
Microsoft::WRL::ComPtr<WSLAProcess> CreateLinuxProcessImpl(
|
||||
_In_ const WSLA_PROCESS_OPTIONS& Options, int* Errno = nullptr, const TPrepareCommandLine& PrepareCommandLine = [](const auto&) {});
|
||||
@ -110,6 +111,7 @@ private:
|
||||
|
||||
VIRTUAL_MACHINE_SETTINGS m_settings;
|
||||
std::thread m_processExitThread;
|
||||
std::thread m_crashDumpCollectionThread;
|
||||
|
||||
GUID m_vmId{};
|
||||
std::wstring m_vmIdString;
|
||||
|
||||
@ -969,4 +969,81 @@ class WSLATests
|
||||
VERIFY_ARE_EQUAL(error, -1);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_METHOD(CrashDumpCollection)
|
||||
{
|
||||
WSL2_TEST_ONLY();
|
||||
|
||||
VIRTUAL_MACHINE_SETTINGS settings{};
|
||||
settings.CpuCount = 4;
|
||||
settings.DisplayName = L"WSLA";
|
||||
settings.MemoryMb = 2048;
|
||||
settings.BootTimeoutMs = 30 * 1000;
|
||||
settings.RootVhd = testVhd.c_str();
|
||||
|
||||
auto session = CreateSession(settings);
|
||||
int processId = 0;
|
||||
|
||||
// Cache the existing crash dumps so we can check that a new one is created.
|
||||
auto crashDumpsDir = std::filesystem::temp_directory_path() / "wsla-crashes";
|
||||
std::set<std::filesystem::path> existingDumps;
|
||||
|
||||
if (std::filesystem::exists(crashDumpsDir))
|
||||
{
|
||||
existingDumps = {std::filesystem::directory_iterator(crashDumpsDir), std::filesystem::directory_iterator{}};
|
||||
}
|
||||
|
||||
// Create a stuck process and crash it.
|
||||
{
|
||||
WSLAProcessLauncher launcher("/bin/cat", {"/bin/cat"}, {}, ProcessFlags::Stdin | ProcessFlags::Stdout | ProcessFlags::Stderr);
|
||||
|
||||
auto process = launcher.Launch(*session);
|
||||
|
||||
// Get the process id. This is need to identify the crash dump file.
|
||||
VERIFY_SUCCEEDED(process.Get().GetPid(&processId));
|
||||
|
||||
// Send SIGSEV(11) to crash the process.
|
||||
VERIFY_SUCCEEDED(process.Get().Signal(11));
|
||||
|
||||
auto result = process.WaitAndCaptureOutput();
|
||||
VERIFY_ARE_EQUAL(result.Code, 11);
|
||||
VERIFY_ARE_EQUAL(result.Signalled, true);
|
||||
VERIFY_ARE_EQUAL(result.Output[1], "");
|
||||
VERIFY_ARE_EQUAL(result.Output[2], "");
|
||||
|
||||
VERIFY_ARE_EQUAL(process.Get().Signal(9), HRESULT_FROM_WIN32(ERROR_INVALID_STATE));
|
||||
}
|
||||
|
||||
// Dumps files are named with the format: wsl-crash-<sessionId>-<pid>-<processname>-<code>.dmp
|
||||
// Check if a new file was added in crashDumpsDir matching the pattern and not in existingDumps.
|
||||
std::string expectedPattern = std::format("wsl-crash-*-{}-_usr_bin_cat-11.dmp", processId);
|
||||
|
||||
auto dumpFile = wsl::shared::retry::RetryWithTimeout<std::filesystem::path>(
|
||||
[crashDumpsDir, expectedPattern, existingDumps]() {
|
||||
for (const auto& entry : std::filesystem::directory_iterator(crashDumpsDir))
|
||||
{
|
||||
const auto& filePath = entry.path();
|
||||
if (existingDumps.find(filePath) == existingDumps.end() &&
|
||||
PathMatchSpecA(filePath.filename().string().c_str(), expectedPattern.c_str()))
|
||||
{
|
||||
return filePath;
|
||||
}
|
||||
}
|
||||
|
||||
throw wil::ResultException(HRESULT_FROM_WIN32(ERROR_NOT_FOUND));
|
||||
},
|
||||
std::chrono::milliseconds{100},
|
||||
std::chrono::seconds{10});
|
||||
|
||||
// Ensure that the dump file is cleaned up after test completion.
|
||||
auto cleanup = wil::scope_exit([&] {
|
||||
if (std::filesystem::exists(dumpFile))
|
||||
{
|
||||
std::filesystem::remove(dumpFile);
|
||||
}
|
||||
});
|
||||
|
||||
VERIFY_IS_TRUE(std::filesystem::exists(dumpFile));
|
||||
VERIFY_IS_TRUE(std::filesystem::file_size(dumpFile) > 0);
|
||||
}
|
||||
};
|
||||
Loading…
x
Reference in New Issue
Block a user