Add more ConsoleBench tests (#17441)

This now covers all major Console APIs.
In the future we could add tests that cover VT sequences as well.
This commit is contained in:
Leonard Hecker 2024-06-22 13:30:18 +02:00 committed by GitHub
parent 8511f3d737
commit bb4981caae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 598 additions and 162 deletions

View File

@ -0,0 +1,26 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<assembly
xmlns="urn:schemas-microsoft-com:asm.v1"
xmlns:asm3="urn:schemas-microsoft-com:asm.v3"
xmlns:cv1="urn:schemas-microsoft-com:compatibility.v1"
xmlns:ws="http://schemas.microsoft.com/SMI/2005/WindowsSettings"
xmlns:ws2="http://schemas.microsoft.com/SMI/2016/WindowsSettings"
xmlns:ws3="http://schemas.microsoft.com/SMI/2019/WindowsSettings"
manifestVersion="1.0">
<asm3:application>
<windowsSettings>
<ws2:longPathAware>true</ws2:longPathAware>
<ws3:activeCodePage>UTF-8</ws3:activeCodePage>
</windowsSettings>
</asm3:application>
<cv1:compatibility>
<application>
<supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/>
</application>
</cv1:compatibility>
<dependency>
<dependentAssembly>
<assemblyIdentity type="win32" name="Microsoft.Windows.Common-Controls" version="6.0.0.0" processorArchitecture="*" publicKeyToken="6595b64144ccf1df" language="*" />
</dependentAssembly>
</dependency>
</assembly>

View File

@ -25,6 +25,9 @@
<ClInclude Include="pch.h" />
<ClInclude Include="utils.h" />
</ItemGroup>
<ItemGroup>
<Manifest Include="ConsoleBench.exe.manifest" />
</ItemGroup>
<ItemDefinitionGroup>
<ClCompile>
<ControlFlowGuard>false</ControlFlowGuard>

View File

@ -49,4 +49,9 @@
<Filter>Source Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Manifest Include="ConsoleBench.exe.manifest">
<Filter>Source Files</Filter>
</Manifest>
</ItemGroup>
</Project>

View File

@ -5,7 +5,7 @@ using namespace mem;
Arena::Arena(size_t bytes)
{
m_alloc = static_cast<uint8_t*>(THROW_IF_NULL_ALLOC(VirtualAlloc(nullptr, bytes, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE)));
m_alloc = static_cast<uint8_t*>(THROW_IF_NULL_ALLOC(VirtualAlloc(nullptr, bytes, MEM_RESERVE, PAGE_READWRITE)));
}
Arena::~Arena()
@ -41,8 +41,18 @@ void* Arena::_push_raw(size_t bytes, size_t alignment)
{
const auto mask = alignment - 1;
const auto pos = (m_pos + mask) & ~mask;
const auto pos_new = pos + bytes;
const auto ptr = m_alloc + pos;
m_pos = pos + bytes;
if (pos_new > m_commit)
{
// Commit in 1MB chunks and pre-commit 1MiB in advance.
const auto commit_new = (pos_new + 0x1FFFFF) & ~0xFFFFF;
THROW_IF_NULL_ALLOC(VirtualAlloc(m_alloc + m_commit, commit_new - m_commit, MEM_COMMIT, PAGE_READWRITE));
m_commit = commit_new;
}
m_pos = pos_new;
return ptr;
}
@ -76,8 +86,8 @@ ScopedArena::~ScopedArena()
static [[msvc::noinline]] std::array<Arena, 2> thread_arenas_init()
{
return {
Arena{ 64 * 1024 * 1024 },
Arena{ 64 * 1024 * 1024 },
Arena{ 1024 * 1024 * 1024 },
Arena{ 1024 * 1024 * 1024 },
};
}
@ -166,7 +176,9 @@ std::wstring_view mem::format(Arena& arena, const wchar_t* fmt, va_list args)
return {};
}
// Make space for a terminating \0 character.
len++;
const auto buffer = arena.push_uninitialized<wchar_t>(len);
len = _vsnwprintf(buffer, len, fmt, args);

View File

@ -60,6 +60,7 @@ namespace mem
void* _push_uninitialized(size_t bytes, size_t alignment = __STDCPP_DEFAULT_NEW_ALIGNMENT__);
uint8_t* m_alloc = nullptr;
size_t m_commit = 0;
size_t m_pos = 0;
};
@ -96,16 +97,32 @@ namespace mem
}
template<typename T>
std::basic_string_view<T> repeat_string(Arena& arena, std::basic_string_view<T> in, size_t count)
auto repeat(Arena& arena, const T& in, size_t count) -> decltype(auto)
{
const auto len = count * in.size();
const auto buf = arena.push_uninitialized<T>(len);
for (size_t i = 0; i < count; ++i)
if constexpr (is_std_view<T>::value)
{
mem::copy(buf + i * in.size(), in.data(), in.size());
}
const auto data = in.data();
const auto size = in.size();
const auto len = count * size;
const auto buf = arena.push_uninitialized<typename T::value_type>(len);
return { buf, len };
for (size_t i = 0; i < count; ++i)
{
mem::copy(buf + i * size, data, size);
}
return T{ buf, len };
}
else
{
const auto buf = arena.push_uninitialized<T>(count);
for (size_t i = 0; i < count; ++i)
{
memcpy(buf + i, &in, sizeof(T));
}
return std::span{ buf, count };
}
}
}

View File

@ -3,6 +3,7 @@
#include <conmsgl1.h>
#include <winternl.h>
#include <wil/win32_helpers.h>
#include "arena.h"
@ -46,12 +47,27 @@ static void conhostCopyToStringBuffer(USHORT& length, auto& buffer, const wchar_
ConhostHandle spawn_conhost(mem::Arena& arena, const wchar_t* path)
{
const auto pathLen = wcslen(path);
const auto isDLL = pathLen > 4 && wcscmp(&path[pathLen - 4], L".dll") == 0;
const auto scratch = mem::get_scratch_arena(arena);
const auto server = conhostCreateHandle(nullptr, L"\\Device\\ConDrv\\Server", true, false);
auto server = conhostCreateHandle(nullptr, L"\\Device\\ConDrv\\Server", true, false);
auto reference = conhostCreateHandle(server.get(), L"\\Reference", false, true);
{
const auto cmd = format(scratch.arena, LR"("%s" --server 0x%zx)", path, server.get());
const auto selfPath = scratch.arena.push_uninitialized<wchar_t>(64 * 1024);
GetModuleFileNameW(nullptr, selfPath, 64 * 1024);
std::wstring_view cmd;
if (isDLL)
{
cmd = format(scratch.arena, LR"("%s" host %zx "%s")", selfPath, server.get(), path);
}
else
{
cmd = format(scratch.arena, LR"("%s" --server 0x%zx)", path, server.get());
}
uint8_t attrListBuffer[64];
@ -154,6 +170,22 @@ ConhostHandle spawn_conhost(mem::Arena& arena, const wchar_t* path)
};
}
// A continuation of spawn_conhost().
void check_spawn_conhost_dll(int argc, const wchar_t* argv[])
{
if (argc == 4 && wcscmp(argv[1], L"host") == 0)
{
const auto serverHandle = reinterpret_cast<HANDLE>(wcstoull(argv[2], nullptr, 16));
const auto path = argv[3];
using Entrypoint = NTSTATUS(NTAPI*)(HANDLE);
const auto h = THROW_LAST_ERROR_IF_NULL(LoadLibraryExW(path, nullptr, 0));
const auto f = THROW_LAST_ERROR_IF_NULL(reinterpret_cast<Entrypoint>(GetProcAddress(h, "ConsoleCreateIoThread")));
THROW_IF_NTSTATUS_FAILED(f(serverHandle));
ExitThread(S_OK);
}
}
HANDLE get_active_connection()
{
// (Not actually) FUN FACT! The handles don't mean anything and the cake is a lie!

View File

@ -16,5 +16,6 @@ struct ConhostHandle
};
ConhostHandle spawn_conhost(mem::Arena& arena, const wchar_t* path);
void check_spawn_conhost_dll(int argc, const wchar_t* argv[]);
HANDLE get_active_connection();
void set_active_connection(HANDLE connection);

View File

@ -7,26 +7,47 @@
#include "conhost.h"
#include "utils.h"
#define ENABLE_TEST_OUTPUT_WRITE 1
#define ENABLE_TEST_OUTPUT_SCROLL 1
#define ENABLE_TEST_OUTPUT_FILL 1
#define ENABLE_TEST_OUTPUT_READ 1
#define ENABLE_TEST_INPUT 1
#define ENABLE_TEST_CLIPBOARD 1
using Measurements = std::span<int32_t>;
using MeasurementsPerBenchmark = std::span<Measurements>;
struct BenchmarkContext
{
HWND hwnd;
HANDLE input;
HANDLE output;
int64_t time_limit;
bool wants_more() const;
void mark_beg();
void mark_end();
size_t rand();
HWND hwnd = nullptr;
HANDLE input = nullptr;
HANDLE output = nullptr;
mem::Arena& arena;
std::string_view utf8_4Ki;
std::string_view utf8_128Ki;
std::wstring_view utf16_4Ki;
std::wstring_view utf16_128Ki;
std::span<WORD> attr_4Ki;
std::span<CHAR_INFO> char_4Ki;
std::span<INPUT_RECORD> input_4Ki;
Measurements m_measurements;
size_t m_measurements_off = 0;
int64_t m_time = 0;
int64_t m_time_limit = 0;
size_t m_rng_state = 0;
};
struct Benchmark
{
const char* title;
void (*exec)(const BenchmarkContext& ctx, Measurements measurements);
void (*exec)(BenchmarkContext& ctx);
};
struct AccumulatedResults
@ -37,158 +58,415 @@ struct AccumulatedResults
MeasurementsPerBenchmark* measurments;
};
constexpr int32_t perf_delta(int64_t beg, int64_t end)
{
return static_cast<int32_t>(end - beg);
}
static constexpr COORD s_buffer_size{ 120, 9001 };
static constexpr COORD s_viewport_size{ 120, 30 };
static constexpr Benchmark s_benchmarks[]{
static constexpr Benchmark s_benchmarks[] = {
#if ENABLE_TEST_OUTPUT_WRITE
Benchmark{
.title = "WriteConsoleA 4Ki",
.exec = [](const BenchmarkContext& ctx, Measurements measurements) {
for (auto& d : measurements)
.exec = [](BenchmarkContext& ctx) {
while (ctx.wants_more())
{
const auto beg = query_perf_counter();
WriteConsoleA(ctx.output, ctx.utf8_4Ki.data(), static_cast<DWORD>(ctx.utf8_4Ki.size()), nullptr, nullptr);
const auto end = query_perf_counter();
d = perf_delta(beg, end);
if (end >= ctx.time_limit)
{
break;
}
ctx.mark_beg();
const auto res = WriteConsoleA(ctx.output, ctx.utf8_4Ki.data(), static_cast<DWORD>(ctx.utf8_4Ki.size()), nullptr, nullptr);
ctx.mark_end();
debugAssert(res == TRUE);
}
},
},
Benchmark{
.title = "WriteConsoleW 4Ki",
.exec = [](const BenchmarkContext& ctx, Measurements measurements) {
for (auto& d : measurements)
.exec = [](BenchmarkContext& ctx) {
while (ctx.wants_more())
{
const auto beg = query_perf_counter();
WriteConsoleW(ctx.output, ctx.utf16_4Ki.data(), static_cast<DWORD>(ctx.utf16_4Ki.size()), nullptr, nullptr);
const auto end = query_perf_counter();
d = perf_delta(beg, end);
if (end >= ctx.time_limit)
{
break;
}
ctx.mark_beg();
const auto res = WriteConsoleW(ctx.output, ctx.utf16_4Ki.data(), static_cast<DWORD>(ctx.utf16_4Ki.size()), nullptr, nullptr);
ctx.mark_end();
debugAssert(res == TRUE);
}
},
},
Benchmark{
.title = "WriteConsoleA 128Ki",
.exec = [](const BenchmarkContext& ctx, Measurements measurements) {
for (auto& d : measurements)
.exec = [](BenchmarkContext& ctx) {
while (ctx.wants_more())
{
const auto beg = query_perf_counter();
WriteConsoleA(ctx.output, ctx.utf8_128Ki.data(), static_cast<DWORD>(ctx.utf8_128Ki.size()), nullptr, nullptr);
const auto end = query_perf_counter();
d = perf_delta(beg, end);
if (end >= ctx.time_limit)
{
break;
}
ctx.mark_beg();
const auto res = WriteConsoleA(ctx.output, ctx.utf8_128Ki.data(), static_cast<DWORD>(ctx.utf8_128Ki.size()), nullptr, nullptr);
ctx.mark_end();
debugAssert(res == TRUE);
}
},
},
Benchmark{
.title = "WriteConsoleW 128Ki",
.exec = [](const BenchmarkContext& ctx, Measurements measurements) {
for (auto& d : measurements)
.exec = [](BenchmarkContext& ctx) {
while (ctx.wants_more())
{
const auto beg = query_perf_counter();
WriteConsoleW(ctx.output, ctx.utf16_128Ki.data(), static_cast<DWORD>(ctx.utf16_128Ki.size()), nullptr, nullptr);
const auto end = query_perf_counter();
d = perf_delta(beg, end);
if (end >= ctx.time_limit)
{
break;
}
ctx.mark_beg();
const auto res = WriteConsoleW(ctx.output, ctx.utf16_128Ki.data(), static_cast<DWORD>(ctx.utf16_128Ki.size()), nullptr, nullptr);
ctx.mark_end();
debugAssert(res == TRUE);
}
},
},
Benchmark{
.title = "Copy to clipboard 4Ki",
.exec = [](const BenchmarkContext& ctx, Measurements measurements) {
.title = "WriteConsoleOutputAttribute 4Ki",
.exec = [](BenchmarkContext& ctx) {
static constexpr COORD pos{ 0, 0 };
DWORD written;
while (ctx.wants_more())
{
ctx.mark_beg();
const auto res = WriteConsoleOutputAttribute(ctx.output, ctx.attr_4Ki.data(), static_cast<DWORD>(ctx.attr_4Ki.size()), pos, &written);
ctx.mark_end();
debugAssert(res == TRUE);
}
},
},
Benchmark{
.title = "WriteConsoleOutputCharacterW 4Ki",
.exec = [](BenchmarkContext& ctx) {
static constexpr COORD pos{ 0, 0 };
DWORD written;
while (ctx.wants_more())
{
ctx.mark_beg();
const auto res = WriteConsoleOutputCharacterW(ctx.output, ctx.utf16_4Ki.data(), static_cast<DWORD>(ctx.utf16_4Ki.size()), pos, &written);
ctx.mark_end();
debugAssert(res == TRUE);
}
},
},
Benchmark{
.title = "WriteConsoleOutputW 4Ki",
.exec = [](BenchmarkContext& ctx) {
static constexpr COORD pos{ 0, 0 };
static constexpr COORD size{ 64, 64 };
static constexpr SMALL_RECT rect{ 0, 0, 63, 63 };
while (ctx.wants_more())
{
auto written = rect;
ctx.mark_beg();
const auto res = WriteConsoleOutputW(ctx.output, ctx.char_4Ki.data(), size, pos, &written);
ctx.mark_end();
debugAssert(res == TRUE);
}
},
},
#endif
#if ENABLE_TEST_OUTPUT_SCROLL
Benchmark{
.title = "ScrollConsoleScreenBufferW 4Ki",
.exec = [](BenchmarkContext& ctx) {
for (int i = 0; i < 10; i++)
{
WriteConsoleW(ctx.output, ctx.utf16_128Ki.data(), static_cast<DWORD>(ctx.utf16_128Ki.size()), nullptr, nullptr);
}
static constexpr CHAR_INFO fill{ L' ', FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED };
static constexpr size_t w = 64;
static constexpr size_t h = 64;
while (ctx.wants_more())
{
auto r = ctx.rand();
const auto srcLeft = (r >> 0) % (s_buffer_size.X - w);
const auto srcTop = (r >> 16) % (s_buffer_size.Y - h);
size_t dstLeft;
size_t dstTop;
do
{
r = ctx.rand();
dstLeft = (r >> 0) % (s_buffer_size.X - w);
dstTop = (r >> 16) % (s_buffer_size.Y - h);
} while (srcLeft == dstLeft && srcTop == dstTop);
const SMALL_RECT scrollRect{
.Left = static_cast<SHORT>(srcLeft),
.Top = static_cast<SHORT>(srcTop),
.Right = static_cast<SHORT>(srcLeft + w - 1),
.Bottom = static_cast<SHORT>(srcTop + h - 1),
};
const COORD destOrigin{
.X = static_cast<SHORT>(dstLeft),
.Y = static_cast<SHORT>(dstTop),
};
ctx.mark_beg();
const auto res = ScrollConsoleScreenBufferW(ctx.output, &scrollRect, nullptr, destOrigin, &fill);
ctx.mark_end();
debugAssert(res == TRUE);
}
},
},
Benchmark{
.title = "ScrollConsoleScreenBufferW vertical",
.exec = [](BenchmarkContext& ctx) {
for (int i = 0; i < 10; i++)
{
WriteConsoleW(ctx.output, ctx.utf16_128Ki.data(), static_cast<DWORD>(ctx.utf16_128Ki.size()), nullptr, nullptr);
}
static constexpr CHAR_INFO fill{ L' ', FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED };
static constexpr size_t h = (4096 + s_buffer_size.X / 2) / s_buffer_size.X;
while (ctx.wants_more())
{
auto r = ctx.rand();
const auto srcTop = r % (s_buffer_size.Y - h);
size_t dstTop;
do
{
r = ctx.rand();
dstTop = r % (s_buffer_size.Y - h);
} while (srcTop == dstTop);
const SMALL_RECT scrollRect{
.Left = 0,
.Top = static_cast<SHORT>(srcTop),
.Right = s_buffer_size.X - 1,
.Bottom = static_cast<SHORT>(srcTop + h - 1),
};
const COORD destOrigin{
.X = 0,
.Y = static_cast<SHORT>(dstTop),
};
ctx.mark_beg();
const auto res = ScrollConsoleScreenBufferW(ctx.output, &scrollRect, nullptr, destOrigin, &fill);
ctx.mark_end();
debugAssert(res == TRUE);
}
},
},
#endif
#if ENABLE_TEST_OUTPUT_FILL
Benchmark{
.title = "FillConsoleOutputAttribute 4Ki",
.exec = [](BenchmarkContext& ctx) {
static constexpr COORD pos{ 0, 0 };
DWORD written;
while (ctx.wants_more())
{
ctx.mark_beg();
FillConsoleOutputAttribute(ctx.output, FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED, 4096, pos, &written);
ctx.mark_end();
debugAssert(written == 4096);
}
},
},
Benchmark{
.title = "FillConsoleOutputCharacterW 4Ki",
.exec = [](BenchmarkContext& ctx) {
static constexpr COORD pos{ 0, 0 };
DWORD written;
while (ctx.wants_more())
{
ctx.mark_beg();
FillConsoleOutputCharacterW(ctx.output, L'A', 4096, pos, &written);
ctx.mark_end();
debugAssert(written == 4096);
}
},
},
#endif
#if ENABLE_TEST_OUTPUT_READ
Benchmark{
.title = "ReadConsoleOutputAttribute 4Ki",
.exec = [](BenchmarkContext& ctx) {
static constexpr COORD pos{ 0, 0 };
const auto scratch = mem::get_scratch_arena(ctx.arena);
const auto buf = scratch.arena.push_uninitialized<WORD>(4096);
DWORD read;
WriteConsoleW(ctx.output, ctx.utf16_128Ki.data(), static_cast<DWORD>(ctx.utf16_128Ki.size()), nullptr, nullptr);
while (ctx.wants_more())
{
ctx.mark_beg();
ReadConsoleOutputAttribute(ctx.output, buf, 4096, pos, &read);
ctx.mark_end();
debugAssert(read == 4096);
}
},
},
Benchmark{
.title = "ReadConsoleOutputCharacterW 4Ki",
.exec = [](BenchmarkContext& ctx) {
static constexpr COORD pos{ 0, 0 };
const auto scratch = mem::get_scratch_arena(ctx.arena);
const auto buf = scratch.arena.push_uninitialized<wchar_t>(4096);
DWORD read;
WriteConsoleW(ctx.output, ctx.utf16_128Ki.data(), static_cast<DWORD>(ctx.utf16_128Ki.size()), nullptr, nullptr);
while (ctx.wants_more())
{
ctx.mark_beg();
ReadConsoleOutputCharacterW(ctx.output, buf, 4096, pos, &read);
ctx.mark_end();
debugAssert(read == 4096);
}
},
},
Benchmark{
.title = "ReadConsoleOutputW 4Ki",
.exec = [](BenchmarkContext& ctx) {
static constexpr COORD pos{ 0, 0 };
static constexpr COORD size{ 64, 64 };
static constexpr SMALL_RECT rect{ 0, 0, 63, 63 };
const auto scratch = mem::get_scratch_arena(ctx.arena);
const auto buf = scratch.arena.push_uninitialized<CHAR_INFO>(size.X * size.Y);
WriteConsoleW(ctx.output, ctx.utf16_128Ki.data(), static_cast<DWORD>(ctx.utf16_128Ki.size()), nullptr, nullptr);
while (ctx.wants_more())
{
auto read = rect;
ctx.mark_beg();
ReadConsoleOutputW(ctx.output, buf, size, pos, &read);
ctx.mark_end();
debugAssert(read.Right == 63 && read.Bottom == 63);
}
},
},
#endif
#if ENABLE_TEST_INPUT
Benchmark{
.title = "WriteConsoleInputW 4Ki",
.exec = [](BenchmarkContext& ctx) {
DWORD written;
FlushConsoleInputBuffer(ctx.input);
while (ctx.wants_more())
{
ctx.mark_beg();
WriteConsoleInputW(ctx.input, ctx.input_4Ki.data(), static_cast<DWORD>(ctx.input_4Ki.size()), &written);
ctx.mark_end();
debugAssert(written == ctx.input_4Ki.size());
FlushConsoleInputBuffer(ctx.input);
}
},
},
Benchmark{
.title = "ReadConsoleInputW 4Ki",
.exec = [](BenchmarkContext& ctx) {
const auto scratch = mem::get_scratch_arena(ctx.arena);
const auto buf = scratch.arena.push_uninitialized<INPUT_RECORD>(ctx.input_4Ki.size());
DWORD written, read;
FlushConsoleInputBuffer(ctx.input);
while (ctx.wants_more())
{
WriteConsoleInputW(ctx.input, ctx.input_4Ki.data(), static_cast<DWORD>(ctx.input_4Ki.size()), &written);
debugAssert(written == ctx.input_4Ki.size());
ctx.mark_beg();
ReadConsoleInputW(ctx.input, buf, static_cast<DWORD>(ctx.input_4Ki.size()), &read);
ctx.mark_end();
debugAssert(read == ctx.input_4Ki.size());
}
},
},
Benchmark{
.title = "ReadConsoleW 4Ki",
.exec = [](BenchmarkContext& ctx) {
const auto scratch = mem::get_scratch_arena(ctx.arena);
const auto cap = static_cast<DWORD>(ctx.input_4Ki.size()) * 4;
const auto buf = scratch.arena.push_uninitialized<wchar_t>(cap);
DWORD written, read;
FlushConsoleInputBuffer(ctx.input);
while (ctx.wants_more())
{
WriteConsoleInputW(ctx.input, ctx.input_4Ki.data(), static_cast<DWORD>(ctx.input_4Ki.size()), &written);
debugAssert(written == ctx.input_4Ki.size());
ctx.mark_beg();
ReadConsoleW(ctx.input, buf, cap, &read, nullptr);
debugAssert(read == ctx.input_4Ki.size());
ctx.mark_end();
}
},
},
#endif
#if ENABLE_TEST_CLIPBOARD
Benchmark{
.title = "Clipboard copy 4Ki",
.exec = [](BenchmarkContext& ctx) {
WriteConsoleW(ctx.output, ctx.utf16_4Ki.data(), static_cast<DWORD>(ctx.utf8_4Ki.size()), nullptr, nullptr);
for (auto& d : measurements)
while (ctx.wants_more())
{
SendMessageW(ctx.hwnd, WM_SYSCOMMAND, 0xFFF5 /* ID_CONSOLE_SELECTALL */, 0);
const auto beg = query_perf_counter();
ctx.mark_beg();
SendMessageW(ctx.hwnd, WM_SYSCOMMAND, 0xFFF0 /* ID_CONSOLE_COPY */, 0);
const auto end = query_perf_counter();
d = perf_delta(beg, end);
if (end >= ctx.time_limit)
{
break;
}
ctx.mark_end();
}
},
},
Benchmark{
.title = "Paste from clipboard 4Ki",
.exec = [](const BenchmarkContext& ctx, Measurements measurements) {
.title = "Clipboard paste 4Ki",
.exec = [](BenchmarkContext& ctx) {
set_clipboard(ctx.hwnd, ctx.utf16_4Ki);
FlushConsoleInputBuffer(ctx.input);
for (auto& d : measurements)
while (ctx.wants_more())
{
const auto beg = query_perf_counter();
ctx.mark_beg();
SendMessageW(ctx.hwnd, WM_SYSCOMMAND, 0xFFF1 /* ID_CONSOLE_PASTE */, 0);
const auto end = query_perf_counter();
d = perf_delta(beg, end);
ctx.mark_end();
FlushConsoleInputBuffer(ctx.input);
if (end >= ctx.time_limit)
{
break;
}
}
},
},
Benchmark{
.title = "ReadConsoleInputW clipboard 4Ki",
.exec = [](const BenchmarkContext& ctx, Measurements measurements) {
static constexpr DWORD cap = 16 * 1024;
#endif
};
const auto scratch = mem::get_scratch_arena(ctx.arena);
const auto buf = scratch.arena.push_uninitialized<INPUT_RECORD>(cap);
DWORD read;
static constexpr size_t s_benchmarks_count = _countof(s_benchmarks);
static constexpr size_t s_samples_min = 20;
static constexpr size_t s_samples_max = 1000;
set_clipboard(ctx.hwnd, ctx.utf16_4Ki);
FlushConsoleInputBuffer(ctx.input);
// 128 characters and 124 columns.
static constexpr std::string_view s_payload_utf8{ "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna alΑΒΓΔΕ" };
// 128 characters and 128 columns.
static constexpr std::wstring_view s_payload_utf16{ L"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.ΑΒΓΔΕ" };
for (auto& d : measurements)
{
SendMessageW(ctx.hwnd, WM_SYSCOMMAND, 0xFFF1 /* ID_CONSOLE_PASTE */, 0);
const auto beg = query_perf_counter();
ReadConsoleInputW(ctx.input, buf, cap, &read);
debugAssert(read >= 1024 && read < cap);
const auto end = query_perf_counter();
d = perf_delta(beg, end);
if (end >= ctx.time_limit)
{
break;
}
}
static constexpr WORD s_payload_attr = FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED;
static constexpr CHAR_INFO s_payload_char{
.Char = { .UnicodeChar = L'A' },
.Attributes = s_payload_attr,
};
static constexpr INPUT_RECORD s_payload_record{
.EventType = KEY_EVENT,
.Event = {
.KeyEvent = {
.bKeyDown = TRUE,
.wRepeatCount = 1,
.wVirtualKeyCode = 'A',
.wVirtualScanCode = 0,
.uChar = 'A',
.dwControlKeyState = 0,
},
},
};
static constexpr size_t s_benchmarks_count = _countof(s_benchmarks);
// Each of these strings is 128 columns.
static constexpr std::string_view payload_utf8{ "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labor眠い子猫はマグロ狩りの夢を見る" };
static constexpr std::wstring_view payload_utf16{ L"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labor眠い子猫はマグロ狩りの夢を見る" };
static bool print_warning();
static AccumulatedResults* prepare_results(mem::Arena& arena, std::span<const wchar_t*> paths);
@ -196,6 +474,7 @@ static std::span<Measurements> run_benchmarks_for_path(mem::Arena& arena, const
static void generate_html(mem::Arena& arena, const AccumulatedResults* results);
int wmain(int argc, const wchar_t* argv[])
try
{
if (argc < 2)
{
@ -203,6 +482,8 @@ int wmain(int argc, const wchar_t* argv[])
return 1;
}
check_spawn_conhost_dll(argc, argv);
const auto cp = GetConsoleCP();
const auto output_cp = GetConsoleOutputCP();
const auto restore_cp = wil::scope_exit([&]() {
@ -230,12 +511,29 @@ int wmain(int argc, const wchar_t* argv[])
{
const auto title = results->trace_names[trace_idx];
print_format(scratch.arena, "\r\n# %.*s\r\n", title.size(), title.data());
// I found that waiting between tests fixes weird bugs when launching very old conhost versions.
if (trace_idx != 0)
{
Sleep(5000);
}
results->measurments[trace_idx] = run_benchmarks_for_path(scratch.arena, paths[trace_idx]);
}
generate_html(scratch.arena, results);
return 0;
}
catch (const wil::ResultException& e)
{
printf("Exception: %08x\n", e.GetErrorCode());
return 1;
}
catch (...)
{
printf("Unknown exception\n");
return 1;
}
static bool print_warning()
{
@ -284,7 +582,7 @@ static AccumulatedResults* prepare_results(mem::Arena& arena, std::span<const wc
const auto attr = GetFileAttributesW(path);
if (attr == INVALID_FILE_ATTRIBUTES || (attr & FILE_ATTRIBUTE_DIRECTORY) != 0)
{
print_format(arena, "Invalid path: %s\r\n", path);
print_format(arena, "Invalid path: %S\r\n", path);
return nullptr;
}
}
@ -317,28 +615,19 @@ static AccumulatedResults* prepare_results(mem::Arena& arena, std::span<const wc
return results;
}
static void prepare_conhost(const BenchmarkContext& ctx, HWND parent_hwnd)
static void prepare_conhost(BenchmarkContext& ctx, HWND parent_hwnd)
{
const auto scratch = mem::get_scratch_arena(ctx.arena);
SetForegroundWindow(parent_hwnd);
// Ensure conhost is in a consistent state with identical fonts and window sizes,
// Ensure conhost is in a consistent state with identical fonts and window sizes.
SetConsoleCP(CP_UTF8);
SetConsoleOutputCP(CP_UTF8);
SetConsoleMode(ctx.output, ENABLE_PROCESSED_OUTPUT | ENABLE_WRAP_AT_EOL_OUTPUT | ENABLE_VIRTUAL_TERMINAL_PROCESSING);
{
CONSOLE_SCREEN_BUFFER_INFOEX info{
.cbSize = sizeof(info),
.dwSize = { 120, 9001 },
.wAttributes = FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED,
.srWindow = { 0, 0, 119, 29 },
.dwMaximumWindowSize = { 120, 30 },
.wPopupAttributes = FOREGROUND_BLUE | FOREGROUND_RED | BACKGROUND_BLUE | BACKGROUND_GREEN | BACKGROUND_RED | BACKGROUND_INTENSITY,
.ColorTable = { 0x0C0C0C, 0x1F0FC5, 0x0EA113, 0x009CC1, 0xDA3700, 0x981788, 0xDD963A, 0xCCCCCC, 0x767676, 0x5648E7, 0x0CC616, 0xA5F1F9, 0xFF783B, 0x9E00B4, 0xD6D661, 0xF2F2F2 },
};
SetConsoleScreenBufferInfoEx(ctx.output, &info);
}
// The ReadConsoleW test relies on ENABLE_LINE_INPUT not being set.
SetConsoleMode(ctx.input, ENABLE_PROCESSED_INPUT | ENABLE_ECHO_INPUT);
{
CONSOLE_FONT_INFOEX info{
.cbSize = sizeof(info),
@ -349,12 +638,22 @@ static void prepare_conhost(const BenchmarkContext& ctx, HWND parent_hwnd)
};
SetCurrentConsoleFontEx(ctx.output, FALSE, &info);
}
{
SMALL_RECT info{
.Left = 0,
.Top = 0,
.Right = s_viewport_size.X - 1,
.Bottom = s_viewport_size.Y - 1,
};
SetConsoleScreenBufferSize(ctx.output, s_buffer_size);
SetConsoleWindowInfo(ctx.output, TRUE, &info);
}
// Ensure conhost's backing TextBuffer is fully committed and initialized. There's currently no way
// to un-commit it and so not committing it now would be unfair for the first test that runs.
const auto buf = scratch.arena.push_uninitialized<char>(9001);
memset(buf, '\n', 9001);
WriteFile(ctx.output, buf, 9001, nullptr, nullptr);
const auto buf = scratch.arena.push_uninitialized<char>(s_buffer_size.Y);
memset(buf, '\n', s_buffer_size.Y);
WriteFile(ctx.output, buf, s_buffer_size.Y, nullptr, nullptr);
}
static std::span<Measurements> run_benchmarks_for_path(mem::Arena& arena, const wchar_t* path)
@ -364,7 +663,7 @@ static std::span<Measurements> run_benchmarks_for_path(mem::Arena& arena, const
const auto parent_hwnd = GetConsoleWindow();
const auto freq = query_perf_freq();
const auto handle = spawn_conhost(scratch.arena, path);
auto handle = spawn_conhost(scratch.arena, path);
set_active_connection(handle.connection.get());
const auto print_with_parent_connection = [&](auto&&... args) {
@ -377,45 +676,56 @@ static std::span<Measurements> run_benchmarks_for_path(mem::Arena& arena, const
.hwnd = GetConsoleWindow(),
.input = GetStdHandle(STD_INPUT_HANDLE),
.output = GetStdHandle(STD_OUTPUT_HANDLE),
.arena = scratch.arena,
.utf8_4Ki = mem::repeat_string(scratch.arena, payload_utf8, 4 * 1024 / 128),
.utf8_128Ki = mem::repeat_string(scratch.arena, payload_utf8, 128 * 1024 / 128),
.utf16_4Ki = mem::repeat_string(scratch.arena, payload_utf16, 4 * 1024 / 128),
.utf16_128Ki = mem::repeat_string(scratch.arena, payload_utf16, 128 * 1024 / 128),
.utf8_4Ki = mem::repeat(scratch.arena, s_payload_utf8, 4 * 1024 / s_payload_utf8.size()),
.utf8_128Ki = mem::repeat(scratch.arena, s_payload_utf8, 128 * 1024 / s_payload_utf8.size()),
.utf16_4Ki = mem::repeat(scratch.arena, s_payload_utf16, 4 * 1024 / s_payload_utf16.size()),
.utf16_128Ki = mem::repeat(scratch.arena, s_payload_utf16, 128 * 1024 / s_payload_utf16.size()),
.attr_4Ki = mem::repeat(scratch.arena, s_payload_attr, 4 * 1024),
.char_4Ki = mem::repeat(scratch.arena, s_payload_char, 4 * 1024),
.input_4Ki = mem::repeat(scratch.arena, s_payload_record, 4 * 1024),
.m_measurements = scratch.arena.push_uninitialized_span<int32_t>(4 * 1024 * 1024),
};
prepare_conhost(ctx, parent_hwnd);
Sleep(1000);
const auto results = arena.push_uninitialized_span<Measurements>(s_benchmarks_count);
for (auto& measurements : results)
{
measurements = arena.push_zeroed_span<int32_t>(2048);
}
for (size_t bench_idx = 0; bench_idx < s_benchmarks_count; ++bench_idx)
{
const auto& bench = s_benchmarks[bench_idx];
auto& measurements = results[bench_idx];
print_with_parent_connection("- %s", bench.title);
// Warmup for 0.1s.
// Warmup for 0.1s max.
WriteConsoleW(ctx.output, L"\033c", 2, nullptr, nullptr);
ctx.time_limit = query_perf_counter() + freq / 10;
bench.exec(ctx, measurements);
ctx.m_measurements_off = 0;
ctx.m_time_limit = query_perf_counter() + freq / 10;
bench.exec(ctx);
// Actual run for 1s.
// Actual run for 3s max.
WriteConsoleW(ctx.output, L"\033c", 2, nullptr, nullptr);
ctx.time_limit = query_perf_counter() + freq;
bench.exec(ctx, measurements);
ctx.m_measurements_off = 0;
ctx.m_time_limit = query_perf_counter() + freq * 3;
bench.exec(ctx);
// Trim off trailing 0s that resulted from the time_limit.
size_t len = measurements.size();
for (; len > 0 && measurements[len - 1] == 0; --len)
const auto measurements = arena.push_uninitialized_span<int32_t>(std::min(ctx.m_measurements_off, s_samples_max));
if (ctx.m_measurements_off <= s_samples_max)
{
mem::copy(measurements.data(), ctx.m_measurements.data(), ctx.m_measurements_off);
}
measurements = measurements.subspan(0, len);
else
{
const auto total = ctx.m_measurements_off;
for (size_t i = 0; i < s_samples_max; ++i)
{
measurements[i] = ctx.m_measurements[i * total / s_samples_max];
}
}
results[bench_idx] = measurements;
print_with_parent_connection(", done\r\n");
}
@ -463,7 +773,7 @@ static void generate_html(mem::Arena& arena, const AccumulatedResults* results)
</head>
<body>
<script src="https://cdn.plot.ly/plotly-2.27.0.min.js" charset="utf-8"></script>
<script src="https://cdn.plot.ly/plotly-2.32.0.min.js" charset="utf-8"></script>
<script>
)");
@ -491,8 +801,8 @@ static void generate_html(mem::Arena& arena, const AccumulatedResults* results)
// Console calls have a high tail latency. Whatever the reason is (it's probably scheduling latency)
// it's not particularly interesting at the moment when the median latency is intolerable high anyway.
const auto p25 = measurements[(measurements.size() * 25 + 50) / 100];
const auto p75 = measurements[(measurements.size() * 75 + 50) / 100];
const auto p25 = measurements[(measurements.size() * 250 + 5) / 1000];
const auto p75 = measurements[(measurements.size() * 750 + 5) / 1000];
const auto iqr3 = (p75 - p25) * 3;
const auto outlier_max = p75 + iqr3;
@ -503,7 +813,7 @@ static void generate_html(mem::Arena& arena, const AccumulatedResults* results)
{
}
for (auto it = beg; it < end; ++it)
for (auto it = beg; it != end; ++it)
{
char buffer[32];
const auto res = std::to_chars(&buffer[0], &buffer[64], *it * sec_per_tick, std::chars_format::scientific, 3);
@ -553,3 +863,33 @@ static void generate_html(mem::Arena& arena, const AccumulatedResults* results)
</html>
)");
}
bool BenchmarkContext::wants_more() const
{
return m_measurements_off < s_samples_min || (m_measurements_off < m_measurements.size() && m_time < m_time_limit);
}
void BenchmarkContext::mark_beg()
{
m_time = query_perf_counter();
}
void BenchmarkContext::mark_end()
{
const auto end = query_perf_counter();
m_measurements[m_measurements_off++] = static_cast<int32_t>(end - m_time);
m_time = end;
}
size_t BenchmarkContext::rand()
{
// These constants are the same as used by the PCG family of random number generators.
// The 32-Bit version is described in https://doi.org/10.1090/S0025-5718-99-00996-5, Table 5.
// The 64-Bit version is the multiplier as used by Donald Knuth for MMIX and found by C. E. Haynes.
#ifdef _WIN64
m_rng_state = m_rng_state * UINT64_C(6364136223846793005) + UINT64_C(1442695040888963407);
#else
m_rng_state = m_rng_state * UINT32_C(747796405) + UINT32_C(2891336453);
#endif
return m_rng_state;
}

View File

@ -2,7 +2,7 @@
// clang-format off
#ifdef NDEBUG
#define debugAssert(cond) ((void)0)
#define debugAssert(cond) if constexpr (false) { if (!(cond)); }
#else
#define debugAssert(cond) if (!(cond)) __debugbreak()
#endif