mirror of
https://github.com/microsoft/terminal.git
synced 2025-12-10 00:48:23 -06:00
I'm planning to use the `dark2` color palette in the upcoming cooked read rewrite as a debug aid to paint dirty regions. Now that it's going to be used in more than one place I figured it may be time to properly add it to the NOTICE file even if it still won't be shipped with the final product.
2343 lines
97 KiB
C++
2343 lines
97 KiB
C++
// Copyright (c) Microsoft Corporation.
|
|
// Licensed under the MIT license.
|
|
|
|
#include "pch.h"
|
|
#include "BackendD3D.h"
|
|
|
|
#include <til/unicode.h>
|
|
|
|
#include <custom_shader_ps.h>
|
|
#include <custom_shader_vs.h>
|
|
#include <shader_ps.h>
|
|
#include <shader_vs.h>
|
|
|
|
#include "BuiltinGlyphs.h"
|
|
#include "dwrite.h"
|
|
#include "wic.h"
|
|
#include "../../types/inc/ColorFix.hpp"
|
|
|
|
#if ATLAS_DEBUG_SHOW_DIRTY || ATLAS_DEBUG_COLORIZE_GLYPH_ATLAS
|
|
#include <til/colorbrewer.h>
|
|
#endif
|
|
|
|
TIL_FAST_MATH_BEGIN
|
|
|
|
#pragma warning(disable : 4100) // '...': unreferenced formal parameter
|
|
#pragma warning(disable : 26440) // Function '...' can be declared 'noexcept'(f.6).
|
|
// This code packs various data into smaller-than-int types to save both CPU and GPU memory. This warning would force
|
|
// us to add dozens upon dozens of gsl::narrow_cast<>s throughout the file which is more annoying than helpful.
|
|
#pragma warning(disable : 4242) // '=': conversion from '...' to '...', possible loss of data
|
|
#pragma warning(disable : 4244) // 'initializing': conversion from '...' to '...', possible loss of data
|
|
#pragma warning(disable : 4267) // 'argument': conversion from '...' to '...', possible loss of data
|
|
#pragma warning(disable : 4838) // conversion from '...' to '...' requires a narrowing conversion
|
|
#pragma warning(disable : 26472) // Don't use a static_cast for arithmetic conversions. Use brace initialization, gsl::narrow_cast or gsl::narrow (type.1).
|
|
// Disable a bunch of warnings which get in the way of writing performant code.
|
|
#pragma warning(disable : 26429) // Symbol 'data' is never tested for nullness, it can be marked as not_null (f.23).
|
|
#pragma warning(disable : 26446) // Prefer to use gsl::at() instead of unchecked subscript operator (bounds.4).
|
|
#pragma warning(disable : 26459) // You called an STL function '...' with a raw pointer parameter at position '...' that may be unsafe [...].
|
|
#pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).
|
|
#pragma warning(disable : 26482) // Only index into arrays using constant expressions (bounds.2).
|
|
#pragma warning(disable : 26490) // Don't use reinterpret_cast (type.1).
|
|
|
|
// Initializing large arrays can be very costly compared to how cheap some of these functions are.
|
|
#define ALLOW_UNINITIALIZED_BEGIN _Pragma("warning(push)") _Pragma("warning(disable : 26494)")
|
|
#define ALLOW_UNINITIALIZED_END _Pragma("warning(pop)")
|
|
|
|
using namespace Microsoft::Console::Render::Atlas;
|
|
|
|
static constexpr D2D1_MATRIX_3X2_F identityTransform{ .m11 = 1, .m22 = 1 };
|
|
static constexpr D2D1_COLOR_F whiteColor{ 1, 1, 1, 1 };
|
|
|
|
static u64 queryPerfFreq() noexcept
|
|
{
|
|
LARGE_INTEGER li;
|
|
QueryPerformanceFrequency(&li);
|
|
return std::bit_cast<u64>(li.QuadPart);
|
|
}
|
|
|
|
static u64 queryPerfCount() noexcept
|
|
{
|
|
LARGE_INTEGER li;
|
|
QueryPerformanceCounter(&li);
|
|
return std::bit_cast<u64>(li.QuadPart);
|
|
}
|
|
|
|
BackendD3D::BackendD3D(const RenderingPayload& p)
|
|
{
|
|
THROW_IF_FAILED(p.device->CreateVertexShader(&shader_vs[0], sizeof(shader_vs), nullptr, _vertexShader.addressof()));
|
|
THROW_IF_FAILED(p.device->CreatePixelShader(&shader_ps[0], sizeof(shader_ps), nullptr, _pixelShader.addressof()));
|
|
|
|
{
|
|
static constexpr D3D11_INPUT_ELEMENT_DESC layout[]{
|
|
{ "SV_Position", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
|
|
{ "shadingType", 0, DXGI_FORMAT_R16_UINT, 1, offsetof(QuadInstance, shadingType), D3D11_INPUT_PER_INSTANCE_DATA, 1 },
|
|
{ "renditionScale", 0, DXGI_FORMAT_R8G8_UINT, 1, offsetof(QuadInstance, renditionScale), D3D11_INPUT_PER_INSTANCE_DATA, 1 },
|
|
{ "position", 0, DXGI_FORMAT_R16G16_SINT, 1, offsetof(QuadInstance, position), D3D11_INPUT_PER_INSTANCE_DATA, 1 },
|
|
{ "size", 0, DXGI_FORMAT_R16G16_UINT, 1, offsetof(QuadInstance, size), D3D11_INPUT_PER_INSTANCE_DATA, 1 },
|
|
{ "texcoord", 0, DXGI_FORMAT_R16G16_UINT, 1, offsetof(QuadInstance, texcoord), D3D11_INPUT_PER_INSTANCE_DATA, 1 },
|
|
{ "color", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 1, offsetof(QuadInstance, color), D3D11_INPUT_PER_INSTANCE_DATA, 1 },
|
|
};
|
|
THROW_IF_FAILED(p.device->CreateInputLayout(&layout[0], std::size(layout), &shader_vs[0], sizeof(shader_vs), _inputLayout.addressof()));
|
|
}
|
|
|
|
{
|
|
static constexpr f32x2 vertices[]{
|
|
{ 0, 0 },
|
|
{ 1, 0 },
|
|
{ 1, 1 },
|
|
{ 0, 1 },
|
|
};
|
|
static constexpr D3D11_SUBRESOURCE_DATA initialData{ &vertices[0] };
|
|
|
|
D3D11_BUFFER_DESC desc{};
|
|
desc.ByteWidth = sizeof(vertices);
|
|
desc.Usage = D3D11_USAGE_IMMUTABLE;
|
|
desc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
|
|
THROW_IF_FAILED(p.device->CreateBuffer(&desc, &initialData, _vertexBuffer.addressof()));
|
|
}
|
|
|
|
{
|
|
static constexpr u16 indices[]{
|
|
0, // { 0, 0 }
|
|
1, // { 1, 0 }
|
|
2, // { 1, 1 }
|
|
2, // { 1, 1 }
|
|
3, // { 0, 1 }
|
|
0, // { 0, 0 }
|
|
};
|
|
static constexpr D3D11_SUBRESOURCE_DATA initialData{ &indices[0] };
|
|
|
|
D3D11_BUFFER_DESC desc{};
|
|
desc.ByteWidth = sizeof(indices);
|
|
desc.Usage = D3D11_USAGE_IMMUTABLE;
|
|
desc.BindFlags = D3D11_BIND_INDEX_BUFFER;
|
|
THROW_IF_FAILED(p.device->CreateBuffer(&desc, &initialData, _indexBuffer.addressof()));
|
|
}
|
|
|
|
{
|
|
static constexpr D3D11_BUFFER_DESC desc{
|
|
.ByteWidth = sizeof(VSConstBuffer),
|
|
.Usage = D3D11_USAGE_DEFAULT,
|
|
.BindFlags = D3D11_BIND_CONSTANT_BUFFER,
|
|
};
|
|
THROW_IF_FAILED(p.device->CreateBuffer(&desc, nullptr, _vsConstantBuffer.addressof()));
|
|
}
|
|
|
|
{
|
|
static constexpr D3D11_BUFFER_DESC desc{
|
|
.ByteWidth = sizeof(PSConstBuffer),
|
|
.Usage = D3D11_USAGE_DEFAULT,
|
|
.BindFlags = D3D11_BIND_CONSTANT_BUFFER,
|
|
};
|
|
THROW_IF_FAILED(p.device->CreateBuffer(&desc, nullptr, _psConstantBuffer.addressof()));
|
|
}
|
|
|
|
{
|
|
// The final step of the ClearType blending algorithm is a lerp() between the premultiplied alpha
|
|
// background color and straight alpha foreground color given the 3 RGB weights in alphaCorrected:
|
|
// lerp(background, foreground, weights)
|
|
// Which is equivalent to:
|
|
// background * (1 - weights) + foreground * weights
|
|
//
|
|
// This COULD be implemented using dual source color blending like so:
|
|
// .SrcBlend = D3D11_BLEND_SRC1_COLOR
|
|
// .DestBlend = D3D11_BLEND_INV_SRC1_COLOR
|
|
// .BlendOp = D3D11_BLEND_OP_ADD
|
|
// Because:
|
|
// background * (1 - weights) + foreground * weights
|
|
// ^ ^ ^ ^ ^
|
|
// Dest INV_SRC1_COLOR | Src SRC1_COLOR
|
|
// OP_ADD
|
|
//
|
|
// BUT we need simultaneous support for regular "source over" alpha blending
|
|
// (SHADING_TYPE_PASSTHROUGH) like this:
|
|
// background * (1 - alpha) + foreground
|
|
//
|
|
// This is why we set:
|
|
// .SrcBlend = D3D11_BLEND_ONE
|
|
//
|
|
// --> We need to multiply the foreground with the weights ourselves.
|
|
static constexpr D3D11_BLEND_DESC desc{
|
|
.RenderTarget = { {
|
|
.BlendEnable = TRUE,
|
|
.SrcBlend = D3D11_BLEND_ONE,
|
|
.DestBlend = D3D11_BLEND_INV_SRC1_COLOR,
|
|
.BlendOp = D3D11_BLEND_OP_ADD,
|
|
.SrcBlendAlpha = D3D11_BLEND_ONE,
|
|
.DestBlendAlpha = D3D11_BLEND_INV_SRC1_ALPHA,
|
|
.BlendOpAlpha = D3D11_BLEND_OP_ADD,
|
|
.RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL,
|
|
} },
|
|
};
|
|
THROW_IF_FAILED(p.device->CreateBlendState(&desc, _blendState.addressof()));
|
|
}
|
|
|
|
#if ATLAS_DEBUG_SHADER_HOT_RELOAD
|
|
_sourceDirectory = std::filesystem::path{ __FILE__ }.parent_path();
|
|
_sourceCodeWatcher = wil::make_folder_change_reader_nothrow(_sourceDirectory.c_str(), false, wil::FolderChangeEvents::FileName | wil::FolderChangeEvents::LastWriteTime, [this](wil::FolderChangeEvent, PCWSTR path) {
|
|
if (til::ends_with(path, L".hlsl"))
|
|
{
|
|
auto expected = INT64_MAX;
|
|
const auto invalidationTime = std::chrono::steady_clock::now() + std::chrono::milliseconds(100);
|
|
_sourceCodeInvalidationTime.compare_exchange_strong(expected, invalidationTime.time_since_epoch().count(), std::memory_order_relaxed);
|
|
}
|
|
});
|
|
#endif
|
|
}
|
|
|
|
#pragma warning(suppress : 26432) // If you define or delete any default operation in the type '...', define or delete them all (c.21).
|
|
BackendD3D::~BackendD3D()
|
|
{
|
|
// In case an exception is thrown for some reason between BeginDraw() and EndDraw()
|
|
// we still technically need to call EndDraw() before releasing any resources.
|
|
if (_d2dBeganDrawing)
|
|
{
|
|
#pragma warning(suppress : 26447) // The function is declared 'noexcept' but calls function '...' which may throw exceptions (f.6).
|
|
LOG_IF_FAILED(_d2dRenderTarget->EndDraw());
|
|
}
|
|
}
|
|
|
|
void BackendD3D::ReleaseResources() noexcept
|
|
{
|
|
_renderTargetView.reset();
|
|
_customRenderTargetView.reset();
|
|
// Ensure _handleSettingsUpdate() is called so that _renderTarget gets recreated.
|
|
_generation = {};
|
|
}
|
|
|
|
void BackendD3D::Render(RenderingPayload& p)
|
|
{
|
|
if (_generation != p.s.generation())
|
|
{
|
|
_handleSettingsUpdate(p);
|
|
}
|
|
|
|
_debugUpdateShaders(p);
|
|
|
|
// After a Present() the render target becomes unbound.
|
|
p.deviceContext->OMSetRenderTargets(1, _customRenderTargetView ? _customRenderTargetView.addressof() : _renderTargetView.addressof(), nullptr);
|
|
|
|
// Invalidating the render target helps with spotting invalid quad instances and Present1() bugs.
|
|
#if ATLAS_DEBUG_SHOW_DIRTY || ATLAS_DEBUG_DUMP_RENDER_TARGET
|
|
{
|
|
static constexpr f32 clearColor[4]{};
|
|
p.deviceContext->ClearView(_renderTargetView.get(), &clearColor[0], nullptr, 0);
|
|
}
|
|
#endif
|
|
|
|
_drawBackground(p);
|
|
_drawCursorBackground(p);
|
|
_drawText(p);
|
|
_drawSelection(p);
|
|
_debugShowDirty(p);
|
|
_flushQuads(p);
|
|
|
|
if (_customPixelShader)
|
|
{
|
|
_executeCustomShader(p);
|
|
}
|
|
|
|
_debugDumpRenderTarget(p);
|
|
}
|
|
|
|
bool BackendD3D::RequiresContinuousRedraw() noexcept
|
|
{
|
|
return _requiresContinuousRedraw;
|
|
}
|
|
|
|
void BackendD3D::_handleSettingsUpdate(const RenderingPayload& p)
|
|
{
|
|
if (!_renderTargetView)
|
|
{
|
|
wil::com_ptr<ID3D11Texture2D> buffer;
|
|
THROW_IF_FAILED(p.swapChain.swapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), reinterpret_cast<void**>(buffer.addressof())));
|
|
THROW_IF_FAILED(p.device->CreateRenderTargetView(buffer.get(), nullptr, _renderTargetView.put()));
|
|
}
|
|
|
|
const auto fontChanged = _fontGeneration != p.s->font.generation();
|
|
const auto miscChanged = _miscGeneration != p.s->misc.generation();
|
|
const auto cellCountChanged = _viewportCellCount != p.s->viewportCellCount;
|
|
|
|
if (fontChanged)
|
|
{
|
|
_updateFontDependents(p);
|
|
}
|
|
if (miscChanged)
|
|
{
|
|
_recreateCustomShader(p);
|
|
}
|
|
if (cellCountChanged)
|
|
{
|
|
_recreateBackgroundColorBitmap(p);
|
|
}
|
|
|
|
// Similar to _renderTargetView above, we might have to recreate the _customRenderTargetView whenever _swapChainManager
|
|
// resets it. We only do it after calling _recreateCustomShader however, since that sets the _customPixelShader.
|
|
if (_customPixelShader && !_customRenderTargetView)
|
|
{
|
|
_recreateCustomRenderTargetView(p);
|
|
}
|
|
|
|
_recreateConstBuffer(p);
|
|
_setupDeviceContextState(p);
|
|
|
|
_generation = p.s.generation();
|
|
_fontGeneration = p.s->font.generation();
|
|
_miscGeneration = p.s->misc.generation();
|
|
_targetSize = p.s->targetSize;
|
|
_viewportCellCount = p.s->viewportCellCount;
|
|
}
|
|
|
|
void BackendD3D::_updateFontDependents(const RenderingPayload& p)
|
|
{
|
|
const auto& font = *p.s->font;
|
|
|
|
// Curlyline is drawn with a desired height relative to the font size. The
|
|
// baseline of curlyline is at the middle of singly underline. When there's
|
|
// limited space to draw a curlyline, we apply a limit on the peak height.
|
|
{
|
|
const int cellHeight = font.cellSize.y;
|
|
const int duTop = font.doubleUnderline[0].position;
|
|
const int duBottom = font.doubleUnderline[1].position;
|
|
const int duHeight = font.doubleUnderline[0].height;
|
|
|
|
// This gives it the same position and height as our double-underline. There's no particular reason for that, apart from
|
|
// it being simple to implement and robust against more peculiar fonts with unusually large/small descenders, etc.
|
|
// We still need to ensure though that it doesn't clip out of the cellHeight at the bottom, which is why `position` has a min().
|
|
const auto height = std::max(3, duBottom + duHeight - duTop);
|
|
const auto position = std::min(duTop, cellHeight - height - duHeight);
|
|
|
|
_curlyLineHalfHeight = height * 0.5f;
|
|
_curlyUnderline.position = gsl::narrow_cast<u16>(position);
|
|
_curlyUnderline.height = gsl::narrow_cast<u16>(height);
|
|
}
|
|
|
|
DWrite_GetRenderParams(p.dwriteFactory.get(), &_gamma, &_cleartypeEnhancedContrast, &_grayscaleEnhancedContrast, _textRenderingParams.put());
|
|
// Clearing the atlas requires BeginDraw(), which is expensive. Defer this until we need Direct2D anyways.
|
|
_fontChangedResetGlyphAtlas = true;
|
|
_textShadingType = font.antialiasingMode == AntialiasingMode::ClearType ? ShadingType::TextClearType : ShadingType::TextGrayscale;
|
|
|
|
// _ligatureOverhangTriggerLeft/Right are essentially thresholds for a glyph's width at
|
|
// which point we consider it wider than allowed and "this looks like a coding ligature".
|
|
// See _drawTextOverlapSplit for more information about what this does.
|
|
{
|
|
// No ligatures -> No thresholds.
|
|
auto ligaturesDisabled = false;
|
|
for (const auto& feature : font.fontFeatures)
|
|
{
|
|
if (feature.nameTag == DWRITE_FONT_FEATURE_TAG_STANDARD_LIGATURES)
|
|
{
|
|
ligaturesDisabled = !feature.parameter;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (ligaturesDisabled)
|
|
{
|
|
_ligatureOverhangTriggerLeft = til::CoordTypeMin;
|
|
_ligatureOverhangTriggerRight = til::CoordTypeMax;
|
|
}
|
|
else
|
|
{
|
|
const auto halfCellWidth = font.cellSize.x / 2;
|
|
_ligatureOverhangTriggerLeft = -halfCellWidth;
|
|
_ligatureOverhangTriggerRight = font.advanceWidth + halfCellWidth;
|
|
}
|
|
}
|
|
|
|
if (_d2dRenderTarget)
|
|
{
|
|
_d2dRenderTargetUpdateFontSettings(p);
|
|
}
|
|
|
|
_softFontBitmap.reset();
|
|
}
|
|
|
|
void BackendD3D::_d2dRenderTargetUpdateFontSettings(const RenderingPayload& p) const noexcept
|
|
{
|
|
const auto& font = *p.s->font;
|
|
_d2dRenderTarget->SetDpi(font.dpi, font.dpi);
|
|
_d2dRenderTarget->SetTextAntialiasMode(static_cast<D2D1_TEXT_ANTIALIAS_MODE>(font.antialiasingMode));
|
|
}
|
|
|
|
void BackendD3D::_recreateCustomShader(const RenderingPayload& p)
|
|
{
|
|
_customRenderTargetView.reset();
|
|
_customOffscreenTexture.reset();
|
|
_customOffscreenTextureView.reset();
|
|
_customVertexShader.reset();
|
|
_customPixelShader.reset();
|
|
_customShaderConstantBuffer.reset();
|
|
_customShaderSamplerState.reset();
|
|
_customShaderTexture.reset();
|
|
_customShaderTextureView.reset();
|
|
_requiresContinuousRedraw = false;
|
|
|
|
if (!p.s->misc->customPixelShaderPath.empty())
|
|
{
|
|
const char* target = nullptr;
|
|
switch (p.device->GetFeatureLevel())
|
|
{
|
|
case D3D_FEATURE_LEVEL_10_0:
|
|
target = "ps_4_0";
|
|
break;
|
|
case D3D_FEATURE_LEVEL_10_1:
|
|
target = "ps_4_1";
|
|
break;
|
|
default:
|
|
target = "ps_5_0";
|
|
break;
|
|
}
|
|
|
|
static constexpr auto flags =
|
|
D3DCOMPILE_PACK_MATRIX_COLUMN_MAJOR
|
|
#ifdef NDEBUG
|
|
| D3DCOMPILE_OPTIMIZATION_LEVEL3;
|
|
#else
|
|
// Only enable strictness and warnings in DEBUG mode
|
|
// as these settings makes it very difficult to develop
|
|
// shaders as windows terminal is not telling the user
|
|
// what's wrong, windows terminal just fails.
|
|
// Keep it in DEBUG mode to catch errors in shaders
|
|
// shipped with windows terminal
|
|
| D3DCOMPILE_ENABLE_STRICTNESS | D3DCOMPILE_WARNINGS_ARE_ERRORS | D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION;
|
|
#endif
|
|
|
|
wil::com_ptr<ID3DBlob> error;
|
|
wil::com_ptr<ID3DBlob> blob;
|
|
const auto hr = D3DCompileFromFile(
|
|
/* pFileName */ p.s->misc->customPixelShaderPath.c_str(),
|
|
/* pDefines */ nullptr,
|
|
/* pInclude */ D3D_COMPILE_STANDARD_FILE_INCLUDE,
|
|
/* pEntrypoint */ "main",
|
|
/* pTarget */ target,
|
|
/* Flags1 */ flags,
|
|
/* Flags2 */ 0,
|
|
/* ppCode */ blob.addressof(),
|
|
/* ppErrorMsgs */ error.addressof());
|
|
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
THROW_IF_FAILED(p.device->CreatePixelShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, _customPixelShader.addressof()));
|
|
|
|
// Try to determine whether the shader uses the Time variable
|
|
wil::com_ptr<ID3D11ShaderReflection> reflector;
|
|
if (SUCCEEDED_LOG(D3DReflect(blob->GetBufferPointer(), blob->GetBufferSize(), IID_PPV_ARGS(reflector.addressof()))))
|
|
{
|
|
// Depending on the version of the d3dcompiler_*.dll, the next two functions either return nullptr
|
|
// on failure or an instance of CInvalidSRConstantBuffer or CInvalidSRVariable respectively,
|
|
// which cause GetDesc() to return E_FAIL. In other words, we have to assume that any failure in the
|
|
// next few lines indicates that the cbuffer is entirely unused (--> _requiresContinuousRedraw=false).
|
|
if (ID3D11ShaderReflectionConstantBuffer* constantBufferReflector = reflector->GetConstantBufferByIndex(0)) // shader buffer
|
|
{
|
|
if (ID3D11ShaderReflectionVariable* variableReflector = constantBufferReflector->GetVariableByIndex(0)) // time
|
|
{
|
|
D3D11_SHADER_VARIABLE_DESC variableDescriptor;
|
|
if (SUCCEEDED(variableReflector->GetDesc(&variableDescriptor)))
|
|
{
|
|
// only if time is used
|
|
_requiresContinuousRedraw = WI_IsFlagSet(variableDescriptor.uFlags, D3D_SVF_USED);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Unless we can determine otherwise, assume this shader requires evaluation every frame
|
|
_requiresContinuousRedraw = true;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (error)
|
|
{
|
|
LOG_HR_MSG(hr, "%.*hs", static_cast<int>(error->GetBufferSize()), static_cast<char*>(error->GetBufferPointer()));
|
|
}
|
|
else
|
|
{
|
|
LOG_HR(hr);
|
|
}
|
|
if (p.warningCallback)
|
|
{
|
|
p.warningCallback(D2DERR_SHADER_COMPILE_FAILED, p.s->misc->customPixelShaderPath);
|
|
}
|
|
}
|
|
|
|
if (!p.s->misc->customPixelShaderImagePath.empty())
|
|
{
|
|
try
|
|
{
|
|
WIC::LoadTextureFromFile(p.device.get(), p.s->misc->customPixelShaderImagePath.c_str(), _customShaderTexture.addressof(), _customShaderTextureView.addressof());
|
|
}
|
|
catch (...)
|
|
{
|
|
LOG_CAUGHT_EXCEPTION();
|
|
_customPixelShader.reset();
|
|
if (p.warningCallback)
|
|
{
|
|
p.warningCallback(D2DERR_SHADER_COMPILE_FAILED, p.s->misc->customPixelShaderImagePath);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if (p.s->misc->useRetroTerminalEffect)
|
|
{
|
|
THROW_IF_FAILED(p.device->CreatePixelShader(&custom_shader_ps[0], sizeof(custom_shader_ps), nullptr, _customPixelShader.put()));
|
|
}
|
|
|
|
if (_customPixelShader)
|
|
{
|
|
THROW_IF_FAILED(p.device->CreateVertexShader(&custom_shader_vs[0], sizeof(custom_shader_vs), nullptr, _customVertexShader.put()));
|
|
|
|
{
|
|
static constexpr D3D11_BUFFER_DESC desc{
|
|
.ByteWidth = sizeof(CustomConstBuffer),
|
|
.Usage = D3D11_USAGE_DYNAMIC,
|
|
.BindFlags = D3D11_BIND_CONSTANT_BUFFER,
|
|
.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE,
|
|
};
|
|
THROW_IF_FAILED(p.device->CreateBuffer(&desc, nullptr, _customShaderConstantBuffer.put()));
|
|
}
|
|
|
|
{
|
|
static constexpr D3D11_SAMPLER_DESC desc{
|
|
.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR,
|
|
.AddressU = D3D11_TEXTURE_ADDRESS_BORDER,
|
|
.AddressV = D3D11_TEXTURE_ADDRESS_BORDER,
|
|
.AddressW = D3D11_TEXTURE_ADDRESS_BORDER,
|
|
.MaxAnisotropy = 1,
|
|
.ComparisonFunc = D3D11_COMPARISON_ALWAYS,
|
|
.MaxLOD = D3D11_FLOAT32_MAX,
|
|
};
|
|
THROW_IF_FAILED(p.device->CreateSamplerState(&desc, _customShaderSamplerState.put()));
|
|
}
|
|
|
|
// Since floats are imprecise we need to constrain the time value into a range that can be accurately represented.
|
|
// Assuming a monitor refresh rate of 1000 Hz, we can still easily represent 1000 seconds accurately (roughly 16 minutes).
|
|
// 10000 seconds would already result in a 50% error. So to avoid this, we use queryPerfCount() modulo _customShaderPerfTickMod.
|
|
// The use of a power of 10 is intentional, because shaders are often periodic and this makes any decimal multiplier up to 3 fractional
|
|
// digits not break the periodicity. For instance, with a wraparound of 1000 seconds sin(1.234*x) is still perfectly periodic.
|
|
const auto freq = queryPerfFreq();
|
|
_customShaderPerfTickMod = freq * 1000;
|
|
_customShaderSecsPerPerfTick = 1.0f / freq;
|
|
}
|
|
}
|
|
|
|
void BackendD3D::_recreateCustomRenderTargetView(const RenderingPayload& p)
|
|
{
|
|
// Avoid memory usage spikes by releasing memory first.
|
|
_customOffscreenTexture.reset();
|
|
_customOffscreenTextureView.reset();
|
|
|
|
const D3D11_TEXTURE2D_DESC desc{
|
|
.Width = p.s->targetSize.x,
|
|
.Height = p.s->targetSize.y,
|
|
.MipLevels = 1,
|
|
.ArraySize = 1,
|
|
.Format = DXGI_FORMAT_B8G8R8A8_UNORM,
|
|
.SampleDesc = { 1, 0 },
|
|
.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET,
|
|
};
|
|
THROW_IF_FAILED(p.device->CreateTexture2D(&desc, nullptr, _customOffscreenTexture.addressof()));
|
|
THROW_IF_FAILED(p.device->CreateShaderResourceView(_customOffscreenTexture.get(), nullptr, _customOffscreenTextureView.addressof()));
|
|
THROW_IF_FAILED(p.device->CreateRenderTargetView(_customOffscreenTexture.get(), nullptr, _customRenderTargetView.addressof()));
|
|
}
|
|
|
|
void BackendD3D::_recreateBackgroundColorBitmap(const RenderingPayload& p)
|
|
{
|
|
// Avoid memory usage spikes by releasing memory first.
|
|
_backgroundBitmap.reset();
|
|
_backgroundBitmapView.reset();
|
|
|
|
const D3D11_TEXTURE2D_DESC desc{
|
|
.Width = p.s->viewportCellCount.x,
|
|
.Height = p.s->viewportCellCount.y,
|
|
.MipLevels = 1,
|
|
.ArraySize = 1,
|
|
.Format = DXGI_FORMAT_R8G8B8A8_UNORM,
|
|
.SampleDesc = { 1, 0 },
|
|
.Usage = D3D11_USAGE_DYNAMIC,
|
|
.BindFlags = D3D11_BIND_SHADER_RESOURCE,
|
|
.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE,
|
|
};
|
|
THROW_IF_FAILED(p.device->CreateTexture2D(&desc, nullptr, _backgroundBitmap.addressof()));
|
|
THROW_IF_FAILED(p.device->CreateShaderResourceView(_backgroundBitmap.get(), nullptr, _backgroundBitmapView.addressof()));
|
|
_backgroundBitmapGeneration = {};
|
|
}
|
|
|
|
void BackendD3D::_recreateConstBuffer(const RenderingPayload& p) const
|
|
{
|
|
{
|
|
VSConstBuffer data{};
|
|
data.positionScale = { 2.0f / p.s->targetSize.x, -2.0f / p.s->targetSize.y };
|
|
p.deviceContext->UpdateSubresource(_vsConstantBuffer.get(), 0, nullptr, &data, 0, 0);
|
|
}
|
|
{
|
|
PSConstBuffer data{};
|
|
data.backgroundColor = colorFromU32Premultiply<f32x4>(p.s->misc->backgroundColor);
|
|
data.backgroundCellSize = { static_cast<f32>(p.s->font->cellSize.x), static_cast<f32>(p.s->font->cellSize.y) };
|
|
data.backgroundCellCount = { static_cast<f32>(p.s->viewportCellCount.x), static_cast<f32>(p.s->viewportCellCount.y) };
|
|
DWrite_GetGammaRatios(_gamma, data.gammaRatios);
|
|
data.enhancedContrast = p.s->font->antialiasingMode == AntialiasingMode::ClearType ? _cleartypeEnhancedContrast : _grayscaleEnhancedContrast;
|
|
data.underlineWidth = p.s->font->underline.height;
|
|
data.doubleUnderlineWidth = p.s->font->doubleUnderline[0].height;
|
|
data.curlyLineHalfHeight = _curlyLineHalfHeight;
|
|
data.shadedGlyphDotSize = std::max(1.0f, std::roundf(std::max(p.s->font->cellSize.x / 16.0f, p.s->font->cellSize.y / 32.0f)));
|
|
p.deviceContext->UpdateSubresource(_psConstantBuffer.get(), 0, nullptr, &data, 0, 0);
|
|
}
|
|
}
|
|
|
|
void BackendD3D::_setupDeviceContextState(const RenderingPayload& p)
|
|
{
|
|
// IA: Input Assembler
|
|
ID3D11Buffer* vertexBuffers[]{ _vertexBuffer.get(), _instanceBuffer.get() };
|
|
static constexpr UINT strides[]{ sizeof(f32x2), sizeof(QuadInstance) };
|
|
static constexpr UINT offsets[]{ 0, 0 };
|
|
p.deviceContext->IASetIndexBuffer(_indexBuffer.get(), DXGI_FORMAT_R16_UINT, 0);
|
|
p.deviceContext->IASetInputLayout(_inputLayout.get());
|
|
p.deviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
|
p.deviceContext->IASetVertexBuffers(0, 2, &vertexBuffers[0], &strides[0], &offsets[0]);
|
|
|
|
// VS: Vertex Shader
|
|
p.deviceContext->VSSetShader(_vertexShader.get(), nullptr, 0);
|
|
p.deviceContext->VSSetConstantBuffers(0, 1, _vsConstantBuffer.addressof());
|
|
|
|
// RS: Rasterizer Stage
|
|
D3D11_VIEWPORT viewport{};
|
|
viewport.Width = static_cast<f32>(p.s->targetSize.x);
|
|
viewport.Height = static_cast<f32>(p.s->targetSize.y);
|
|
p.deviceContext->RSSetViewports(1, &viewport);
|
|
|
|
// PS: Pixel Shader
|
|
ID3D11ShaderResourceView* resources[]{ _backgroundBitmapView.get(), _glyphAtlasView.get() };
|
|
p.deviceContext->PSSetShader(_pixelShader.get(), nullptr, 0);
|
|
p.deviceContext->PSSetConstantBuffers(0, 1, _psConstantBuffer.addressof());
|
|
p.deviceContext->PSSetShaderResources(0, 2, &resources[0]);
|
|
|
|
// OM: Output Merger
|
|
p.deviceContext->OMSetBlendState(_blendState.get(), nullptr, 0xffffffff);
|
|
p.deviceContext->OMSetRenderTargets(1, _customRenderTargetView ? _customRenderTargetView.addressof() : _renderTargetView.addressof(), nullptr);
|
|
}
|
|
|
|
void BackendD3D::_debugUpdateShaders(const RenderingPayload& p) noexcept
|
|
{
|
|
#if ATLAS_DEBUG_SHADER_HOT_RELOAD
|
|
try
|
|
{
|
|
const auto invalidationTime = _sourceCodeInvalidationTime.load(std::memory_order_relaxed);
|
|
|
|
if (invalidationTime == INT64_MAX || invalidationTime > std::chrono::steady_clock::now().time_since_epoch().count())
|
|
{
|
|
return;
|
|
}
|
|
|
|
_sourceCodeInvalidationTime.store(INT64_MAX, std::memory_order_relaxed);
|
|
|
|
static constexpr auto flags =
|
|
D3DCOMPILE_PACK_MATRIX_COLUMN_MAJOR | D3DCOMPILE_ENABLE_STRICTNESS | D3DCOMPILE_WARNINGS_ARE_ERRORS
|
|
#ifndef NDEBUG
|
|
| D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION
|
|
#endif
|
|
;
|
|
|
|
static const auto compile = [](const std::filesystem::path& path, const char* target) {
|
|
wil::com_ptr<ID3DBlob> error;
|
|
wil::com_ptr<ID3DBlob> blob;
|
|
const auto hr = D3DCompileFromFile(
|
|
/* pFileName */ path.c_str(),
|
|
/* pDefines */ nullptr,
|
|
/* pInclude */ D3D_COMPILE_STANDARD_FILE_INCLUDE,
|
|
/* pEntrypoint */ "main",
|
|
/* pTarget */ target,
|
|
/* Flags1 */ flags,
|
|
/* Flags2 */ 0,
|
|
/* ppCode */ blob.addressof(),
|
|
/* ppErrorMsgs */ error.addressof());
|
|
|
|
if (error)
|
|
{
|
|
std::thread t{ [error = std::move(error)]() noexcept {
|
|
MessageBoxA(nullptr, static_cast<const char*>(error->GetBufferPointer()), "Compilation error", MB_ICONERROR | MB_OK);
|
|
} };
|
|
t.detach();
|
|
}
|
|
|
|
THROW_IF_FAILED(hr);
|
|
return blob;
|
|
};
|
|
|
|
struct FileVS
|
|
{
|
|
std::wstring_view filename;
|
|
wil::com_ptr<ID3D11VertexShader> BackendD3D::*target;
|
|
};
|
|
struct FilePS
|
|
{
|
|
std::wstring_view filename;
|
|
wil::com_ptr<ID3D11PixelShader> BackendD3D::*target;
|
|
};
|
|
|
|
static constexpr std::array filesVS{
|
|
FileVS{ L"shader_vs.hlsl", &BackendD3D::_vertexShader },
|
|
};
|
|
static constexpr std::array filesPS{
|
|
FilePS{ L"shader_ps.hlsl", &BackendD3D::_pixelShader },
|
|
};
|
|
|
|
std::array<wil::com_ptr<ID3D11VertexShader>, filesVS.size()> compiledVS;
|
|
std::array<wil::com_ptr<ID3D11PixelShader>, filesPS.size()> compiledPS;
|
|
|
|
// Compile our files before moving them into `this` below to ensure we're
|
|
// always in a consistent state where all shaders are seemingly valid.
|
|
for (size_t i = 0; i < filesVS.size(); ++i)
|
|
{
|
|
const auto blob = compile(_sourceDirectory / filesVS[i].filename, "vs_4_0");
|
|
THROW_IF_FAILED(p.device->CreateVertexShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, compiledVS[i].addressof()));
|
|
}
|
|
for (size_t i = 0; i < filesPS.size(); ++i)
|
|
{
|
|
const auto blob = compile(_sourceDirectory / filesPS[i].filename, "ps_4_0");
|
|
THROW_IF_FAILED(p.device->CreatePixelShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, compiledPS[i].addressof()));
|
|
}
|
|
|
|
for (size_t i = 0; i < filesVS.size(); ++i)
|
|
{
|
|
this->*filesVS[i].target = std::move(compiledVS[i]);
|
|
}
|
|
for (size_t i = 0; i < filesPS.size(); ++i)
|
|
{
|
|
this->*filesPS[i].target = std::move(compiledPS[i]);
|
|
}
|
|
|
|
_setupDeviceContextState(p);
|
|
}
|
|
CATCH_LOG()
|
|
#endif
|
|
}
|
|
|
|
void BackendD3D::_d2dBeginDrawing() noexcept
|
|
{
|
|
if (!_d2dBeganDrawing)
|
|
{
|
|
_d2dRenderTarget->BeginDraw();
|
|
_d2dBeganDrawing = true;
|
|
}
|
|
}
|
|
|
|
void BackendD3D::_d2dEndDrawing()
|
|
{
|
|
if (_d2dBeganDrawing)
|
|
{
|
|
THROW_IF_FAILED(_d2dRenderTarget->EndDraw());
|
|
_d2dBeganDrawing = false;
|
|
}
|
|
}
|
|
|
|
void BackendD3D::_resetGlyphAtlas(const RenderingPayload& p)
|
|
{
|
|
// The index returned by _BitScanReverse is undefined when the input is 0. We can simultaneously guard
|
|
// against that and avoid unreasonably small textures, by clamping the min. texture size to `minArea`.
|
|
// `minArea` results in a 64kB RGBA texture which is the min. alignment for placed memory.
|
|
static constexpr u32 minArea = 128 * 128;
|
|
static constexpr u32 maxArea = D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION * D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION;
|
|
|
|
const auto cellArea = static_cast<u32>(p.s->font->cellSize.x) * p.s->font->cellSize.y;
|
|
const auto targetArea = static_cast<u32>(p.s->targetSize.x) * p.s->targetSize.y;
|
|
|
|
const auto minAreaByFont = cellArea * 95; // Covers all printable ASCII characters
|
|
const auto minAreaByGrowth = static_cast<u32>(_rectPacker.width) * _rectPacker.height * 2;
|
|
|
|
// It's hard to say what the max. size of the cache should be. Optimally I think we should use as much
|
|
// memory as is available, but the rendering code in this project is a big mess and so integrating
|
|
// memory pressure feedback (RegisterVideoMemoryBudgetChangeNotificationEvent) is rather difficult.
|
|
// As an alternative I'm using 1.25x the size of the swap chain. The 1.25x is there to avoid situations, where
|
|
// we're locked into a state, where on every render pass we're starting with a half full atlas, drawing once,
|
|
// filling it with the remaining half and drawing again, requiring two rendering passes on each frame.
|
|
const auto maxAreaByFont = targetArea + targetArea / 4;
|
|
|
|
auto area = std::min(maxAreaByFont, std::max(minAreaByFont, minAreaByGrowth));
|
|
area = clamp(area, minArea, maxArea);
|
|
|
|
// This block of code calculates the size of a power-of-2 texture that has an area larger than the given `area`.
|
|
// For instance, for an area of 985x1946 = 1916810 it would result in a u/v of 2048x1024 (area = 2097152).
|
|
// This has 2 benefits: GPUs like power-of-2 textures and it ensures that we don't resize the texture
|
|
// every time you resize the window by a pixel. Instead it only grows/shrinks by a factor of 2.
|
|
unsigned long index;
|
|
_BitScanReverse(&index, area - 1);
|
|
const auto u = static_cast<u16>(1u << ((index + 2) / 2));
|
|
const auto v = static_cast<u16>(1u << ((index + 1) / 2));
|
|
|
|
if (u != _rectPacker.width || v != _rectPacker.height)
|
|
{
|
|
_resizeGlyphAtlas(p, u, v);
|
|
}
|
|
|
|
stbrp_init_target(&_rectPacker, u, v, _rectPackerData.data(), _rectPackerData.size());
|
|
|
|
// This is a little imperfect, because it only releases the memory of the glyph mappings, not the memory held by
|
|
// any DirectWrite fonts. On the other side, the amount of fonts on a system is always finite, where "finite"
|
|
// is pretty low, relatively speaking. Additionally this allows us to cache the boxGlyphs map indefinitely.
|
|
// It's not great, but it's not terrible.
|
|
for (auto& slot : _glyphAtlasMap.container())
|
|
{
|
|
for (auto& glyphs : slot.glyphs)
|
|
{
|
|
glyphs.clear();
|
|
}
|
|
}
|
|
for (auto& glyphs : _builtinGlyphs.glyphs)
|
|
{
|
|
glyphs.clear();
|
|
}
|
|
|
|
_d2dBeginDrawing();
|
|
_d2dRenderTarget->Clear();
|
|
|
|
_fontChangedResetGlyphAtlas = false;
|
|
}
|
|
|
|
void BackendD3D::_resizeGlyphAtlas(const RenderingPayload& p, const u16 u, const u16 v)
|
|
{
|
|
#if defined(_M_X64) || defined(_M_IX86)
|
|
static const auto faultyMacTypeVersion = _checkMacTypeVersion(p);
|
|
#else
|
|
// The affected versions of MacType are unavailable on ARM.
|
|
static constexpr auto faultyMacTypeVersion = false;
|
|
#endif
|
|
|
|
_d2dRenderTarget.reset();
|
|
_d2dRenderTarget4.reset();
|
|
_glyphAtlas.reset();
|
|
_glyphAtlasView.reset();
|
|
|
|
{
|
|
const D3D11_TEXTURE2D_DESC desc{
|
|
.Width = u,
|
|
.Height = v,
|
|
.MipLevels = 1,
|
|
.ArraySize = 1,
|
|
.Format = DXGI_FORMAT_B8G8R8A8_UNORM,
|
|
.SampleDesc = { 1, 0 },
|
|
.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET,
|
|
};
|
|
THROW_IF_FAILED(p.device->CreateTexture2D(&desc, nullptr, _glyphAtlas.addressof()));
|
|
THROW_IF_FAILED(p.device->CreateShaderResourceView(_glyphAtlas.get(), nullptr, _glyphAtlasView.addressof()));
|
|
}
|
|
|
|
{
|
|
const auto surface = _glyphAtlas.query<IDXGISurface>();
|
|
|
|
static constexpr D2D1_RENDER_TARGET_PROPERTIES props{
|
|
.type = D2D1_RENDER_TARGET_TYPE_DEFAULT,
|
|
.pixelFormat = { DXGI_FORMAT_B8G8R8A8_UNORM, D2D1_ALPHA_MODE_PREMULTIPLIED },
|
|
};
|
|
// ID2D1RenderTarget and ID2D1DeviceContext are the same and I'm tired of pretending they're not.
|
|
THROW_IF_FAILED(p.d2dFactory->CreateDxgiSurfaceRenderTarget(surface.get(), &props, reinterpret_cast<ID2D1RenderTarget**>(_d2dRenderTarget.addressof())));
|
|
_d2dRenderTarget.try_query_to(_d2dRenderTarget4.addressof());
|
|
|
|
_d2dRenderTarget->SetUnitMode(D2D1_UNIT_MODE_PIXELS);
|
|
// Ensure that D2D uses the exact same gamma as our shader uses.
|
|
_d2dRenderTarget->SetTextRenderingParams(_textRenderingParams.get());
|
|
|
|
_d2dRenderTargetUpdateFontSettings(p);
|
|
}
|
|
|
|
// We have our own glyph cache so Direct2D's cache doesn't help much.
|
|
// This saves us 1MB of RAM, which is not much, but also not nothing.
|
|
if (_d2dRenderTarget4)
|
|
{
|
|
wil::com_ptr<ID2D1Device> device;
|
|
_d2dRenderTarget4->GetDevice(device.addressof());
|
|
|
|
device->SetMaximumTextureMemory(0);
|
|
|
|
if (!faultyMacTypeVersion)
|
|
{
|
|
if (const auto device4 = device.try_query<ID2D1Device4>())
|
|
{
|
|
device4->SetMaximumColorGlyphCacheMemory(0);
|
|
}
|
|
}
|
|
}
|
|
|
|
{
|
|
THROW_IF_FAILED(_d2dRenderTarget->CreateSolidColorBrush(&whiteColor, nullptr, _emojiBrush.put()));
|
|
THROW_IF_FAILED(_d2dRenderTarget->CreateSolidColorBrush(&whiteColor, nullptr, _brush.put()));
|
|
}
|
|
|
|
ID3D11ShaderResourceView* resources[]{ _backgroundBitmapView.get(), _glyphAtlasView.get() };
|
|
p.deviceContext->PSSetShaderResources(0, 2, &resources[0]);
|
|
|
|
_rectPackerData = Buffer<stbrp_node>{ u };
|
|
}
|
|
|
|
// MacType is a popular 3rd party system to give the font rendering on Windows a softer look.
|
|
// It's particularly popular in China. Unfortunately, it hooks ID2D1Device4 incorrectly:
|
|
// https://github.com/snowie2000/mactype/pull/938
|
|
// This results in crashes. Not a lot of them, but enough to constantly show up.
|
|
// The issue was fixed in the MacType v1.2023.5.31 release, the only one in 2023.
|
|
//
|
|
// Please feel free to remove this check in a few years.
|
|
bool BackendD3D::_checkMacTypeVersion(const RenderingPayload& p)
|
|
{
|
|
#ifdef _WIN64
|
|
static constexpr auto name = L"MacType64.Core.dll";
|
|
#else
|
|
static constexpr auto name = L"MacType.Core.dll";
|
|
#endif
|
|
|
|
wil::unique_hmodule handle;
|
|
if (!GetModuleHandleExW(0, name, handle.addressof()))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
const auto resource = FindResourceW(handle.get(), MAKEINTRESOURCE(VS_VERSION_INFO), RT_VERSION);
|
|
if (!resource)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
const auto dataHandle = LoadResource(handle.get(), resource);
|
|
if (!dataHandle)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
const auto data = LockResource(dataHandle);
|
|
if (!data)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
VS_FIXEDFILEINFO* info;
|
|
UINT varLen = 0;
|
|
if (!VerQueryValueW(data, L"\\", reinterpret_cast<void**>(&info), &varLen))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
const auto faulty = info->dwFileVersionMS < (1 << 16 | 2023);
|
|
|
|
if (faulty && p.warningCallback)
|
|
{
|
|
p.warningCallback(ATLAS_ENGINE_ERROR_MAC_TYPE, {});
|
|
}
|
|
|
|
return faulty;
|
|
}
|
|
|
|
BackendD3D::QuadInstance& BackendD3D::_getLastQuad() noexcept
|
|
{
|
|
assert(_instancesCount != 0);
|
|
return _instances[_instancesCount - 1];
|
|
}
|
|
|
|
// NOTE: Up to 5M calls per second -> no std::vector, no std::unordered_map.
|
|
// This function is an easy >100x faster than std::vector, can be
|
|
// inlined and reduces overall (!) renderer CPU usage by 5%.
|
|
BackendD3D::QuadInstance& BackendD3D::_appendQuad()
|
|
{
|
|
if (_instancesCount >= _instances.size())
|
|
{
|
|
_bumpInstancesSize();
|
|
}
|
|
|
|
return _instances[_instancesCount++];
|
|
}
|
|
|
|
void BackendD3D::_bumpInstancesSize()
|
|
{
|
|
auto newSize = std::max(_instancesCount, _instances.size() * 2);
|
|
newSize = std::max(size_t{ 256 }, newSize);
|
|
Expects(newSize > _instances.size());
|
|
|
|
// Our render loop heavily relies on memcpy() which is up to between 1.5x (Intel)
|
|
// and 40x (AMD) faster for allocations with an alignment of 32 or greater.
|
|
auto newInstances = Buffer<QuadInstance, 32>{ newSize };
|
|
std::copy_n(_instances.data(), _instances.size(), newInstances.data());
|
|
|
|
_instances = std::move(newInstances);
|
|
}
|
|
|
|
void BackendD3D::_flushQuads(const RenderingPayload& p)
|
|
{
|
|
if (!_instancesCount)
|
|
{
|
|
return;
|
|
}
|
|
|
|
if (!_cursorRects.empty())
|
|
{
|
|
_drawCursorForeground();
|
|
}
|
|
|
|
// TODO: Shrink instances buffer
|
|
if (_instancesCount > _instanceBufferCapacity)
|
|
{
|
|
_recreateInstanceBuffers(p);
|
|
}
|
|
|
|
{
|
|
D3D11_MAPPED_SUBRESOURCE mapped{};
|
|
THROW_IF_FAILED(p.deviceContext->Map(_instanceBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped));
|
|
memcpy(mapped.pData, _instances.data(), _instancesCount * sizeof(QuadInstance));
|
|
p.deviceContext->Unmap(_instanceBuffer.get(), 0);
|
|
}
|
|
|
|
// I found 4 approaches to drawing lots of quads quickly. There are probably even more.
|
|
// They can often be found in discussions about "particle" or "point sprite" rendering in game development.
|
|
// * Compute Shader: My understanding is that at the time of writing games are moving over to bucketing
|
|
// particles into "tiles" on the screen and drawing them with a compute shader. While this improves
|
|
// performance, it doesn't mix well with our goal of allowing arbitrary overlaps between glyphs.
|
|
// Additionally none of the next 3 approaches use any significant amount of GPU time in the first place.
|
|
// * Geometry Shader: Geometry shaders can generate vertices on the fly, which would neatly replace our need
|
|
// for an index buffer. However, many sources claim they're significantly slower than the following approaches.
|
|
// * DrawIndexed & DrawInstanced: Again, many sources claim that GPU instancing (Draw(Indexed)Instanced) performs
|
|
// poorly for small meshes, and instead indexed vertices with a SRV (shader resource view) should be used.
|
|
// The popular "Vertex Shader Tricks" talk from Bill Bilodeau at GDC 2014 suggests this approach, explains
|
|
// how it works (you divide the `SV_VertexID` by 4 and index into the SRV that contains the per-instance data;
|
|
// it's basically manual instancing inside the vertex shader) and shows how it outperforms regular instancing.
|
|
// However on my own limited test hardware (built around ~2020), I found that for at least our use case,
|
|
// GPU instancing matches the performance of using a custom buffer. In fact on my Nvidia GPU in particular,
|
|
// instancing with ~10k instances appears to be about 50% faster and so DrawInstanced was chosen.
|
|
// Instead I found that packing instance data as tightly as possible made the biggest performance difference,
|
|
// and packing 16 bit integers with ID3D11InputLayout is quite a bit more convenient too.
|
|
|
|
p.deviceContext->DrawIndexedInstanced(6, static_cast<UINT>(_instancesCount), 0, 0, 0);
|
|
_instancesCount = 0;
|
|
}
|
|
|
|
void BackendD3D::_recreateInstanceBuffers(const RenderingPayload& p)
|
|
{
|
|
// We use the viewport size of the terminal as the initial estimate for the amount of instances we'll see.
|
|
const auto minCapacity = static_cast<size_t>(p.s->viewportCellCount.x) * p.s->viewportCellCount.y;
|
|
auto newCapacity = std::max(_instancesCount, minCapacity);
|
|
auto newSize = newCapacity * sizeof(QuadInstance);
|
|
// Round up to multiples of 64kB to avoid reallocating too often.
|
|
// 64kB is the minimum alignment for committed resources in D3D12.
|
|
newSize = alignForward<size_t>(newSize, 64 * 1024);
|
|
newCapacity = newSize / sizeof(QuadInstance);
|
|
|
|
_instanceBuffer.reset();
|
|
|
|
{
|
|
const D3D11_BUFFER_DESC desc{
|
|
.ByteWidth = gsl::narrow<UINT>(newSize),
|
|
.Usage = D3D11_USAGE_DYNAMIC,
|
|
.BindFlags = D3D11_BIND_VERTEX_BUFFER,
|
|
.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE,
|
|
.StructureByteStride = sizeof(QuadInstance),
|
|
};
|
|
THROW_IF_FAILED(p.device->CreateBuffer(&desc, nullptr, _instanceBuffer.addressof()));
|
|
}
|
|
|
|
// IA: Input Assembler
|
|
ID3D11Buffer* vertexBuffers[]{ _vertexBuffer.get(), _instanceBuffer.get() };
|
|
static constexpr UINT strides[]{ sizeof(f32x2), sizeof(QuadInstance) };
|
|
static constexpr UINT offsets[]{ 0, 0 };
|
|
p.deviceContext->IASetVertexBuffers(0, 2, &vertexBuffers[0], &strides[0], &offsets[0]);
|
|
|
|
_instanceBufferCapacity = newCapacity;
|
|
}
|
|
|
|
void BackendD3D::_drawBackground(const RenderingPayload& p)
|
|
{
|
|
// Not uploading the bitmap halves (!) the GPU load for any given frame on 2023 hardware.
|
|
if (_backgroundBitmapGeneration != p.colorBitmapGenerations[0])
|
|
{
|
|
_uploadBackgroundBitmap(p);
|
|
}
|
|
|
|
_appendQuad() = {
|
|
.shadingType = static_cast<u16>(ShadingType::Background),
|
|
.size = p.s->targetSize,
|
|
};
|
|
}
|
|
|
|
void BackendD3D::_uploadBackgroundBitmap(const RenderingPayload& p)
|
|
{
|
|
D3D11_MAPPED_SUBRESOURCE mapped{};
|
|
THROW_IF_FAILED(p.deviceContext->Map(_backgroundBitmap.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped));
|
|
|
|
auto src = std::bit_cast<const char*>(p.backgroundBitmap.data());
|
|
const auto srcEnd = std::bit_cast<const char*>(p.backgroundBitmap.data() + p.backgroundBitmap.size());
|
|
const auto srcStride = p.colorBitmapRowStride * sizeof(u32);
|
|
auto dst = static_cast<char*>(mapped.pData);
|
|
|
|
while (src < srcEnd)
|
|
{
|
|
memcpy(dst, src, srcStride);
|
|
src += srcStride;
|
|
dst += mapped.RowPitch;
|
|
}
|
|
|
|
p.deviceContext->Unmap(_backgroundBitmap.get(), 0);
|
|
_backgroundBitmapGeneration = p.colorBitmapGenerations[0];
|
|
}
|
|
|
|
void BackendD3D::_drawText(RenderingPayload& p)
|
|
{
|
|
if (_fontChangedResetGlyphAtlas)
|
|
{
|
|
_resetGlyphAtlas(p);
|
|
}
|
|
|
|
til::CoordType dirtyTop = til::CoordTypeMax;
|
|
til::CoordType dirtyBottom = til::CoordTypeMin;
|
|
|
|
u16 y = 0;
|
|
for (const auto row : p.rows)
|
|
{
|
|
f32 baselineX = 0;
|
|
f32 baselineY = y * p.s->font->cellSize.y + p.s->font->baseline;
|
|
f32 scaleX = 1;
|
|
f32 scaleY = 1;
|
|
|
|
if (row->lineRendition != LineRendition::SingleWidth)
|
|
{
|
|
scaleX = 2;
|
|
|
|
if (row->lineRendition >= LineRendition::DoubleHeightTop)
|
|
{
|
|
scaleY = 2;
|
|
baselineY /= 2;
|
|
}
|
|
}
|
|
|
|
const u8x2 renditionScale{
|
|
static_cast<u8>(row->lineRendition != LineRendition::SingleWidth ? 2 : 1),
|
|
static_cast<u8>(row->lineRendition >= LineRendition::DoubleHeightTop ? 2 : 1),
|
|
};
|
|
|
|
for (const auto& m : row->mappings)
|
|
{
|
|
auto x = m.glyphsFrom;
|
|
const auto glyphsTo = m.glyphsTo;
|
|
const auto fontFace = m.fontFace.get();
|
|
|
|
// The lack of a fontFace indicates a soft font.
|
|
AtlasFontFaceEntry* fontFaceEntry = &_builtinGlyphs;
|
|
if (fontFace) [[likely]]
|
|
{
|
|
fontFaceEntry = _glyphAtlasMap.insert(fontFace).first;
|
|
}
|
|
|
|
const auto& glyphs = fontFaceEntry->glyphs[WI_EnumValue(row->lineRendition)];
|
|
|
|
while (x < glyphsTo)
|
|
{
|
|
size_t dx = 1;
|
|
u32 glyphIndex = row->glyphIndices[x];
|
|
|
|
// Note: !fontFace is only nullptr for builtin glyphs which then use glyphIndices for UTF16 code points.
|
|
// In other words, this doesn't accidentally corrupt any actual glyph indices.
|
|
if (!fontFace && til::is_leading_surrogate(glyphIndex))
|
|
{
|
|
glyphIndex = til::combine_surrogates(glyphIndex, row->glyphIndices[x + 1]);
|
|
dx = 2;
|
|
}
|
|
|
|
auto glyphEntry = glyphs.lookup(glyphIndex);
|
|
if (!glyphEntry)
|
|
{
|
|
glyphEntry = _drawGlyph(p, *row, *fontFaceEntry, glyphIndex);
|
|
}
|
|
|
|
// A shadingType of 0 (ShadingType::Default) indicates a glyph that is whitespace.
|
|
if (glyphEntry->shadingType != ShadingType::Default)
|
|
{
|
|
auto l = static_cast<til::CoordType>(lrintf((baselineX + row->glyphOffsets[x].advanceOffset) * scaleX));
|
|
auto t = static_cast<til::CoordType>(lrintf((baselineY - row->glyphOffsets[x].ascenderOffset) * scaleY));
|
|
|
|
l += glyphEntry->offset.x;
|
|
t += glyphEntry->offset.y;
|
|
|
|
row->dirtyTop = std::min(row->dirtyTop, t);
|
|
row->dirtyBottom = std::max(row->dirtyBottom, t + glyphEntry->size.y);
|
|
|
|
_appendQuad() = {
|
|
.shadingType = static_cast<u16>(glyphEntry->shadingType),
|
|
.renditionScale = renditionScale,
|
|
.position = { static_cast<i16>(l), static_cast<i16>(t) },
|
|
.size = glyphEntry->size,
|
|
.texcoord = glyphEntry->texcoord,
|
|
.color = row->colors[x],
|
|
};
|
|
|
|
if (glyphEntry->overlapSplit)
|
|
{
|
|
_drawTextOverlapSplit(p, y);
|
|
}
|
|
}
|
|
|
|
baselineX += row->glyphAdvances[x];
|
|
x += dx;
|
|
}
|
|
}
|
|
|
|
if (!row->gridLineRanges.empty())
|
|
{
|
|
_drawGridlines(p, y);
|
|
}
|
|
|
|
if (p.invalidatedRows.contains(y))
|
|
{
|
|
dirtyTop = std::min(dirtyTop, row->dirtyTop);
|
|
dirtyBottom = std::max(dirtyBottom, row->dirtyBottom);
|
|
}
|
|
|
|
++y;
|
|
}
|
|
|
|
if (dirtyTop < dirtyBottom)
|
|
{
|
|
p.dirtyRectInPx.top = std::min(p.dirtyRectInPx.top, dirtyTop);
|
|
p.dirtyRectInPx.bottom = std::max(p.dirtyRectInPx.bottom, dirtyBottom);
|
|
}
|
|
|
|
_d2dEndDrawing();
|
|
}
|
|
|
|
// There are a number of coding-oriented fonts that feature ligatures which (for instance)
|
|
// translate text like "!=" into a glyph that looks like "≠" (just 2 columns wide and not 1).
|
|
// Glyphs like that still need to be colored in potentially multiple colors however, so this
|
|
// function will handle these ligatures by splitting them up into multiple QuadInstances.
|
|
//
|
|
// It works by iteratively splitting the wide glyph into shorter and shorter segments like so
|
|
// (whitespaces indicate that the glyph was split up in a leading and trailing half):
|
|
// <!--
|
|
// < !--
|
|
// < ! --
|
|
// < ! - -
|
|
void BackendD3D::_drawTextOverlapSplit(const RenderingPayload& p, u16 y)
|
|
{
|
|
auto& originalQuad = _getLastQuad();
|
|
|
|
// If the current row has a non-default line rendition then every glyph is scaled up by 2x horizontally.
|
|
// This requires us to make some changes: For instance, if the ligature occupies columns 3, 4 and 5 (0-indexed)
|
|
// then we need to get the foreground colors from columns 2 and 4, because columns 0,1 2,3 4,5 6,7 and so on form pairs.
|
|
// A wide glyph would be a total of 4 actual columns wide! In other words, we need to properly round our clip rects and columns.
|
|
i32 columnAdvance = 1;
|
|
i32 columnAdvanceInPx = p.s->font->cellSize.x;
|
|
i32 cellCount = p.s->viewportCellCount.x;
|
|
|
|
if (p.rows[y]->lineRendition != LineRendition::SingleWidth)
|
|
{
|
|
columnAdvance = 2;
|
|
columnAdvanceInPx <<= 1;
|
|
cellCount >>= 1;
|
|
}
|
|
|
|
i32 originalLeft = originalQuad.position.x;
|
|
i32 originalRight = originalQuad.position.x + originalQuad.size.x;
|
|
originalLeft = std::max(originalLeft, 0);
|
|
originalRight = std::min(originalRight, cellCount * columnAdvanceInPx);
|
|
|
|
if (originalLeft >= originalRight)
|
|
{
|
|
return;
|
|
}
|
|
|
|
const auto colors = &p.foregroundBitmap[p.colorBitmapRowStride * y];
|
|
|
|
// As explained in the beginning, column and clipLeft should be in multiples of columnAdvance
|
|
// and columnAdvanceInPx respectively, because that's how line renditions work.
|
|
auto column = originalLeft / columnAdvanceInPx;
|
|
auto clipLeft = column * columnAdvanceInPx;
|
|
column *= columnAdvance;
|
|
|
|
// Our loop below will split the ligature by processing it from left to right.
|
|
// Some fonts however implement ligatures by replacing a string like "&&" with a whitespace padding glyph,
|
|
// followed by the actual "&&" glyph which has a 1 column advance width. In that case the originalQuad
|
|
// will have the .color of the 2nd column and not of the 1st one. We need to fix that here.
|
|
auto lastFg = colors[column];
|
|
originalQuad.color = lastFg;
|
|
column += columnAdvance;
|
|
clipLeft += columnAdvanceInPx;
|
|
|
|
// We must ensure to exit the loop while `column` is less than `cellCount.x`,
|
|
// otherwise we cause a potential out of bounds access into foregroundBitmap.
|
|
// This may happen with glyphs that are severely overlapping their cells,
|
|
// outside of the viewport. The `clipLeft < originalRight` condition doubles
|
|
// as a `column < cellCount.x` condition thanks to us std::min()ing it above.
|
|
for (; clipLeft < originalRight; column += columnAdvance, clipLeft += columnAdvanceInPx)
|
|
{
|
|
const auto fg = colors[column];
|
|
|
|
if (lastFg != fg)
|
|
{
|
|
// NOTE: _appendQuad might reallocate and any pointers
|
|
// acquired before calling this function are now invalid.
|
|
auto& next = _appendQuad();
|
|
// The item at -1 is the quad we've just appended, which means
|
|
// that the previous quad we want to split up is at -2.
|
|
auto& prev = _instances[_instancesCount - 2];
|
|
|
|
const auto prevWidth = clipLeft - prev.position.x;
|
|
const auto nextWidth = prev.size.x - prevWidth;
|
|
|
|
prev.size.x = gsl::narrow<u16>(prevWidth);
|
|
|
|
next = prev;
|
|
next.position.x = gsl::narrow<i16>(next.position.x + prevWidth);
|
|
next.texcoord.x = gsl::narrow<u16>(next.texcoord.x + prevWidth);
|
|
next.size.x = gsl::narrow<u16>(nextWidth);
|
|
next.color = fg;
|
|
|
|
lastFg = fg;
|
|
}
|
|
}
|
|
}
|
|
|
|
BackendD3D::AtlasGlyphEntry* BackendD3D::_drawGlyph(const RenderingPayload& p, const ShapedRow& row, AtlasFontFaceEntry& fontFaceEntry, u32 glyphIndex)
|
|
{
|
|
// The lack of a fontFace indicates a soft font.
|
|
if (!fontFaceEntry.fontFace)
|
|
{
|
|
return _drawBuiltinGlyph(p, row, fontFaceEntry, glyphIndex);
|
|
}
|
|
|
|
const auto glyphIndexU16 = static_cast<u16>(glyphIndex);
|
|
const DWRITE_GLYPH_RUN glyphRun{
|
|
.fontFace = fontFaceEntry.fontFace.get(),
|
|
.fontEmSize = p.s->font->fontSize,
|
|
.glyphCount = 1,
|
|
.glyphIndices = &glyphIndexU16,
|
|
};
|
|
|
|
// It took me a while to figure out how to rasterize glyphs manually with DirectWrite without depending on Direct2D.
|
|
// The benefits are a reduction in memory usage, an increase in performance under certain circumstances and most
|
|
// importantly, the ability to debug the renderer more easily, because many graphics debuggers don't support Direct2D.
|
|
// Direct2D has one big advantage however: It features a clever glyph uploader with a pool of D3D11_USAGE_STAGING textures,
|
|
// which I was too short on time to implement myself. This makes rasterization with Direct2D roughly 2x faster.
|
|
//
|
|
// This code remains, because it features some parts that are slightly more and some parts that are outright difficult to figure out.
|
|
#if 0
|
|
const auto wantsClearType = p.s->font->antialiasingMode == AntialiasingMode::ClearType;
|
|
const auto wantsAliased = p.s->font->antialiasingMode == AntialiasingMode::Aliased;
|
|
const auto antialiasMode = wantsClearType ? DWRITE_TEXT_ANTIALIAS_MODE_CLEARTYPE : DWRITE_TEXT_ANTIALIAS_MODE_GRAYSCALE;
|
|
const auto outlineThreshold = wantsAliased ? DWRITE_OUTLINE_THRESHOLD_ALIASED : DWRITE_OUTLINE_THRESHOLD_ANTIALIASED;
|
|
|
|
DWRITE_RENDERING_MODE renderingMode{};
|
|
DWRITE_GRID_FIT_MODE gridFitMode{};
|
|
THROW_IF_FAILED(fontFaceEntry.fontFace->GetRecommendedRenderingMode(
|
|
/* fontEmSize */ glyphRun.fontEmSize,
|
|
/* dpiX */ 1, // fontEmSize is already in pixel
|
|
/* dpiY */ 1, // fontEmSize is already in pixel
|
|
/* transform */ nullptr,
|
|
/* isSideways */ FALSE,
|
|
/* outlineThreshold */ outlineThreshold,
|
|
/* measuringMode */ DWRITE_MEASURING_MODE_NATURAL,
|
|
/* renderingParams */ _textRenderingParams.get(),
|
|
/* renderingMode */ &renderingMode,
|
|
/* gridFitMode */ &gridFitMode));
|
|
|
|
wil::com_ptr<IDWriteGlyphRunAnalysis> glyphRunAnalysis;
|
|
THROW_IF_FAILED(p.dwriteFactory->CreateGlyphRunAnalysis(
|
|
/* glyphRun */ &glyphRun,
|
|
/* transform */ nullptr,
|
|
/* renderingMode */ renderingMode,
|
|
/* measuringMode */ DWRITE_MEASURING_MODE_NATURAL,
|
|
/* gridFitMode */ gridFitMode,
|
|
/* antialiasMode */ antialiasMode,
|
|
/* baselineOriginX */ 0,
|
|
/* baselineOriginY */ 0,
|
|
/* glyphRunAnalysis */ glyphRunAnalysis.put()));
|
|
|
|
RECT textureBounds{};
|
|
|
|
if (wantsClearType)
|
|
{
|
|
THROW_IF_FAILED(glyphRunAnalysis->GetAlphaTextureBounds(DWRITE_TEXTURE_CLEARTYPE_3x1, &textureBounds));
|
|
|
|
// Some glyphs cannot be drawn with ClearType, such as bitmap fonts. In that case
|
|
// GetAlphaTextureBounds() supposedly returns an empty RECT, but I haven't tested that yet.
|
|
if (!IsRectEmpty(&textureBounds))
|
|
{
|
|
// Allocate a buffer of `3 * width * height` bytes.
|
|
THROW_IF_FAILED(glyphRunAnalysis->CreateAlphaTexture(DWRITE_TEXTURE_CLEARTYPE_3x1, &textureBounds, buffer.data(), buffer.size()));
|
|
// The buffer contains RGB ClearType weights which can now be packed into RGBA and uploaded to the glyph atlas.
|
|
return;
|
|
}
|
|
|
|
// --> Retry with grayscale AA.
|
|
}
|
|
|
|
// Even though it says "ALIASED" and even though the docs for the flag still say:
|
|
// > [...] that is, each pixel is either fully opaque or fully transparent [...]
|
|
// don't be confused: It's grayscale antialiased. lol
|
|
THROW_IF_FAILED(glyphRunAnalysis->GetAlphaTextureBounds(DWRITE_TEXTURE_ALIASED_1x1, &textureBounds));
|
|
|
|
// Allocate a buffer of `width * height` bytes.
|
|
THROW_IF_FAILED(glyphRunAnalysis->CreateAlphaTexture(DWRITE_TEXTURE_ALIASED_1x1, &textureBounds, buffer.data(), buffer.size()));
|
|
// The buffer now contains a grayscale alpha mask.
|
|
#endif
|
|
|
|
// This code finds the local font file path. Useful for debugging as it
|
|
// gets you the font.ttf <> glyphIndex pair to uniquely identify glyphs.
|
|
#if 0
|
|
std::vector<std::wstring> paths;
|
|
|
|
UINT32 numberOfFiles;
|
|
THROW_IF_FAILED(fontFaceEntry.fontFace->GetFiles(&numberOfFiles, nullptr));
|
|
wil::com_ptr<IDWriteFontFile> fontFiles[8];
|
|
THROW_IF_FAILED(fontFaceEntry.fontFace->GetFiles(&numberOfFiles, fontFiles[0].addressof()));
|
|
|
|
for (UINT32 i = 0; i < numberOfFiles; ++i)
|
|
{
|
|
wil::com_ptr<IDWriteFontFileLoader> loader;
|
|
THROW_IF_FAILED(fontFiles[i]->GetLoader(loader.addressof()));
|
|
|
|
void const* fontFileReferenceKey;
|
|
UINT32 fontFileReferenceKeySize;
|
|
THROW_IF_FAILED(fontFiles[i]->GetReferenceKey(&fontFileReferenceKey, &fontFileReferenceKeySize));
|
|
|
|
if (const auto localLoader = loader.try_query<IDWriteLocalFontFileLoader>())
|
|
{
|
|
UINT32 filePathLength;
|
|
THROW_IF_FAILED(localLoader->GetFilePathLengthFromKey(fontFileReferenceKey, fontFileReferenceKeySize, &filePathLength));
|
|
|
|
filePathLength++;
|
|
std::wstring filePath(filePathLength, L'\0');
|
|
THROW_IF_FAILED(localLoader->GetFilePathFromKey(fontFileReferenceKey, fontFileReferenceKeySize, filePath.data(), filePathLength));
|
|
|
|
paths.emplace_back(std::move(filePath));
|
|
}
|
|
}
|
|
#endif
|
|
|
|
const int scale = row.lineRendition != LineRendition::SingleWidth;
|
|
D2D1_MATRIX_3X2_F transform = identityTransform;
|
|
|
|
if (scale)
|
|
{
|
|
transform.m11 = 2.0f;
|
|
transform.m22 = row.lineRendition >= LineRendition::DoubleHeightTop ? 2.0f : 1.0f;
|
|
_d2dRenderTarget->SetTransform(&transform);
|
|
}
|
|
|
|
const auto restoreTransform = wil::scope_exit([&]() noexcept {
|
|
_d2dRenderTarget->SetTransform(&identityTransform);
|
|
});
|
|
|
|
// This calculates the black box of the glyph, or in other words,
|
|
// it's extents/size relative to its baseline origin (at 0,0).
|
|
//
|
|
// bounds.top ------++-----######--+
|
|
// (-7) || ############
|
|
// ||#### ####
|
|
// |### #####
|
|
// baseline ______ |### #####|
|
|
// origin \|############# |
|
|
// (= 0,0) \|########### |
|
|
// ++-------###---+
|
|
// ## ### |
|
|
// bounds.bottom ---+#########-----+
|
|
// (+2) | |
|
|
// bounds.left bounds.right
|
|
// (-1) (+14)
|
|
//
|
|
|
|
bool isColorGlyph = false;
|
|
D2D1_RECT_F bounds = GlyphRunEmptyBounds;
|
|
|
|
const auto antialiasingCleanup = wil::scope_exit([&]() {
|
|
if (isColorGlyph)
|
|
{
|
|
_d2dRenderTarget4->SetTextAntialiasMode(static_cast<D2D1_TEXT_ANTIALIAS_MODE>(p.s->font->antialiasingMode));
|
|
}
|
|
});
|
|
|
|
{
|
|
wil::com_ptr<IDWriteColorGlyphRunEnumerator1> enumerator;
|
|
|
|
if (p.s->font->colorGlyphs)
|
|
{
|
|
enumerator = TranslateColorGlyphRun(p.dwriteFactory4.get(), {}, &glyphRun);
|
|
}
|
|
|
|
if (!enumerator)
|
|
{
|
|
THROW_IF_FAILED(_d2dRenderTarget->GetGlyphRunWorldBounds({}, &glyphRun, DWRITE_MEASURING_MODE_NATURAL, &bounds));
|
|
}
|
|
else
|
|
{
|
|
isColorGlyph = true;
|
|
_d2dRenderTarget4->SetTextAntialiasMode(D2D1_TEXT_ANTIALIAS_MODE_GRAYSCALE);
|
|
|
|
while (ColorGlyphRunMoveNext(enumerator.get()))
|
|
{
|
|
const auto colorGlyphRun = ColorGlyphRunGetCurrentRun(enumerator.get());
|
|
ColorGlyphRunAccumulateBounds(_d2dRenderTarget.get(), colorGlyphRun, bounds);
|
|
}
|
|
}
|
|
}
|
|
|
|
// The bounds may be empty if the glyph is whitespace.
|
|
if (bounds.left >= bounds.right || bounds.top >= bounds.bottom)
|
|
{
|
|
return _drawGlyphAllocateEntry(row, fontFaceEntry, glyphIndex);
|
|
}
|
|
|
|
const auto bl = lrintf(bounds.left);
|
|
const auto bt = lrintf(bounds.top);
|
|
const auto br = lrintf(bounds.right);
|
|
const auto bb = lrintf(bounds.bottom);
|
|
|
|
stbrp_rect rect{
|
|
.w = br - bl,
|
|
.h = bb - bt,
|
|
};
|
|
_drawGlyphAtlasAllocate(p, rect);
|
|
_d2dBeginDrawing();
|
|
|
|
const D2D1_POINT_2F baselineOrigin{
|
|
static_cast<f32>(rect.x - bl),
|
|
static_cast<f32>(rect.y - bt),
|
|
};
|
|
|
|
if (scale)
|
|
{
|
|
transform.dx = (1.0f - transform.m11) * baselineOrigin.x;
|
|
transform.dy = (1.0f - transform.m22) * baselineOrigin.y;
|
|
_d2dRenderTarget->SetTransform(&transform);
|
|
}
|
|
|
|
if (!isColorGlyph)
|
|
{
|
|
_d2dRenderTarget->DrawGlyphRun(baselineOrigin, &glyphRun, _brush.get(), DWRITE_MEASURING_MODE_NATURAL);
|
|
}
|
|
else
|
|
{
|
|
const auto enumerator = TranslateColorGlyphRun(p.dwriteFactory4.get(), baselineOrigin, &glyphRun);
|
|
while (ColorGlyphRunMoveNext(enumerator.get()))
|
|
{
|
|
const auto colorGlyphRun = ColorGlyphRunGetCurrentRun(enumerator.get());
|
|
ColorGlyphRunDraw(_d2dRenderTarget4.get(), _emojiBrush.get(), _brush.get(), colorGlyphRun);
|
|
}
|
|
}
|
|
|
|
// Ligatures are drawn with strict cell-wise foreground color, while other text allows colors to overhang
|
|
// their cells. This makes sure that italics and such retain their color and don't look "cut off".
|
|
//
|
|
// The former condition makes sure to exclude diacritics and such from being considered a ligature,
|
|
// while the latter condition-pair makes sure to exclude regular BMP wide glyphs that overlap a little.
|
|
const auto triggerLeft = _ligatureOverhangTriggerLeft << scale;
|
|
const auto triggerRight = _ligatureOverhangTriggerRight << scale;
|
|
const auto overlapSplit = rect.w >= p.s->font->cellSize.x && (bl <= triggerLeft || br >= triggerRight);
|
|
|
|
const auto glyphEntry = _drawGlyphAllocateEntry(row, fontFaceEntry, glyphIndex);
|
|
glyphEntry->shadingType = isColorGlyph ? ShadingType::TextPassthrough : _textShadingType;
|
|
glyphEntry->overlapSplit = overlapSplit;
|
|
glyphEntry->offset.x = bl;
|
|
glyphEntry->offset.y = bt;
|
|
glyphEntry->size.x = rect.w;
|
|
glyphEntry->size.y = rect.h;
|
|
glyphEntry->texcoord.x = rect.x;
|
|
glyphEntry->texcoord.y = rect.y;
|
|
|
|
if (row.lineRendition >= LineRendition::DoubleHeightTop)
|
|
{
|
|
_splitDoubleHeightGlyph(p, row, fontFaceEntry, glyphEntry);
|
|
}
|
|
|
|
return glyphEntry;
|
|
}
|
|
|
|
BackendD3D::AtlasGlyphEntry* BackendD3D::_drawBuiltinGlyph(const RenderingPayload& p, const ShapedRow& row, AtlasFontFaceEntry& fontFaceEntry, u32 glyphIndex)
|
|
{
|
|
auto baseline = p.s->font->baseline;
|
|
stbrp_rect rect{
|
|
.w = p.s->font->cellSize.x,
|
|
.h = p.s->font->cellSize.y,
|
|
};
|
|
if (row.lineRendition != LineRendition::SingleWidth)
|
|
{
|
|
const auto heightShift = static_cast<u8>(row.lineRendition >= LineRendition::DoubleHeightTop);
|
|
rect.w <<= 1;
|
|
rect.h <<= heightShift;
|
|
baseline <<= heightShift;
|
|
}
|
|
|
|
_drawGlyphAtlasAllocate(p, rect);
|
|
_d2dBeginDrawing();
|
|
|
|
auto shadingType = ShadingType::TextGrayscale;
|
|
const D2D1_RECT_F r{
|
|
static_cast<f32>(rect.x),
|
|
static_cast<f32>(rect.y),
|
|
static_cast<f32>(rect.x + rect.w),
|
|
static_cast<f32>(rect.y + rect.h),
|
|
};
|
|
|
|
if (BuiltinGlyphs::IsSoftFontChar(glyphIndex))
|
|
{
|
|
shadingType = _drawSoftFontGlyph(p, r, glyphIndex);
|
|
}
|
|
else
|
|
{
|
|
// This code works in tandem with SHADING_TYPE_TEXT_BUILTIN_GLYPH in our pixel shader.
|
|
// Unless someone removed it, it should have a lengthy comment visually explaining
|
|
// what each of the 3 RGB components do. The short version is:
|
|
// R: stretch the checkerboard pattern (Shape_Filled050) horizontally
|
|
// G: invert the pixels
|
|
// B: overrides the above and fills it
|
|
static constexpr D2D1_COLOR_F shadeColorMap[] = {
|
|
{ 1, 0, 0, 1 }, // Shape_Filled025
|
|
{ 0, 0, 0, 1 }, // Shape_Filled050
|
|
{ 1, 1, 0, 1 }, // Shape_Filled075
|
|
{ 1, 1, 1, 1 }, // Shape_Filled100
|
|
};
|
|
BuiltinGlyphs::DrawBuiltinGlyph(p.d2dFactory.get(), _d2dRenderTarget.get(), _brush.get(), shadeColorMap, r, glyphIndex);
|
|
shadingType = ShadingType::TextBuiltinGlyph;
|
|
}
|
|
|
|
const auto glyphEntry = _drawGlyphAllocateEntry(row, fontFaceEntry, glyphIndex);
|
|
glyphEntry->shadingType = shadingType;
|
|
glyphEntry->overlapSplit = 0;
|
|
glyphEntry->offset.x = 0;
|
|
glyphEntry->offset.y = -baseline;
|
|
glyphEntry->size.x = rect.w;
|
|
glyphEntry->size.y = rect.h;
|
|
glyphEntry->texcoord.x = rect.x;
|
|
glyphEntry->texcoord.y = rect.y;
|
|
|
|
if (row.lineRendition >= LineRendition::DoubleHeightTop)
|
|
{
|
|
_splitDoubleHeightGlyph(p, row, fontFaceEntry, glyphEntry);
|
|
}
|
|
|
|
return glyphEntry;
|
|
}
|
|
|
|
BackendD3D::ShadingType BackendD3D::_drawSoftFontGlyph(const RenderingPayload& p, const D2D1_RECT_F& rect, u32 glyphIndex)
|
|
{
|
|
const auto width = static_cast<size_t>(p.s->font->softFontCellSize.width);
|
|
const auto height = static_cast<size_t>(p.s->font->softFontCellSize.height);
|
|
const auto softFontIndex = glyphIndex - 0xEF20u;
|
|
const auto data = til::safe_slice_len(p.s->font->softFontPattern, height * softFontIndex, height);
|
|
|
|
// This happens if someone wrote a U+EF2x character (by accident), but we don't even have soft fonts enabled yet.
|
|
if (data.empty() || data.size() != height)
|
|
{
|
|
return ShadingType::Default;
|
|
}
|
|
|
|
if (!_softFontBitmap)
|
|
{
|
|
// Allocating such a tiny texture is very wasteful (min. texture size on GPUs
|
|
// right now is 64kB), but this is a seldom used feature, so it's fine...
|
|
const D2D1_SIZE_U size{
|
|
static_cast<UINT32>(width),
|
|
static_cast<UINT32>(height),
|
|
};
|
|
const D2D1_BITMAP_PROPERTIES1 bitmapProperties{
|
|
.pixelFormat = { DXGI_FORMAT_B8G8R8A8_UNORM, D2D1_ALPHA_MODE_PREMULTIPLIED },
|
|
.dpiX = static_cast<f32>(p.s->font->dpi),
|
|
.dpiY = static_cast<f32>(p.s->font->dpi),
|
|
};
|
|
THROW_IF_FAILED(_d2dRenderTarget->CreateBitmap(size, nullptr, 0, &bitmapProperties, _softFontBitmap.addressof()));
|
|
}
|
|
|
|
{
|
|
auto bitmapData = Buffer<u32>{ width * height };
|
|
auto dst = bitmapData.begin();
|
|
|
|
for (auto srcBits : data)
|
|
{
|
|
for (size_t x = 0; x < width; x++)
|
|
{
|
|
const auto srcBitIsSet = (srcBits & 0x8000) != 0;
|
|
*dst++ = srcBitIsSet ? 0xffffffff : 0x00000000;
|
|
srcBits <<= 1;
|
|
}
|
|
}
|
|
|
|
const auto pitch = static_cast<UINT32>(width * sizeof(u32));
|
|
THROW_IF_FAILED(_softFontBitmap->CopyFromMemory(nullptr, bitmapData.data(), pitch));
|
|
}
|
|
|
|
_d2dRenderTarget->PushAxisAlignedClip(&rect, D2D1_ANTIALIAS_MODE_ALIASED);
|
|
const auto restoreD2D = wil::scope_exit([&]() {
|
|
_d2dRenderTarget->PopAxisAlignedClip();
|
|
});
|
|
|
|
const auto interpolation = p.s->font->antialiasingMode == AntialiasingMode::Aliased ? D2D1_INTERPOLATION_MODE_NEAREST_NEIGHBOR : D2D1_INTERPOLATION_MODE_HIGH_QUALITY_CUBIC;
|
|
_d2dRenderTarget->DrawBitmap(_softFontBitmap.get(), &rect, 1, interpolation, nullptr, nullptr);
|
|
return ShadingType::TextGrayscale;
|
|
}
|
|
|
|
void BackendD3D::_drawGlyphAtlasAllocate(const RenderingPayload& p, stbrp_rect& rect)
|
|
{
|
|
if (stbrp_pack_rects(&_rectPacker, &rect, 1))
|
|
{
|
|
return;
|
|
}
|
|
|
|
_d2dEndDrawing();
|
|
_flushQuads(p);
|
|
_resetGlyphAtlas(p);
|
|
|
|
if (!stbrp_pack_rects(&_rectPacker, &rect, 1))
|
|
{
|
|
THROW_HR(HRESULT_FROM_WIN32(ERROR_POSSIBLE_DEADLOCK));
|
|
}
|
|
}
|
|
|
|
BackendD3D::AtlasGlyphEntry* BackendD3D::_drawGlyphAllocateEntry(const ShapedRow& row, AtlasFontFaceEntry& fontFaceEntry, u32 glyphIndex)
|
|
{
|
|
const auto glyphEntry = fontFaceEntry.glyphs[WI_EnumValue(row.lineRendition)].insert(glyphIndex).first;
|
|
glyphEntry->shadingType = ShadingType::Default;
|
|
return glyphEntry;
|
|
}
|
|
|
|
// If this is a double-height glyph (DECDHL), we need to split it into 2 glyph entries:
|
|
// One for the top/bottom half each, because that's how DECDHL works. This will clip the
|
|
// `glyphEntry` to only contain the one specified by `fontFaceEntry.lineRendition`
|
|
// and create a second entry in our glyph cache hashmap that contains the other half.
|
|
void BackendD3D::_splitDoubleHeightGlyph(const RenderingPayload& p, const ShapedRow& row, AtlasFontFaceEntry& fontFaceEntry, AtlasGlyphEntry* glyphEntry)
|
|
{
|
|
// Twice the line height, twice the descender gap. For both.
|
|
glyphEntry->offset.y -= p.s->font->descender;
|
|
|
|
const auto isTop = row.lineRendition == LineRendition::DoubleHeightTop;
|
|
const auto otherLineRendition = isTop ? LineRendition::DoubleHeightBottom : LineRendition::DoubleHeightTop;
|
|
const auto entry2 = fontFaceEntry.glyphs[WI_EnumValue(otherLineRendition)].insert(glyphEntry->glyphIndex).first;
|
|
|
|
*entry2 = *glyphEntry;
|
|
|
|
const auto top = isTop ? glyphEntry : entry2;
|
|
const auto bottom = isTop ? entry2 : glyphEntry;
|
|
const auto topSize = clamp(-glyphEntry->offset.y - p.s->font->baseline, 0, static_cast<int>(glyphEntry->size.y));
|
|
|
|
top->offset.y += p.s->font->cellSize.y;
|
|
top->size.y = topSize;
|
|
bottom->offset.y += topSize;
|
|
bottom->size.y = std::max(0, bottom->size.y - topSize);
|
|
bottom->texcoord.y += topSize;
|
|
|
|
// Things like diacritics might be so small that they only exist on either half of the
|
|
// double-height row. This effectively turns the other (unneeded) side into whitespace.
|
|
if (!top->size.y)
|
|
{
|
|
top->shadingType = ShadingType::Default;
|
|
}
|
|
if (!bottom->size.y)
|
|
{
|
|
bottom->shadingType = ShadingType::Default;
|
|
}
|
|
}
|
|
|
|
void BackendD3D::_drawGridlines(const RenderingPayload& p, u16 y)
|
|
{
|
|
const auto row = p.rows[y];
|
|
|
|
const auto horizontalShift = static_cast<u8>(row->lineRendition != LineRendition::SingleWidth);
|
|
const auto verticalShift = static_cast<u8>(row->lineRendition >= LineRendition::DoubleHeightTop);
|
|
|
|
const auto cellSize = p.s->font->cellSize;
|
|
const auto rowTop = static_cast<i16>(cellSize.y * y);
|
|
const auto rowBottom = static_cast<i16>(rowTop + cellSize.y);
|
|
|
|
auto textCellTop = rowTop;
|
|
if (row->lineRendition == LineRendition::DoubleHeightBottom)
|
|
{
|
|
textCellTop -= cellSize.y;
|
|
}
|
|
|
|
const i32 clipTop = row->lineRendition == LineRendition::DoubleHeightBottom ? rowTop : 0;
|
|
const i32 clipBottom = row->lineRendition == LineRendition::DoubleHeightTop ? rowBottom : p.s->targetSize.y;
|
|
|
|
const auto appendVerticalLines = [&](const GridLineRange& r, FontDecorationPosition pos) {
|
|
const auto textCellWidth = cellSize.x << horizontalShift;
|
|
const auto offset = pos.position << horizontalShift;
|
|
const auto width = static_cast<u16>(pos.height << horizontalShift);
|
|
|
|
auto posX = r.from * cellSize.x + offset;
|
|
const auto end = r.to * cellSize.x;
|
|
|
|
for (; posX < end; posX += textCellWidth)
|
|
{
|
|
_appendQuad() = {
|
|
.shadingType = static_cast<u16>(ShadingType::SolidLine),
|
|
.position = { static_cast<i16>(posX), rowTop },
|
|
.size = { width, p.s->font->cellSize.y },
|
|
.color = r.gridlineColor,
|
|
};
|
|
}
|
|
};
|
|
const auto appendHorizontalLine = [&](const GridLineRange& r, FontDecorationPosition pos, ShadingType shadingType, const u32 color) {
|
|
const auto offset = pos.position << verticalShift;
|
|
const auto height = static_cast<u16>(pos.height << verticalShift);
|
|
|
|
const auto left = static_cast<i16>(r.from * cellSize.x);
|
|
const auto width = static_cast<u16>((r.to - r.from) * cellSize.x);
|
|
|
|
i32 rt = textCellTop + offset;
|
|
i32 rb = rt + height;
|
|
rt = clamp(rt, clipTop, clipBottom);
|
|
rb = clamp(rb, clipTop, clipBottom);
|
|
|
|
if (rt < rb)
|
|
{
|
|
_appendQuad() = {
|
|
.shadingType = static_cast<u16>(shadingType),
|
|
.renditionScale = { static_cast<u8>(1 << horizontalShift), static_cast<u8>(1 << verticalShift) },
|
|
.position = { left, static_cast<i16>(rt) },
|
|
.size = { width, static_cast<u16>(rb - rt) },
|
|
.color = color,
|
|
};
|
|
}
|
|
};
|
|
|
|
for (const auto& r : row->gridLineRanges)
|
|
{
|
|
// AtlasEngine.cpp shouldn't add any gridlines if they don't do anything.
|
|
assert(r.lines.any());
|
|
|
|
if (r.lines.test(GridLines::Left))
|
|
{
|
|
appendVerticalLines(r, p.s->font->gridLeft);
|
|
}
|
|
if (r.lines.test(GridLines::Right))
|
|
{
|
|
appendVerticalLines(r, p.s->font->gridRight);
|
|
}
|
|
if (r.lines.test(GridLines::Top))
|
|
{
|
|
appendHorizontalLine(r, p.s->font->gridTop, ShadingType::SolidLine, r.gridlineColor);
|
|
}
|
|
if (r.lines.test(GridLines::Bottom))
|
|
{
|
|
appendHorizontalLine(r, p.s->font->gridBottom, ShadingType::SolidLine, r.gridlineColor);
|
|
}
|
|
if (r.lines.test(GridLines::Strikethrough))
|
|
{
|
|
appendHorizontalLine(r, p.s->font->strikethrough, ShadingType::SolidLine, r.gridlineColor);
|
|
}
|
|
|
|
if (r.lines.test(GridLines::Underline))
|
|
{
|
|
appendHorizontalLine(r, p.s->font->underline, ShadingType::SolidLine, r.underlineColor);
|
|
}
|
|
else if (r.lines.any(GridLines::DottedUnderline, GridLines::HyperlinkUnderline))
|
|
{
|
|
appendHorizontalLine(r, p.s->font->underline, ShadingType::DottedLine, r.underlineColor);
|
|
}
|
|
else if (r.lines.test(GridLines::DashedUnderline))
|
|
{
|
|
appendHorizontalLine(r, p.s->font->underline, ShadingType::DashedLine, r.underlineColor);
|
|
}
|
|
else if (r.lines.test(GridLines::CurlyUnderline))
|
|
{
|
|
appendHorizontalLine(r, _curlyUnderline, ShadingType::CurlyLine, r.underlineColor);
|
|
}
|
|
else if (r.lines.test(GridLines::DoubleUnderline))
|
|
{
|
|
for (const auto pos : p.s->font->doubleUnderline)
|
|
{
|
|
appendHorizontalLine(r, pos, ShadingType::SolidLine, r.underlineColor);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void BackendD3D::_drawCursorBackground(const RenderingPayload& p)
|
|
{
|
|
_cursorRects.clear();
|
|
|
|
if (p.cursorRect.empty())
|
|
{
|
|
return;
|
|
}
|
|
|
|
_cursorPosition = {
|
|
p.s->font->cellSize.x * p.cursorRect.left,
|
|
p.s->font->cellSize.y * p.cursorRect.top,
|
|
p.s->font->cellSize.x * p.cursorRect.right,
|
|
p.s->font->cellSize.y * p.cursorRect.bottom,
|
|
};
|
|
|
|
const auto cursorColor = p.s->cursor->cursorColor;
|
|
const auto offset = p.cursorRect.top * p.colorBitmapRowStride;
|
|
|
|
for (auto x1 = p.cursorRect.left; x1 < p.cursorRect.right; ++x1)
|
|
{
|
|
const auto x0 = x1;
|
|
const auto bg = p.backgroundBitmap[offset + x1] | 0xff000000;
|
|
|
|
for (; x1 < p.cursorRect.right && (p.backgroundBitmap[offset + x1] | 0xff000000) == bg; ++x1)
|
|
{
|
|
}
|
|
|
|
const i16x2 position{
|
|
static_cast<i16>(p.s->font->cellSize.x * x0),
|
|
static_cast<i16>(p.s->font->cellSize.y * p.cursorRect.top),
|
|
};
|
|
const u16x2 size{
|
|
static_cast<u16>(p.s->font->cellSize.x * (x1 - x0)),
|
|
p.s->font->cellSize.y,
|
|
};
|
|
auto background = cursorColor;
|
|
auto foreground = bg;
|
|
|
|
if (cursorColor == 0xffffffff)
|
|
{
|
|
background = bg ^ 0xffffff;
|
|
foreground = 0xffffffff;
|
|
}
|
|
|
|
// The legacy console used to invert colors by just doing `bg ^ 0xc0c0c0`. This resulted
|
|
// in a minimum squared distance of just 0.029195 across all possible color combinations.
|
|
background = ColorFix::GetPerceivableColor(background, bg, 0.25f * 0.25f);
|
|
|
|
auto& c0 = _cursorRects.emplace_back(position, size, background, foreground);
|
|
|
|
switch (static_cast<CursorType>(p.s->cursor->cursorType))
|
|
{
|
|
case CursorType::Legacy:
|
|
{
|
|
const auto height = (c0.size.y * p.s->cursor->heightPercentage + 50) / 100;
|
|
c0.position.y += c0.size.y - height;
|
|
c0.size.y = height;
|
|
break;
|
|
}
|
|
case CursorType::VerticalBar:
|
|
c0.size.x = p.s->font->thinLineWidth;
|
|
break;
|
|
case CursorType::Underscore:
|
|
c0.position.y += p.s->font->underline.position;
|
|
c0.size.y = p.s->font->underline.height;
|
|
break;
|
|
case CursorType::EmptyBox:
|
|
{
|
|
auto& c1 = _cursorRects.emplace_back(c0);
|
|
if (x0 == p.cursorRect.left)
|
|
{
|
|
auto& c = _cursorRects.emplace_back(c0);
|
|
// Make line a little shorter vertically so it doesn't overlap with the top/bottom horizontal lines.
|
|
c.position.y += p.s->font->thinLineWidth;
|
|
c.size.y -= 2 * p.s->font->thinLineWidth;
|
|
// The actual adjustment...
|
|
c.size.x = p.s->font->thinLineWidth;
|
|
}
|
|
if (x1 == p.cursorRect.right)
|
|
{
|
|
auto& c = _cursorRects.emplace_back(c0);
|
|
// Make line a little shorter vertically so it doesn't overlap with the top/bottom horizontal lines.
|
|
c.position.y += p.s->font->thinLineWidth;
|
|
c.size.y -= 2 * p.s->font->thinLineWidth;
|
|
// The actual adjustment...
|
|
c.position.x += c.size.x - p.s->font->thinLineWidth;
|
|
c.size.x = p.s->font->thinLineWidth;
|
|
}
|
|
c0.size.y = p.s->font->thinLineWidth;
|
|
c1.position.y += c1.size.y - p.s->font->thinLineWidth;
|
|
c1.size.y = p.s->font->thinLineWidth;
|
|
break;
|
|
}
|
|
case CursorType::FullBox:
|
|
break;
|
|
case CursorType::DoubleUnderscore:
|
|
{
|
|
auto& c1 = _cursorRects.emplace_back(c0);
|
|
c0.position.y += p.s->font->doubleUnderline[0].position;
|
|
c0.size.y = p.s->font->thinLineWidth;
|
|
c1.position.y += p.s->font->doubleUnderline[1].position;
|
|
c1.size.y = p.s->font->thinLineWidth;
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
for (const auto& c : _cursorRects)
|
|
{
|
|
_appendQuad() = {
|
|
.shadingType = static_cast<u16>(ShadingType::Cursor),
|
|
.position = c.position,
|
|
.size = c.size,
|
|
.color = c.background,
|
|
};
|
|
}
|
|
}
|
|
|
|
void BackendD3D::_drawCursorForeground()
|
|
{
|
|
// NOTE: _appendQuad() may reallocate the _instances vector. It's important to iterate
|
|
// by index, because pointers (or iterators) would get invalidated. It's also important
|
|
// to cache the original _instancesCount since it'll get changed with each append.
|
|
auto instancesCount = _instancesCount;
|
|
size_t instancesOffset = 0;
|
|
|
|
assert(instancesCount != 0);
|
|
|
|
// All of the text drawing primitives are drawn as a single block, after drawing
|
|
// the background and cursor background and before drawing the selection overlay.
|
|
// To avoid having to check the shadingType in the loop below, we'll find the
|
|
// start and end of this "block" here in advance.
|
|
for (; instancesOffset < instancesCount; ++instancesOffset)
|
|
{
|
|
const auto shadingType = static_cast<ShadingType>(_instances[instancesOffset].shadingType);
|
|
if (shadingType >= ShadingType::TextDrawingFirst && shadingType <= ShadingType::TextDrawingLast)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
// We can also skip any instances (= any rows) at the beginning that are clearly not overlapping with
|
|
// the cursor. This reduces the CPU cost of this function by roughly half (a few microseconds).
|
|
for (; instancesOffset < instancesCount; ++instancesOffset)
|
|
{
|
|
const auto& it = _instances[instancesOffset];
|
|
if ((it.position.y + it.size.y) > _cursorPosition.top)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Now do the same thing as above, but backwards from the end.
|
|
for (; instancesCount > instancesOffset; --instancesCount)
|
|
{
|
|
const auto shadingType = static_cast<ShadingType>(_instances[instancesCount - 1].shadingType);
|
|
if (shadingType >= ShadingType::TextDrawingFirst && shadingType <= ShadingType::TextDrawingLast)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
for (; instancesCount > instancesOffset; --instancesCount)
|
|
{
|
|
const auto& it = _instances[instancesCount - 1];
|
|
if (it.position.y < _cursorPosition.bottom)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
// For cursors with multiple rectangles this really isn't all that fast, because it iterates
|
|
// over the instances vector multiple times. But I also don't really care, because the
|
|
// double-underline and empty-box cursors are pretty annoying to deal with in any case.
|
|
//
|
|
// It would definitely help if instead of position & size QuadInstances would use left/top/right/bottom
|
|
// with f32, because then computing the intersection would be much faster via SIMD. But that would
|
|
// make the struct size larger and cost more power to transmit more data to the GPU. ugh.
|
|
for (const auto& c : _cursorRects)
|
|
{
|
|
const int cursorL = c.position.x;
|
|
const int cursorT = c.position.y;
|
|
const int cursorR = cursorL + c.size.x;
|
|
const int cursorB = cursorT + c.size.y;
|
|
|
|
for (size_t i = instancesOffset; i < instancesCount; ++i)
|
|
{
|
|
const auto& it = _instances[i];
|
|
const int instanceL = it.position.x;
|
|
const int instanceT = it.position.y;
|
|
const int instanceR = instanceL + it.size.x;
|
|
const int instanceB = instanceT + it.size.y;
|
|
|
|
if (instanceL < cursorR && instanceR > cursorL && instanceT < cursorB && instanceB > cursorT)
|
|
{
|
|
// The _instances vector is _huge_ (easily up to 50k items) whereas only 1-2 items will actually overlap
|
|
// with the cursor. --> Make this loop more compact by putting as much as possible into a function call.
|
|
const auto added = _drawCursorForegroundSlowPath(c, i);
|
|
i += added;
|
|
instancesCount += added;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
size_t BackendD3D::_drawCursorForegroundSlowPath(const CursorRect& c, size_t offset)
|
|
{
|
|
// We won't die from copying 24 bytes. It simplifies the code below especially in
|
|
// respect to when/if we overwrite the _instances[offset] slot with a cutout.
|
|
#pragma warning(suppress : 26820) // This is a potentially expensive copy operation. Consider using a reference unless a copy is required (p.9).
|
|
const auto it = _instances[offset];
|
|
|
|
// There's one special exception to the rule: Emojis. We currently don't really support inverting
|
|
// (or reversing) colored glyphs like that, so we can return early here and avoid cutting them up.
|
|
// It'd be too expensive to check for these rare glyph types inside the _drawCursorForeground() loop.
|
|
if (static_cast<ShadingType>(it.shadingType) == ShadingType::TextPassthrough)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
const int cursorL = c.position.x;
|
|
const int cursorT = c.position.y;
|
|
const int cursorR = cursorL + c.size.x;
|
|
const int cursorB = cursorT + c.size.y;
|
|
|
|
const int instanceL = it.position.x;
|
|
const int instanceT = it.position.y;
|
|
const int instanceR = instanceL + it.size.x;
|
|
const int instanceB = instanceT + it.size.y;
|
|
|
|
const auto intersectionL = std::max(cursorL, instanceL);
|
|
const auto intersectionT = std::max(cursorT, instanceT);
|
|
const auto intersectionR = std::min(cursorR, instanceR);
|
|
const auto intersectionB = std::min(cursorB, instanceB);
|
|
|
|
// We should only get called if there's actually an intersection.
|
|
assert(intersectionL < intersectionR && intersectionT < intersectionB);
|
|
|
|
// We need to ensure that the glyph doesn't "dirty" the cursor background with its un-inverted/un-reversed color.
|
|
// If it did, and we'd draw the inverted/reversed glyph on top, it would look smudged.
|
|
// As such, this cuts a cursor-sized hole into the original glyph and splits it up.
|
|
//
|
|
// > Always initialize an object
|
|
// I would pay money if this warning was a little smarter. The array can remain uninitialized,
|
|
// because it acts like a tiny small_vector, but without the assertions.
|
|
#pragma warning(suppress : 26494) // Variable 'cutouts' is uninitialized. Always initialize an object (type.5).
|
|
rect<int> cutouts[4];
|
|
size_t cutoutCount = 0;
|
|
|
|
if (instanceT < intersectionT)
|
|
{
|
|
cutouts[cutoutCount++] = { instanceL, instanceT, instanceR, intersectionT };
|
|
}
|
|
if (instanceB > intersectionB)
|
|
{
|
|
cutouts[cutoutCount++] = { instanceL, intersectionB, instanceR, instanceB };
|
|
}
|
|
if (instanceL < intersectionL)
|
|
{
|
|
cutouts[cutoutCount++] = { instanceL, intersectionT, intersectionL, intersectionB };
|
|
}
|
|
if (instanceR > intersectionR)
|
|
{
|
|
cutouts[cutoutCount++] = { intersectionR, intersectionT, instanceR, intersectionB };
|
|
}
|
|
|
|
const auto addedInstances = cutoutCount ? cutoutCount - 1 : 0;
|
|
|
|
// Make place for cutoutCount-many items at position.
|
|
// NOTE: _bumpInstancesSize() reallocates the vector and all references to _instances will now be invalid.
|
|
if (addedInstances)
|
|
{
|
|
const auto instancesCount = _instancesCount;
|
|
|
|
_instancesCount += addedInstances;
|
|
if (_instancesCount >= _instances.size())
|
|
{
|
|
_bumpInstancesSize();
|
|
}
|
|
|
|
const auto src = _instances.data() + offset;
|
|
const auto dst = src + addedInstances;
|
|
const auto count = instancesCount - offset;
|
|
assert(src >= _instances.begin() && (src + count) < _instances.end());
|
|
assert(dst >= _instances.begin() && (dst + count) < _instances.end());
|
|
memmove(dst, src, count * sizeof(QuadInstance));
|
|
}
|
|
|
|
// Now that there's space we can write the glyph cutouts back into the instances vector.
|
|
for (size_t i = 0; i < cutoutCount; ++i)
|
|
{
|
|
const auto& cutout = cutouts[i];
|
|
auto& target = _instances[offset + i];
|
|
|
|
target.shadingType = it.shadingType;
|
|
target.renditionScale.x = it.renditionScale.x;
|
|
target.renditionScale.y = it.renditionScale.y;
|
|
target.position.x = static_cast<i16>(cutout.left);
|
|
target.position.y = static_cast<i16>(cutout.top);
|
|
target.size.x = static_cast<u16>(cutout.right - cutout.left);
|
|
target.size.y = static_cast<u16>(cutout.bottom - cutout.top);
|
|
target.texcoord.x = static_cast<u16>(it.texcoord.x + cutout.left - instanceL);
|
|
target.texcoord.y = static_cast<u16>(it.texcoord.y + cutout.top - instanceT);
|
|
target.color = it.color;
|
|
}
|
|
|
|
auto color = c.foreground == 0xffffffff ? it.color ^ 0xffffff : c.foreground;
|
|
color = ColorFix::GetPerceivableColor(color, c.background, 0.5f * 0.5f);
|
|
|
|
// If the cursor covers the entire glyph (like, let's say, a full-box cursor with an ASCII character),
|
|
// we don't append a new quad, but rather reuse the one that already exists (cutoutCount == 0).
|
|
auto& target = cutoutCount ? _appendQuad() : _instances[offset];
|
|
|
|
target.shadingType = it.shadingType;
|
|
target.renditionScale.x = it.renditionScale.x;
|
|
target.renditionScale.y = it.renditionScale.y;
|
|
target.position.x = static_cast<i16>(intersectionL);
|
|
target.position.y = static_cast<i16>(intersectionT);
|
|
target.size.x = static_cast<u16>(intersectionR - intersectionL);
|
|
target.size.y = static_cast<u16>(intersectionB - intersectionT);
|
|
target.texcoord.x = static_cast<u16>(it.texcoord.x + intersectionL - instanceL);
|
|
target.texcoord.y = static_cast<u16>(it.texcoord.y + intersectionT - instanceT);
|
|
target.color = color;
|
|
|
|
return addedInstances;
|
|
}
|
|
|
|
void BackendD3D::_drawSelection(const RenderingPayload& p)
|
|
{
|
|
u16 y = 0;
|
|
u16 lastFrom = 0;
|
|
u16 lastTo = 0;
|
|
|
|
for (const auto& row : p.rows)
|
|
{
|
|
if (row->selectionTo > row->selectionFrom)
|
|
{
|
|
// If the current selection line matches the previous one, we can just extend the previous quad downwards.
|
|
// The way this is implemented isn't very smart, but we also don't have very many rows to iterate through.
|
|
if (row->selectionFrom == lastFrom && row->selectionTo == lastTo)
|
|
{
|
|
_getLastQuad().size.y += p.s->font->cellSize.y;
|
|
}
|
|
else
|
|
{
|
|
_appendQuad() = {
|
|
.shadingType = static_cast<u16>(ShadingType::Selection),
|
|
.position = {
|
|
static_cast<i16>(p.s->font->cellSize.x * row->selectionFrom),
|
|
static_cast<i16>(p.s->font->cellSize.y * y),
|
|
},
|
|
.size = {
|
|
static_cast<u16>(p.s->font->cellSize.x * (row->selectionTo - row->selectionFrom)),
|
|
static_cast<u16>(p.s->font->cellSize.y),
|
|
},
|
|
.color = p.s->misc->selectionColor,
|
|
};
|
|
lastFrom = row->selectionFrom;
|
|
lastTo = row->selectionTo;
|
|
}
|
|
}
|
|
|
|
y++;
|
|
}
|
|
}
|
|
|
|
void BackendD3D::_debugShowDirty(const RenderingPayload& p)
|
|
{
|
|
#if ATLAS_DEBUG_SHOW_DIRTY
|
|
_presentRects[_presentRectsPos] = p.dirtyRectInPx;
|
|
_presentRectsPos = (_presentRectsPos + 1) % std::size(_presentRects);
|
|
|
|
for (size_t i = 0; i < std::size(_presentRects); ++i)
|
|
{
|
|
const auto& rect = _presentRects[(_presentRectsPos + i) % std::size(_presentRects)];
|
|
if (rect.non_empty())
|
|
{
|
|
_appendQuad() = {
|
|
.shadingType = static_cast<u16>(ShadingType::Selection),
|
|
.position = {
|
|
static_cast<i16>(rect.left),
|
|
static_cast<i16>(rect.top),
|
|
},
|
|
.size = {
|
|
static_cast<u16>(rect.right - rect.left),
|
|
static_cast<u16>(rect.bottom - rect.top),
|
|
},
|
|
.color = til::colorbrewer::pastel1[i] | 0x1f000000,
|
|
};
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void BackendD3D::_debugDumpRenderTarget(const RenderingPayload& p)
|
|
{
|
|
#if ATLAS_DEBUG_DUMP_RENDER_TARGET
|
|
if (_dumpRenderTargetCounter == 0)
|
|
{
|
|
ExpandEnvironmentStringsW(ATLAS_DEBUG_DUMP_RENDER_TARGET_PATH, &_dumpRenderTargetBasePath[0], gsl::narrow_cast<DWORD>(std::size(_dumpRenderTargetBasePath)));
|
|
std::filesystem::create_directories(_dumpRenderTargetBasePath);
|
|
}
|
|
|
|
wchar_t path[MAX_PATH];
|
|
swprintf_s(path, L"%s\\%u_%08zu.png", &_dumpRenderTargetBasePath[0], GetCurrentProcessId(), _dumpRenderTargetCounter);
|
|
SaveTextureToPNG(p.deviceContext.get(), _swapChainManager.GetBuffer().get(), p.s->font->dpi, &path[0]);
|
|
_dumpRenderTargetCounter++;
|
|
#endif
|
|
}
|
|
|
|
void BackendD3D::_executeCustomShader(RenderingPayload& p)
|
|
{
|
|
{
|
|
// See the comment in _recreateCustomShader() which initializes the two members below and explains what they do.
|
|
const auto now = queryPerfCount();
|
|
const auto time = static_cast<int>(now % _customShaderPerfTickMod) * _customShaderSecsPerPerfTick;
|
|
|
|
const CustomConstBuffer data{
|
|
.time = time,
|
|
.scale = static_cast<f32>(p.s->font->dpi) / static_cast<f32>(USER_DEFAULT_SCREEN_DPI),
|
|
.resolution = {
|
|
static_cast<f32>(_viewportCellCount.x * p.s->font->cellSize.x),
|
|
static_cast<f32>(_viewportCellCount.y * p.s->font->cellSize.y),
|
|
},
|
|
.background = colorFromU32Premultiply<f32x4>(p.s->misc->backgroundColor),
|
|
};
|
|
|
|
D3D11_MAPPED_SUBRESOURCE mapped{};
|
|
THROW_IF_FAILED(p.deviceContext->Map(_customShaderConstantBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped));
|
|
memcpy(mapped.pData, &data, sizeof(data));
|
|
p.deviceContext->Unmap(_customShaderConstantBuffer.get(), 0);
|
|
}
|
|
|
|
{
|
|
// Before we do anything else we have to unbound _renderTargetView from being
|
|
// a render target, otherwise we can't use it as a shader resource below.
|
|
p.deviceContext->OMSetRenderTargets(1, _renderTargetView.addressof(), nullptr);
|
|
|
|
// IA: Input Assembler
|
|
p.deviceContext->IASetIndexBuffer(nullptr, DXGI_FORMAT_UNKNOWN, 0);
|
|
p.deviceContext->IASetInputLayout(nullptr);
|
|
p.deviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
|
|
p.deviceContext->IASetVertexBuffers(0, 0, nullptr, nullptr, nullptr);
|
|
|
|
// VS: Vertex Shader
|
|
p.deviceContext->VSSetShader(_customVertexShader.get(), nullptr, 0);
|
|
p.deviceContext->VSSetConstantBuffers(0, 0, nullptr);
|
|
|
|
// PS: Pixel Shader
|
|
p.deviceContext->PSSetShader(_customPixelShader.get(), nullptr, 0);
|
|
p.deviceContext->PSSetConstantBuffers(0, 1, _customShaderConstantBuffer.addressof());
|
|
ID3D11ShaderResourceView* const resourceViews[]{
|
|
_customOffscreenTextureView.get(), // The terminal contents
|
|
_customShaderTextureView.get(), // the experimental.pixelShaderImagePath, if there is one
|
|
};
|
|
p.deviceContext->PSSetShaderResources(0, resourceViews[1] ? 2 : 1, &resourceViews[0]);
|
|
p.deviceContext->PSSetSamplers(0, 1, _customShaderSamplerState.addressof());
|
|
|
|
// OM: Output Merger
|
|
p.deviceContext->OMSetBlendState(nullptr, nullptr, 0xffffffff);
|
|
}
|
|
|
|
p.deviceContext->Draw(4, 0);
|
|
|
|
{
|
|
// IA: Input Assembler
|
|
ID3D11Buffer* vertexBuffers[]{ _vertexBuffer.get(), _instanceBuffer.get() };
|
|
static constexpr UINT strides[]{ sizeof(f32x2), sizeof(QuadInstance) };
|
|
static constexpr UINT offsets[]{ 0, 0 };
|
|
p.deviceContext->IASetIndexBuffer(_indexBuffer.get(), DXGI_FORMAT_R16_UINT, 0);
|
|
p.deviceContext->IASetInputLayout(_inputLayout.get());
|
|
p.deviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
|
p.deviceContext->IASetVertexBuffers(0, 2, &vertexBuffers[0], &strides[0], &offsets[0]);
|
|
|
|
// VS: Vertex Shader
|
|
p.deviceContext->VSSetShader(_vertexShader.get(), nullptr, 0);
|
|
p.deviceContext->VSSetConstantBuffers(0, 1, _vsConstantBuffer.addressof());
|
|
|
|
// PS: Pixel Shader
|
|
ID3D11ShaderResourceView* resources[]{ _backgroundBitmapView.get(), _glyphAtlasView.get() };
|
|
p.deviceContext->PSSetShader(_pixelShader.get(), nullptr, 0);
|
|
p.deviceContext->PSSetConstantBuffers(0, 1, _psConstantBuffer.addressof());
|
|
p.deviceContext->PSSetShaderResources(0, 2, &resources[0]);
|
|
p.deviceContext->PSSetSamplers(0, 0, nullptr);
|
|
|
|
// OM: Output Merger
|
|
p.deviceContext->OMSetBlendState(_blendState.get(), nullptr, 0xffffffff);
|
|
p.deviceContext->OMSetRenderTargets(1, _customRenderTargetView.addressof(), nullptr);
|
|
}
|
|
|
|
// With custom shaders, everything might be invalidated, so we have to
|
|
// indirectly disable Present1() and its dirty rects this way.
|
|
p.dirtyRectInPx = { 0, 0, p.s->targetSize.x, p.s->targetSize.y };
|
|
}
|
|
|
|
TIL_FAST_MATH_END
|