From 4d67453c02c47dfcef89861d03a2c587a039f2ab Mon Sep 17 00:00:00 2001 From: Carlos Zamora Date: Wed, 14 May 2025 10:30:05 -0700 Subject: [PATCH] Replace New Tab Menu Match Profiles functionality with regex support (#18654) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary of the Pull Request Updates the New Tab Menu's Match Profiles entry to support regex instead of doing a direct match. Also adds validation to ensure the regex is valid. Updated the UI to help make it more clear that this supports regexes and even added a link to some helpful docs. ## Validation Steps Performed ✅ Invalid regex displays a warning ✅ Valid regex works nicely ✅ profile matcher with source=`Windows.Terminal.VisualStudio` still works as expected ## PR Checklist Closes #18553 --- src/buffer/out/UTextAdapter.cpp | 11 --- src/buffer/out/UTextAdapter.h | 2 - src/buffer/out/textBuffer.cpp | 3 +- .../Resources/en-US/Resources.resw | 3 + src/cascadia/TerminalApp/TerminalWindow.cpp | 1 + src/cascadia/TerminalCore/Terminal.cpp | 11 +-- .../TerminalSettingsEditor/NewTabMenu.xaml | 2 + .../Resources/en-US/Resources.resw | 11 +-- .../CascadiaSettings.cpp | 37 ++++++++++ .../TerminalSettingsModel/CascadiaSettings.h | 1 + .../MatchProfilesEntry.cpp | 70 +++++++++++++------ .../MatchProfilesEntry.h | 31 +++++++- .../TerminalWarnings.idl | 1 + src/inc/til/regex.h | 22 ++++++ 14 files changed, 160 insertions(+), 46 deletions(-) create mode 100644 src/inc/til/regex.h diff --git a/src/buffer/out/UTextAdapter.cpp b/src/buffer/out/UTextAdapter.cpp index 717d97812a..1b392f3d23 100644 --- a/src/buffer/out/UTextAdapter.cpp +++ b/src/buffer/out/UTextAdapter.cpp @@ -400,17 +400,6 @@ Microsoft::Console::ICU::unique_utext Microsoft::Console::ICU::UTextFromTextBuff return ut; } -Microsoft::Console::ICU::unique_uregex Microsoft::Console::ICU::CreateRegex(const std::wstring_view& pattern, uint32_t flags, UErrorCode* status) noexcept -{ -#pragma warning(suppress : 26490) // Don't use reinterpret_cast (type.1). - const auto re = uregex_open(reinterpret_cast(pattern.data()), gsl::narrow_cast(pattern.size()), flags, nullptr, status); - // ICU describes the time unit as being dependent on CPU performance and "typically [in] the order of milliseconds", - // but this claim seems highly outdated already. On my CPU from 2021, a limit of 4096 equals roughly 600ms. - uregex_setTimeLimit(re, 4096, status); - uregex_setStackLimit(re, 4 * 1024 * 1024, status); - return unique_uregex{ re }; -} - // Returns a half-open [beg,end) range given a text start and end position. // This function is designed to be used with uregex_start64/uregex_end64. til::point_span Microsoft::Console::ICU::BufferRangeFromMatch(UText* ut, URegularExpression* re) diff --git a/src/buffer/out/UTextAdapter.h b/src/buffer/out/UTextAdapter.h index 39903627b5..fe6bacafd4 100644 --- a/src/buffer/out/UTextAdapter.h +++ b/src/buffer/out/UTextAdapter.h @@ -9,10 +9,8 @@ class TextBuffer; namespace Microsoft::Console::ICU { - using unique_uregex = wistd::unique_ptr>; using unique_utext = wil::unique_struct; unique_utext UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd) noexcept; - unique_uregex CreateRegex(const std::wstring_view& pattern, uint32_t flags, UErrorCode* status) noexcept; til::point_span BufferRangeFromMatch(UText* ut, URegularExpression* re); } diff --git a/src/buffer/out/textBuffer.cpp b/src/buffer/out/textBuffer.cpp index 095195133b..11f48ec0f6 100644 --- a/src/buffer/out/textBuffer.cpp +++ b/src/buffer/out/textBuffer.cpp @@ -10,6 +10,7 @@ #include "../../types/inc/CodepointWidthDetector.hpp" #include "../renderer/base/renderer.hpp" #include "../types/inc/utils.hpp" +#include #include "search.h" // BODGY: Misdiagnosis in MSVC 17.11: Referencing global constants in the member @@ -3353,7 +3354,7 @@ std::optional> TextBuffer::SearchText(const std::ws } UErrorCode status = U_ZERO_ERROR; - const auto re = ICU::CreateRegex(needle, icuFlags, &status); + const auto re = til::ICU::CreateRegex(needle, icuFlags, &status); if (status > U_ZERO_ERROR) { return std::nullopt; diff --git a/src/cascadia/TerminalApp/Resources/en-US/Resources.resw b/src/cascadia/TerminalApp/Resources/en-US/Resources.resw index 74d09214ba..79a9155134 100644 --- a/src/cascadia/TerminalApp/Resources/en-US/Resources.resw +++ b/src/cascadia/TerminalApp/Resources/en-US/Resources.resw @@ -944,4 +944,7 @@ Move right + + An invalid regex was found. + \ No newline at end of file diff --git a/src/cascadia/TerminalApp/TerminalWindow.cpp b/src/cascadia/TerminalApp/TerminalWindow.cpp index ff3a66acae..9776679691 100644 --- a/src/cascadia/TerminalApp/TerminalWindow.cpp +++ b/src/cascadia/TerminalApp/TerminalWindow.cpp @@ -55,6 +55,7 @@ static const std::array settingsLoadWarningsLabels{ USES_RESOURCE(L"UnknownTheme"), USES_RESOURCE(L"DuplicateRemainingProfilesEntry"), USES_RESOURCE(L"InvalidUseOfContent"), + USES_RESOURCE(L"InvalidRegex"), }; static_assert(settingsLoadWarningsLabels.size() == static_cast(SettingsLoadWarnings::WARNINGS_SIZE)); diff --git a/src/cascadia/TerminalCore/Terminal.cpp b/src/cascadia/TerminalCore/Terminal.cpp index 91def44107..68d7178187 100644 --- a/src/cascadia/TerminalCore/Terminal.cpp +++ b/src/cascadia/TerminalCore/Terminal.cpp @@ -12,6 +12,7 @@ #include "../../buffer/out/UTextAdapter.h" #include +#include #include using namespace winrt::Microsoft::Terminal::Core; @@ -1375,7 +1376,7 @@ struct URegularExpressionInterner // // An alternative approach would be to not make this method thread-safe and give each // Terminal instance its own cache. I'm not sure which approach would have been better. - ICU::unique_uregex Intern(const std::wstring_view& pattern) + til::ICU::unique_uregex Intern(const std::wstring_view& pattern) { UErrorCode status = U_ZERO_ERROR; @@ -1383,14 +1384,14 @@ struct URegularExpressionInterner const auto guard = _lock.lock_shared(); if (const auto it = _cache.find(pattern); it != _cache.end()) { - return ICU::unique_uregex{ uregex_clone(it->second.re.get(), &status) }; + return til::ICU::unique_uregex{ uregex_clone(it->second.re.get(), &status) }; } } // Even if the URegularExpression creation failed, we'll insert it into the cache, because there's no point in retrying. // (Apart from OOM but in that case this application will crash anyways in 3.. 2.. 1..) - auto re = ICU::CreateRegex(pattern, 0, &status); - ICU::unique_uregex clone{ uregex_clone(re.get(), &status) }; + auto re = til::ICU::CreateRegex(pattern, 0, &status); + til::ICU::unique_uregex clone{ uregex_clone(re.get(), &status) }; std::wstring key{ pattern }; const auto guard = _lock.lock_exclusive(); @@ -1412,7 +1413,7 @@ struct URegularExpressionInterner private: struct CacheValue { - ICU::unique_uregex re; + til::ICU::unique_uregex re; size_t generation = 0; }; diff --git a/src/cascadia/TerminalSettingsEditor/NewTabMenu.xaml b/src/cascadia/TerminalSettingsEditor/NewTabMenu.xaml index 018a00d532..54acab56e1 100644 --- a/src/cascadia/TerminalSettingsEditor/NewTabMenu.xaml +++ b/src/cascadia/TerminalSettingsEditor/NewTabMenu.xaml @@ -449,6 +449,8 @@ FontIconGlyph="" Style="{StaticResource ExpanderSettingContainerStyleWithIcon}"> + Header for a control that adds any remaining profiles to the new tab menu. - Add a group of profiles that match at least one of the defined properties + Add a group of profiles that match at least one of the defined regex properties Additional information for a control that adds a terminal profile matcher to the new tab menu. Presented near "NewTabMenu_AddMatchProfiles". @@ -2121,15 +2121,15 @@ Header for a control that adds a folder to the new tab menu. - Profile name + Profile name (Regex) Header for a text box used to define a regex for the names of profiles to add. - Profile source + Profile source (Regex) Header for a text box used to define a regex for the sources of profiles to add. - Commandline + Commandline (Regex) Header for a text box used to define a regex for the commandlines of profiles to add. @@ -2344,6 +2344,9 @@ This option is managed by enterprise policy and cannot be changed here. This is displayed in concordance with Globals_StartOnUserLogin if the enterprise administrator has taken control of this setting. + + Learn more about regular expressions + None Text displayed when the background image path is not defined. diff --git a/src/cascadia/TerminalSettingsModel/CascadiaSettings.cpp b/src/cascadia/TerminalSettingsModel/CascadiaSettings.cpp index eeb3d7cc2e..9dc2433403 100644 --- a/src/cascadia/TerminalSettingsModel/CascadiaSettings.cpp +++ b/src/cascadia/TerminalSettingsModel/CascadiaSettings.cpp @@ -4,6 +4,7 @@ #include "pch.h" #include "CascadiaSettings.h" #include "CascadiaSettings.g.cpp" +#include "MatchProfilesEntry.h" #include "DefaultTerminal.h" #include "FileUtils.h" @@ -429,6 +430,7 @@ void CascadiaSettings::_validateSettings() _validateColorSchemesInCommands(); _validateThemeExists(); _validateProfileEnvironmentVariables(); + _validateRegexes(); } // Method Description: @@ -583,6 +585,41 @@ void CascadiaSettings::_validateProfileEnvironmentVariables() } } +// Returns true if all regexes in the new tab menu are valid, false otherwise +static bool _validateNTMEntries(const IVector& entries) +{ + if (!entries) + { + return true; + } + for (const auto& ntmEntry : entries) + { + if (const auto& folderEntry = ntmEntry.try_as()) + { + if (!_validateNTMEntries(folderEntry.RawEntries())) + { + return false; + } + } + if (const auto& matchProfilesEntry = ntmEntry.try_as()) + { + if (!winrt::get_self(matchProfilesEntry)->ValidateRegexes()) + { + return false; + } + } + } + return true; +} + +void CascadiaSettings::_validateRegexes() +{ + if (!_validateNTMEntries(_globals->NewTabMenu())) + { + _warnings.Append(SettingsLoadWarnings::InvalidRegex); + } +} + // Method Description: // - Helper to get the GUID of a profile, given an optional index and a possible // "profile" value to override that. diff --git a/src/cascadia/TerminalSettingsModel/CascadiaSettings.h b/src/cascadia/TerminalSettingsModel/CascadiaSettings.h index e91c43885e..a6e50e370a 100644 --- a/src/cascadia/TerminalSettingsModel/CascadiaSettings.h +++ b/src/cascadia/TerminalSettingsModel/CascadiaSettings.h @@ -175,6 +175,7 @@ namespace winrt::Microsoft::Terminal::Settings::Model::implementation void _validateColorSchemesInCommands() const; bool _hasInvalidColorScheme(const Model::Command& command) const; void _validateThemeExists(); + void _validateRegexes(); void _researchOnLoad(); diff --git a/src/cascadia/TerminalSettingsModel/MatchProfilesEntry.cpp b/src/cascadia/TerminalSettingsModel/MatchProfilesEntry.cpp index 6946eb98e8..5dc4021954 100644 --- a/src/cascadia/TerminalSettingsModel/MatchProfilesEntry.cpp +++ b/src/cascadia/TerminalSettingsModel/MatchProfilesEntry.cpp @@ -36,41 +36,71 @@ namespace winrt::Microsoft::Terminal::Settings::Model::implementation auto entry = winrt::make_self(); JsonUtils::GetValueForKey(json, NameKey, entry->_Name); + entry->_validateName(); + JsonUtils::GetValueForKey(json, CommandlineKey, entry->_Commandline); + entry->_validateCommandline(); + JsonUtils::GetValueForKey(json, SourceKey, entry->_Source); + entry->_validateSource(); return entry; } + // Returns true if all regexes are valid, false otherwise + bool MatchProfilesEntry::ValidateRegexes() const + { + return !(_invalidName || _invalidCommandline || _invalidSource); + } + +#define DEFINE_VALIDATE_FUNCTION(name) \ + void MatchProfilesEntry::_validate##name() noexcept \ + { \ + _invalid##name = false; \ + if (_##name.empty()) \ + { \ + /* empty field is valid*/ \ + return; \ + } \ + UErrorCode status = U_ZERO_ERROR; \ + _##name##Regex = til::ICU::CreateRegex(_##name, 0, &status); \ + if (U_FAILURE(status)) \ + { \ + _invalid##name = true; \ + _##name##Regex.reset(); \ + } \ + } + + DEFINE_VALIDATE_FUNCTION(Name); + DEFINE_VALIDATE_FUNCTION(Commandline); + DEFINE_VALIDATE_FUNCTION(Source); + bool MatchProfilesEntry::MatchesProfile(const Model::Profile& profile) { - // We use an optional here instead of a simple bool directly, since there is no - // sensible default value for the desired semantics: the first property we want - // to match on should always be applied (so one would set "true" as a default), - // but if none of the properties are set, the default return value should be false - // since this entry type is expected to behave like a positive match/whitelist. - // - // The easiest way to deal with this neatly is to use an optional, then for any - // property to match we consider a null value to be "true", and for the return - // value of the function we consider the null value to be "false". - auto isMatching = std::optional{}; + auto isMatch = [](const til::ICU::unique_uregex& regex, std::wstring_view text) { + if (text.empty()) + { + return false; + } + UErrorCode status = U_ZERO_ERROR; + uregex_setText(regex.get(), reinterpret_cast(text.data()), static_cast(text.size()), &status); + const auto match = uregex_matches(regex.get(), 0, &status); + return status == U_ZERO_ERROR && match; + }; - if (!_Name.empty()) + if (!_Name.empty() && isMatch(_NameRegex, profile.Name())) { - isMatching = { isMatching.value_or(true) && _Name == profile.Name() }; + return true; } - - if (!_Source.empty()) + else if (!_Source.empty() && isMatch(_SourceRegex, profile.Source())) { - isMatching = { isMatching.value_or(true) && _Source == profile.Source() }; + return true; } - - if (!_Commandline.empty()) + else if (!_Commandline.empty() && isMatch(_CommandlineRegex, profile.Commandline())) { - isMatching = { isMatching.value_or(true) && _Commandline == profile.Commandline() }; + return true; } - - return isMatching.value_or(false); + return false; } Model::NewTabMenuEntry MatchProfilesEntry::Copy() const diff --git a/src/cascadia/TerminalSettingsModel/MatchProfilesEntry.h b/src/cascadia/TerminalSettingsModel/MatchProfilesEntry.h index 815d1be3e8..8849291bbf 100644 --- a/src/cascadia/TerminalSettingsModel/MatchProfilesEntry.h +++ b/src/cascadia/TerminalSettingsModel/MatchProfilesEntry.h @@ -17,6 +17,30 @@ Author(s): #include "ProfileCollectionEntry.h" #include "MatchProfilesEntry.g.h" +#include + +// This macro defines the getter and setter for a regex property. +// The setter tries to instantiate the regex immediately and caches +// it if successful. If it fails, it sets a boolean flag to track that +// it failed. +#define DEFINE_MATCH_PROFILE_REGEX_PROPERTY(name) \ +public: \ + hstring name() const noexcept \ + { \ + return _##name; \ + } \ + void name(const hstring& value) noexcept \ + { \ + _##name = value; \ + _validate##name(); \ + } \ + \ +private: \ + void _validate##name() noexcept; \ + \ + hstring _##name; \ + til::ICU::unique_uregex _##name##Regex; \ + bool _invalid##name{ false }; namespace winrt::Microsoft::Terminal::Settings::Model::implementation { @@ -30,11 +54,12 @@ namespace winrt::Microsoft::Terminal::Settings::Model::implementation Json::Value ToJson() const override; static com_ptr FromJson(const Json::Value& json); + bool ValidateRegexes() const; bool MatchesProfile(const Model::Profile& profile); - WINRT_PROPERTY(winrt::hstring, Name); - WINRT_PROPERTY(winrt::hstring, Commandline); - WINRT_PROPERTY(winrt::hstring, Source); + DEFINE_MATCH_PROFILE_REGEX_PROPERTY(Name) + DEFINE_MATCH_PROFILE_REGEX_PROPERTY(Commandline) + DEFINE_MATCH_PROFILE_REGEX_PROPERTY(Source) }; } diff --git a/src/cascadia/TerminalSettingsModel/TerminalWarnings.idl b/src/cascadia/TerminalSettingsModel/TerminalWarnings.idl index aa02d18ff4..c8923ce34a 100644 --- a/src/cascadia/TerminalSettingsModel/TerminalWarnings.idl +++ b/src/cascadia/TerminalSettingsModel/TerminalWarnings.idl @@ -25,6 +25,7 @@ namespace Microsoft.Terminal.Settings.Model UnknownTheme, DuplicateRemainingProfilesEntry, InvalidUseOfContent, + InvalidRegex, WARNINGS_SIZE // IMPORTANT: This MUST be the last value in this enum. It's an unused placeholder. }; diff --git a/src/inc/til/regex.h b/src/inc/til/regex.h new file mode 100644 index 0000000000..0ddc84eda6 --- /dev/null +++ b/src/inc/til/regex.h @@ -0,0 +1,22 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#pragma once + +#include + +namespace til::ICU // Terminal Implementation Library. Also: "Today I Learned" +{ + using unique_uregex = wistd::unique_ptr>; + + _TIL_INLINEPREFIX unique_uregex CreateRegex(const std::wstring_view& pattern, uint32_t flags, UErrorCode* status) noexcept + { +#pragma warning(suppress : 26490) // Don't use reinterpret_cast (type.1). + const auto re = uregex_open(reinterpret_cast(pattern.data()), gsl::narrow_cast(pattern.size()), flags, nullptr, status); + // ICU describes the time unit as being dependent on CPU performance and "typically [in] the order of milliseconds", + // but this claim seems highly outdated already. On my CPU from 2021, a limit of 4096 equals roughly 600ms. + uregex_setTimeLimit(re, 4096, status); + uregex_setStackLimit(re, 4 * 1024 * 1024, status); + return unique_uregex{ re }; + } +} \ No newline at end of file