mirror of
https://github.com/shlinkio/shlink.git
synced 2025-12-10 11:05:50 -06:00
Fix error when creating short URL for page with unsupported encoding
This commit is contained in:
parent
88e97f18ad
commit
e9fe1ac5d4
@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com), and this
|
||||
|
||||
### Fixed
|
||||
* [#2341](https://github.com/shlinkio/shlink/issues/2341) Ensure all asynchronous jobs that interact with the database do not leave idle connections open.
|
||||
* [#2334](https://github.com/shlinkio/shlink/issues/2334) Improve how page titles are encoded to UTF-8, falling back from mbstring to iconv if available, and ultimately using the original title in case of error, but never causing the short URL creation to fail.
|
||||
|
||||
|
||||
# [4.4.0] - 2024-12-27
|
||||
|
||||
@ -227,6 +227,7 @@ return [
|
||||
ShortUrl\Helper\ShortUrlTitleResolutionHelper::class => [
|
||||
'httpClient',
|
||||
Config\Options\UrlShortenerOptions::class,
|
||||
'Logger_Shlink',
|
||||
],
|
||||
ShortUrl\Helper\ShortUrlRedirectionBuilder::class => [
|
||||
Config\Options\TrackingOptions::class,
|
||||
|
||||
@ -4,14 +4,19 @@ declare(strict_types=1);
|
||||
|
||||
namespace Shlinkio\Shlink\Core\ShortUrl\Helper;
|
||||
|
||||
use Closure;
|
||||
use Fig\Http\Message\RequestMethodInterface;
|
||||
use GuzzleHttp\ClientInterface;
|
||||
use GuzzleHttp\RequestOptions;
|
||||
use Laminas\Stdlib\ErrorHandler;
|
||||
use Psr\Http\Message\ResponseInterface;
|
||||
use Psr\Log\LoggerInterface;
|
||||
use Shlinkio\Shlink\Core\Config\Options\UrlShortenerOptions;
|
||||
use Throwable;
|
||||
|
||||
use function function_exists;
|
||||
use function html_entity_decode;
|
||||
use function iconv;
|
||||
use function mb_convert_encoding;
|
||||
use function preg_match;
|
||||
use function str_contains;
|
||||
@ -30,9 +35,14 @@ readonly class ShortUrlTitleResolutionHelper implements ShortUrlTitleResolutionH
|
||||
// Matches the charset inside a Content-Type header
|
||||
private const string CHARSET_VALUE = '/charset=([^;]+)/i';
|
||||
|
||||
/**
|
||||
* @param (Closure(): bool)|null $isIconvInstalled
|
||||
*/
|
||||
public function __construct(
|
||||
private ClientInterface $httpClient,
|
||||
private UrlShortenerOptions $options,
|
||||
private LoggerInterface $logger,
|
||||
private Closure|null $isIconvInstalled = null,
|
||||
) {
|
||||
}
|
||||
|
||||
@ -58,7 +68,7 @@ readonly class ShortUrlTitleResolutionHelper implements ShortUrlTitleResolutionH
|
||||
}
|
||||
|
||||
$title = $this->tryToResolveTitle($response, $contentType);
|
||||
return $title !== null ? $data->withResolvedTitle($title) : $data;
|
||||
return $title !== null ? $data->withResolvedTitle(html_entity_decode(trim($title))) : $data;
|
||||
}
|
||||
|
||||
private function fetchUrl(string $url): ResponseInterface|null
|
||||
@ -84,6 +94,7 @@ readonly class ShortUrlTitleResolutionHelper implements ShortUrlTitleResolutionH
|
||||
{
|
||||
$collectedBody = '';
|
||||
$body = $response->getBody();
|
||||
|
||||
// With streaming enabled, we can walk the body until the </title> tag is found, and then stop
|
||||
while (! str_contains($collectedBody, '</title>') && ! $body->eof()) {
|
||||
$collectedBody .= $body->read(1024);
|
||||
@ -95,12 +106,48 @@ readonly class ShortUrlTitleResolutionHelper implements ShortUrlTitleResolutionH
|
||||
return null;
|
||||
}
|
||||
|
||||
// Get the page's charset from Content-Type header
|
||||
preg_match(self::CHARSET_VALUE, $contentType, $charsetMatches);
|
||||
$titleInOriginalEncoding = $titleMatches[1];
|
||||
|
||||
$title = isset($charsetMatches[1])
|
||||
? mb_convert_encoding($titleMatches[1], 'utf8', $charsetMatches[1])
|
||||
: $titleMatches[1];
|
||||
return html_entity_decode(trim($title));
|
||||
// Get the page's charset from Content-Type header, or return title as is if not found
|
||||
preg_match(self::CHARSET_VALUE, $contentType, $charsetMatches);
|
||||
if (! isset($charsetMatches[1])) {
|
||||
return $titleInOriginalEncoding;
|
||||
}
|
||||
|
||||
$pageCharset = $charsetMatches[1];
|
||||
return $this->encodeToUtf8WithMbString($titleInOriginalEncoding, $pageCharset)
|
||||
?? $this->encodeToUtf8WithIconv($titleInOriginalEncoding, $pageCharset)
|
||||
?? $titleInOriginalEncoding;
|
||||
}
|
||||
|
||||
private function encodeToUtf8WithMbString(string $titleInOriginalEncoding, string $pageCharset): string|null
|
||||
{
|
||||
try {
|
||||
return mb_convert_encoding($titleInOriginalEncoding, 'utf-8', $pageCharset);
|
||||
} catch (Throwable $e) {
|
||||
$this->logger->warning('It was impossible to encode page title in UTF-8 with mb_convert_encoding. {e}', [
|
||||
'e' => $e,
|
||||
]);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private function encodeToUtf8WithIconv(string $titleInOriginalEncoding, string $pageCharset): string|null
|
||||
{
|
||||
$isIconvInstalled = ($this->isIconvInstalled ?? fn () => function_exists('iconv'))();
|
||||
if (! $isIconvInstalled) {
|
||||
$this->logger->warning('Missing iconv extension. Skipping title encoding');
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
ErrorHandler::start();
|
||||
$title = iconv($pageCharset, 'utf-8', $titleInOriginalEncoding);
|
||||
ErrorHandler::stop(throw: true);
|
||||
return $title ?: null;
|
||||
} catch (Throwable $e) {
|
||||
$this->logger->warning('It was impossible to encode page title in UTF-8 with iconv. {e}', ['e' => $e]);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -16,6 +16,7 @@ use PHPUnit\Framework\Attributes\TestWith;
|
||||
use PHPUnit\Framework\MockObject\Builder\InvocationMocker;
|
||||
use PHPUnit\Framework\MockObject\MockObject;
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Psr\Log\LoggerInterface;
|
||||
use Shlinkio\Shlink\Core\Config\Options\UrlShortenerOptions;
|
||||
use Shlinkio\Shlink\Core\ShortUrl\Helper\ShortUrlTitleResolutionHelper;
|
||||
use Shlinkio\Shlink\Core\ShortUrl\Model\ShortUrlCreation;
|
||||
@ -25,10 +26,12 @@ class ShortUrlTitleResolutionHelperTest extends TestCase
|
||||
private const string LONG_URL = 'http://foobar.com/12345/hello?foo=bar';
|
||||
|
||||
private MockObject & ClientInterface $httpClient;
|
||||
private MockObject & LoggerInterface $logger;
|
||||
|
||||
protected function setUp(): void
|
||||
{
|
||||
$this->httpClient = $this->createMock(ClientInterface::class);
|
||||
$this->logger = $this->createMock(LoggerInterface::class);
|
||||
}
|
||||
|
||||
#[Test]
|
||||
@ -90,14 +93,59 @@ class ShortUrlTitleResolutionHelperTest extends TestCase
|
||||
}
|
||||
|
||||
#[Test]
|
||||
#[TestWith(['TEXT/html; charset=utf-8'], 'charset')]
|
||||
#[TestWith(['TEXT/html'], 'no charset')]
|
||||
public function titleIsUpdatedWhenItCanBeResolvedFromResponse(string $contentType): void
|
||||
#[TestWith(['TEXT/html', false], 'no charset')]
|
||||
#[TestWith(['TEXT/html; charset=utf-8', false], 'mbstring-supported charset')]
|
||||
#[TestWith(['TEXT/html; charset=Windows-1255', true], 'mbstring-unsupported charset')]
|
||||
public function titleIsUpdatedWhenItCanBeResolvedFromResponse(string $contentType, bool $expectsWarning): void
|
||||
{
|
||||
$data = ShortUrlCreation::fromRawData(['longUrl' => self::LONG_URL]);
|
||||
$this->expectRequestToBeCalled()->willReturn($this->respWithTitle($contentType));
|
||||
if ($expectsWarning) {
|
||||
$this->logger->expects($this->once())->method('warning')->with(
|
||||
'It was impossible to encode page title in UTF-8 with mb_convert_encoding. {e}',
|
||||
$this->isArray(),
|
||||
);
|
||||
} else {
|
||||
$this->logger->expects($this->never())->method('warning');
|
||||
}
|
||||
|
||||
$result = $this->helper(autoResolveTitles: true)->processTitle($data);
|
||||
$data = ShortUrlCreation::fromRawData(['longUrl' => self::LONG_URL]);
|
||||
$result = $this->helper(autoResolveTitles: true, iconvEnabled: true)->processTitle($data);
|
||||
|
||||
self::assertNotSame($data, $result);
|
||||
self::assertEquals('Resolved "title"', $result->title);
|
||||
}
|
||||
|
||||
#[Test]
|
||||
#[TestWith([
|
||||
'contentType' => 'text/html; charset=Windows-1255',
|
||||
'iconvEnabled' => false,
|
||||
'expectedSecondMessage' => 'Missing iconv extension. Skipping title encoding',
|
||||
])]
|
||||
#[TestWith([
|
||||
'contentType' => 'text/html; charset=foo',
|
||||
'iconvEnabled' => true,
|
||||
'expectedSecondMessage' => 'It was impossible to encode page title in UTF-8 with iconv. {e}',
|
||||
])]
|
||||
public function warningsLoggedWhenTitleCannotBeEncodedToUtf8(
|
||||
string $contentType,
|
||||
bool $iconvEnabled,
|
||||
string $expectedSecondMessage,
|
||||
): void {
|
||||
$this->expectRequestToBeCalled()->willReturn($this->respWithTitle($contentType));
|
||||
$callCount = 0;
|
||||
$this->logger->expects($this->exactly(2))->method('warning')->with($this->callback(
|
||||
function (string $message) use (&$callCount, $expectedSecondMessage): bool {
|
||||
$callCount++;
|
||||
if ($callCount === 1) {
|
||||
return $message === 'It was impossible to encode page title in UTF-8 with mb_convert_encoding. {e}';
|
||||
}
|
||||
|
||||
return $message === $expectedSecondMessage;
|
||||
},
|
||||
));
|
||||
|
||||
$data = ShortUrlCreation::fromRawData(['longUrl' => self::LONG_URL]);
|
||||
$result = $this->helper(autoResolveTitles: true, iconvEnabled: $iconvEnabled)->processTitle($data);
|
||||
|
||||
self::assertNotSame($data, $result);
|
||||
self::assertEquals('Resolved "title"', $result->title);
|
||||
@ -143,11 +191,13 @@ class ShortUrlTitleResolutionHelperTest extends TestCase
|
||||
return $body;
|
||||
}
|
||||
|
||||
private function helper(bool $autoResolveTitles = false): ShortUrlTitleResolutionHelper
|
||||
private function helper(bool $autoResolveTitles = false, bool $iconvEnabled = false): ShortUrlTitleResolutionHelper
|
||||
{
|
||||
return new ShortUrlTitleResolutionHelper(
|
||||
$this->httpClient,
|
||||
new UrlShortenerOptions(autoResolveTitles: $autoResolveTitles),
|
||||
$this->logger,
|
||||
fn () => $iconvEnabled,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user