From cbfde9d9811a75691123dfc83dee72714c74ec00 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 13 May 2025 01:52:19 +0200 Subject: [PATCH] Fix ArenaString::from_utf8_lossy duplicating invalid chars --- src/arena/string.rs | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/arena/string.rs b/src/arena/string.rs index 74d2716..487c750 100644 --- a/src/arena/string.rs +++ b/src/arena/string.rs @@ -31,26 +31,22 @@ impl<'a> ArenaString<'a> { Self { vec: bytes } } - #[must_use] - pub fn from_utf8_lossy_owned(v: Vec) -> Self { - let mut res = Self::new_in(v.allocator()); - + pub fn from_utf8_lossy<'s>(arena: &'a Arena, v: &'s [u8]) -> Result<&'s str, ArenaString<'a>> { let mut iter = v.utf8_chunks(); let Some(mut chunk) = iter.next() else { - return res; + return Ok(""); }; let valid = chunk.valid(); if chunk.invalid().is_empty() { debug_assert_eq!(valid.len(), v.len()); - return unsafe { Self::from_utf8_unchecked(v) }; + return Ok(unsafe { str::from_utf8_unchecked(v) }); } const REPLACEMENT: &str = "\u{FFFD}"; + let mut res = Self::new_in(arena); res.reserve(v.len()); - res.push_str(chunk.valid()); - res.push_str(REPLACEMENT); loop { res.push_str(chunk.valid()); @@ -63,7 +59,15 @@ impl<'a> ArenaString<'a> { }; } - res + Err(res) + } + + #[must_use] + pub fn from_utf8_lossy_owned(v: Vec) -> Self { + match Self::from_utf8_lossy(v.allocator(), &v) { + Ok(..) => unsafe { Self::from_utf8_unchecked(v) }, + Err(s) => s, + } } pub fn is_empty(&self) -> bool {