wip

2026-06-15 21:16:40 -05:00 · 2025-07-12 03:23:49 +02:00
parent f377e9f4bf
commit dd4fc0ca8f
2 changed files with 284 additions and 317 deletions
--- a/src/highlighter/lang.rs
+++ b/src/highlighter/lang.rs
@@ -10,7 +10,6 @@ use regex_syntax::hir::{Class, ClassBytes, ClassBytesRange, Hir, HirKind, Look};

 use super::{Action, Consume, HighlightKind};
 use crate::arena::{Arena, ArenaString, scratch_arena};
-use crate::cell::SemiRefCell;
 use crate::highlighter::{CharsetFormatter, Transition};

 pub struct LanguageDefinition {
@@ -48,7 +47,8 @@ pub const JSON: LanguageDefinition = {
                    // Strings
                    (r#"""#, String, Push("string")),
                    // Numbers (start: minus or digit)
-                    (r#"-?\d*(?:\.\d+)?(?:[eE][+-]?\d+)?"#, Number, Pop(1)),
+                    (r#"-\d*(?:\.\d+)?(?:[eE][+-]?\d+)?"#, Number, Pop(1)),
+                    (r#"\d*(?:\.\d+)?(?:[eE][+-]?\d+)?"#, Number, Pop(1)),
                    // Booleans/null
                    (r#"true\b"#, Keyword, Pop(1)),
                    (r#"false\b"#, Keyword, Pop(1)),
@@ -65,348 +65,315 @@ pub const JSON: LanguageDefinition = {
    }
 };

-type NodeCell<'a> = SemiRefCell<Node<'a>>;
-
-// Nodes form a DFA graph which is mostly shaped like a tree.
-// Each group of sibling nodes represent the edges coming out of a DFA state.
-struct Node<'a> {
-    edge_first: Option<&'a EdgeCell<'a>>,
-    edge_last: Option<&'a EdgeCell<'a>>,
+struct WipState {
+    transitions: Vec<WipTransition>,
 }

-impl<'a> Node<'a> {
-    fn new_in(arena: &'a Arena) -> &'a mut NodeCell<'a> {
-        arena.alloc_uninit().write(NodeCell::new(Node { edge_first: None, edge_last: None }))
-    }
+#[derive(Clone, Copy, PartialEq, Eq)]
+enum WipAction {
+    Change(usize),
+    Push(usize),
+    Pop(usize),
 }

-type EdgeCell<'a> = SemiRefCell<Edge<'a>>;
-
-struct Edge<'a> {
-    edge_next: Option<&'a EdgeCell<'a>>,
-    dst: &'a NodeCell<'a>,
-    test: Consume<'a>,
+#[derive(PartialEq, Eq)]
+enum WipConsume {
+    Chars(usize),
+    Prefix(String),
+    PrefixInsensitive(String),
+    Charset(Box<[bool; 256]>),
+    Line,
 }

-fn add_edge<'a>(
-    arena: &'a Arena,
-    src: &'a NodeCell<'a>,
-    dst: &'a NodeCell<'a>,
-    test: Consume<'a>,
-) -> &'a NodeCell<'a> {
-    let mut src = src.borrow_mut();
+struct WipTransition {
+    test: WipConsume,
+    kind: HighlightKind,
+    action: WipAction,
+}

-    // Check if the edge already exists.
-    {
-        let mut edge = src.edge_first;
-        while let Some(e) = edge {
-            let e = e.borrow();
-            if e.test == test {
-                return e.dst;
-            }
-            edge = e.edge_next;
-        }
+struct WipContext<'a> {
+    states: &'a mut Vec<WipState>,
+    kind: HighlightKind,
+}
+
+impl WipContext<'_> {
+    fn add_state(&mut self) -> usize {
+        self.states.push(WipState { transitions: Vec::new() });
+        self.states.len() - 1
    }

-    let edge = arena.alloc_uninit().write(EdgeCell::new(Edge { edge_next: None, dst, test }));
+    fn add_transition(&mut self, src: usize, dst: WipAction, test: WipConsume) -> WipAction {
+        let src = &mut self.states[src].transitions;

-    if let Some(last) = src.edge_last {
-        last.borrow_mut().edge_next = Some(edge);
-    } else {
-        src.edge_first = Some(edge);
-    }
-
-    src.edge_last = Some(edge);
-    dst
-}
-
-fn transform<'a>(
-    arena: &'a Arena,
-    src: &'a NodeCell<'a>,
-    dst: &'a NodeCell<'a>,
-    hir: &Hir,
-) -> &'a NodeCell<'a> {
-    fn is_any_class(class: &ClassBytes) -> bool {
-        class.ranges() == [ClassBytesRange::new(0, 255)]
-    }
-
-    match hir.kind() {
-        HirKind::Literal(lit) => transform_literal(arena, src, dst, &lit.0),
-        HirKind::Class(Class::Bytes(class)) if is_any_class(class) => {
-            transform_any(arena, src, dst)
-        }
-        HirKind::Class(Class::Bytes(class)) => transform_class(arena, src, dst, class),
-        HirKind::Look(Look::WordAscii) => dst,
-        HirKind::Repetition(rep) => match (rep.min, rep.max, rep.sub.kind()) {
-            (0, None, HirKind::Class(Class::Bytes(class))) if is_any_class(class) => {
-                transform_any_star(arena, src, dst)
-            }
-            (0, None, HirKind::Class(Class::Bytes(class))) => {
-                let dst = transform_class_plus(arena, src, dst, class);
-                transform_option(arena, src, dst);
-                dst
-            }
-            (0, Some(1), _) => {
-                let dst = transform(arena, src, dst, &rep.sub);
-                transform_option(arena, src, dst);
-                dst
-            }
-            (1, None, HirKind::Class(Class::Bytes(class))) => {
-                transform_class_plus(arena, src, dst, class)
-            }
-            _ => panic!("Unsupported HIR: {hir:?}"),
-        },
-        HirKind::Concat(hirs) if hirs.len() >= 2 => transform_concat(arena, src, dst, hirs),
-        HirKind::Alternation(hirs) if hirs.len() >= 2 => transform_alt(arena, src, dst, hirs),
-        _ => panic!("Unsupported HIR: {hir:?}"),
-    }
-}
-
-// string
-fn transform_literal<'a>(
-    arena: &'a Arena,
-    src: &'a NodeCell<'a>,
-    dst: &'a NodeCell<'a>,
-    lit: &[u8],
-) -> &'a NodeCell<'a> {
-    let copy = arena.alloc_uninit_slice(lit.len()).write_clone_of_slice(lit);
-    let copy = str::from_utf8(copy).unwrap();
-    add_edge(arena, src, dst, Consume::Prefix(copy))
-}
-
-// [a-z]+
-fn transform_class_plus<'a>(
-    arena: &'a Arena,
-    src: &'a NodeCell<'a>,
-    dst: &'a NodeCell<'a>,
-    class: &ClassBytes,
-) -> &'a NodeCell<'a> {
-    let charset = class_to_charset(arena, class);
-    add_edge(arena, src, dst, Consume::Charset(charset))
-}
-
-// [eE]
-fn transform_class<'a>(
-    arena: &'a Arena,
-    src: &'a NodeCell<'a>,
-    dst: &'a NodeCell<'a>,
-    class: &ClassBytes,
-) -> &'a NodeCell<'a> {
-    let charset = class_to_charset(arena, class);
-    let mut actual_dst = None;
-
-    for i in 0..256 {
-        if !charset[i] {
-            continue;
-        }
-
-        if i >= 128 {
-            panic!("Invalid non-ASCII class character {i}");
-        }
-
-        let ch = i as u8;
-        let copy = arena.alloc_uninit().write(ch.to_ascii_lowercase());
-        let copy = str::from_utf8(slice::from_ref(copy)).unwrap();
-
-        // NOTE: Uppercase chars have a lower numeric value than lowercase chars.
-        // As such, we need to test for `is_ascii_uppercase`.
-        let test = if ch.is_ascii_uppercase()
-            && let upper = ch.to_ascii_lowercase() as usize
-            && charset[upper]
-        {
-            charset[upper] = false;
-            Consume::PrefixInsensitive(copy)
-        } else {
-            Consume::Prefix(copy)
-        };
-
-        let node = add_edge(arena, src, dst, test);
-        if !ptr::eq(node, *actual_dst.get_or_insert(node)) {
-            panic!("Diverging destinations for class transformer: {class:?}");
-        }
-    }
-
-    actual_dst.unwrap_or(dst)
-}
-
-// .?
-fn transform_option<'a>(
-    arena: &'a Arena,
-    src: &'a NodeCell<'a>,
-    dst: &'a NodeCell<'a>,
-) -> &'a NodeCell<'a> {
-    add_edge(arena, src, dst, Consume::Chars(0))
-}
-
-// .*
-fn transform_any_star<'a>(
-    arena: &'a Arena,
-    src: &'a NodeCell<'a>,
-    dst: &'a NodeCell<'a>,
-) -> &'a NodeCell<'a> {
-    add_edge(arena, src, dst, Consume::Line)
-}
-
-// .
-fn transform_any<'a>(
-    arena: &'a Arena,
-    src: &'a NodeCell<'a>,
-    dst: &'a NodeCell<'a>,
-) -> &'a NodeCell<'a> {
-    add_edge(arena, src, dst, Consume::Chars(1))
-}
-
-fn transform_concat<'a>(
-    arena: &'a Arena,
-    mut src: &'a NodeCell<'a>,
-    dst: &'a NodeCell<'a>,
-    hirs: &[Hir],
-) -> &'a NodeCell<'a> {
-    fn check_lowercase_literal(hir: &Hir) -> Option<u8> {
-        if let HirKind::Class(Class::Bytes(class)) = hir.kind()
-            && let ranges = class.ranges()
-            && ranges.len() == 2
-            && ranges[0].len() == 1
-            && ranges[1].len() == 1
-            && let lower_a = ranges[0].start().to_ascii_lowercase()
-            && let lower_b = ranges[1].start().to_ascii_lowercase()
-            && lower_a == lower_b
-        {
-            Some(lower_a)
-        } else {
-            None
-        }
-    }
-
-    let mut it = hirs.iter().peekable();
-
-    while let Some(mut hir) = it.next() {
-        if let Some(ch) = check_lowercase_literal(hir) {
-            // Transform [aA][bB][cC] into PrefixInsensitive("abc")
-            let mut str = ManuallyDrop::new(ArenaString::new_in(arena));
-            str.push(ch as char);
-
-            while let Some(next_hir) = it.peek() {
-                if let Some(next_ch) = check_lowercase_literal(next_hir) {
-                    str.push(next_ch as char);
-                    it.next();
-                } else {
-                    break;
+        // Check if the edge already exists.
+        for t in src.iter() {
+            if t.test == test {
+                match t.action {
+                    WipAction::Change(_) => return t.action,
+                    _ => panic!("Existing edge with non-change action"),
                }
            }
-
-            let next = if it.peek().is_some() { Node::new_in(arena) } else { dst };
-            let str: &'a str = unsafe { mem::transmute(str.as_str()) };
-            src = add_edge(arena, src, next, Consume::PrefixInsensitive(str));
-        } else {
-            let next = if it.peek().is_some() { Node::new_in(arena) } else { dst };
-            src = transform(arena, src, next, hir);
        }
+
+        src.push(WipTransition { test, kind: self.kind, action: dst });
+        dst
    }

-    src
-}
-
-fn transform_alt<'a>(
-    arena: &'a Arena,
-    src: &'a NodeCell<'a>,
-    dst: &'a NodeCell<'a>,
-    hirs: &[Hir],
-) -> &'a NodeCell<'a> {
-    let mut actual_dst = None;
-
-    for hir in hirs {
-        let node = transform(arena, src, dst, hir);
-        if !ptr::eq(node, *actual_dst.get_or_insert(node)) {
-            panic!("Diverging destinations for alternation transformer: {hirs:?}");
+    fn transform(&mut self, src: usize, dst: WipAction, hir: &Hir) -> WipAction {
+        fn is_any_class(class: &ClassBytes) -> bool {
+            class.ranges() == [ClassBytesRange::new(0, 255)]
        }
-    }

-    actual_dst.unwrap_or(dst)
-}
-
-fn class_to_charset<'a>(arena: &'a Arena, class: &ClassBytes) -> &'a mut [bool; 256] {
-    let mut charset = arena.alloc_uninit().write([false; 256]);
-
-    for r in class.iter() {
-        charset[r.start() as usize..=r.end() as usize].fill(true);
-    }
-
-    // If the class includes \w, we also set any non-ASCII characters.
-    // That's not how Unicode works, but it simplifies the implementation.
-    if [(b'0', b'9'), (b'A', b'Z'), (b'_', b'_'), (b'a', b'z')]
-        .iter()
-        .all(|&(beg, end)| charset[beg as usize..=end as usize].iter().all(|&b| b))
-    {
-        charset[0x80..=0xFF].fill(true);
-    }
-
-    charset
-}
-
-fn print_mermaid<'a>(root: &'a NodeCell<'a>) {
-    fn node_id<'a, 'v>(
-        visited: &'v mut HashMap<*const NodeCell<'a>, (usize, bool)>,
-        ptr: &'a NodeCell<'a>,
-    ) -> &'v mut (usize, bool) {
-        let num = visited.len();
-        match visited.entry(ptr as *const _) {
-            Entry::Occupied(mut e) => e.into_mut(),
-            Entry::Vacant(mut e) => e.insert((num, false)),
-        }
-    }
-
-    fn walk<'a>(
-        node: &'a NodeCell<'a>,
-        visited: &mut HashMap<*const NodeCell<'a>, (usize, bool)>,
-        out: &mut String,
-    ) {
-        let node_ptr = node as *const _;
-        let src_id = match node_id(visited, node) {
-            (num, visited) if !*visited => {
-                *visited = true;
-                *num
+        match hir.kind() {
+            HirKind::Literal(lit) => self.transform_literal(src, dst, &lit.0),
+            HirKind::Class(Class::Bytes(class)) if is_any_class(class) => {
+                self.transform_any(src, dst)
            }
-            _ => return, // Already visited
-        };
+            HirKind::Class(Class::Bytes(class)) => self.transform_class(src, dst, class),
+            HirKind::Look(Look::WordAscii) => dst,
+            HirKind::Repetition(rep) => match (rep.min, rep.max, rep.sub.kind()) {
+                (0, None, HirKind::Class(Class::Bytes(class))) if is_any_class(class) => {
+                    self.transform_any_star(src, dst)
+                }
+                (0, None, HirKind::Class(Class::Bytes(class))) => {
+                    let dst = self.transform_class_plus(src, dst, class);
+                    self.transform_option(src, dst);
+                    dst
+                }
+                (0, Some(1), _) => {
+                    let dst = self.transform(src, dst, &rep.sub);
+                    self.transform_option(src, dst);
+                    dst
+                }
+                (1, None, HirKind::Class(Class::Bytes(class))) => {
+                    self.transform_class_plus(src, dst, class)
+                }
+                _ => panic!("Unsupported HIR: {hir:?}"),
+            },
+            HirKind::Concat(hirs) if hirs.len() >= 2 => self.transform_concat(src, dst, hirs),
+            HirKind::Alternation(hirs) if hirs.len() >= 2 => self.transform_alt(src, dst, hirs),
+            _ => panic!("Unsupported HIR: {hir:?}"),
+        }
+    }

-        let node_ref = node.borrow();
-        let mut edge = node_ref.edge_first;
+    // string
+    fn transform_literal(&mut self, src: usize, dst: WipAction, lit: &[u8]) -> WipAction {
+        self.add_transition(src, dst, WipConsume::Prefix(String::from_utf8(lit.to_vec()).unwrap()))
+    }

-        while let Some(edge_cell) = edge {
-            let edge_ref = edge_cell.borrow();
-            let &mut (dst_id, _) = node_id(visited, edge_ref.dst);
-            let label = match &edge_ref.test {
-                Consume::Prefix(s) => format!("Prefix({s})"),
-                Consume::PrefixInsensitive(s) => format!("PrefixInsensitive({s})"),
-                Consume::Charset(c) => format!("Charset({:?})", CharsetFormatter(c)),
-                Consume::Chars(n) => format!("Chars({n})"),
-                Consume::Line => "Line".to_string(),
+    // [a-z]+
+    fn transform_class_plus(
+        &mut self,
+        src: usize,
+        dst: WipAction,
+        class: &ClassBytes,
+    ) -> WipAction {
+        let charset = self.class_to_charset(class);
+        self.add_transition(src, dst, WipConsume::Charset(charset))
+    }
+
+    // [eE]
+    fn transform_class(&mut self, src: usize, dst: WipAction, class: &ClassBytes) -> WipAction {
+        let mut charset = self.class_to_charset(class);
+        let mut actual_dst = None;
+
+        for i in 0..256 {
+            if !charset[i] {
+                continue;
+            }
+
+            if i >= 128 {
+                panic!("Invalid non-ASCII class character {i}");
+            }
+
+            let ch = i as u8;
+            let str = String::from_utf8(slice::from_ref(&ch).to_vec()).unwrap();
+
+            // NOTE: Uppercase chars have a lower numeric value than lowercase chars.
+            // As such, we need to test for `is_ascii_uppercase`.
+            let test = if ch.is_ascii_uppercase()
+                && let upper = ch.to_ascii_lowercase() as usize
+                && charset[upper]
+            {
+                charset[upper] = false;
+                WipConsume::PrefixInsensitive(str)
+            } else {
+                WipConsume::Prefix(str)
+            };
+
+            let d = self.add_transition(src, dst, test);
+            if d != *actual_dst.get_or_insert(d) {
+                panic!("Diverging destinations for class transformer: {class:?}");
+            }
+        }
+
+        actual_dst.unwrap_or(dst)
+    }
+
+    // .?
+    fn transform_option(&mut self, src: usize, dst: WipAction) -> WipAction {
+        self.add_transition(src, dst, WipConsume::Chars(0))
+    }
+
+    // .*
+    fn transform_any_star(&mut self, src: usize, dst: WipAction) -> WipAction {
+        self.add_transition(src, dst, WipConsume::Line)
+    }
+
+    // .
+    fn transform_any(&mut self, src: usize, dst: WipAction) -> WipAction {
+        self.add_transition(src, dst, WipConsume::Chars(1))
+    }
+
+    fn transform_concat(&mut self, src: usize, dst: WipAction, hirs: &[Hir]) -> WipAction {
+        fn check_lowercase_literal(hir: &Hir) -> Option<u8> {
+            if let HirKind::Class(Class::Bytes(class)) = hir.kind()
+                && let ranges = class.ranges()
+                && ranges.len() == 2
+                && ranges[0].len() == 1
+                && ranges[1].len() == 1
+                && let lower_a = ranges[0].start().to_ascii_lowercase()
+                && let lower_b = ranges[1].start().to_ascii_lowercase()
+                && lower_a == lower_b
+            {
+                Some(lower_a)
+            } else {
+                None
+            }
+        }
+
+        let mut it = hirs.iter().peekable();
+        let mut src = WipAction::Change(src);
+
+        while let Some(mut hir) = it.next() {
+            let src_idx = match src {
+                WipAction::Change(idx) => idx,
+                _ => panic!("Unexpected action in transform_concat"),
+            };
+
+            if let Some(ch) = check_lowercase_literal(hir) {
+                // Transform [aA][bB][cC] into PrefixInsensitive("abc").
+                let mut str = String::new();
+                str.push(ch as char);
+
+                while let Some(next_hir) = it.peek() {
+                    if let Some(next_ch) = check_lowercase_literal(next_hir) {
+                        str.push(next_ch as char);
+                        it.next();
+                    } else {
+                        break;
+                    }
+                }
+
+                let next =
+                    if it.peek().is_some() { WipAction::Change(self.add_state()) } else { dst };
+                src = self.add_transition(src_idx, next, WipConsume::PrefixInsensitive(str));
+            } else {
+                // Any other sequence is simply concatenated.
+                let next =
+                    if it.peek().is_some() { WipAction::Change(self.add_state()) } else { dst };
+                src = self.transform(src_idx, next, hir);
+            }
+        }
+
+        src
+    }
+
+    fn transform_alt(&mut self, src: usize, dst: WipAction, hirs: &[Hir]) -> WipAction {
+        let mut actual_dst = None;
+
+        for hir in hirs {
+            let d = self.transform(src, dst, hir);
+            if d != *actual_dst.get_or_insert(d) {
+                panic!("Diverging destinations for alternation transformer: {hirs:?}");
+            }
+        }
+
+        actual_dst.unwrap_or(dst)
+    }
+
+    fn class_to_charset(&mut self, class: &ClassBytes) -> Box<[bool; 256]> {
+        let mut charset = Box::new([false; 256]);
+
+        for r in class.iter() {
+            charset[r.start() as usize..=r.end() as usize].fill(true);
+        }
+
+        // If the class includes \w, we also set any non-ASCII characters.
+        // That's not how Unicode works, but it simplifies the implementation.
+        if [(b'0', b'9'), (b'A', b'Z'), (b'_', b'_'), (b'a', b'z')]
+            .iter()
+            .all(|&(beg, end)| charset[beg as usize..=end as usize].iter().all(|&b| b))
+        {
+            charset[0x80..=0xFF].fill(true);
+        }
+
+        charset
+    }
+}
+
+fn print_mermaid(def_states: &[StateDefinition], states: &[WipState]) {
+    // Print header for Mermaid graph
+    println!("%%{{init:{{'fontFamily':'monospace','flowchart':{{'defaultRenderer':'elk'}}}}}}%%");
+    println!("graph TD");
+
+    // Print nodes (states)
+    for (idx, _state) in states.iter().enumerate() {
+        println!(
+            "    {idx}[\"{}\"]",
+            match def_states.get(idx) {
+                Some(state) => state.name,
+                None => &format!("{idx}"),
+            }
+        );
+    }
+
+    // Print edges (transitions)
+    for (src_idx, state) in states.iter().enumerate() {
+        for t in &state.transitions {
+            let dst = match t.action {
+                WipAction::Change(idx) => format!("{idx}"),
+                WipAction::Push(idx) => {
+                    format!("push{}[/\"Push({})\"/]", src_idx << 16 | idx, def_states[idx].name)
+                }
+                WipAction::Pop(count) => {
+                    format!("pop{}[/\"Pop({count})\"/]", src_idx << 16 | count)
+                }
+            };
+            let label = match &t.test {
+                WipConsume::Prefix(s) => format!("Prefix({s})"),
+                WipConsume::PrefixInsensitive(s) => format!("PrefixInsensitive({s})"),
+                WipConsume::Charset(c) => format!("Charset({:?})", CharsetFormatter(c)),
+                WipConsume::Chars(n) => format!("Chars({n})"),
+                WipConsume::Line => "Line".to_string(),
            };
            let label = label.replace('"', "&quot;");
-            out.push_str(&format!("    {src_id} -->|\"{label}\"| {dst_id}\n"));
-
-            walk(edge_ref.dst, visited, out);
-
-            edge = edge_ref.edge_next;
+            let label = label.replace('\\', r#"\\"#);
+            println!("    {src_idx} -->|\"{label}\"| {dst}");
        }
    }
-
-    let mut out = String::from(
-        "%%{init:{'fontFamily':'monospace','flowchart':{'defaultRenderer':'elk'}}}%%\ngraph TD\n",
-    );
-    let mut visited = HashMap::new();
-    walk(root, &mut visited, &mut out);
-    println!("{out}");
 }

 #[allow(dead_code)]
 pub fn parse_language_definition(def: &LanguageDefinition) {
-    let scratch = scratch_arena(None);
-    let root = Node::new_in(&scratch);
+    let mut state_names = HashMap::new();
+    let mut states = Vec::new();

    for state in def.states {
+        state_names.insert(state.name, states.len());
+        states.push(WipState { transitions: Vec::new() });
+    }
+
+    for (ground_idx, state) in def.states.iter().enumerate() {
        for (pattern, kind, action) in state.rules {
+            let mut ctx = WipContext { states: &mut states, kind: *kind };
+            let dst = match action {
+                ActionDefinition::Push(name) => match state_names.get(name) {
+                    Some(&idx) => WipAction::Push(idx),
+                    None => panic!("Unknown state name: {name}"),
+                },
+                ActionDefinition::Pop(count) => WipAction::Pop(*count),
+            };
            let hir = regex_syntax::ParserBuilder::new()
                .utf8(false)
                .unicode(false)
@@ -414,11 +381,11 @@ pub fn parse_language_definition(def: &LanguageDefinition) {
                .build()
                .parse(pattern)
                .unwrap();
-            transform(&scratch, root, root, &hir);
+            ctx.transform(ground_idx, dst, &hir);
        }
    }

-    print_mermaid(root);
+    print_mermaid(def.states, &states);
 }

 #[cfg(test)]
--- a/src/highlighter/mod.rs
+++ b/src/highlighter/mod.rs
@@ -83,7 +83,7 @@ impl Language {
 struct Transition<'s> {
    test: Consume<'s>,
    kind: HighlightKind,
-    state: Action,
+    action: Action,
 }

 #[derive(PartialEq, Eq)]
@@ -337,7 +337,7 @@ impl<'doc> Highlighter<'doc> {
                    }
                }

-                match t.state {
+                match t.action {
                    Action::Change(to) => {
                        if let Some(last) = res.last_mut() {
                            last.kind = t.kind;