From 030d1dd33091f0249dfe1952654a15e4c9feb170 Mon Sep 17 00:00:00 2001
From: kronberger-droid <kronberger@proton.me>
Date: Fri, 15 May 2026 17:33:34 +0200
Subject: [PATCH 1/5] edit_mode: add MotionTarget and resolve_motion (no impls)

Default-none trait method; every motion still falls through to LineBuffer.
---
 src/edit_mode/base.rs | 26 ++++++++++++++++++++++++++
 src/edit_mode/mod.rs  |  2 +-
 src/engine.rs         | 29 +++++++++++++++++++++++++++--
 3 files changed, 54 insertions(+), 3 deletions(-)
diff --git a/src/edit_mode/base.rs b/src/edit_mode/base.rs
index 408098b32..ac89cd931 100644
--- a/src/edit_mode/base.rs
+++ b/src/edit_mode/base.rs
@@ -3,6 +3,25 @@ use crate::{
     PromptEditMode,
 };
 
+/// Buffer snapshot passed to an [`EditMode`] when resolving a [`MotionTarget`].
+pub struct EditContext<'a> {
+    pub buffer: &'a str,
+    pub cursor: usize,
+}
+
+/// The target shape of a motion. Each [`EditMode`] decides what buffer offset
+/// the target resolves to using its own segmentation rules.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum MotionTarget {
+    WordLeft,
+    WordRight,
+    WordRightStart,
+    WordRightEnd,
+    BigWordLeft,
+    BigWordRightStart,
+    BigWordRightEnd,
+}
+
 /// Define the style of parsing for the edit events
 /// Available default options:
 /// - Emacs
@@ -18,4 +37,11 @@ pub trait EditMode: Send {
     fn handle_mode_specific_event(&mut self, _event: ReedlineEvent) -> EventStatus {
         EventStatus::Inapplicable
     }
+
+    /// Resolve a [`MotionTarget`] to a buffer offset using this mode's segmentation rules.
+    /// Returning `None` defers to the default `LineBuffer` behavior for the
+    /// equivalent legacy command.
+    fn resolve_motion(&self, _target: MotionTarget, _ctx: &EditContext) -> Option<usize> {
+        None
+    }
 }
diff --git a/src/edit_mode/mod.rs b/src/edit_mode/mod.rs
index 0b6f2f6ff..87cb76ab5 100644
--- a/src/edit_mode/mod.rs
+++ b/src/edit_mode/mod.rs
@@ -6,7 +6,7 @@ mod helix;
 mod keybindings;
 mod vi;
 
-pub use base::EditMode;
+pub use base::{EditContext, EditMode, MotionTarget};
 pub use cursors::CursorConfig;
 pub use emacs::{default_emacs_keybindings, Emacs};
 #[cfg(feature = "helix")]
diff --git a/src/engine.rs b/src/engine.rs
index 2f33a09f7..0840187fd 100644
--- a/src/engine.rs
+++ b/src/engine.rs
@@ -19,7 +19,7 @@ use {
     crate::{
         completion::{Completer, DefaultCompleter},
         core_editor::Editor,
-        edit_mode::{EditMode, Emacs},
+        edit_mode::{EditContext, EditMode, Emacs, MotionTarget},
         enums::{EventStatus, ReedlineEvent},
         highlighter::SimpleMatchHighlighter,
         hinter::Hinter,
@@ -1718,7 +1718,17 @@ impl Reedline {
 
         // Run the commands over the edit buffer
         for command in commands {
-            self.editor.run_edit_command(command);
+            let resolved = motion_intent(command).and_then(|(target, select)| {
+                let ctx = EditContext {
+                    buffer: self.editor.get_buffer(),
+                    cursor: self.editor.line_buffer().insertion_point(),
+                };
+                self.edit_mode
+                    .resolve_motion(target, &ctx)
+                    .map(|position| EditCommand::MoveToPosition { position, select })
+            });
+            self.editor
+                .run_edit_command(resolved.as_ref().unwrap_or(command));
         }
     }
 
@@ -2254,6 +2264,21 @@ impl Reedline {
     }
 }
 
+/// Classify an [`EditCommand`] as a motion. Returns `Some((target, select))`
+/// for motion-bearing commands, `None` for everything else.
+fn motion_intent(command: &EditCommand) -> Option<(MotionTarget, bool)> {
+    Some(match *command {
+        EditCommand::MoveWordLeft { select } => (MotionTarget::WordLeft, select),
+        EditCommand::MoveWordRight { select } => (MotionTarget::WordRight, select),
+        EditCommand::MoveWordRightStart { select } => (MotionTarget::WordRightStart, select),
+        EditCommand::MoveWordRightEnd { select } => (MotionTarget::WordRightEnd, select),
+        EditCommand::MoveBigWordLeft { select } => (MotionTarget::BigWordLeft, select),
+        EditCommand::MoveBigWordRightStart { select } => (MotionTarget::BigWordRightStart, select),
+        EditCommand::MoveBigWordRightEnd { select } => (MotionTarget::BigWordRightEnd, select),
+        _ => return None,
+    })
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;

From b5b7be621abd01188eb5d2de3835f97a657284bd Mon Sep 17 00:00:00 2001
From: kronberger-droid <kronberger@proton.me>
Date: Fri, 15 May 2026 18:14:14 +0200
Subject: [PATCH 2/5] vi: implement resolve_motion

Three-class word segmentation for w/W/e/E/b/B.
Operators are follow up.
---
 src/edit_mode/vi/mod.rs  |  21 ++++-
 src/edit_mode/vi/word.rs | 188 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 208 insertions(+), 1 deletion(-)
 create mode 100644 src/edit_mode/vi/word.rs

diff --git a/src/edit_mode/vi/mod.rs b/src/edit_mode/vi/mod.rs
index 721e45d94..d6eddb117 100644
--- a/src/edit_mode/vi/mod.rs
+++ b/src/edit_mode/vi/mod.rs
@@ -2,6 +2,7 @@ mod command;
 mod motion;
 mod parser;
 mod vi_keybindings;
+mod word;
 
 use std::str::FromStr;
 
@@ -10,7 +11,7 @@ pub use vi_keybindings::{default_vi_insert_keybindings, default_vi_normal_keybin
 
 use self::motion::ViCharSearch;
 
-use super::EditMode;
+use super::{EditContext, EditMode, MotionTarget};
 use crate::{
     edit_mode::{keybindings::Keybindings, vi::parser::parse},
     enums::{EditCommand, EventStatus, ReedlineEvent, ReedlineRawEvent},
@@ -223,6 +224,24 @@ impl EditMode for Vi {
             _ => EventStatus::Inapplicable,
         }
     }
+
+    fn resolve_motion(&self, target: MotionTarget, ctx: &EditContext) -> Option<usize> {
+        use word::WordKind::{BigWord, Word};
+        let (buffer, cursor) = (ctx.buffer, ctx.cursor);
+        Some(match target {
+            MotionTarget::WordLeft => word::word_left_index(buffer, cursor, Word),
+            MotionTarget::WordRightStart => word::word_right_start_index(buffer, cursor, Word),
+            MotionTarget::WordRightEnd => word::word_right_end_index(buffer, cursor, Word),
+            MotionTarget::BigWordLeft => word::word_left_index(buffer, cursor, BigWord),
+            MotionTarget::BigWordRightStart => {
+                word::word_right_start_index(buffer, cursor, BigWord)
+            }
+            MotionTarget::BigWordRightEnd => word::word_right_end_index(buffer, cursor, BigWord),
+            // Emacs-style `M-f`; Vi never emits this. Defer to LineBuffer's
+            // UAX #29 path so behavior stays consistent if it ever reaches Vi.
+            MotionTarget::WordRight => return None,
+        })
+    }
 }
 
 #[cfg(test)]
diff --git a/src/edit_mode/vi/word.rs b/src/edit_mode/vi/word.rs
new file mode 100644
index 000000000..678272759
--- /dev/null
+++ b/src/edit_mode/vi/word.rs
@@ -0,0 +1,188 @@
+//! Vi-standard word segmentation.
+//!
+//! Vi groups characters into three classes:
+//! - **Keyword**: alphanumeric and underscore
+//! - **Punctuation**: any non-keyword, non-whitespace char
+//! - **Whitespace**
+//!
+//! Word boundaries occur at any class transition. This differs from reedline's
+//! existing UAX #29 word boundaries (used by Emacs), which treat `foo.bar` as
+//! one word; Vi sees three (`foo`, `.`, `bar`).
+//!
+//! `BigWord` motions (W/E/B) collapse Keyword and Punctuation: only whitespace
+//! creates a boundary.
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum CharClass {
+    Keyword,
+    Punctuation,
+    Whitespace,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub(super) enum WordKind {
+    Word,
+    BigWord,
+}
+
+fn char_class(c: char, kind: WordKind) -> CharClass {
+    if c.is_whitespace() {
+        CharClass::Whitespace
+    } else if matches!(kind, WordKind::BigWord) || c.is_alphanumeric() || c == '_' {
+        CharClass::Keyword
+    } else {
+        CharClass::Punctuation
+    }
+}
+
+/// Vi `w`/`W` motion: jump to the start of the next class segment, skipping
+/// any whitespace between segments. Returns a byte offset.
+pub(super) fn word_right_start_index(buffer: &str, cursor: usize, kind: WordKind) -> usize {
+    let mut iter = buffer[cursor..].char_indices().map(|(i, c)| (cursor + i, c));
+
+    let Some((_, first)) = iter.next() else {
+        return buffer.len();
+    };
+    let start_class = char_class(first, kind);
+
+    let Some((boundary_pos, boundary_char)) =
+        iter.find(|&(_, c)| char_class(c, kind) != start_class)
+    else {
+        return buffer.len();
+    };
+
+    if char_class(boundary_char, kind) != CharClass::Whitespace {
+        return boundary_pos;
+    }
+
+    buffer[boundary_pos..]
+        .char_indices()
+        .find(|&(_, c)| char_class(c, kind) != CharClass::Whitespace)
+        .map_or(buffer.len(), |(rel, _)| boundary_pos + rel)
+}
+
+/// Vi `e`/`E` motion: jump to the end of the current class segment, or to the
+/// end of the next segment if already at an end. Returns a byte offset of the
+/// last char in the segment.
+pub(super) fn word_right_end_index(buffer: &str, cursor: usize, kind: WordKind) -> usize {
+    let mut iter = buffer[cursor..].char_indices().map(|(i, c)| (cursor + i, c));
+
+    // Always advance past the cursor's char first, so that being already at the
+    // end of a word jumps to the end of the *next* one.
+    if iter.next().is_none() {
+        return buffer.len();
+    }
+
+    let Some((mut last_pos, start_char)) =
+        iter.find(|&(_, c)| char_class(c, kind) != CharClass::Whitespace)
+    else {
+        return buffer.len();
+    };
+    let start_class = char_class(start_char, kind);
+
+    for (pos, c) in iter {
+        if char_class(c, kind) != start_class {
+            break;
+        }
+        last_pos = pos;
+    }
+    last_pos
+}
+
+/// Vi `b`/`B` motion: jump to the start of the current class segment, or to
+/// the start of the previous segment if already at a start. Returns a byte
+/// offset.
+pub(super) fn word_left_index(buffer: &str, cursor: usize, kind: WordKind) -> usize {
+    if cursor == 0 {
+        return 0;
+    }
+
+    let mut iter = buffer[..cursor].char_indices().rev();
+
+    let Some((mut pos, c)) = iter.find(|&(_, c)| char_class(c, kind) != CharClass::Whitespace)
+    else {
+        return 0;
+    };
+    let target_class = char_class(c, kind);
+
+    for (i, c) in iter {
+        if char_class(c, kind) != target_class {
+            break;
+        }
+        pos = i;
+    }
+    pos
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use rstest::rstest;
+
+    // --- small word `w` ---
+    #[rstest]
+    #[case("hello world", 0, 6)]
+    #[case("hello world", 4, 6)]
+    #[case("foo.bar", 0, 3)]
+    #[case("foo.bar", 3, 4)]
+    #[case("foo  bar", 0, 5)]
+    #[case("hello", 0, 5)]
+    #[case("", 0, 0)]
+    #[case("  hello", 0, 2)]
+    #[case("a_b foo", 0, 4)]
+    fn small_w(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) {
+        assert_eq!(word_right_start_index(buffer, cursor, WordKind::Word), expected);
+    }
+
+    // --- big word `W` ---
+    #[rstest]
+    #[case("foo.bar baz", 0, 8)]
+    #[case("foo bar", 0, 4)]
+    #[case("foo   bar", 0, 6)]
+    #[case("", 0, 0)]
+    fn big_w(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) {
+        assert_eq!(word_right_start_index(buffer, cursor, WordKind::BigWord), expected);
+    }
+
+    // --- small word `e` ---
+    #[rstest]
+    #[case("hello world", 0, 4)]
+    #[case("hello world", 4, 10)]
+    #[case("foo.bar", 0, 2)]
+    #[case("foo.bar", 2, 3)]
+    #[case("foo.bar", 3, 6)]
+    #[case("", 0, 0)]
+    #[case("  hello", 0, 6)]
+    fn small_e(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) {
+        assert_eq!(word_right_end_index(buffer, cursor, WordKind::Word), expected);
+    }
+
+    // --- big word `E` ---
+    #[rstest]
+    #[case("foo.bar baz", 0, 6)]
+    #[case("foo bar", 0, 2)]
+    fn big_e(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) {
+        assert_eq!(word_right_end_index(buffer, cursor, WordKind::BigWord), expected);
+    }
+
+    // --- small word `b` ---
+    #[rstest]
+    #[case("hello world", 6, 0)]
+    #[case("hello world", 10, 6)]
+    #[case("hello world", 0, 0)]
+    #[case("foo.bar", 4, 3)]
+    #[case("foo.bar", 3, 0)]
+    #[case("   abc", 6, 3)]
+    #[case("   ", 3, 0)]
+    fn small_b(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) {
+        assert_eq!(word_left_index(buffer, cursor, WordKind::Word), expected);
+    }
+
+    // --- big word `B` ---
+    #[rstest]
+    #[case("foo.bar baz", 8, 0)]
+    #[case("foo.bar", 4, 0)]
+    fn big_b(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) {
+        assert_eq!(word_left_index(buffer, cursor, WordKind::BigWord), expected);
+    }
+}

From 8c9a6d28a4c13e1f39ccb2078b5721c616ed6f79 Mon Sep 17 00:00:00 2001
From: kronberger-droid <kronberger@proton.me>
Date: Fri, 15 May 2026 18:58:55 +0200
Subject: [PATCH 3/5] vi: cleanup and grapheme awareness

Iterate by grapheme cluster so multi-codepoint sequences move as one
unit. Minor cleanup in resolve_motion dispatch.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/edit_mode/vi/mod.rs  | 10 +++---
 src/edit_mode/vi/word.rs | 72 ++++++++++++++++++++++++++--------------
 2 files changed, 53 insertions(+), 29 deletions(-)

diff --git a/src/edit_mode/vi/mod.rs b/src/edit_mode/vi/mod.rs
index d6eddb117..abc63032a 100644
--- a/src/edit_mode/vi/mod.rs
+++ b/src/edit_mode/vi/mod.rs
@@ -225,9 +225,12 @@ impl EditMode for Vi {
         }
     }
 
-    fn resolve_motion(&self, target: MotionTarget, ctx: &EditContext) -> Option<usize> {
+    fn resolve_motion(
+        &self,
+        target: MotionTarget,
+        &EditContext { buffer, cursor }: &EditContext,
+    ) -> Option<usize> {
         use word::WordKind::{BigWord, Word};
-        let (buffer, cursor) = (ctx.buffer, ctx.cursor);
         Some(match target {
             MotionTarget::WordLeft => word::word_left_index(buffer, cursor, Word),
             MotionTarget::WordRightStart => word::word_right_start_index(buffer, cursor, Word),
@@ -237,8 +240,7 @@ impl EditMode for Vi {
                 word::word_right_start_index(buffer, cursor, BigWord)
             }
             MotionTarget::BigWordRightEnd => word::word_right_end_index(buffer, cursor, BigWord),
-            // Emacs-style `M-f`; Vi never emits this. Defer to LineBuffer's
-            // UAX #29 path so behavior stays consistent if it ever reaches Vi.
+            // Vi never emits WordRight; defer to LineBuffer's UAX #29 path.
             MotionTarget::WordRight => return None,
         })
     }
diff --git a/src/edit_mode/vi/word.rs b/src/edit_mode/vi/word.rs
index 678272759..2666aba80 100644
--- a/src/edit_mode/vi/word.rs
+++ b/src/edit_mode/vi/word.rs
@@ -11,6 +11,13 @@
 //!
 //! `BigWord` motions (W/E/B) collapse Keyword and Punctuation: only whitespace
 //! creates a boundary.
+//!
+//! Iteration walks grapheme clusters (via `unicode_segmentation::grapheme_indices`)
+//! so multi-codepoint sequences (combining marks, ZWJ emoji) are treated as one
+//! unit, matching the rest of `core_editor`. Each cluster is classified by its
+//! first scalar.
+
+use unicode_segmentation::UnicodeSegmentation;
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 enum CharClass {
@@ -25,7 +32,11 @@ pub(super) enum WordKind {
     BigWord,
 }
 
-fn char_class(c: char, kind: WordKind) -> CharClass {
+fn class_of(grapheme: &str, kind: WordKind) -> CharClass {
+    let c = grapheme
+        .chars()
+        .next()
+        .expect("grapheme cluster is non-empty");
     if c.is_whitespace() {
         CharClass::Whitespace
     } else if matches!(kind, WordKind::BigWord) || c.is_alphanumeric() || c == '_' {
@@ -38,50 +49,52 @@ fn char_class(c: char, kind: WordKind) -> CharClass {
 /// Vi `w`/`W` motion: jump to the start of the next class segment, skipping
 /// any whitespace between segments. Returns a byte offset.
 pub(super) fn word_right_start_index(buffer: &str, cursor: usize, kind: WordKind) -> usize {
-    let mut iter = buffer[cursor..].char_indices().map(|(i, c)| (cursor + i, c));
+    let mut iter = buffer[cursor..]
+        .grapheme_indices(true)
+        .map(|(i, g)| (cursor + i, g));
 
     let Some((_, first)) = iter.next() else {
         return buffer.len();
     };
-    let start_class = char_class(first, kind);
+    let start_class = class_of(first, kind);
 
-    let Some((boundary_pos, boundary_char)) =
-        iter.find(|&(_, c)| char_class(c, kind) != start_class)
+    let Some((boundary_pos, boundary_g)) =
+        iter.find(|&(_, g)| class_of(g, kind) != start_class)
     else {
         return buffer.len();
     };
 
-    if char_class(boundary_char, kind) != CharClass::Whitespace {
+    if class_of(boundary_g, kind) != CharClass::Whitespace {
         return boundary_pos;
     }
 
-    buffer[boundary_pos..]
-        .char_indices()
-        .find(|&(_, c)| char_class(c, kind) != CharClass::Whitespace)
-        .map_or(buffer.len(), |(rel, _)| boundary_pos + rel)
+    iter.find(|&(_, g)| class_of(g, kind) != CharClass::Whitespace)
+        .map_or(buffer.len(), |(pos, _)| pos)
 }
 
 /// Vi `e`/`E` motion: jump to the end of the current class segment, or to the
-/// end of the next segment if already at an end. Returns a byte offset of the
-/// last char in the segment.
+/// end of the next segment if already at an end. Returns the byte offset of
+/// the last grapheme in the segment.
 pub(super) fn word_right_end_index(buffer: &str, cursor: usize, kind: WordKind) -> usize {
-    let mut iter = buffer[cursor..].char_indices().map(|(i, c)| (cursor + i, c));
+    let mut iter = buffer[cursor..]
+        .grapheme_indices(true)
+        .map(|(i, g)| (cursor + i, g));
 
-    // Always advance past the cursor's char first, so that being already at the
-    // end of a word jumps to the end of the *next* one.
+    // Always advance past the cursor's grapheme first, so that being already at
+    // the end of a word jumps to the end of the *next* one.
     if iter.next().is_none() {
         return buffer.len();
     }
 
-    let Some((mut last_pos, start_char)) =
-        iter.find(|&(_, c)| char_class(c, kind) != CharClass::Whitespace)
+    let Some((mut last_pos, start_g)) =
+        iter.find(|&(_, g)| class_of(g, kind) != CharClass::Whitespace)
     else {
         return buffer.len();
     };
-    let start_class = char_class(start_char, kind);
+    let start_class = class_of(start_g, kind);
 
-    for (pos, c) in iter {
-        if char_class(c, kind) != start_class {
+    for (pos, g) in iter {
+        if class_of(g, kind) != start_class {
             break;
         }
         last_pos = pos;
@@ -97,16 +110,16 @@ pub(super) fn word_left_index(buffer: &str, cursor: usize, kind: WordKind) -> us
         return 0;
     }
 
-    let mut iter = buffer[..cursor].char_indices().rev();
+    let mut iter = buffer[..cursor].grapheme_indices(true).rev();
 
-    let Some((mut pos, c)) = iter.find(|&(_, c)| char_class(c, kind) != CharClass::Whitespace)
+    let Some((mut pos, g)) = iter.find(|&(_, g)| class_of(g, kind) != CharClass::Whitespace)
     else {
         return 0;
     };
-    let target_class = char_class(c, kind);
+    let target_class = class_of(g, kind);
 
-    for (i, c) in iter {
-        if char_class(c, kind) != target_class {
+    for (i, g) in iter {
+        if class_of(g, kind) != target_class {
             break;
         }
         pos = i;
@@ -130,6 +143,10 @@ mod tests {
     #[case("", 0, 0)]
     #[case("  hello", 0, 2)]
     #[case("a_b foo", 0, 4)]
+    // Unicode: precomposed multi-byte, combining mark, ZWJ emoji
+    #[case("café foo", 0, 6)]
+    #[case("e\u{0301} foo", 0, 4)]
+    #[case("👨‍👩‍👧 foo", 0, 19)]
     fn small_w(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) {
         assert_eq!(word_right_start_index(buffer, cursor, WordKind::Word), expected);
     }
@@ -153,6 +170,8 @@ mod tests {
     #[case("foo.bar", 3, 6)]
     #[case("", 0, 0)]
     #[case("  hello", 0, 6)]
+    // Unicode: lands on the byte offset of the last grapheme's start
+    #[case("café foo", 0, 3)]
     fn small_e(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) {
         assert_eq!(word_right_end_index(buffer, cursor, WordKind::Word), expected);
     }
@@ -174,6 +193,9 @@ mod tests {
     #[case("foo.bar", 3, 0)]
     #[case("   abc", 6, 3)]
     #[case("   ", 3, 0)]
+    // Unicode: backwards iteration must not split a grapheme cluster
+    #[case("café foo", 9, 6)]
+    #[case("café", 5, 0)]
     fn small_b(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) {
         assert_eq!(word_left_index(buffer, cursor, WordKind::Word), expected);
     }

From db208fb4bfd36e158d327608e36bfd9789d75bf5 Mon Sep 17 00:00:00 2001
From: kronberger-droid <kronberger@proton.me>
Date: Fri, 15 May 2026 20:58:29 +0200
Subject: [PATCH 4/5] chore: run cargo fmt

---
 src/edit_mode/vi/word.rs | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/src/edit_mode/vi/word.rs b/src/edit_mode/vi/word.rs
index 2666aba80..e93636479 100644
--- a/src/edit_mode/vi/word.rs
+++ b/src/edit_mode/vi/word.rs
@@ -58,8 +58,7 @@ pub(super) fn word_right_start_index(buffer: &str, cursor: usize, kind: WordKind
     };
     let start_class = class_of(first, kind);
 
-    let Some((boundary_pos, boundary_g)) =
-        iter.find(|&(_, g)| class_of(g, kind) != start_class)
+    let Some((boundary_pos, boundary_g)) = iter.find(|&(_, g)| class_of(g, kind) != start_class)
     else {
         return buffer.len();
     };
@@ -112,8 +111,7 @@ pub(super) fn word_left_index(buffer: &str, cursor: usize, kind: WordKind) -> us
 
     let mut iter = buffer[..cursor].grapheme_indices(true).rev();
 
-    let Some((mut pos, g)) = iter.find(|&(_, g)| class_of(g, kind) != CharClass::Whitespace)
-    else {
+    let Some((mut pos, g)) = iter.find(|&(_, g)| class_of(g, kind) != CharClass::Whitespace) else {
         return 0;
     };
     let target_class = class_of(g, kind);
@@ -148,7 +146,10 @@ mod tests {
     #[case("e\u{0301} foo", 0, 4)]
     #[case("👨‍👩‍👧 foo", 0, 19)]
     fn small_w(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) {
-        assert_eq!(word_right_start_index(buffer, cursor, WordKind::Word), expected);
+        assert_eq!(
+            word_right_start_index(buffer, cursor, WordKind::Word),
+            expected
+        );
     }
 
     // --- big word `W` ---
@@ -158,7 +159,10 @@ mod tests {
     #[case("foo   bar", 0, 6)]
     #[case("", 0, 0)]
     fn big_w(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) {
-        assert_eq!(word_right_start_index(buffer, cursor, WordKind::BigWord), expected);
+        assert_eq!(
+            word_right_start_index(buffer, cursor, WordKind::BigWord),
+            expected
+        );
     }
 
     // --- small word `e` ---
@@ -173,7 +177,10 @@ mod tests {
     // Unicode: lands on the byte offset of the last grapheme's start
     #[case("café foo", 0, 3)]
     fn small_e(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) {
-        assert_eq!(word_right_end_index(buffer, cursor, WordKind::Word), expected);
+        assert_eq!(
+            word_right_end_index(buffer, cursor, WordKind::Word),
+            expected
+        );
     }
 
     // --- big word `E` ---
@@ -181,7 +188,10 @@ mod tests {
     #[case("foo.bar baz", 0, 6)]
     #[case("foo bar", 0, 2)]
     fn big_e(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) {
-        assert_eq!(word_right_end_index(buffer, cursor, WordKind::BigWord), expected);
+        assert_eq!(
+            word_right_end_index(buffer, cursor, WordKind::BigWord),
+            expected
+        );
     }
 
     // --- small word `b` ---

From e126057a3a16c1570965ec66d6ed911e0d06d084 Mon Sep 17 00:00:00 2001
From: kronberger-droid <kronberger@proton.me>
Date: Fri, 15 May 2026 21:12:12 +0200
Subject: [PATCH 5/5] chore: mention divergence from vim default

---
 src/edit_mode/vi/word.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/edit_mode/vi/word.rs b/src/edit_mode/vi/word.rs
index e93636479..2f935c876 100644
--- a/src/edit_mode/vi/word.rs
+++ b/src/edit_mode/vi/word.rs
@@ -16,6 +16,9 @@
 //! so multi-codepoint sequences (combining marks, ZWJ emoji) are treated as one
 //! unit, matching the rest of `core_editor`. Each cluster is classified by its
 //! first scalar.
+//!
+//! Classification uses Unicode-aware predicates, aligning with Helix's model
+//! rather than Vim's strict default `iskeyword` (Latin-1 only).
 
 use unicode_segmentation::UnicodeSegmentation;