diff --git a/src/edit_mode/base.rs b/src/edit_mode/base.rs index 408098b3..ac89cd93 100644 --- a/src/edit_mode/base.rs +++ b/src/edit_mode/base.rs @@ -3,6 +3,25 @@ use crate::{ PromptEditMode, }; +/// Buffer snapshot passed to an [`EditMode`] when resolving a [`MotionTarget`]. +pub struct EditContext<'a> { + pub buffer: &'a str, + pub cursor: usize, +} + +/// The target shape of a motion. Each [`EditMode`] decides what buffer offset +/// the target resolves to using its own segmentation rules. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum MotionTarget { + WordLeft, + WordRight, + WordRightStart, + WordRightEnd, + BigWordLeft, + BigWordRightStart, + BigWordRightEnd, +} + /// Define the style of parsing for the edit events /// Available default options: /// - Emacs @@ -18,4 +37,11 @@ pub trait EditMode: Send { fn handle_mode_specific_event(&mut self, _event: ReedlineEvent) -> EventStatus { EventStatus::Inapplicable } + + /// Resolve a [`MotionTarget`] to a buffer offset using this mode's segmentation rules. + /// Returning `None` defers to the default `LineBuffer` behavior for the + /// equivalent legacy command. + fn resolve_motion(&self, _target: MotionTarget, _ctx: &EditContext) -> Option { + None + } } diff --git a/src/edit_mode/mod.rs b/src/edit_mode/mod.rs index 0b6f2f6f..87cb76ab 100644 --- a/src/edit_mode/mod.rs +++ b/src/edit_mode/mod.rs @@ -6,7 +6,7 @@ mod helix; mod keybindings; mod vi; -pub use base::EditMode; +pub use base::{EditContext, EditMode, MotionTarget}; pub use cursors::CursorConfig; pub use emacs::{default_emacs_keybindings, Emacs}; #[cfg(feature = "helix")] diff --git a/src/edit_mode/vi/mod.rs b/src/edit_mode/vi/mod.rs index 721e45d9..abc63032 100644 --- a/src/edit_mode/vi/mod.rs +++ b/src/edit_mode/vi/mod.rs @@ -2,6 +2,7 @@ mod command; mod motion; mod parser; mod vi_keybindings; +mod word; use std::str::FromStr; @@ -10,7 +11,7 @@ pub use vi_keybindings::{default_vi_insert_keybindings, default_vi_normal_keybin use self::motion::ViCharSearch; -use super::EditMode; +use super::{EditContext, EditMode, MotionTarget}; use crate::{ edit_mode::{keybindings::Keybindings, vi::parser::parse}, enums::{EditCommand, EventStatus, ReedlineEvent, ReedlineRawEvent}, @@ -223,6 +224,26 @@ impl EditMode for Vi { _ => EventStatus::Inapplicable, } } + + fn resolve_motion( + &self, + target: MotionTarget, + &EditContext { buffer, cursor }: &EditContext, + ) -> Option { + use word::WordKind::{BigWord, Word}; + Some(match target { + MotionTarget::WordLeft => word::word_left_index(buffer, cursor, Word), + MotionTarget::WordRightStart => word::word_right_start_index(buffer, cursor, Word), + MotionTarget::WordRightEnd => word::word_right_end_index(buffer, cursor, Word), + MotionTarget::BigWordLeft => word::word_left_index(buffer, cursor, BigWord), + MotionTarget::BigWordRightStart => { + word::word_right_start_index(buffer, cursor, BigWord) + } + MotionTarget::BigWordRightEnd => word::word_right_end_index(buffer, cursor, BigWord), + // Vi never emits WordRight; defer to LineBuffer's UAX #29 path. + MotionTarget::WordRight => return None, + }) + } } #[cfg(test)] diff --git a/src/edit_mode/vi/word.rs b/src/edit_mode/vi/word.rs new file mode 100644 index 00000000..2f935c87 --- /dev/null +++ b/src/edit_mode/vi/word.rs @@ -0,0 +1,223 @@ +//! Vi-standard word segmentation. +//! +//! Vi groups characters into three classes: +//! - **Keyword**: alphanumeric and underscore +//! - **Punctuation**: any non-keyword, non-whitespace char +//! - **Whitespace** +//! +//! Word boundaries occur at any class transition. This differs from reedline's +//! existing UAX #29 word boundaries (used by Emacs), which treat `foo.bar` as +//! one word; Vi sees three (`foo`, `.`, `bar`). +//! +//! `BigWord` motions (W/E/B) collapse Keyword and Punctuation: only whitespace +//! creates a boundary. +//! +//! Iteration walks grapheme clusters (via `unicode_segmentation::grapheme_indices`) +//! so multi-codepoint sequences (combining marks, ZWJ emoji) are treated as one +//! unit, matching the rest of `core_editor`. Each cluster is classified by its +//! first scalar. +//! +//! Classification uses Unicode-aware predicates, aligning with Helix's model +//! rather than Vim's strict default `iskeyword` (Latin-1 only). + +use unicode_segmentation::UnicodeSegmentation; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum CharClass { + Keyword, + Punctuation, + Whitespace, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(super) enum WordKind { + Word, + BigWord, +} + +fn class_of(grapheme: &str, kind: WordKind) -> CharClass { + let c = grapheme + .chars() + .next() + .expect("grapheme cluster is non-empty"); + if c.is_whitespace() { + CharClass::Whitespace + } else if matches!(kind, WordKind::BigWord) || c.is_alphanumeric() || c == '_' { + CharClass::Keyword + } else { + CharClass::Punctuation + } +} + +/// Vi `w`/`W` motion: jump to the start of the next class segment, skipping +/// any whitespace between segments. Returns a byte offset. +pub(super) fn word_right_start_index(buffer: &str, cursor: usize, kind: WordKind) -> usize { + let mut iter = buffer[cursor..] + .grapheme_indices(true) + .map(|(i, g)| (cursor + i, g)); + + let Some((_, first)) = iter.next() else { + return buffer.len(); + }; + let start_class = class_of(first, kind); + + let Some((boundary_pos, boundary_g)) = iter.find(|&(_, g)| class_of(g, kind) != start_class) + else { + return buffer.len(); + }; + + if class_of(boundary_g, kind) != CharClass::Whitespace { + return boundary_pos; + } + + iter.find(|&(_, g)| class_of(g, kind) != CharClass::Whitespace) + .map_or(buffer.len(), |(pos, _)| pos) +} + +/// Vi `e`/`E` motion: jump to the end of the current class segment, or to the +/// end of the next segment if already at an end. Returns the byte offset of +/// the last grapheme in the segment. +pub(super) fn word_right_end_index(buffer: &str, cursor: usize, kind: WordKind) -> usize { + let mut iter = buffer[cursor..] + .grapheme_indices(true) + .map(|(i, g)| (cursor + i, g)); + + // Always advance past the cursor's grapheme first, so that being already at + // the end of a word jumps to the end of the *next* one. + if iter.next().is_none() { + return buffer.len(); + } + + let Some((mut last_pos, start_g)) = + iter.find(|&(_, g)| class_of(g, kind) != CharClass::Whitespace) + else { + return buffer.len(); + }; + let start_class = class_of(start_g, kind); + + for (pos, g) in iter { + if class_of(g, kind) != start_class { + break; + } + last_pos = pos; + } + last_pos +} + +/// Vi `b`/`B` motion: jump to the start of the current class segment, or to +/// the start of the previous segment if already at a start. Returns a byte +/// offset. +pub(super) fn word_left_index(buffer: &str, cursor: usize, kind: WordKind) -> usize { + if cursor == 0 { + return 0; + } + + let mut iter = buffer[..cursor].grapheme_indices(true).rev(); + + let Some((mut pos, g)) = iter.find(|&(_, g)| class_of(g, kind) != CharClass::Whitespace) else { + return 0; + }; + let target_class = class_of(g, kind); + + for (i, g) in iter { + if class_of(g, kind) != target_class { + break; + } + pos = i; + } + pos +} + +#[cfg(test)] +mod tests { + use super::*; + use rstest::rstest; + + // --- small word `w` --- + #[rstest] + #[case("hello world", 0, 6)] + #[case("hello world", 4, 6)] + #[case("foo.bar", 0, 3)] + #[case("foo.bar", 3, 4)] + #[case("foo bar", 0, 5)] + #[case("hello", 0, 5)] + #[case("", 0, 0)] + #[case(" hello", 0, 2)] + #[case("a_b foo", 0, 4)] + // Unicode: precomposed multi-byte, combining mark, ZWJ emoji + #[case("café foo", 0, 6)] + #[case("e\u{0301} foo", 0, 4)] + #[case("👨‍👩‍👧 foo", 0, 19)] + fn small_w(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) { + assert_eq!( + word_right_start_index(buffer, cursor, WordKind::Word), + expected + ); + } + + // --- big word `W` --- + #[rstest] + #[case("foo.bar baz", 0, 8)] + #[case("foo bar", 0, 4)] + #[case("foo bar", 0, 6)] + #[case("", 0, 0)] + fn big_w(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) { + assert_eq!( + word_right_start_index(buffer, cursor, WordKind::BigWord), + expected + ); + } + + // --- small word `e` --- + #[rstest] + #[case("hello world", 0, 4)] + #[case("hello world", 4, 10)] + #[case("foo.bar", 0, 2)] + #[case("foo.bar", 2, 3)] + #[case("foo.bar", 3, 6)] + #[case("", 0, 0)] + #[case(" hello", 0, 6)] + // Unicode: lands on the byte offset of the last grapheme's start + #[case("café foo", 0, 3)] + fn small_e(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) { + assert_eq!( + word_right_end_index(buffer, cursor, WordKind::Word), + expected + ); + } + + // --- big word `E` --- + #[rstest] + #[case("foo.bar baz", 0, 6)] + #[case("foo bar", 0, 2)] + fn big_e(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) { + assert_eq!( + word_right_end_index(buffer, cursor, WordKind::BigWord), + expected + ); + } + + // --- small word `b` --- + #[rstest] + #[case("hello world", 6, 0)] + #[case("hello world", 10, 6)] + #[case("hello world", 0, 0)] + #[case("foo.bar", 4, 3)] + #[case("foo.bar", 3, 0)] + #[case(" abc", 6, 3)] + #[case(" ", 3, 0)] + // Unicode: backwards iteration must not split a grapheme cluster + #[case("café foo", 9, 6)] + #[case("café", 5, 0)] + fn small_b(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) { + assert_eq!(word_left_index(buffer, cursor, WordKind::Word), expected); + } + + // --- big word `B` --- + #[rstest] + #[case("foo.bar baz", 8, 0)] + #[case("foo.bar", 4, 0)] + fn big_b(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) { + assert_eq!(word_left_index(buffer, cursor, WordKind::BigWord), expected); + } +} diff --git a/src/engine.rs b/src/engine.rs index 2f33a09f..0840187f 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -19,7 +19,7 @@ use { crate::{ completion::{Completer, DefaultCompleter}, core_editor::Editor, - edit_mode::{EditMode, Emacs}, + edit_mode::{EditContext, EditMode, Emacs, MotionTarget}, enums::{EventStatus, ReedlineEvent}, highlighter::SimpleMatchHighlighter, hinter::Hinter, @@ -1718,7 +1718,17 @@ impl Reedline { // Run the commands over the edit buffer for command in commands { - self.editor.run_edit_command(command); + let resolved = motion_intent(command).and_then(|(target, select)| { + let ctx = EditContext { + buffer: self.editor.get_buffer(), + cursor: self.editor.line_buffer().insertion_point(), + }; + self.edit_mode + .resolve_motion(target, &ctx) + .map(|position| EditCommand::MoveToPosition { position, select }) + }); + self.editor + .run_edit_command(resolved.as_ref().unwrap_or(command)); } } @@ -2254,6 +2264,21 @@ impl Reedline { } } +/// Classify an [`EditCommand`] as a motion. Returns `Some((target, select))` +/// for motion-bearing commands, `None` for everything else. +fn motion_intent(command: &EditCommand) -> Option<(MotionTarget, bool)> { + Some(match *command { + EditCommand::MoveWordLeft { select } => (MotionTarget::WordLeft, select), + EditCommand::MoveWordRight { select } => (MotionTarget::WordRight, select), + EditCommand::MoveWordRightStart { select } => (MotionTarget::WordRightStart, select), + EditCommand::MoveWordRightEnd { select } => (MotionTarget::WordRightEnd, select), + EditCommand::MoveBigWordLeft { select } => (MotionTarget::BigWordLeft, select), + EditCommand::MoveBigWordRightStart { select } => (MotionTarget::BigWordRightStart, select), + EditCommand::MoveBigWordRightEnd { select } => (MotionTarget::BigWordRightEnd, select), + _ => return None, + }) +} + #[cfg(test)] mod tests { use super::*;