Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions src/edit_mode/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,25 @@ use crate::{
PromptEditMode,
};

/// Buffer snapshot passed to an [`EditMode`] when resolving a [`MotionTarget`].
pub struct EditContext<'a> {
pub buffer: &'a str,
pub cursor: usize,
}

/// The target shape of a motion. Each [`EditMode`] decides what buffer offset
/// the target resolves to using its own segmentation rules.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MotionTarget {
WordLeft,
WordRight,
WordRightStart,
WordRightEnd,
BigWordLeft,
BigWordRightStart,
BigWordRightEnd,
}

/// Define the style of parsing for the edit events
/// Available default options:
/// - Emacs
Expand All @@ -18,4 +37,11 @@ pub trait EditMode: Send {
fn handle_mode_specific_event(&mut self, _event: ReedlineEvent) -> EventStatus {
EventStatus::Inapplicable
}

/// Resolve a [`MotionTarget`] to a buffer offset using this mode's segmentation rules.
/// Returning `None` defers to the default `LineBuffer` behavior for the
/// equivalent legacy command.
fn resolve_motion(&self, _target: MotionTarget, _ctx: &EditContext) -> Option<usize> {
None
}
}
2 changes: 1 addition & 1 deletion src/edit_mode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ mod helix;
mod keybindings;
mod vi;

pub use base::EditMode;
pub use base::{EditContext, EditMode, MotionTarget};
pub use cursors::CursorConfig;
pub use emacs::{default_emacs_keybindings, Emacs};
#[cfg(feature = "helix")]
Expand Down
23 changes: 22 additions & 1 deletion src/edit_mode/vi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ mod command;
mod motion;
mod parser;
mod vi_keybindings;
mod word;

use std::str::FromStr;

Expand All @@ -10,7 +11,7 @@ pub use vi_keybindings::{default_vi_insert_keybindings, default_vi_normal_keybin

use self::motion::ViCharSearch;

use super::EditMode;
use super::{EditContext, EditMode, MotionTarget};
use crate::{
edit_mode::{keybindings::Keybindings, vi::parser::parse},
enums::{EditCommand, EventStatus, ReedlineEvent, ReedlineRawEvent},
Expand Down Expand Up @@ -223,6 +224,26 @@ impl EditMode for Vi {
_ => EventStatus::Inapplicable,
}
}

fn resolve_motion(
&self,
target: MotionTarget,
&EditContext { buffer, cursor }: &EditContext,
) -> Option<usize> {
use word::WordKind::{BigWord, Word};
Some(match target {
MotionTarget::WordLeft => word::word_left_index(buffer, cursor, Word),
MotionTarget::WordRightStart => word::word_right_start_index(buffer, cursor, Word),
MotionTarget::WordRightEnd => word::word_right_end_index(buffer, cursor, Word),
MotionTarget::BigWordLeft => word::word_left_index(buffer, cursor, BigWord),
MotionTarget::BigWordRightStart => {
word::word_right_start_index(buffer, cursor, BigWord)
}
MotionTarget::BigWordRightEnd => word::word_right_end_index(buffer, cursor, BigWord),
// Vi never emits WordRight; defer to LineBuffer's UAX #29 path.
MotionTarget::WordRight => return None,
})
}
}

#[cfg(test)]
Expand Down
223 changes: 223 additions & 0 deletions src/edit_mode/vi/word.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
//! Vi-standard word segmentation.
//!
//! Vi groups characters into three classes:
//! - **Keyword**: alphanumeric and underscore
//! - **Punctuation**: any non-keyword, non-whitespace char
//! - **Whitespace**
//!
//! Word boundaries occur at any class transition. This differs from reedline's
//! existing UAX #29 word boundaries (used by Emacs), which treat `foo.bar` as
//! one word; Vi sees three (`foo`, `.`, `bar`).
//!
//! `BigWord` motions (W/E/B) collapse Keyword and Punctuation: only whitespace
//! creates a boundary.
//!
//! Iteration walks grapheme clusters (via `unicode_segmentation::grapheme_indices`)
//! so multi-codepoint sequences (combining marks, ZWJ emoji) are treated as one
//! unit, matching the rest of `core_editor`. Each cluster is classified by its
//! first scalar.
//!
//! Classification uses Unicode-aware predicates, aligning with Helix's model
//! rather than Vim's strict default `iskeyword` (Latin-1 only).

use unicode_segmentation::UnicodeSegmentation;

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum CharClass {
Keyword,
Punctuation,
Whitespace,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(super) enum WordKind {
Word,
BigWord,
}

fn class_of(grapheme: &str, kind: WordKind) -> CharClass {
let c = grapheme
.chars()
.next()
.expect("grapheme cluster is non-empty");
if c.is_whitespace() {
CharClass::Whitespace
} else if matches!(kind, WordKind::BigWord) || c.is_alphanumeric() || c == '_' {
CharClass::Keyword
} else {
CharClass::Punctuation
}
}

/// Vi `w`/`W` motion: jump to the start of the next class segment, skipping
/// any whitespace between segments. Returns a byte offset.
pub(super) fn word_right_start_index(buffer: &str, cursor: usize, kind: WordKind) -> usize {
let mut iter = buffer[cursor..]
.grapheme_indices(true)
.map(|(i, g)| (cursor + i, g));

let Some((_, first)) = iter.next() else {
return buffer.len();
};
let start_class = class_of(first, kind);

let Some((boundary_pos, boundary_g)) = iter.find(|&(_, g)| class_of(g, kind) != start_class)
else {
return buffer.len();
};

if class_of(boundary_g, kind) != CharClass::Whitespace {
return boundary_pos;
}

iter.find(|&(_, g)| class_of(g, kind) != CharClass::Whitespace)
.map_or(buffer.len(), |(pos, _)| pos)
}

/// Vi `e`/`E` motion: jump to the end of the current class segment, or to the
/// end of the next segment if already at an end. Returns the byte offset of
/// the last grapheme in the segment.
pub(super) fn word_right_end_index(buffer: &str, cursor: usize, kind: WordKind) -> usize {
let mut iter = buffer[cursor..]
.grapheme_indices(true)
.map(|(i, g)| (cursor + i, g));

// Always advance past the cursor's grapheme first, so that being already at
// the end of a word jumps to the end of the *next* one.
if iter.next().is_none() {
return buffer.len();
}

let Some((mut last_pos, start_g)) =
iter.find(|&(_, g)| class_of(g, kind) != CharClass::Whitespace)
else {
return buffer.len();
};
let start_class = class_of(start_g, kind);

for (pos, g) in iter {
if class_of(g, kind) != start_class {
break;
}
last_pos = pos;
}
last_pos
}

/// Vi `b`/`B` motion: jump to the start of the current class segment, or to
/// the start of the previous segment if already at a start. Returns a byte
/// offset.
pub(super) fn word_left_index(buffer: &str, cursor: usize, kind: WordKind) -> usize {
if cursor == 0 {
return 0;
}

let mut iter = buffer[..cursor].grapheme_indices(true).rev();

let Some((mut pos, g)) = iter.find(|&(_, g)| class_of(g, kind) != CharClass::Whitespace) else {
return 0;
};
let target_class = class_of(g, kind);

for (i, g) in iter {
if class_of(g, kind) != target_class {
break;
}
pos = i;
}
pos
}

#[cfg(test)]
mod tests {
use super::*;
use rstest::rstest;

// --- small word `w` ---
#[rstest]
#[case("hello world", 0, 6)]
#[case("hello world", 4, 6)]
#[case("foo.bar", 0, 3)]
#[case("foo.bar", 3, 4)]
#[case("foo bar", 0, 5)]
#[case("hello", 0, 5)]
#[case("", 0, 0)]
#[case(" hello", 0, 2)]
#[case("a_b foo", 0, 4)]
// Unicode: precomposed multi-byte, combining mark, ZWJ emoji
#[case("café foo", 0, 6)]
#[case("e\u{0301} foo", 0, 4)]
#[case("👨‍👩‍👧 foo", 0, 19)]
fn small_w(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) {
assert_eq!(
word_right_start_index(buffer, cursor, WordKind::Word),
expected
);
}

// --- big word `W` ---
#[rstest]
#[case("foo.bar baz", 0, 8)]
#[case("foo bar", 0, 4)]
#[case("foo bar", 0, 6)]
#[case("", 0, 0)]
fn big_w(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) {
assert_eq!(
word_right_start_index(buffer, cursor, WordKind::BigWord),
expected
);
}

// --- small word `e` ---
#[rstest]
#[case("hello world", 0, 4)]
#[case("hello world", 4, 10)]
#[case("foo.bar", 0, 2)]
#[case("foo.bar", 2, 3)]
#[case("foo.bar", 3, 6)]
#[case("", 0, 0)]
#[case(" hello", 0, 6)]
// Unicode: lands on the byte offset of the last grapheme's start
#[case("café foo", 0, 3)]
fn small_e(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) {
assert_eq!(
word_right_end_index(buffer, cursor, WordKind::Word),
expected
);
}

// --- big word `E` ---
#[rstest]
#[case("foo.bar baz", 0, 6)]
#[case("foo bar", 0, 2)]
fn big_e(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) {
assert_eq!(
word_right_end_index(buffer, cursor, WordKind::BigWord),
expected
);
}

// --- small word `b` ---
#[rstest]
#[case("hello world", 6, 0)]
#[case("hello world", 10, 6)]
#[case("hello world", 0, 0)]
#[case("foo.bar", 4, 3)]
#[case("foo.bar", 3, 0)]
#[case(" abc", 6, 3)]
#[case(" ", 3, 0)]
// Unicode: backwards iteration must not split a grapheme cluster
#[case("café foo", 9, 6)]
#[case("café", 5, 0)]
fn small_b(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) {
assert_eq!(word_left_index(buffer, cursor, WordKind::Word), expected);
}

// --- big word `B` ---
#[rstest]
#[case("foo.bar baz", 8, 0)]
#[case("foo.bar", 4, 0)]
fn big_b(#[case] buffer: &str, #[case] cursor: usize, #[case] expected: usize) {
assert_eq!(word_left_index(buffer, cursor, WordKind::BigWord), expected);
}
}
29 changes: 27 additions & 2 deletions src/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use {
crate::{
completion::{Completer, DefaultCompleter},
core_editor::Editor,
edit_mode::{EditMode, Emacs},
edit_mode::{EditContext, EditMode, Emacs, MotionTarget},
enums::{EventStatus, ReedlineEvent},
highlighter::SimpleMatchHighlighter,
hinter::Hinter,
Expand Down Expand Up @@ -1718,7 +1718,17 @@ impl Reedline {

// Run the commands over the edit buffer
for command in commands {
self.editor.run_edit_command(command);
let resolved = motion_intent(command).and_then(|(target, select)| {
let ctx = EditContext {
buffer: self.editor.get_buffer(),
cursor: self.editor.line_buffer().insertion_point(),
};
self.edit_mode
.resolve_motion(target, &ctx)
.map(|position| EditCommand::MoveToPosition { position, select })
});
self.editor
.run_edit_command(resolved.as_ref().unwrap_or(command));
}
}

Expand Down Expand Up @@ -2254,6 +2264,21 @@ impl Reedline {
}
}

/// Classify an [`EditCommand`] as a motion. Returns `Some((target, select))`
/// for motion-bearing commands, `None` for everything else.
fn motion_intent(command: &EditCommand) -> Option<(MotionTarget, bool)> {
Some(match *command {
EditCommand::MoveWordLeft { select } => (MotionTarget::WordLeft, select),
EditCommand::MoveWordRight { select } => (MotionTarget::WordRight, select),
EditCommand::MoveWordRightStart { select } => (MotionTarget::WordRightStart, select),
EditCommand::MoveWordRightEnd { select } => (MotionTarget::WordRightEnd, select),
EditCommand::MoveBigWordLeft { select } => (MotionTarget::BigWordLeft, select),
EditCommand::MoveBigWordRightStart { select } => (MotionTarget::BigWordRightStart, select),
EditCommand::MoveBigWordRightEnd { select } => (MotionTarget::BigWordRightEnd, select),
_ => return None,
})
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
Loading