diff --git a/src/checks/homoglyph.rs b/src/checks/homoglyph.rs new file mode 100644 index 0000000..996e350 --- /dev/null +++ b/src/checks/homoglyph.rs @@ -0,0 +1,147 @@ +use std::collections::HashMap; + +use crate::Corpus; + +use super::{util, Check, Package, Squat}; + +/// Checks whether a package only differs from a package in the corpus by substituting visually +/// similar characters. +/// +/// This covers both single character confusables (`0` ↔ `o`, `1` ↔ `l`) and multi-character +/// sequences that render similarly to a single glyph (`rn` ↔ `m`, `vv` ↔ `w`). +/// +/// Documented attacks include `1odash` targeting `lodash` on npm and `r3quests` targeting +/// `requests` on PyPI. +pub struct Homoglyph { + glyphs: HashMap>, + multi: Vec<(String, Vec)>, +} + +impl Homoglyph { + /// Instantiates a homoglyph check with custom substitution tables. + /// + /// `glyphs` maps a single character to its visually similar replacements. + /// + /// `multi` maps a multi-character sequence to its visually similar replacements. All + /// occurrences of a multi-character pattern are replaced at once. + pub fn new( + glyphs: impl Iterator)>, + multi: impl Iterator)>, + ) -> Self { + Self { + glyphs: glyphs.collect(), + multi: multi.collect(), + } + } +} + +impl Default for Homoglyph { + fn default() -> Self { + let glyphs = [ + ('a', vec!["4"]), + ('b', vec!["8", "6"]), + ('d', vec!["cl"]), + ('e', vec!["3"]), + ('g', vec!["9", "6"]), + ('i', vec!["1", "l"]), + ('l', vec!["1", "i"]), + ('m', vec!["rn", "nn"]), + ('o', vec!["0"]), + ('s', vec!["5"]), + ('t', vec!["7"]), + ('w', vec!["vv", "uu"]), + ('z', vec!["2"]), + ('0', vec!["o"]), + ('1', vec!["l", "i"]), + ('2', vec!["z"]), + ('3', vec!["e"]), + ('4', vec!["a"]), + ('5', vec!["s"]), + ('6', vec!["b", "g"]), + ('7', vec!["t"]), + ('8', vec!["b"]), + ('9', vec!["g", "q"]), + ]; + + let multi = [ + ("rn", vec!["m"]), + ("nn", vec!["m"]), + ("vv", vec!["w"]), + ("uu", vec!["w"]), + ("cl", vec!["d"]), + ]; + + Self::new( + glyphs + .into_iter() + .map(|(c, v)| (c, v.into_iter().map(String::from).collect())), + multi + .into_iter() + .map(|(p, v)| (String::from(p), v.into_iter().map(String::from).collect())), + ) + } +} + +impl Check for Homoglyph { + fn check( + &self, + corpus: &dyn Corpus, + name: &str, + package: &dyn Package, + ) -> crate::Result> { + let mut squats = Vec::new(); + + for (i, c) in name.char_indices() { + if let Some(glyphs) = self.glyphs.get(&c) { + for glyph in glyphs.iter() { + let name_to_check = util::rebuild_name(name, i, c.len_utf8(), glyph); + if corpus.possible_squat(&name_to_check, name, package)? { + squats.push(Squat::Homoglyph(name_to_check)); + } + } + } + } + + for (pattern, replacements) in self.multi.iter() { + if name.contains(pattern.as_str()) { + for replacement in replacements.iter() { + let name_to_check = name.replace(pattern.as_str(), replacement); + if corpus.possible_squat(&name_to_check, name, package)? { + squats.push(Squat::Homoglyph(name_to_check)); + } + } + } + } + + Ok(squats) + } +} + +#[cfg(test)] +mod tests { + use crate::checks::testutil::assert_check; + + use super::*; + + #[test] + fn test_homoglyph() -> crate::Result<()> { + #[track_caller] + fn test(input: &str, want: &[&str]) -> crate::Result<()> { + assert_check(Homoglyph::default(), input, want) + } + + test("", &[])?; + test("x", &[])?; + test("lo", &["1o", "io", "l0"])?; + test("rn", &["m"])?; + test("m", &["rn", "nn"])?; + test("cl", &["d", "c1", "ci"])?; + test( + "1odash", + &["lodash", "iodash", "10dash", "1oclash", "1od4sh", "1oda5h"], + )?; + test("élé", &["é1é", "éié"])?; + + Ok(()) + } +} diff --git a/src/checks/keyboard.rs b/src/checks/keyboard.rs new file mode 100644 index 0000000..e654f60 --- /dev/null +++ b/src/checks/keyboard.rs @@ -0,0 +1,134 @@ +use std::collections::HashMap; + +use crate::Corpus; + +use super::{util, Check, Package, Squat}; + +/// Checks whether a package only differs from a package in the corpus by replacing one character +/// with an adjacent key on a keyboard. +/// +/// This is distinct from [`super::Typos`], which targets curated misspellings: this check +/// systematically generates every single-character replacement based on physical key proximity. +/// +/// Documented attacks include `requezts` and `requeats` targeting `requests` on PyPI. +pub struct KeyboardAdjacent { + adjacent: HashMap>, +} + +impl KeyboardAdjacent { + /// Instantiates a keyboard-adjacent check with a custom layout. + /// + /// Each entry maps a key to the keys physically surrounding it. + pub fn new(adjacent: impl Iterator)>) -> Self { + Self { + adjacent: adjacent.collect(), + } + } + + /// Instantiates a keyboard-adjacent check using a US QWERTY layout. + pub fn qwerty() -> Self { + let layout = [ + ('q', vec!["w", "a", "s"]), + ('w', vec!["q", "e", "a", "s", "d"]), + ('e', vec!["w", "r", "s", "d", "f"]), + ('r', vec!["e", "t", "d", "f", "g"]), + ('t', vec!["r", "y", "f", "g", "h"]), + ('y', vec!["t", "u", "g", "h", "j"]), + ('u', vec!["y", "i", "h", "j", "k"]), + ('i', vec!["u", "o", "j", "k", "l"]), + ('o', vec!["i", "p", "k", "l"]), + ('p', vec!["o", "l"]), + ('a', vec!["q", "w", "s", "z"]), + ('s', vec!["q", "w", "e", "a", "d", "z", "x"]), + ('d', vec!["w", "e", "r", "s", "f", "x", "c"]), + ('f', vec!["e", "r", "t", "d", "g", "c", "v"]), + ('g', vec!["r", "t", "y", "f", "h", "v", "b"]), + ('h', vec!["t", "y", "u", "g", "j", "b", "n"]), + ('j', vec!["y", "u", "i", "h", "k", "n", "m"]), + ('k', vec!["u", "i", "o", "j", "l", "m"]), + ('l', vec!["i", "o", "p", "k"]), + ('z', vec!["a", "s", "x"]), + ('x', vec!["s", "d", "z", "c"]), + ('c', vec!["d", "f", "x", "v"]), + ('v', vec!["f", "g", "c", "b"]), + ('b', vec!["g", "h", "v", "n"]), + ('n', vec!["h", "j", "b", "m"]), + ('m', vec!["j", "k", "n"]), + ('1', vec!["2", "q"]), + ('2', vec!["1", "3", "q", "w"]), + ('3', vec!["2", "4", "w", "e"]), + ('4', vec!["3", "5", "e", "r"]), + ('5', vec!["4", "6", "r", "t"]), + ('6', vec!["5", "7", "t", "y"]), + ('7', vec!["6", "8", "y", "u"]), + ('8', vec!["7", "9", "u", "i"]), + ('9', vec!["8", "0", "i", "o"]), + ('0', vec!["9", "o", "p"]), + ]; + + Self::new( + layout + .into_iter() + .map(|(c, v)| (c, v.into_iter().map(String::from).collect())), + ) + } +} + +impl Default for KeyboardAdjacent { + fn default() -> Self { + Self::qwerty() + } +} + +impl Check for KeyboardAdjacent { + fn check( + &self, + corpus: &dyn Corpus, + name: &str, + package: &dyn Package, + ) -> crate::Result> { + let mut squats = Vec::new(); + + for (i, c) in name.char_indices() { + if let Some(keys) = self.adjacent.get(&c) { + for key in keys.iter() { + let name_to_check = util::rebuild_name(name, i, c.len_utf8(), key); + if corpus.possible_squat(&name_to_check, name, package)? { + squats.push(Squat::KeyboardAdjacent(name_to_check)); + } + } + } + } + + Ok(squats) + } +} + +#[cfg(test)] +mod tests { + use crate::checks::testutil::assert_check; + + use super::*; + + #[test] + fn test_keyboard_adjacent() -> crate::Result<()> { + #[track_caller] + fn test(input: &str, want: &[&str]) -> crate::Result<()> { + assert_check(KeyboardAdjacent::qwerty(), input, want) + } + + test("", &[])?; + test("-", &[])?; + test("p", &["o", "l"])?; + test("qz", &["wz", "az", "sz", "qa", "qs", "qx"])?; + test( + "ts", + &[ + "rs", "ys", "fs", "gs", "hs", "tq", "tw", "te", "ta", "td", "tz", "tx", + ], + )?; + test("épé", &["éoé", "élé"])?; + + Ok(()) + } +} diff --git a/src/checks/mod.rs b/src/checks/mod.rs index 7be8b39..66f68e5 100644 --- a/src/checks/mod.rs +++ b/src/checks/mod.rs @@ -8,6 +8,8 @@ use std::fmt::Display; use crate::{Corpus, Package}; mod bitflips; +mod homoglyph; +mod keyboard; mod omitted; mod repeated; mod swapped; @@ -19,6 +21,8 @@ mod version; mod testutil; pub use bitflips::Bitflips; +pub use homoglyph::Homoglyph; +pub use keyboard::KeyboardAdjacent; pub use omitted::Omitted; pub use repeated::Repeated; pub use swapped::{Characters as SwappedCharacters, Words as SwappedWords}; @@ -39,6 +43,8 @@ pub trait Check: Sync + Send { #[derive(Debug, Clone)] pub enum Squat { Bitflip(String), + Homoglyph(String), + KeyboardAdjacent(String), OmittedCharacter(String), RepeatedCharacter(String), SwappedCharacters(String), @@ -61,6 +67,8 @@ impl Squat { pub fn package(&self) -> &str { match self { Squat::Bitflip(package) => package, + Squat::Homoglyph(package) => package, + Squat::KeyboardAdjacent(package) => package, Squat::OmittedCharacter(package) => package, Squat::RepeatedCharacter(package) => package, Squat::SwappedCharacters(package) => package, @@ -79,6 +87,12 @@ impl Display for Squat { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Squat::Bitflip(package) => write!(f, "may be a bitflip of {package}"), + Squat::Homoglyph(package) => { + write!(f, "uses visually similar characters to {package}") + } + Squat::KeyboardAdjacent(package) => { + write!(f, "uses a keyboard-adjacent key from {package}") + } Squat::OmittedCharacter(package) => write!(f, "omits characters in {package}"), Squat::RepeatedCharacter(package) => write!(f, "repeats characters in {package}"), Squat::SwappedCharacters(package) => write!(f, "swaps characters in {package}"),