Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 147 additions & 0 deletions src/checks/homoglyph.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
use std::collections::HashMap;

use crate::Corpus;

use super::{util, Check, Package, Squat};

/// Checks whether a package only differs from a package in the corpus by substituting visually
/// similar characters.
///
/// This covers both single character confusables (`0` ↔ `o`, `1` ↔ `l`) and multi-character
/// sequences that render similarly to a single glyph (`rn` ↔ `m`, `vv` ↔ `w`).
///
/// Documented attacks include `1odash` targeting `lodash` on npm and `r3quests` targeting
/// `requests` on PyPI.
pub struct Homoglyph {
glyphs: HashMap<char, Vec<String>>,
multi: Vec<(String, Vec<String>)>,
}

impl Homoglyph {
/// Instantiates a homoglyph check with custom substitution tables.
///
/// `glyphs` maps a single character to its visually similar replacements.
///
/// `multi` maps a multi-character sequence to its visually similar replacements. All
/// occurrences of a multi-character pattern are replaced at once.
pub fn new(
glyphs: impl Iterator<Item = (char, Vec<String>)>,
multi: impl Iterator<Item = (String, Vec<String>)>,
) -> Self {
Self {
glyphs: glyphs.collect(),
multi: multi.collect(),
}
}
}

impl Default for Homoglyph {
fn default() -> Self {
let glyphs = [
('a', vec!["4"]),
('b', vec!["8", "6"]),
('d', vec!["cl"]),
('e', vec!["3"]),
('g', vec!["9", "6"]),
('i', vec!["1", "l"]),
('l', vec!["1", "i"]),
('m', vec!["rn", "nn"]),
('o', vec!["0"]),
('s', vec!["5"]),
('t', vec!["7"]),
('w', vec!["vv", "uu"]),
('z', vec!["2"]),
('0', vec!["o"]),
('1', vec!["l", "i"]),
('2', vec!["z"]),
('3', vec!["e"]),
('4', vec!["a"]),
('5', vec!["s"]),
('6', vec!["b", "g"]),
('7', vec!["t"]),
('8', vec!["b"]),
('9', vec!["g", "q"]),
];

let multi = [
("rn", vec!["m"]),
("nn", vec!["m"]),
("vv", vec!["w"]),
("uu", vec!["w"]),
("cl", vec!["d"]),
];

Self::new(
glyphs
.into_iter()
.map(|(c, v)| (c, v.into_iter().map(String::from).collect())),
multi
.into_iter()
.map(|(p, v)| (String::from(p), v.into_iter().map(String::from).collect())),
)
}
}

impl Check for Homoglyph {
fn check(
&self,
corpus: &dyn Corpus,
name: &str,
package: &dyn Package,
) -> crate::Result<Vec<Squat>> {
let mut squats = Vec::new();

for (i, c) in name.char_indices() {
if let Some(glyphs) = self.glyphs.get(&c) {
for glyph in glyphs.iter() {
let name_to_check = util::rebuild_name(name, i, c.len_utf8(), glyph);
if corpus.possible_squat(&name_to_check, name, package)? {
squats.push(Squat::Homoglyph(name_to_check));
}
}
}
}

for (pattern, replacements) in self.multi.iter() {
if name.contains(pattern.as_str()) {
for replacement in replacements.iter() {
let name_to_check = name.replace(pattern.as_str(), replacement);
if corpus.possible_squat(&name_to_check, name, package)? {
squats.push(Squat::Homoglyph(name_to_check));
}
}
}
}

Ok(squats)
}
}

#[cfg(test)]
mod tests {
use crate::checks::testutil::assert_check;

use super::*;

#[test]
fn test_homoglyph() -> crate::Result<()> {
#[track_caller]
fn test(input: &str, want: &[&str]) -> crate::Result<()> {
assert_check(Homoglyph::default(), input, want)
}

test("", &[])?;
test("x", &[])?;
test("lo", &["1o", "io", "l0"])?;
test("rn", &["m"])?;
test("m", &["rn", "nn"])?;
test("cl", &["d", "c1", "ci"])?;
test(
"1odash",
&["lodash", "iodash", "10dash", "1oclash", "1od4sh", "1oda5h"],
)?;
test("élé", &["é1é", "éié"])?;

Ok(())
}
}
134 changes: 134 additions & 0 deletions src/checks/keyboard.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
use std::collections::HashMap;

use crate::Corpus;

use super::{util, Check, Package, Squat};

/// Checks whether a package only differs from a package in the corpus by replacing one character
/// with an adjacent key on a keyboard.
///
/// This is distinct from [`super::Typos`], which targets curated misspellings: this check
/// systematically generates every single-character replacement based on physical key proximity.
///
/// Documented attacks include `requezts` and `requeats` targeting `requests` on PyPI.
pub struct KeyboardAdjacent {
adjacent: HashMap<char, Vec<String>>,
}

impl KeyboardAdjacent {
/// Instantiates a keyboard-adjacent check with a custom layout.
///
/// Each entry maps a key to the keys physically surrounding it.
pub fn new(adjacent: impl Iterator<Item = (char, Vec<String>)>) -> Self {
Self {
adjacent: adjacent.collect(),
}
}

/// Instantiates a keyboard-adjacent check using a US QWERTY layout.
pub fn qwerty() -> Self {
let layout = [
('q', vec!["w", "a", "s"]),
('w', vec!["q", "e", "a", "s", "d"]),
('e', vec!["w", "r", "s", "d", "f"]),
('r', vec!["e", "t", "d", "f", "g"]),
('t', vec!["r", "y", "f", "g", "h"]),
('y', vec!["t", "u", "g", "h", "j"]),
('u', vec!["y", "i", "h", "j", "k"]),
('i', vec!["u", "o", "j", "k", "l"]),
('o', vec!["i", "p", "k", "l"]),
('p', vec!["o", "l"]),
('a', vec!["q", "w", "s", "z"]),
('s', vec!["q", "w", "e", "a", "d", "z", "x"]),
('d', vec!["w", "e", "r", "s", "f", "x", "c"]),
('f', vec!["e", "r", "t", "d", "g", "c", "v"]),
('g', vec!["r", "t", "y", "f", "h", "v", "b"]),
('h', vec!["t", "y", "u", "g", "j", "b", "n"]),
('j', vec!["y", "u", "i", "h", "k", "n", "m"]),
('k', vec!["u", "i", "o", "j", "l", "m"]),
('l', vec!["i", "o", "p", "k"]),
('z', vec!["a", "s", "x"]),
('x', vec!["s", "d", "z", "c"]),
('c', vec!["d", "f", "x", "v"]),
('v', vec!["f", "g", "c", "b"]),
('b', vec!["g", "h", "v", "n"]),
('n', vec!["h", "j", "b", "m"]),
('m', vec!["j", "k", "n"]),
('1', vec!["2", "q"]),
('2', vec!["1", "3", "q", "w"]),
('3', vec!["2", "4", "w", "e"]),
('4', vec!["3", "5", "e", "r"]),
('5', vec!["4", "6", "r", "t"]),
('6', vec!["5", "7", "t", "y"]),
('7', vec!["6", "8", "y", "u"]),
('8', vec!["7", "9", "u", "i"]),
('9', vec!["8", "0", "i", "o"]),
('0', vec!["9", "o", "p"]),
];

Self::new(
layout
.into_iter()
.map(|(c, v)| (c, v.into_iter().map(String::from).collect())),
)
}
}

impl Default for KeyboardAdjacent {
fn default() -> Self {
Self::qwerty()
}
}

impl Check for KeyboardAdjacent {
fn check(
&self,
corpus: &dyn Corpus,
name: &str,
package: &dyn Package,
) -> crate::Result<Vec<Squat>> {
let mut squats = Vec::new();

for (i, c) in name.char_indices() {
if let Some(keys) = self.adjacent.get(&c) {
for key in keys.iter() {
let name_to_check = util::rebuild_name(name, i, c.len_utf8(), key);
if corpus.possible_squat(&name_to_check, name, package)? {
squats.push(Squat::KeyboardAdjacent(name_to_check));
}
}
}
}

Ok(squats)
}
}

#[cfg(test)]
mod tests {
use crate::checks::testutil::assert_check;

use super::*;

#[test]
fn test_keyboard_adjacent() -> crate::Result<()> {
#[track_caller]
fn test(input: &str, want: &[&str]) -> crate::Result<()> {
assert_check(KeyboardAdjacent::qwerty(), input, want)
}

test("", &[])?;
test("-", &[])?;
test("p", &["o", "l"])?;
test("qz", &["wz", "az", "sz", "qa", "qs", "qx"])?;
test(
"ts",
&[
"rs", "ys", "fs", "gs", "hs", "tq", "tw", "te", "ta", "td", "tz", "tx",
],
)?;
test("épé", &["éoé", "élé"])?;

Ok(())
}
}
14 changes: 14 additions & 0 deletions src/checks/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use std::fmt::Display;
use crate::{Corpus, Package};

mod bitflips;
mod homoglyph;
mod keyboard;
mod omitted;
mod repeated;
mod swapped;
Expand All @@ -19,6 +21,8 @@ mod version;
mod testutil;

pub use bitflips::Bitflips;
pub use homoglyph::Homoglyph;
pub use keyboard::KeyboardAdjacent;
pub use omitted::Omitted;
pub use repeated::Repeated;
pub use swapped::{Characters as SwappedCharacters, Words as SwappedWords};
Expand All @@ -39,6 +43,8 @@ pub trait Check: Sync + Send {
#[derive(Debug, Clone)]
pub enum Squat {
Bitflip(String),
Homoglyph(String),
KeyboardAdjacent(String),
OmittedCharacter(String),
RepeatedCharacter(String),
SwappedCharacters(String),
Expand All @@ -61,6 +67,8 @@ impl Squat {
pub fn package(&self) -> &str {
match self {
Squat::Bitflip(package) => package,
Squat::Homoglyph(package) => package,
Squat::KeyboardAdjacent(package) => package,
Squat::OmittedCharacter(package) => package,
Squat::RepeatedCharacter(package) => package,
Squat::SwappedCharacters(package) => package,
Expand All @@ -79,6 +87,12 @@ impl Display for Squat {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Squat::Bitflip(package) => write!(f, "may be a bitflip of {package}"),
Squat::Homoglyph(package) => {
write!(f, "uses visually similar characters to {package}")
}
Squat::KeyboardAdjacent(package) => {
write!(f, "uses a keyboard-adjacent key from {package}")
}
Squat::OmittedCharacter(package) => write!(f, "omits characters in {package}"),
Squat::RepeatedCharacter(package) => write!(f, "repeats characters in {package}"),
Squat::SwappedCharacters(package) => write!(f, "swaps characters in {package}"),
Expand Down