diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 0000000..5c404b9 --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,5 @@ +target +corpus +artifacts +coverage +Cargo.lock diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 0000000..cef2ad0 --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "typomania-fuzz" +version = "0.0.0" +publish = false +edition = "2021" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" + +[dependencies.typomania] +path = ".." + +[[bin]] +name = "checks" +path = "fuzz_targets/checks.rs" +test = false +doc = false +bench = false + +[workspace] diff --git a/fuzz/fuzz_targets/checks.rs b/fuzz/fuzz_targets/checks.rs new file mode 100644 index 0000000..28a71a5 --- /dev/null +++ b/fuzz/fuzz_targets/checks.rs @@ -0,0 +1,95 @@ +#![no_main] + +use std::{ + collections::{HashMap, HashSet}, + sync::OnceLock, +}; + +use libfuzzer_sys::fuzz_target; +use typomania::{ + checks::{Bitflips, Omitted, Repeated, SwappedCharacters, SwappedWords, Typos, Version}, + AuthorSet, Corpus, Harness, Package, +}; + +const ALPHABET: &str = "abcdefghijklmnopqrstuvwxyz0123456789_-"; +const POPULAR: &[&str] = &["serde", "tokio", "syn", "rand", "log"]; + +struct FuzzPackage; + +impl AuthorSet for FuzzPackage { + fn contains(&self, _author: &str) -> bool { + false + } +} + +impl Package for FuzzPackage { + fn authors(&self) -> &dyn AuthorSet { + self + } + + fn description(&self) -> Option<&str> { + None + } + + fn shared_authors(&self, _other: &dyn AuthorSet) -> bool { + false + } +} + +struct FuzzCorpus(HashMap); + +impl FuzzCorpus { + fn new() -> Self { + Self( + POPULAR + .iter() + .map(|n| (String::from(*n), FuzzPackage)) + .collect(), + ) + } +} + +impl Corpus for FuzzCorpus { + fn contains_name(&self, name: &str) -> typomania::Result { + Ok(self.0.contains_key(name)) + } + + fn get(&self, name: &str) -> typomania::Result> { + Ok(self.0.get(name).map(|p| p as &dyn Package)) + } +} + +fn harness() -> &'static Harness { + static HARNESS: OnceLock> = OnceLock::new(); + HARNESS.get_or_init(|| { + Harness::empty_builder() + .with_check(Repeated) + .with_check(SwappedCharacters) + .with_check(Version) + .with_check(Omitted::new(ALPHABET)) + .with_check(SwappedWords::new("-_").with_max_k(3)) + .with_check(Bitflips::new(ALPHABET, POPULAR.iter().copied())) + .with_check(Typos::new( + [ + ('a', vec![String::from("s"), String::from("q")]), + ('e', vec![String::from("3"), String::from("r")]), + ] + .into_iter(), + )) + .build(FuzzCorpus::new()) + }) +} + +fuzz_target!(|name: &str| { + if name.len() > 128 { + return; + } + + let mut seen = HashSet::new(); + if let Ok(squats) = harness().check_package(name, Box::new(FuzzPackage)) { + for squat in squats { + seen.insert(String::from(squat.package())); + } + } + let _ = seen; +}); diff --git a/src/checks/omitted.rs b/src/checks/omitted.rs index 757b3e4..0ed695d 100644 --- a/src/checks/omitted.rs +++ b/src/checks/omitted.rs @@ -27,7 +27,11 @@ impl Check for Omitted { ) -> crate::Result> { let mut squats = Vec::new(); - for i in 0..=name.len() { + for i in name + .char_indices() + .map(|(i, _)| i) + .chain(std::iter::once(name.len())) + { for c in self.alphabet.iter() { let name_to_check = util::rebuild_name(name, i, 0, c); if corpus.possible_squat(&name_to_check, name, package)? { @@ -55,6 +59,9 @@ mod tests { "axyz", "bxyz", "cxyz", "xayz", "xbyz", "xcyz", "xyaz", "xybz", "xycz", "xyza", "xyzb", "xyzc", ], - ) + )?; + assert_check(Omitted::new("a"), "-ۊ-", &["a-ۊ-", "-aۊ-", "-ۊa-", "-ۊ-a"])?; + + Ok(()) } } diff --git a/src/checks/repeated.rs b/src/checks/repeated.rs index aa9eae2..7ae5cbb 100644 --- a/src/checks/repeated.rs +++ b/src/checks/repeated.rs @@ -16,7 +16,7 @@ impl Check for Repeated { ) -> crate::Result> { let mut squats = Vec::new(); - for (i, (a, b)) in name.chars().tuple_windows().enumerate() { + for ((i, a), (_, b)) in name.char_indices().tuple_windows() { if a == b && a.is_ascii() { let name_to_check = util::rebuild_name(name, i, 2, &format!("{a}")); if corpus.possible_squat(&name_to_check, name, package)? { @@ -50,6 +50,8 @@ mod tests { test("abbbc", &["abbc"])?; test("abbbbc", &["abbbc"])?; test("aaaaaa", &["aaaaa"])?; + test("ۊۊ", &[])?; + test("ۊaaۊ", &["ۊaۊ"])?; Ok(()) } diff --git a/src/checks/swapped.rs b/src/checks/swapped.rs index 3a667dd..ced9930 100644 --- a/src/checks/swapped.rs +++ b/src/checks/swapped.rs @@ -14,9 +14,10 @@ impl Check for Characters { ) -> crate::Result> { let mut squats = Vec::new(); - for (i, (a, b)) in name.chars().tuple_windows().enumerate() { + for ((i, a), (_, b)) in name.char_indices().tuple_windows() { if a != b { - let name_to_check = util::rebuild_name(name, i, 2, &format!("{b}{a}")); + let len = a.len_utf8() + b.len_utf8(); + let name_to_check = util::rebuild_name(name, i, len, &format!("{b}{a}")); if corpus.possible_squat(&name_to_check, name, package)? { squats.push(Squat::SwappedCharacters(name_to_check)); } @@ -117,6 +118,7 @@ mod tests { test("a", &[])?; test("ab", &["ba"])?; test("abc", &["bac", "acb"])?; + test("aۊb", &["ۊab", "abۊ"])?; Ok(()) } diff --git a/src/checks/typos.rs b/src/checks/typos.rs index e661796..9233b53 100644 --- a/src/checks/typos.rs +++ b/src/checks/typos.rs @@ -34,10 +34,10 @@ impl Check for Typos { ) -> crate::Result> { let mut squats = Vec::new(); - for (i, c) in name.chars().enumerate() { + for (i, c) in name.char_indices() { if let Some(typos) = self.typos.get(&c) { for typo in typos.iter() { - let name_to_check = util::rebuild_name(name, i, 1, typo); + let name_to_check = util::rebuild_name(name, i, c.len_utf8(), typo); if corpus.possible_squat(&name_to_check, name, package)? { squats.push(Squat::Typo(name_to_check)); } @@ -70,6 +70,7 @@ mod tests { test("x", &[])?; test("a", &["ab", "b"])?; test("xax", &["xabx", "xbx"])?; + test("ۊaۊ", &["ۊabۊ", "ۊbۊ"])?; Ok(()) }