From 997d98d75fcf5debfb18e9c77a585a8b6774f081 Mon Sep 17 00:00:00 2001 From: Adam Harvey Date: Thu, 11 Jan 2024 11:23:57 -0800 Subject: [PATCH] typosquat: check for prefixes being manipulated like suffixes In #7571, we added checks for crate names that added or removed suffixes from popular crates. This has turned out to be a useful check! (Spoiler alert for the blog post I'm publishing next week.) @Turbo87 pointed out that this can also apply to prefixes, especially `cargo-`. This generalises the suffix check to also check prefixes, and adjusts the typomania configuration to add `cargo` to the list of interesting affixes. For now, the same set of affixes will be used for both, but depending on what we see, a future tweak would be to separate the prefix and suffix lists. Let's see how that pans out. In terms of implementation, I briefly toyed with making this generic over the prefix/suffix combination to remove the copy/paste code, then was reminded by rust-analyzer that `std::str::pattern::Pattern` isn't stable. I'd rather duplicate 20 lines than deal with that, so here we are. --- src/typosquat/cache.rs | 6 ++--- src/typosquat/checks.rs | 57 +++++++++++++++++++++++++++++------------ src/typosquat/config.rs | 2 +- 3 files changed, 45 insertions(+), 20 deletions(-) diff --git a/src/typosquat/cache.rs b/src/typosquat/cache.rs index 0cf6ee092b7..f1617f2451c 100644 --- a/src/typosquat/cache.rs +++ b/src/typosquat/cache.rs @@ -7,7 +7,7 @@ use typomania::{ Harness, }; -use super::{checks::Suffixes, config, database::TopCrates}; +use super::{checks::Affixes, config, database::TopCrates}; static NOTIFICATION_EMAILS_ENV: &str = "TYPOSQUAT_NOTIFICATION_EMAILS"; @@ -72,9 +72,9 @@ impl Cache { .with_check(Typos::new(config::TYPOS.iter().map(|(c, typos)| { (*c, typos.iter().map(|ss| ss.to_string()).collect()) }))) - .with_check(Suffixes::new( - config::SUFFIX_SEPARATORS.iter(), + .with_check(Affixes::new( config::SUFFIXES.iter(), + config::SUFFIX_SEPARATORS.iter(), )) .build(top), ), diff --git a/src/typosquat/checks.rs b/src/typosquat/checks.rs index a69b9f49490..c5b8cefa97f 100644 --- a/src/typosquat/checks.rs +++ b/src/typosquat/checks.rs @@ -3,28 +3,29 @@ use typomania::{ Corpus, Package, }; -/// A typomania check that checks if commonly used suffixes have been added or removed. -pub struct Suffixes { +/// A typomania check that checks if commonly used prefixes or suffixes have been added to or +/// removed from a package name. +pub struct Affixes { + affixes: Vec, separators: Vec, - suffixes: Vec, } -impl Suffixes { - pub fn new(separators: Sep, suffixes: Suf) -> Self +impl Affixes { + pub fn new(affixes: Aff, separators: Sep) -> Self where + Aff: Iterator, + Aff::Item: ToString, Sep: Iterator, Sep::Item: ToString, - Suf: Iterator, - Suf::Item: ToString, { Self { + affixes: affixes.map(|s| s.to_string()).collect(), separators: separators.map(|s| s.to_string()).collect(), - suffixes: suffixes.map(|s| s.to_string()).collect(), } } } -impl Check for Suffixes { +impl Check for Affixes { fn check( &self, corpus: &dyn Corpus, @@ -34,11 +35,32 @@ impl Check for Suffixes { let mut squats = Vec::new(); for separator in self.separators.iter() { - for suffix in self.suffixes.iter() { - let combo = format!("{separator}{suffix}"); + for affix in self.affixes.iter() { + // If the package being examined starts with this prefix and separator combo, then + // we should see if it exists without that prefix in the popular crate corpus. + let combo = format!("{affix}{separator}"); + if let Some(stem) = name.strip_prefix(&combo) { + if corpus.possible_squat(stem, name, package)? { + squats.push(Squat::Custom { + message: format!("adds the {combo} prefix"), + package: stem.to_string(), + }) + } + } + + // Alternatively, let's see if adding the prefix and separator combo to the package + // results in something popular; eg somebody trying to squat `foo` with `rs-foo`. + let prefixed = format!("{combo}{name}"); + if corpus.possible_squat(&prefixed, name, package)? { + squats.push(Squat::Custom { + message: format!("removes the {combo} prefix"), + package: prefixed, + }); + } // If the package being examined ends in this separator and suffix combo, then we - // should see if it exists in the popular crate corpus. + // should see if it exists without that suffix in the popular crate corpus. + let combo = format!("{separator}{affix}"); if let Some(stem) = name.strip_suffix(&combo) { if corpus.possible_squat(stem, name, package)? { squats.push(Squat::Custom { @@ -74,16 +96,17 @@ mod tests { use super::*; #[test] - fn test_suffixes() -> anyhow::Result<()> { + fn test_affixes() -> anyhow::Result<()> { let popular = TestCorpus::default() .with_package(TestPackage::new("foo", "foo", ["Alice", "Bob"])) .with_package(TestPackage::new("bar-rs", "Rust bar", ["Charlie"])) - .with_package(TestPackage::new("quux_sys", "libquux", ["Alice"])); + .with_package(TestPackage::new("quux_sys", "libquux", ["Alice"])) + .with_package(TestPackage::new("core-xyz", "Core xyz", ["Alice"])); let harness = Harness::empty_builder() - .with_check(Suffixes::new( - ["-", "_"].iter(), + .with_check(Affixes::new( ["core", "rs", "sys"].iter(), + ["-", "_"].iter(), )) .build(popular); @@ -103,8 +126,10 @@ mod tests { // Now try some packages that should be. for package in [ TestPackage::new("foo-rs", "no shared author", ["Charlie"]), + TestPackage::new("rs-foo", "no shared author", ["Charlie"]), TestPackage::new("quux", "libquux", ["Charlie"]), TestPackage::new("quux_sys_rs", "libquux... for Rust?", ["Charlie"]), + TestPackage::new("xyz", "unprefixed core-xyz", ["Charlie"]), ] .into_iter() { diff --git a/src/typosquat/config.rs b/src/typosquat/config.rs index 6df9541ce02..d351175eba6 100644 --- a/src/typosquat/config.rs +++ b/src/typosquat/config.rs @@ -9,7 +9,7 @@ pub(super) static CRATE_NAME_ALPHABET: &str = pub(super) static SUFFIX_SEPARATORS: &[&str] = &["-", "_"]; /// Commonly used suffixes when building crate names. -pub(super) static SUFFIXES: &[&str] = &["api", "cli", "core", "lib", "rs", "rust", "sys"]; +pub(super) static SUFFIXES: &[&str] = &["api", "cargo", "cli", "core", "lib", "rs", "rust", "sys"]; /// The number of crates to consider in the "top crates" corpus. pub(super) static TOP_CRATES: i64 = 3000;