Skip to content

Commit 97513c9

Browse files
committed
test(crawlers): python + cargo coverage
* python: PythonCrawler `Default`, `find_by_purls` canonicalized-name match, qualifier stripping, empty/missing/mismatched purls, `crawl_all` over staged .dist-info dirs (well-formed + corrupt METADATA), global_prefix passthrough, and the METADATA early-break arm at first blank line after headers. * cargo: `parse_cargo_toml_name_version` `version.workspace` bail-out test, `verify_crate_at_path` dir-name fallback rejection on name mismatch, hidden-dir skip in `scan_crate_source`, dedup on identical purls across distinct directories, and local-mode fallback through `get_registry_src_paths` with CARGO_HOME stubbed (both with and without a staged registry/src tree). Assisted-by: Claude Code:claude-opus-4-7
1 parent 0f3c39b commit 97513c9

2 files changed

Lines changed: 337 additions & 0 deletions

File tree

crates/socket-patch-core/tests/crawler_cargo_e2e.rs

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,3 +361,152 @@ async fn find_by_purls_verify_fallback_via_dir_name() {
361361
.unwrap();
362362
assert_eq!(result.len(), 1, "verify must fall back to dir name");
363363
}
364+
365+
/// `version.workspace = true` in a top-level `[package]` block must
366+
/// bail (line 49-52): the crawler can't infer the actual version from
367+
/// just this file. `find_by_purls` then has to fall back to dir-name
368+
/// parsing — but `parse_cargo_toml_name_version` itself must return
369+
/// None up front.
370+
#[test]
371+
fn parse_cargo_toml_version_workspace_returns_none() {
372+
let toml = "[package]\nname = \"foo\"\nversion.workspace = true\n";
373+
assert_eq!(parse_cargo_toml_name_version(toml), None);
374+
}
375+
376+
/// `verify_crate_at_path` with a dir-name-only match (workspace
377+
/// version) but a mismatched purl name — must return false. Exercises
378+
/// the `parsed_name == name && parsed_version == version` false arm
379+
/// (cargo_crawler.rs:344-346).
380+
#[tokio::test]
381+
async fn find_by_purls_verify_fallback_dir_name_mismatch_returns_empty() {
382+
let tmp = tempfile::tempdir().unwrap();
383+
let pkg = tmp.path().join("real-crate-1.0.0");
384+
tokio::fs::create_dir(&pkg).await.unwrap();
385+
tokio::fs::write(
386+
pkg.join("Cargo.toml"),
387+
"[package]\nname = \"real-crate\"\nversion.workspace = true\n",
388+
)
389+
.await
390+
.unwrap();
391+
392+
let crawler = CargoCrawler;
393+
// Ask for a name that doesn't match the dir layout.
394+
let result = crawler
395+
.find_by_purls(tmp.path(), &["pkg:cargo/other-crate@1.0.0".to_string()])
396+
.await
397+
.unwrap();
398+
assert!(result.is_empty(), "dir-name mismatch must reject");
399+
}
400+
401+
/// Hidden directory entries inside the crate source root must be
402+
/// skipped by `scan_crate_source` (line 274).
403+
#[tokio::test]
404+
async fn crawl_all_skips_hidden_dirs() {
405+
let tmp = tempfile::tempdir().unwrap();
406+
// Stage a hidden dir that looks like a registry crate — must be skipped.
407+
let hidden = tmp.path().join(".hidden-crate-1.0.0");
408+
tokio::fs::create_dir(&hidden).await.unwrap();
409+
tokio::fs::write(
410+
hidden.join("Cargo.toml"),
411+
"[package]\nname = \"hidden-crate\"\nversion = \"1.0.0\"\n",
412+
)
413+
.await
414+
.unwrap();
415+
// Also stage a real one to confirm the scan actually runs.
416+
stage_registry_crate(tmp.path(), "real-crate", "1.0.0").await;
417+
418+
let crawler = CargoCrawler;
419+
let opts = CrawlerOptions {
420+
cwd: tmp.path().to_path_buf(),
421+
global: true,
422+
global_prefix: Some(tmp.path().to_path_buf()),
423+
batch_size: 100,
424+
};
425+
let result = crawler.crawl_all(&opts).await;
426+
let names: Vec<&str> = result.iter().map(|p| p.name.as_str()).collect();
427+
assert!(names.contains(&"real-crate"));
428+
assert!(!names.contains(&"hidden-crate"), "hidden dir must be skipped");
429+
}
430+
431+
/// `read_crate_cargo_toml` early-returns when the purl has already
432+
/// been recorded in `seen` (line 310-311). Drive this by staging two
433+
/// registry dirs for the same crate — the second one is deduped.
434+
#[tokio::test]
435+
async fn crawl_all_dedups_same_purl() {
436+
let tmp = tempfile::tempdir().unwrap();
437+
// Two physical dirs with identical Cargo.toml -> same purl.
438+
stage_registry_crate(tmp.path(), "foo", "1.0.0").await;
439+
let dup = tmp.path().join("dup-mirror");
440+
tokio::fs::create_dir(&dup).await.unwrap();
441+
tokio::fs::write(
442+
dup.join("Cargo.toml"),
443+
"[package]\nname = \"foo\"\nversion = \"1.0.0\"\n",
444+
)
445+
.await
446+
.unwrap();
447+
448+
let crawler = CargoCrawler;
449+
let opts = CrawlerOptions {
450+
cwd: tmp.path().to_path_buf(),
451+
global: true,
452+
global_prefix: Some(tmp.path().to_path_buf()),
453+
batch_size: 100,
454+
};
455+
let result = crawler.crawl_all(&opts).await;
456+
assert_eq!(result.len(), 1, "duplicate purls must dedup; got {result:?}");
457+
}
458+
459+
/// `get_crate_source_paths` in local mode without a vendor dir but
460+
/// with a Cargo.toml falls through to `get_registry_src_paths`. With
461+
/// CARGO_HOME pointed at an empty tempdir, the registry/src subdir
462+
/// doesn't exist → returns empty. Covers line 130.
463+
#[tokio::test]
464+
#[serial_test::serial]
465+
async fn get_crate_source_paths_local_cargo_toml_falls_back_to_registry() {
466+
let tmp = tempfile::tempdir().unwrap();
467+
tokio::fs::write(tmp.path().join("Cargo.toml"), b"[package]\n").await.unwrap();
468+
// CARGO_HOME points at an empty tempdir → no registry/src to scan.
469+
let cargo_home = tempfile::tempdir().unwrap();
470+
let prev = std::env::var("CARGO_HOME").ok();
471+
std::env::set_var("CARGO_HOME", cargo_home.path());
472+
473+
let crawler = CargoCrawler;
474+
let paths = crawler.get_crate_source_paths(&options_at(tmp.path())).await.unwrap();
475+
476+
if let Some(v) = prev {
477+
std::env::set_var("CARGO_HOME", v);
478+
} else {
479+
std::env::remove_var("CARGO_HOME");
480+
}
481+
482+
assert!(
483+
paths.is_empty(),
484+
"missing registry/src must yield empty; got {paths:?}"
485+
);
486+
}
487+
488+
/// Same as above but with a registry/src tree staged — the discovered
489+
/// index dirs must surface. Covers lines 228-235 (entry walk).
490+
#[tokio::test]
491+
#[serial_test::serial]
492+
async fn get_crate_source_paths_local_cargo_toml_with_registry_src() {
493+
let tmp = tempfile::tempdir().unwrap();
494+
tokio::fs::write(tmp.path().join("Cargo.toml"), b"[package]\n").await.unwrap();
495+
let cargo_home = tempfile::tempdir().unwrap();
496+
let index_dir = cargo_home.path().join("registry").join("src").join("index.crates.io-stub");
497+
tokio::fs::create_dir_all(&index_dir).await.unwrap();
498+
499+
let prev = std::env::var("CARGO_HOME").ok();
500+
std::env::set_var("CARGO_HOME", cargo_home.path());
501+
502+
let crawler = CargoCrawler;
503+
let paths = crawler.get_crate_source_paths(&options_at(tmp.path())).await.unwrap();
504+
505+
if let Some(v) = prev {
506+
std::env::set_var("CARGO_HOME", v);
507+
} else {
508+
std::env::remove_var("CARGO_HOME");
509+
}
510+
511+
assert!(paths.iter().any(|p| p == &index_dir));
512+
}

crates/socket-patch-core/tests/crawler_python_e2e.rs

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ use socket_patch_core::crawlers::python_crawler::{
1818
find_local_venv_site_packages, find_python_dirs, get_global_python_site_packages,
1919
read_python_metadata,
2020
};
21+
use socket_patch_core::crawlers::types::CrawlerOptions;
22+
use socket_patch_core::crawlers::PythonCrawler;
2123

2224
/// Helper: stage a fake `python3.X/lib/python3.X/site-packages` tree
2325
/// under `root` so `find_python_dirs(root, ["python3.*", "lib",
@@ -273,6 +275,192 @@ async fn read_python_metadata_missing_name_returns_none() {
273275
assert_eq!(result, None);
274276
}
275277

278+
/// `PythonCrawler::default()` should forward to `new()`.
279+
#[test]
280+
fn python_crawler_default_and_new_construct_cleanly() {
281+
let _a = PythonCrawler::default();
282+
let _b = PythonCrawler::new();
283+
}
284+
285+
// ── find_by_purls + crawl_all over a staged site-packages ─────
286+
287+
/// Helper: stage a well-formed `<pkg>-<version>.dist-info/METADATA`
288+
/// inside a fake site-packages directory.
289+
async fn stage_dist_info(site_packages: &Path, raw_name: &str, version: &str) {
290+
let dist = site_packages.join(format!("{raw_name}-{version}.dist-info"));
291+
tokio::fs::create_dir_all(&dist).await.unwrap();
292+
let metadata = format!("Metadata-Version: 2.1\nName: {raw_name}\nVersion: {version}\n");
293+
tokio::fs::write(dist.join("METADATA"), metadata).await.unwrap();
294+
}
295+
296+
#[tokio::test]
297+
async fn find_by_purls_matches_canonicalized_name() {
298+
let tmp = tempfile::tempdir().unwrap();
299+
// PEP 503 canonicalization: "Requests" -> "requests"
300+
stage_dist_info(tmp.path(), "Requests", "2.28.0").await;
301+
302+
let crawler = PythonCrawler;
303+
let result = crawler
304+
.find_by_purls(tmp.path(), &["pkg:pypi/requests@2.28.0".to_string()])
305+
.await
306+
.unwrap();
307+
assert_eq!(result.len(), 1, "canonical lookup must hit");
308+
}
309+
310+
#[tokio::test]
311+
async fn find_by_purls_strips_qualifiers() {
312+
let tmp = tempfile::tempdir().unwrap();
313+
stage_dist_info(tmp.path(), "requests", "2.28.0").await;
314+
315+
let crawler = PythonCrawler;
316+
let result = crawler
317+
.find_by_purls(
318+
tmp.path(),
319+
&["pkg:pypi/requests@2.28.0?extension=tar.gz".to_string()],
320+
)
321+
.await
322+
.unwrap();
323+
assert_eq!(result.len(), 1, "qualifiers must be stripped before lookup");
324+
}
325+
326+
#[tokio::test]
327+
async fn find_by_purls_empty_purls_returns_empty() {
328+
let tmp = tempfile::tempdir().unwrap();
329+
stage_dist_info(tmp.path(), "requests", "2.28.0").await;
330+
331+
let crawler = PythonCrawler;
332+
let result = crawler.find_by_purls(tmp.path(), &[]).await.unwrap();
333+
assert!(result.is_empty());
334+
}
335+
336+
#[tokio::test]
337+
async fn find_by_purls_missing_site_packages_returns_empty() {
338+
let tmp = tempfile::tempdir().unwrap();
339+
let crawler = PythonCrawler;
340+
// site_packages_path doesn't exist — read_dir Err arm must yield empty.
341+
let result = crawler
342+
.find_by_purls(
343+
&tmp.path().join("no-such-dir"),
344+
&["pkg:pypi/requests@2.28.0".to_string()],
345+
)
346+
.await
347+
.unwrap();
348+
assert!(result.is_empty());
349+
}
350+
351+
#[tokio::test]
352+
async fn find_by_purls_invalid_purl_skipped() {
353+
let tmp = tempfile::tempdir().unwrap();
354+
stage_dist_info(tmp.path(), "requests", "2.28.0").await;
355+
356+
let crawler = PythonCrawler;
357+
let result = crawler
358+
.find_by_purls(tmp.path(), &["pkg:not-pypi/foo@1.0".to_string()])
359+
.await
360+
.unwrap();
361+
assert!(result.is_empty());
362+
}
363+
364+
#[tokio::test]
365+
async fn find_by_purls_version_mismatch_returns_empty() {
366+
let tmp = tempfile::tempdir().unwrap();
367+
stage_dist_info(tmp.path(), "requests", "2.28.0").await;
368+
369+
let crawler = PythonCrawler;
370+
let result = crawler
371+
.find_by_purls(tmp.path(), &["pkg:pypi/requests@99.99.99".to_string()])
372+
.await
373+
.unwrap();
374+
assert!(result.is_empty());
375+
}
376+
377+
#[tokio::test]
378+
async fn crawl_all_via_site_packages_finds_dist_info_packages() {
379+
let tmp = tempfile::tempdir().unwrap();
380+
stage_dist_info(tmp.path(), "Requests", "2.28.0").await;
381+
stage_dist_info(tmp.path(), "urllib3", "2.0.0").await;
382+
// A non-dist-info dir should be skipped.
383+
tokio::fs::create_dir_all(tmp.path().join("ignore-me")).await.unwrap();
384+
385+
let crawler = PythonCrawler;
386+
let opts = CrawlerOptions {
387+
cwd: tmp.path().to_path_buf(),
388+
global: true,
389+
global_prefix: Some(tmp.path().to_path_buf()),
390+
batch_size: 100,
391+
};
392+
let result = crawler.crawl_all(&opts).await;
393+
let names: Vec<&str> = result.iter().map(|p| p.name.as_str()).collect();
394+
assert!(names.contains(&"requests"));
395+
assert!(names.contains(&"urllib3"));
396+
assert_eq!(result.len(), 2);
397+
}
398+
399+
#[tokio::test]
400+
async fn crawl_all_with_corrupt_metadata_skips() {
401+
let tmp = tempfile::tempdir().unwrap();
402+
let dist = tmp.path().join("broken-1.0.0.dist-info");
403+
tokio::fs::create_dir_all(&dist).await.unwrap();
404+
// Empty METADATA — read_python_metadata returns None.
405+
tokio::fs::write(dist.join("METADATA"), b"").await.unwrap();
406+
407+
let crawler = PythonCrawler;
408+
let opts = CrawlerOptions {
409+
cwd: tmp.path().to_path_buf(),
410+
global: true,
411+
global_prefix: Some(tmp.path().to_path_buf()),
412+
batch_size: 100,
413+
};
414+
let result = crawler.crawl_all(&opts).await;
415+
assert!(result.is_empty(), "broken METADATA must be skipped");
416+
}
417+
418+
/// `get_site_packages_paths` with `global_prefix` set returns just that
419+
/// prefix — exercises the early-return arm at python_crawler.rs:473-474.
420+
#[tokio::test]
421+
async fn get_site_packages_paths_with_global_prefix_passthrough() {
422+
let tmp = tempfile::tempdir().unwrap();
423+
let custom = tmp.path().join("custom-sp");
424+
tokio::fs::create_dir_all(&custom).await.unwrap();
425+
426+
let crawler = PythonCrawler;
427+
let opts = CrawlerOptions {
428+
cwd: tmp.path().to_path_buf(),
429+
global: false,
430+
global_prefix: Some(custom.clone()),
431+
batch_size: 100,
432+
};
433+
let paths = crawler.get_site_packages_paths(&opts).await.unwrap();
434+
assert_eq!(paths, vec![custom]);
435+
}
436+
437+
// ── METADATA early-break arm ───────────────────────────────────
438+
439+
/// METADATA with extra header lines AFTER the blank line should NOT be
440+
/// parsed — the parser must stop at the first blank line after
441+
/// collecting name+version. Covers `python_crawler.rs:80-81`.
442+
#[tokio::test]
443+
async fn read_python_metadata_stops_at_blank_line_after_headers() {
444+
let tmp = tempfile::tempdir().unwrap();
445+
let dist = tmp.path().join("requests-2.28.0.dist-info");
446+
tokio::fs::create_dir(&dist).await.unwrap();
447+
// Headers block, then blank line, then garbage that would otherwise
448+
// (re-)set Name to something else — the parser must NOT pick it up.
449+
tokio::fs::write(
450+
dist.join("METADATA"),
451+
"Name: requests\nVersion: 2.28.0\n\nName: would-be-overwritten\nVersion: 9.9.9\n",
452+
)
453+
.await
454+
.unwrap();
455+
456+
let result = read_python_metadata(&dist).await;
457+
assert_eq!(
458+
result,
459+
Some(("requests".to_string(), "2.28.0".to_string())),
460+
"parser must stop at first blank line; got {result:?}"
461+
);
462+
}
463+
276464
/// METADATA missing Version field → None.
277465
#[tokio::test]
278466
async fn read_python_metadata_missing_version_returns_none() {

0 commit comments

Comments
 (0)