Skip to content

Commit 690e648

Browse files
committed
test(crawler/python): 14 integration tests for find_python_dirs + venv + metadata
New `crawler_python_e2e.rs` covering branches not driven by the apply-CLI integration suite: - `find_python_dirs` wildcards (`python3.*`, `*`, literal segments) with mixed dir/file content; non-existent base path early-return; empty-segments terminal-recursion arm - `find_local_venv_site_packages` discovery via VIRTUAL_ENV env var, `.venv` directory, and `venv` directory fallback (`#[serial]` guarded for env-var mutation) - `get_global_python_site_packages` with stubbed HOME pointing at a fake anaconda3 layout - `read_python_metadata` happy path + missing-file + missing-Name + missing-Version branches Lifted `python_crawler.rs` integration-test regions from 86.3% to 90.8%. Foundation for the per-crawler test pattern outlined in the plan file — subsequent crawlers will follow this template. Assisted-by: Claude Code:claude-opus-4-7
1 parent d01478f commit 690e648

1 file changed

Lines changed: 291 additions & 0 deletions

File tree

Lines changed: 291 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,291 @@
1+
//! Integration coverage for `crawlers::python_crawler` paths the
2+
//! apply-CLI suite doesn't drive. Specifically:
3+
//!
4+
//! - `find_python_dirs` wildcard segments (`python3.*` and `*`)
5+
//! - `find_python_dirs` recursive descent with intermediate
6+
//! non-directory entries
7+
//! - `find_local_venv_site_packages` with VIRTUAL_ENV env var
8+
//! - `get_global_python_site_packages` with stubbed HOME
9+
//!
10+
//! Built around `tempfile::tempdir()` + serial env-var mutation
11+
//! (via `serial_test::serial`) so tests can rebind HOME / VIRTUAL_ENV
12+
//! without racing each other.
13+
14+
use std::path::Path;
15+
16+
use serial_test::serial;
17+
use socket_patch_core::crawlers::python_crawler::{
18+
find_local_venv_site_packages, find_python_dirs, get_global_python_site_packages,
19+
read_python_metadata,
20+
};
21+
22+
/// Helper: stage a fake `python3.X/lib/python3.X/site-packages` tree
23+
/// under `root` so `find_python_dirs(root, ["python3.*", "lib",
24+
/// "python3.*", "site-packages"])` returns it.
25+
async fn stage_python_layout(root: &Path, py_ver: &str) -> std::path::PathBuf {
26+
let sp = root
27+
.join(format!("python{py_ver}"))
28+
.join("lib")
29+
.join(format!("python{py_ver}"))
30+
.join("site-packages");
31+
tokio::fs::create_dir_all(&sp).await.unwrap();
32+
sp
33+
}
34+
35+
// ── find_python_dirs wildcards ─────────────────────────────────
36+
37+
/// `python3.*` wildcard matches directories whose name starts with
38+
/// `python3.`. Covers the wildcard arm + the `name.starts_with`
39+
/// filter.
40+
#[tokio::test]
41+
async fn find_python_dirs_python3_wildcard_matches_versions() {
42+
let tmp = tempfile::tempdir().unwrap();
43+
let p1 = stage_python_layout(tmp.path(), "3.11").await;
44+
let _p2 = stage_python_layout(tmp.path(), "3.12").await;
45+
// Also create a non-matching subdir that should be filtered out.
46+
tokio::fs::create_dir_all(tmp.path().join("python2.7").join("lib"))
47+
.await
48+
.unwrap();
49+
50+
let result =
51+
find_python_dirs(tmp.path(), &["python3.*", "lib", "python3.*", "site-packages"]).await;
52+
assert!(
53+
result.iter().any(|r| r == &p1),
54+
"must find python3.11 layout; got {result:?}"
55+
);
56+
assert_eq!(result.len(), 2, "must find exactly python3.11 + python3.12");
57+
}
58+
59+
/// `*` generic wildcard matches every directory entry. Covers the
60+
/// generic wildcard branch (L142-L160 of python_crawler.rs).
61+
#[tokio::test]
62+
async fn find_python_dirs_star_wildcard_matches_all() {
63+
let tmp = tempfile::tempdir().unwrap();
64+
tokio::fs::create_dir_all(tmp.path().join("pkg_a").join("lib").join("python3.11").join("site-packages"))
65+
.await
66+
.unwrap();
67+
tokio::fs::create_dir_all(tmp.path().join("pkg_b").join("lib").join("python3.11").join("site-packages"))
68+
.await
69+
.unwrap();
70+
71+
let result =
72+
find_python_dirs(tmp.path(), &["*", "lib", "python3.*", "site-packages"]).await;
73+
assert_eq!(result.len(), 2, "* must match both pkg_a and pkg_b");
74+
}
75+
76+
/// `*` wildcard skips non-directory entries (regular files). Covers
77+
/// the `if !ft.is_dir() { continue; }` arm.
78+
#[tokio::test]
79+
async fn find_python_dirs_star_wildcard_skips_files() {
80+
let tmp = tempfile::tempdir().unwrap();
81+
// A regular file at the wildcard position must NOT cause issues.
82+
tokio::fs::write(tmp.path().join("not_a_dir.txt"), b"x").await.unwrap();
83+
// And one real match.
84+
tokio::fs::create_dir_all(tmp.path().join("real").join("lib").join("python3.11").join("site-packages"))
85+
.await
86+
.unwrap();
87+
88+
let result =
89+
find_python_dirs(tmp.path(), &["*", "lib", "python3.*", "site-packages"]).await;
90+
assert_eq!(result.len(), 1, "regular file must be skipped");
91+
}
92+
93+
/// `find_python_dirs` against a non-existent base path returns empty
94+
/// — the early-return arm.
95+
#[tokio::test]
96+
async fn find_python_dirs_nonexistent_base_returns_empty() {
97+
let tmp = tempfile::tempdir().unwrap();
98+
let absent = tmp.path().join("does-not-exist");
99+
let result = find_python_dirs(&absent, &["python3.*", "site-packages"]).await;
100+
assert!(result.is_empty());
101+
}
102+
103+
/// `find_python_dirs` with empty segments returns the base path
104+
/// itself (terminal-recursion arm).
105+
#[tokio::test]
106+
async fn find_python_dirs_empty_segments_returns_base() {
107+
let tmp = tempfile::tempdir().unwrap();
108+
let result = find_python_dirs(tmp.path(), &[]).await;
109+
assert_eq!(result.len(), 1);
110+
assert_eq!(result[0], tmp.path());
111+
}
112+
113+
/// Literal segment branch: non-wildcard segment is treated as a
114+
/// literal subdir.
115+
#[tokio::test]
116+
async fn find_python_dirs_literal_segment_descends() {
117+
let tmp = tempfile::tempdir().unwrap();
118+
let target = tmp.path().join("literal_subdir").join("more");
119+
tokio::fs::create_dir_all(&target).await.unwrap();
120+
121+
let result = find_python_dirs(tmp.path(), &["literal_subdir", "more"]).await;
122+
assert_eq!(result.len(), 1);
123+
assert_eq!(result[0], target);
124+
}
125+
126+
// ── find_local_venv_site_packages ──────────────────────────────
127+
128+
/// VIRTUAL_ENV env var pointing at a real venv layout adds it to
129+
/// the discovered list. Covers the first arm of
130+
/// find_local_venv_site_packages.
131+
#[tokio::test]
132+
#[serial]
133+
async fn find_local_venv_site_packages_honors_virtual_env_var() {
134+
let tmp = tempfile::tempdir().unwrap();
135+
let venv = tmp.path().join("custom-venv");
136+
let sp = venv.join("lib").join("python3.11").join("site-packages");
137+
tokio::fs::create_dir_all(&sp).await.unwrap();
138+
139+
let prev = std::env::var("VIRTUAL_ENV").ok();
140+
std::env::set_var("VIRTUAL_ENV", &venv);
141+
let result = find_local_venv_site_packages(tmp.path()).await;
142+
std::env::remove_var("VIRTUAL_ENV");
143+
if let Some(v) = prev {
144+
std::env::set_var("VIRTUAL_ENV", v);
145+
}
146+
147+
assert!(
148+
result.iter().any(|p| p == &sp),
149+
"VIRTUAL_ENV path must surface; got {result:?}"
150+
);
151+
}
152+
153+
/// `.venv` directory in cwd is discovered when VIRTUAL_ENV is unset.
154+
#[tokio::test]
155+
#[serial]
156+
async fn find_local_venv_site_packages_discovers_dot_venv() {
157+
let tmp = tempfile::tempdir().unwrap();
158+
let sp = tmp.path().join(".venv").join("lib").join("python3.11").join("site-packages");
159+
tokio::fs::create_dir_all(&sp).await.unwrap();
160+
161+
let prev = std::env::var("VIRTUAL_ENV").ok();
162+
std::env::remove_var("VIRTUAL_ENV");
163+
let result = find_local_venv_site_packages(tmp.path()).await;
164+
if let Some(v) = prev {
165+
std::env::set_var("VIRTUAL_ENV", v);
166+
}
167+
assert!(
168+
result.iter().any(|p| p == &sp),
169+
".venv must be discovered; got {result:?}"
170+
);
171+
}
172+
173+
/// `venv` directory in cwd is discovered when neither VIRTUAL_ENV
174+
/// nor .venv exists.
175+
#[tokio::test]
176+
#[serial]
177+
async fn find_local_venv_site_packages_discovers_venv_dir() {
178+
let tmp = tempfile::tempdir().unwrap();
179+
let sp = tmp.path().join("venv").join("lib").join("python3.11").join("site-packages");
180+
tokio::fs::create_dir_all(&sp).await.unwrap();
181+
182+
let prev = std::env::var("VIRTUAL_ENV").ok();
183+
std::env::remove_var("VIRTUAL_ENV");
184+
let result = find_local_venv_site_packages(tmp.path()).await;
185+
if let Some(v) = prev {
186+
std::env::set_var("VIRTUAL_ENV", v);
187+
}
188+
assert!(
189+
result.iter().any(|p| p == &sp),
190+
"venv must be discovered; got {result:?}"
191+
);
192+
}
193+
194+
// ── get_global_python_site_packages ─────────────────────────────
195+
196+
/// With HOME stubbed to a tempdir containing a fake anaconda3 layout,
197+
/// the global discovery includes the anaconda site-packages.
198+
#[tokio::test]
199+
#[serial]
200+
async fn get_global_python_site_packages_discovers_anaconda() {
201+
let tmp = tempfile::tempdir().unwrap();
202+
let anaconda_sp = tmp
203+
.path()
204+
.join("anaconda3")
205+
.join("lib")
206+
.join("python3.11")
207+
.join("site-packages");
208+
tokio::fs::create_dir_all(&anaconda_sp).await.unwrap();
209+
210+
let prev_home = std::env::var("HOME").ok();
211+
std::env::set_var("HOME", tmp.path());
212+
let result = get_global_python_site_packages().await;
213+
if let Some(v) = prev_home {
214+
std::env::set_var("HOME", v);
215+
}
216+
// Anaconda must surface; other production paths may also surface
217+
// since they're scanned unconditionally. The check is "at least
218+
// the staged path is in the result."
219+
assert!(
220+
result.iter().any(|p| p == &anaconda_sp),
221+
"staged anaconda path must surface; got {result:?}"
222+
);
223+
}
224+
225+
// ── read_python_metadata ───────────────────────────────────────
226+
227+
/// Well-formed METADATA returns (name, version).
228+
#[tokio::test]
229+
async fn read_python_metadata_well_formed() {
230+
let tmp = tempfile::tempdir().unwrap();
231+
let dist_info = tmp.path().join("requests-2.28.0.dist-info");
232+
tokio::fs::create_dir(&dist_info).await.unwrap();
233+
tokio::fs::write(
234+
dist_info.join("METADATA"),
235+
"Metadata-Version: 2.1\nName: requests\nVersion: 2.28.0\n",
236+
)
237+
.await
238+
.unwrap();
239+
240+
let result = read_python_metadata(&dist_info).await;
241+
assert_eq!(
242+
result,
243+
Some(("requests".to_string(), "2.28.0".to_string()))
244+
);
245+
}
246+
247+
/// Missing METADATA file → None.
248+
#[tokio::test]
249+
async fn read_python_metadata_missing_file_returns_none() {
250+
let tmp = tempfile::tempdir().unwrap();
251+
let dist_info = tmp.path().join("requests-2.28.0.dist-info");
252+
tokio::fs::create_dir(&dist_info).await.unwrap();
253+
// No METADATA file.
254+
255+
let result = read_python_metadata(&dist_info).await;
256+
assert_eq!(result, None);
257+
}
258+
259+
/// METADATA missing Name field → None.
260+
#[tokio::test]
261+
async fn read_python_metadata_missing_name_returns_none() {
262+
let tmp = tempfile::tempdir().unwrap();
263+
let dist_info = tmp.path().join("requests-2.28.0.dist-info");
264+
tokio::fs::create_dir(&dist_info).await.unwrap();
265+
tokio::fs::write(
266+
dist_info.join("METADATA"),
267+
"Metadata-Version: 2.1\nVersion: 2.28.0\n",
268+
)
269+
.await
270+
.unwrap();
271+
272+
let result = read_python_metadata(&dist_info).await;
273+
assert_eq!(result, None);
274+
}
275+
276+
/// METADATA missing Version field → None.
277+
#[tokio::test]
278+
async fn read_python_metadata_missing_version_returns_none() {
279+
let tmp = tempfile::tempdir().unwrap();
280+
let dist_info = tmp.path().join("requests-2.28.0.dist-info");
281+
tokio::fs::create_dir(&dist_info).await.unwrap();
282+
tokio::fs::write(
283+
dist_info.join("METADATA"),
284+
"Metadata-Version: 2.1\nName: requests\n",
285+
)
286+
.await
287+
.unwrap();
288+
289+
let result = read_python_metadata(&dist_info).await;
290+
assert_eq!(result, None);
291+
}

0 commit comments

Comments
 (0)