@@ -18,6 +18,8 @@ use socket_patch_core::crawlers::python_crawler::{
1818 find_local_venv_site_packages, find_python_dirs, get_global_python_site_packages,
1919 read_python_metadata,
2020} ;
21+ use socket_patch_core:: crawlers:: types:: CrawlerOptions ;
22+ use socket_patch_core:: crawlers:: PythonCrawler ;
2123
2224/// Helper: stage a fake `python3.X/lib/python3.X/site-packages` tree
2325/// under `root` so `find_python_dirs(root, ["python3.*", "lib",
@@ -273,6 +275,192 @@ async fn read_python_metadata_missing_name_returns_none() {
273275 assert_eq ! ( result, None ) ;
274276}
275277
278+ /// `PythonCrawler::default()` should forward to `new()`.
279+ #[ test]
280+ fn python_crawler_default_and_new_construct_cleanly ( ) {
281+ let _a = PythonCrawler :: default ( ) ;
282+ let _b = PythonCrawler :: new ( ) ;
283+ }
284+
285+ // ── find_by_purls + crawl_all over a staged site-packages ─────
286+
287+ /// Helper: stage a well-formed `<pkg>-<version>.dist-info/METADATA`
288+ /// inside a fake site-packages directory.
289+ async fn stage_dist_info ( site_packages : & Path , raw_name : & str , version : & str ) {
290+ let dist = site_packages. join ( format ! ( "{raw_name}-{version}.dist-info" ) ) ;
291+ tokio:: fs:: create_dir_all ( & dist) . await . unwrap ( ) ;
292+ let metadata = format ! ( "Metadata-Version: 2.1\n Name: {raw_name}\n Version: {version}\n " ) ;
293+ tokio:: fs:: write ( dist. join ( "METADATA" ) , metadata) . await . unwrap ( ) ;
294+ }
295+
296+ #[ tokio:: test]
297+ async fn find_by_purls_matches_canonicalized_name ( ) {
298+ let tmp = tempfile:: tempdir ( ) . unwrap ( ) ;
299+ // PEP 503 canonicalization: "Requests" -> "requests"
300+ stage_dist_info ( tmp. path ( ) , "Requests" , "2.28.0" ) . await ;
301+
302+ let crawler = PythonCrawler ;
303+ let result = crawler
304+ . find_by_purls ( tmp. path ( ) , & [ "pkg:pypi/requests@2.28.0" . to_string ( ) ] )
305+ . await
306+ . unwrap ( ) ;
307+ assert_eq ! ( result. len( ) , 1 , "canonical lookup must hit" ) ;
308+ }
309+
310+ #[ tokio:: test]
311+ async fn find_by_purls_strips_qualifiers ( ) {
312+ let tmp = tempfile:: tempdir ( ) . unwrap ( ) ;
313+ stage_dist_info ( tmp. path ( ) , "requests" , "2.28.0" ) . await ;
314+
315+ let crawler = PythonCrawler ;
316+ let result = crawler
317+ . find_by_purls (
318+ tmp. path ( ) ,
319+ & [ "pkg:pypi/requests@2.28.0?extension=tar.gz" . to_string ( ) ] ,
320+ )
321+ . await
322+ . unwrap ( ) ;
323+ assert_eq ! ( result. len( ) , 1 , "qualifiers must be stripped before lookup" ) ;
324+ }
325+
326+ #[ tokio:: test]
327+ async fn find_by_purls_empty_purls_returns_empty ( ) {
328+ let tmp = tempfile:: tempdir ( ) . unwrap ( ) ;
329+ stage_dist_info ( tmp. path ( ) , "requests" , "2.28.0" ) . await ;
330+
331+ let crawler = PythonCrawler ;
332+ let result = crawler. find_by_purls ( tmp. path ( ) , & [ ] ) . await . unwrap ( ) ;
333+ assert ! ( result. is_empty( ) ) ;
334+ }
335+
336+ #[ tokio:: test]
337+ async fn find_by_purls_missing_site_packages_returns_empty ( ) {
338+ let tmp = tempfile:: tempdir ( ) . unwrap ( ) ;
339+ let crawler = PythonCrawler ;
340+ // site_packages_path doesn't exist — read_dir Err arm must yield empty.
341+ let result = crawler
342+ . find_by_purls (
343+ & tmp. path ( ) . join ( "no-such-dir" ) ,
344+ & [ "pkg:pypi/requests@2.28.0" . to_string ( ) ] ,
345+ )
346+ . await
347+ . unwrap ( ) ;
348+ assert ! ( result. is_empty( ) ) ;
349+ }
350+
351+ #[ tokio:: test]
352+ async fn find_by_purls_invalid_purl_skipped ( ) {
353+ let tmp = tempfile:: tempdir ( ) . unwrap ( ) ;
354+ stage_dist_info ( tmp. path ( ) , "requests" , "2.28.0" ) . await ;
355+
356+ let crawler = PythonCrawler ;
357+ let result = crawler
358+ . find_by_purls ( tmp. path ( ) , & [ "pkg:not-pypi/foo@1.0" . to_string ( ) ] )
359+ . await
360+ . unwrap ( ) ;
361+ assert ! ( result. is_empty( ) ) ;
362+ }
363+
364+ #[ tokio:: test]
365+ async fn find_by_purls_version_mismatch_returns_empty ( ) {
366+ let tmp = tempfile:: tempdir ( ) . unwrap ( ) ;
367+ stage_dist_info ( tmp. path ( ) , "requests" , "2.28.0" ) . await ;
368+
369+ let crawler = PythonCrawler ;
370+ let result = crawler
371+ . find_by_purls ( tmp. path ( ) , & [ "pkg:pypi/requests@99.99.99" . to_string ( ) ] )
372+ . await
373+ . unwrap ( ) ;
374+ assert ! ( result. is_empty( ) ) ;
375+ }
376+
377+ #[ tokio:: test]
378+ async fn crawl_all_via_site_packages_finds_dist_info_packages ( ) {
379+ let tmp = tempfile:: tempdir ( ) . unwrap ( ) ;
380+ stage_dist_info ( tmp. path ( ) , "Requests" , "2.28.0" ) . await ;
381+ stage_dist_info ( tmp. path ( ) , "urllib3" , "2.0.0" ) . await ;
382+ // A non-dist-info dir should be skipped.
383+ tokio:: fs:: create_dir_all ( tmp. path ( ) . join ( "ignore-me" ) ) . await . unwrap ( ) ;
384+
385+ let crawler = PythonCrawler ;
386+ let opts = CrawlerOptions {
387+ cwd : tmp. path ( ) . to_path_buf ( ) ,
388+ global : true ,
389+ global_prefix : Some ( tmp. path ( ) . to_path_buf ( ) ) ,
390+ batch_size : 100 ,
391+ } ;
392+ let result = crawler. crawl_all ( & opts) . await ;
393+ let names: Vec < & str > = result. iter ( ) . map ( |p| p. name . as_str ( ) ) . collect ( ) ;
394+ assert ! ( names. contains( & "requests" ) ) ;
395+ assert ! ( names. contains( & "urllib3" ) ) ;
396+ assert_eq ! ( result. len( ) , 2 ) ;
397+ }
398+
399+ #[ tokio:: test]
400+ async fn crawl_all_with_corrupt_metadata_skips ( ) {
401+ let tmp = tempfile:: tempdir ( ) . unwrap ( ) ;
402+ let dist = tmp. path ( ) . join ( "broken-1.0.0.dist-info" ) ;
403+ tokio:: fs:: create_dir_all ( & dist) . await . unwrap ( ) ;
404+ // Empty METADATA — read_python_metadata returns None.
405+ tokio:: fs:: write ( dist. join ( "METADATA" ) , b"" ) . await . unwrap ( ) ;
406+
407+ let crawler = PythonCrawler ;
408+ let opts = CrawlerOptions {
409+ cwd : tmp. path ( ) . to_path_buf ( ) ,
410+ global : true ,
411+ global_prefix : Some ( tmp. path ( ) . to_path_buf ( ) ) ,
412+ batch_size : 100 ,
413+ } ;
414+ let result = crawler. crawl_all ( & opts) . await ;
415+ assert ! ( result. is_empty( ) , "broken METADATA must be skipped" ) ;
416+ }
417+
418+ /// `get_site_packages_paths` with `global_prefix` set returns just that
419+ /// prefix — exercises the early-return arm at python_crawler.rs:473-474.
420+ #[ tokio:: test]
421+ async fn get_site_packages_paths_with_global_prefix_passthrough ( ) {
422+ let tmp = tempfile:: tempdir ( ) . unwrap ( ) ;
423+ let custom = tmp. path ( ) . join ( "custom-sp" ) ;
424+ tokio:: fs:: create_dir_all ( & custom) . await . unwrap ( ) ;
425+
426+ let crawler = PythonCrawler ;
427+ let opts = CrawlerOptions {
428+ cwd : tmp. path ( ) . to_path_buf ( ) ,
429+ global : false ,
430+ global_prefix : Some ( custom. clone ( ) ) ,
431+ batch_size : 100 ,
432+ } ;
433+ let paths = crawler. get_site_packages_paths ( & opts) . await . unwrap ( ) ;
434+ assert_eq ! ( paths, vec![ custom] ) ;
435+ }
436+
437+ // ── METADATA early-break arm ───────────────────────────────────
438+
439+ /// METADATA with extra header lines AFTER the blank line should NOT be
440+ /// parsed — the parser must stop at the first blank line after
441+ /// collecting name+version. Covers `python_crawler.rs:80-81`.
442+ #[ tokio:: test]
443+ async fn read_python_metadata_stops_at_blank_line_after_headers ( ) {
444+ let tmp = tempfile:: tempdir ( ) . unwrap ( ) ;
445+ let dist = tmp. path ( ) . join ( "requests-2.28.0.dist-info" ) ;
446+ tokio:: fs:: create_dir ( & dist) . await . unwrap ( ) ;
447+ // Headers block, then blank line, then garbage that would otherwise
448+ // (re-)set Name to something else — the parser must NOT pick it up.
449+ tokio:: fs:: write (
450+ dist. join ( "METADATA" ) ,
451+ "Name: requests\n Version: 2.28.0\n \n Name: would-be-overwritten\n Version: 9.9.9\n " ,
452+ )
453+ . await
454+ . unwrap ( ) ;
455+
456+ let result = read_python_metadata ( & dist) . await ;
457+ assert_eq ! (
458+ result,
459+ Some ( ( "requests" . to_string( ) , "2.28.0" . to_string( ) ) ) ,
460+ "parser must stop at first blank line; got {result:?}"
461+ ) ;
462+ }
463+
276464/// METADATA missing Version field → None.
277465#[ tokio:: test]
278466async fn read_python_metadata_missing_version_returns_none ( ) {
0 commit comments