Skip to content

Commit 30e36bc

Browse files
authored
Merge pull request #198 from Peter554/parallel-import-scanning
Parallel import scanning (python)
2 parents 23a1e85 + ab2b66a commit 30e36bc

3 files changed

Lines changed: 106 additions & 20 deletions

File tree

CHANGELOG.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22
Changelog
33
=========
44

5+
Unreleased
6+
----------
7+
8+
* Accelerate import scanning via CPU parallelism (multiprocessing).
9+
510
3.7.1 (2025-03-12)
611
------------------
712

src/grimp/application/usecases.py

Lines changed: 96 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,22 @@
11
"""
22
Use cases handle application logic.
33
"""
4-
from typing import Dict, Sequence, Set, Type, Union, cast
4+
5+
from typing import Dict, Sequence, Set, Type, Union, cast, Iterable, Collection
6+
import multiprocessing
7+
import math
58

69
from ..application.ports import caching
710
from ..application.ports.filesystem import AbstractFileSystem
811
from ..application.ports.graph import ImportGraph
912
from ..application.ports.importscanner import AbstractImportScanner
10-
from ..application.ports.modulefinder import AbstractModuleFinder, FoundPackage
13+
from ..application.ports.modulefinder import AbstractModuleFinder, FoundPackage, ModuleFile
1114
from ..application.ports.packagefinder import AbstractPackageFinder
1215
from ..domain.valueobjects import DirectImport, Module
1316
from .config import settings
1417

18+
N_CPUS = multiprocessing.cpu_count()
19+
1520

1621
class NotSupplied:
1722
pass
@@ -106,7 +111,6 @@ def _scan_packages(
106111
exclude_type_checking_imports: bool,
107112
cache_dir: Union[str, Type[NotSupplied], None],
108113
) -> Dict[Module, Set[DirectImport]]:
109-
imports_by_module: Dict[Module, Set[DirectImport]] = {}
110114
if cache_dir is not None:
111115
cache_dir_if_supplied = cache_dir if cache_dir != NotSupplied else None
112116
cache: caching.Cache = settings.CACHE_CLASS.setup(
@@ -116,24 +120,33 @@ def _scan_packages(
116120
exclude_type_checking_imports=exclude_type_checking_imports,
117121
cache_dir=cache_dir_if_supplied,
118122
)
119-
import_scanner: AbstractImportScanner = settings.IMPORT_SCANNER_CLASS(
120-
file_system=file_system,
121-
found_packages=found_packages,
122-
include_external_packages=include_external_packages,
123-
)
124123

125-
for found_package in found_packages:
126-
for module_file in found_package.module_files:
127-
module = module_file.module
128-
try:
129-
if cache_dir is None:
130-
raise caching.CacheMiss
131-
direct_imports = cache.read_imports(module_file)
132-
except caching.CacheMiss:
133-
direct_imports = import_scanner.scan_for_imports(
134-
module, exclude_type_checking_imports=exclude_type_checking_imports
135-
)
136-
imports_by_module[module] = direct_imports
124+
module_files_to_scan = {
125+
module_file
126+
for found_package in found_packages
127+
for module_file in found_package.module_files
128+
}
129+
130+
imports_by_module_file: Dict[ModuleFile, Set[DirectImport]] = {}
131+
132+
if cache_dir is not None:
133+
imports_by_module_file.update(_read_imports_from_cache(module_files_to_scan, cache=cache))
134+
135+
remaining_module_files_to_scan = module_files_to_scan.difference(imports_by_module_file)
136+
if remaining_module_files_to_scan:
137+
imports_by_module_file.update(
138+
_scan_imports(
139+
remaining_module_files_to_scan,
140+
file_system=file_system,
141+
found_packages=found_packages,
142+
include_external_packages=include_external_packages,
143+
exclude_type_checking_imports=exclude_type_checking_imports,
144+
)
145+
)
146+
147+
imports_by_module: Dict[Module, Set[DirectImport]] = {
148+
k.module: v for k, v in imports_by_module_file.items()
149+
}
137150

138151
if cache_dir is not None:
139152
cache.write(imports_by_module)
@@ -172,3 +185,66 @@ def _is_external(module: Module, found_packages: Set[FoundPackage]) -> bool:
172185
module.is_descendant_of(package_module) or module == package_module
173186
for package_module in package_modules
174187
)
188+
189+
190+
def _read_imports_from_cache(
191+
module_files: Iterable[ModuleFile], *, cache: caching.Cache
192+
) -> Dict[ModuleFile, Set[DirectImport]]:
193+
imports_by_module_file: Dict[ModuleFile, Set[DirectImport]] = {}
194+
for module_file in module_files:
195+
try:
196+
direct_imports = cache.read_imports(module_file)
197+
except caching.CacheMiss:
198+
continue
199+
else:
200+
imports_by_module_file[module_file] = direct_imports
201+
return imports_by_module_file
202+
203+
204+
def _scan_imports(
205+
module_files: Collection[ModuleFile],
206+
*,
207+
file_system: AbstractFileSystem,
208+
found_packages: Set[FoundPackage],
209+
include_external_packages: bool,
210+
exclude_type_checking_imports: bool,
211+
) -> Dict[ModuleFile, Set[DirectImport]]:
212+
import_scanner: AbstractImportScanner = settings.IMPORT_SCANNER_CLASS(
213+
file_system=file_system,
214+
found_packages=found_packages,
215+
include_external_packages=include_external_packages,
216+
)
217+
218+
imports_by_module_file: Dict[ModuleFile, Set[DirectImport]] = {}
219+
220+
n_chunks = min(N_CPUS, len(module_files))
221+
chunks = _create_chunks(list(module_files), n_chunks=n_chunks)
222+
with multiprocessing.Pool(n_chunks) as pool:
223+
import_scanning_jobs = pool.starmap(
224+
_scan_chunk,
225+
[(import_scanner, exclude_type_checking_imports, chunk) for chunk in chunks],
226+
)
227+
for chunk_imports_by_module_file in import_scanning_jobs:
228+
imports_by_module_file.update(chunk_imports_by_module_file)
229+
230+
return imports_by_module_file
231+
232+
233+
def _create_chunks(
234+
module_files: Sequence[ModuleFile], *, n_chunks: int
235+
) -> Iterable[Iterable[ModuleFile]]:
236+
chunk_size = math.ceil(len(module_files) / n_chunks)
237+
return [module_files[i * chunk_size : (i + 1) * chunk_size] for i in range(n_chunks)]
238+
239+
240+
def _scan_chunk(
241+
import_scanner: AbstractImportScanner,
242+
exclude_type_checking_imports: bool,
243+
chunk: Iterable[ModuleFile],
244+
) -> Dict[ModuleFile, Set[DirectImport]]:
245+
return {
246+
module_file: import_scanner.scan_for_imports(
247+
module_file.module, exclude_type_checking_imports=exclude_type_checking_imports
248+
)
249+
for module_file in chunk
250+
}

src/grimp/exceptions.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ def __eq__(self, other):
6262
other.text,
6363
)
6464

65+
def __reduce__(self):
66+
# Implement __reduce__ to make this exception pickleable,
67+
# allowing it to be sent between processes.
68+
return SourceSyntaxError, (self.filename, self.lineno, self.text)
69+
6570

6671
class InvalidModuleExpression(GrimpException):
6772
pass

0 commit comments

Comments
 (0)