|
15 | 15 | from ..domain.valueobjects import DirectImport, Module |
16 | 16 | from .config import settings |
17 | 17 |
|
18 | | -N_CPUS = multiprocessing.cpu_count() |
19 | | - |
20 | 18 |
|
21 | 19 | class NotSupplied: |
22 | 20 | pass |
23 | 21 |
|
24 | 22 |
|
| 23 | +# This is an arbitrary number, but setting it too low slows down our functional tests considerably. |
| 24 | +MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING = 50 |
| 25 | + |
| 26 | + |
25 | 27 | def build_graph( |
26 | 28 | package_name, |
27 | 29 | *additional_package_names, |
@@ -209,32 +211,66 @@ def _scan_imports( |
209 | 211 | include_external_packages: bool, |
210 | 212 | exclude_type_checking_imports: bool, |
211 | 213 | ) -> Dict[ModuleFile, Set[DirectImport]]: |
212 | | - import_scanner: AbstractImportScanner = settings.IMPORT_SCANNER_CLASS( |
213 | | - file_system=file_system, |
214 | | - found_packages=found_packages, |
215 | | - include_external_packages=include_external_packages, |
| 214 | + chunks = _create_chunks(module_files) |
| 215 | + return _scan_chunks( |
| 216 | + chunks, |
| 217 | + file_system, |
| 218 | + found_packages, |
| 219 | + include_external_packages, |
| 220 | + exclude_type_checking_imports, |
216 | 221 | ) |
217 | 222 |
|
218 | | - imports_by_module_file: Dict[ModuleFile, Set[DirectImport]] = {} |
219 | 223 |
|
220 | | - n_chunks = min(N_CPUS, len(module_files)) |
221 | | - chunks = _create_chunks(list(module_files), n_chunks=n_chunks) |
222 | | - with multiprocessing.Pool(n_chunks) as pool: |
223 | | - import_scanning_jobs = pool.starmap( |
224 | | - _scan_chunk, |
225 | | - [(import_scanner, exclude_type_checking_imports, chunk) for chunk in chunks], |
226 | | - ) |
227 | | - for chunk_imports_by_module_file in import_scanning_jobs: |
228 | | - imports_by_module_file.update(chunk_imports_by_module_file) |
| 224 | +def _create_chunks(module_files: Collection[ModuleFile]) -> tuple[tuple[ModuleFile, ...], ...]: |
| 225 | + """ |
| 226 | + Split the module files into chunks, each to be worked on by a separate OS process. |
| 227 | + """ |
| 228 | + module_files_tuple = tuple(module_files) |
229 | 229 |
|
230 | | - return imports_by_module_file |
| 230 | + number_of_module_files = len(module_files_tuple) |
| 231 | + n_chunks = _decide_number_of_of_processes(number_of_module_files) |
| 232 | + chunk_size = math.ceil(number_of_module_files / n_chunks) |
| 233 | + |
| 234 | + return tuple( |
| 235 | + module_files_tuple[i * chunk_size : (i + 1) * chunk_size] for i in range(n_chunks) |
| 236 | + ) |
231 | 237 |
|
232 | 238 |
|
233 | | -def _create_chunks( |
234 | | - module_files: Sequence[ModuleFile], *, n_chunks: int |
235 | | -) -> Iterable[Iterable[ModuleFile]]: |
236 | | - chunk_size = math.ceil(len(module_files) / n_chunks) |
237 | | - return [module_files[i * chunk_size : (i + 1) * chunk_size] for i in range(n_chunks)] |
| 239 | +def _decide_number_of_of_processes(number_of_module_files: int) -> int: |
| 240 | + if number_of_module_files < MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING: |
| 241 | + # Don't incur the overhead of multiprocessing. |
| 242 | + return 1 |
| 243 | + return min(multiprocessing.cpu_count(), number_of_module_files) |
| 244 | + |
| 245 | + |
| 246 | +def _scan_chunks( |
| 247 | + chunks: Collection[Collection[ModuleFile]], |
| 248 | + file_system: AbstractFileSystem, |
| 249 | + found_packages: Set[FoundPackage], |
| 250 | + include_external_packages: bool, |
| 251 | + exclude_type_checking_imports: bool, |
| 252 | +) -> Dict[ModuleFile, Set[DirectImport]]: |
| 253 | + import_scanner: AbstractImportScanner = settings.IMPORT_SCANNER_CLASS( |
| 254 | + file_system=file_system, |
| 255 | + found_packages=found_packages, |
| 256 | + include_external_packages=include_external_packages, |
| 257 | + ) |
| 258 | + |
| 259 | + number_of_processes = len(chunks) |
| 260 | + if number_of_processes == 1: |
| 261 | + # No need to spawn a process if there's only one chunk. |
| 262 | + [chunk] = chunks |
| 263 | + return _scan_chunk(import_scanner, exclude_type_checking_imports, chunk) |
| 264 | + else: |
| 265 | + with multiprocessing.Pool(number_of_processes) as pool: |
| 266 | + imports_by_module_file: Dict[ModuleFile, Set[DirectImport]] = {} |
| 267 | + import_scanning_jobs = pool.starmap( |
| 268 | + _scan_chunk, |
| 269 | + [(import_scanner, exclude_type_checking_imports, chunk) for chunk in chunks], |
| 270 | + ) |
| 271 | + for chunk_imports_by_module_file in import_scanning_jobs: |
| 272 | + imports_by_module_file.update(chunk_imports_by_module_file) |
| 273 | + return imports_by_module_file |
238 | 274 |
|
239 | 275 |
|
240 | 276 | def _scan_chunk( |
|
0 commit comments