Remove Python multiprocessing

seddonym · seddonym · commit f764d6c8ed92 · 2025-08-18T17:40:22.000+01:00
diff --git a/docs/usage.rst b/docs/usage.rst
@@ -84,11 +84,6 @@ Building the graph
     :return: An import graph that you can use to analyse the package.
     :rtype: ``ImportGraph``
 
-    This method uses multiple operating system processes to build the graph, if the number of modules to scan (not
-    including modules in the cache) is 50 or more. This threshold can be adjusted by setting the ``GRIMP_MIN_MULTIPROCESSING_MODULES``
-    environment variable to a different number. To disable multiprocessing altogether, set it to a large number (more than
-    the number of modules in the codebase being analyzed).
-
 .. _typing module documentation: https://docs.python.org/3/library/typing.html#typing.TYPE_CHECKING
 
 Methods for analysing the module tree
diff --git a/pyproject.toml b/pyproject.toml
@@ -16,7 +16,6 @@ authors = [
 ]
 requires-python = ">=3.9"
 dependencies = [
-    "joblib>=1.3.0",
     "typing-extensions>=3.10.0.0",
 ]
 classifiers = [
diff --git a/src/grimp/application/scanning.py b/src/grimp/application/scanning.py
@@ -1,8 +1,4 @@
-import math
-import os
-from typing import Collection, Set, Dict, Iterable
-
-import joblib  # type: ignore
+from typing import Collection, Set, Dict
 
 from grimp import _rustgrimp as rust  # type: ignore[attr-defined]
 from grimp.domain.valueobjects import DirectImport, Module
@@ -11,93 +7,22 @@
 from grimp.application.ports.modulefinder import ModuleFile, FoundPackage
 
 
-# Calling code can set this environment variable if it wants to tune when to switch to
-# multiprocessing, or set it to a large number to disable it altogether.
-MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME = "GRIMP_MIN_MULTIPROCESSING_MODULES"
-# This is an arbitrary number, but setting it too low slows down our functional tests considerably.
-# If you change this, update docs/usage.rst too!
-DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING = 50
-
-
 def scan_imports(
     module_files: Collection[ModuleFile],
     *,
     found_packages: Set[FoundPackage],
     include_external_packages: bool,
     exclude_type_checking_imports: bool,
-) -> Dict[ModuleFile, Set[DirectImport]]:
-    chunks = _create_chunks(module_files)
-    return _scan_chunks(
-        chunks,
-        found_packages,
-        include_external_packages,
-        exclude_type_checking_imports,
-    )
-
-
-def _create_chunks(module_files: Collection[ModuleFile]) -> tuple[tuple[ModuleFile, ...], ...]:
-    """
-    Split the module files into chunks, each to be worked on by a separate OS process.
-    """
-    module_files_tuple = tuple(module_files)
-
-    number_of_module_files = len(module_files_tuple)
-    n_chunks = _decide_number_of_processes(number_of_module_files)
-    chunk_size = math.ceil(number_of_module_files / n_chunks)
-
-    return tuple(
-        module_files_tuple[i * chunk_size : (i + 1) * chunk_size] for i in range(n_chunks)
-    )
-
-
-def _decide_number_of_processes(number_of_module_files: int) -> int:
-    min_number_of_modules = int(
-        os.environ.get(
-            MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME,
-            DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING,
-        )
-    )
-    if number_of_module_files < min_number_of_modules:
-        # Don't incur the overhead of multiple processes.
-        return 1
-    return min(joblib.cpu_count(), number_of_module_files)
-
-
-def _scan_chunk(
-    found_packages: Set[FoundPackage],
-    include_external_packages: bool,
-    exclude_type_checking_imports: bool,
-    chunk: Iterable[ModuleFile],
 ) -> Dict[ModuleFile, Set[DirectImport]]:
     file_system: AbstractFileSystem = settings.FILE_SYSTEM
     basic_file_system = file_system.convert_to_basic()
     imports_by_module: dict[Module, set[DirectImport]] = rust.scan_for_imports(
-        module_files=chunk,
+        module_files=tuple(module_files),
         found_packages=found_packages,
         # Ensure that the passed exclude_type_checking_imports is definitely a boolean,
         # otherwise the Rust class will error.
         include_external_packages=bool(include_external_packages),
         exclude_type_checking_imports=exclude_type_checking_imports,
         file_system=basic_file_system,
     )
-    return {module_file: imports_by_module[module_file.module] for module_file in chunk}
-
-
-def _scan_chunks(
-    chunks: Collection[Collection[ModuleFile]],
-    found_packages: Set[FoundPackage],
-    include_external_packages: bool,
-    exclude_type_checking_imports: bool,
-) -> Dict[ModuleFile, Set[DirectImport]]:
-    number_of_processes = len(chunks)
-    import_scanning_jobs = joblib.Parallel(n_jobs=number_of_processes)(
-        joblib.delayed(_scan_chunk)(
-            found_packages, include_external_packages, exclude_type_checking_imports, chunk
-        )
-        for chunk in chunks
-    )
-
-    imports_by_module_file = {}
-    for chunk_imports_by_module_file in import_scanning_jobs:
-        imports_by_module_file.update(chunk_imports_by_module_file)
-    return imports_by_module_file
+    return {module_file: imports_by_module[module_file.module] for module_file in module_files}
diff --git a/tests/functional/test_build_and_use_graph.py b/tests/functional/test_build_and_use_graph.py
@@ -1,8 +1,6 @@
 from grimp import build_graph
 from typing import Set, Tuple, Optional
 import pytest
-from unittest.mock import patch
-from grimp.application import scanning
 
 
 """
@@ -56,33 +54,6 @@ def test_modules():
     }
 
 
-@patch.object(scanning, "DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING", 0)
-def test_modules_multiprocessing():
-    """
-    This test runs relatively slowly, but it's important we cover the multiprocessing code.
-    """
-    graph = build_graph("testpackage", cache_dir=None)
-
-    assert graph.modules == {
-        "testpackage",
-        "testpackage.one",
-        "testpackage.one.alpha",
-        "testpackage.one.beta",
-        "testpackage.one.gamma",
-        "testpackage.one.delta",
-        "testpackage.one.delta.blue",
-        "testpackage.two",
-        "testpackage.two.alpha",
-        "testpackage.two.beta",
-        "testpackage.two.gamma",
-        "testpackage.utils",
-        "testpackage.three",
-        "testpackage.three.beta",
-        "testpackage.three.gamma",
-        "testpackage.three.alpha",
-    }
-
-
 def test_add_module():
     graph = build_graph("testpackage", cache_dir=None)
     number_of_modules = len(graph.modules)
diff --git a/tests/unit/application/test_scanning.py b/tests/unit/application/test_scanning.py
@@ -1,19 +1,12 @@
 from typing import Set
 
-import os
-from unittest.mock import patch
 import pytest  # type: ignore
-import joblib  # type: ignore
 
 from grimp.application.ports.modulefinder import FoundPackage, ModuleFile
 from grimp.application import scanning
 from grimp.domain.valueobjects import DirectImport, Module
 from tests.config import override_settings
 from grimp import _rustgrimp as rust  # type: ignore[attr-defined]
-from tests.adaptors.filesystem import FakeFileSystem
-
-
-SOME_CPU_COUNT = 8
 
 
 @pytest.mark.parametrize(
@@ -944,78 +937,6 @@ def test_exclude_type_checking_imports(
     assert {module_foo_one_file: expected_result} == result
 
 
-@patch.object(scanning, "_scan_chunks", return_value={})
-@patch.object(joblib, "cpu_count", return_value=SOME_CPU_COUNT)
-@pytest.mark.parametrize(
-    "number_of_modules, fake_environ, expected_number_of_chunks",
-    [
-        (
-            scanning.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING - 1,
-            {},
-            1,
-        ),
-        (
-            scanning.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING,
-            {},
-            SOME_CPU_COUNT,
-        ),
-        (
-            scanning.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING + 1,
-            {},
-            SOME_CPU_COUNT,
-        ),
-        (
-            149,
-            {scanning.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150},
-            1,
-        ),
-        (
-            150,
-            {scanning.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150},
-            SOME_CPU_COUNT,
-        ),
-        (
-            151,
-            {scanning.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150},
-            SOME_CPU_COUNT,
-        ),
-    ],
-)
-def test_scanning_multiprocessing_respects_min_number_of_modules(
-    mock_cpu_count,
-    mock_scan_chunks,
-    number_of_modules,
-    fake_environ,
-    expected_number_of_chunks,
-):
-    module_files = frozenset(
-        {
-            ModuleFile(
-                module=Module(f"mypackage.mod_{i}"),
-                mtime=999,
-            )
-            for i in range(number_of_modules)
-        }
-    )
-    found_packages = {
-        FoundPackage(name="mypackage", directory="/path/to/mypackage", module_files=module_files)
-    }
-
-    with override_settings(
-        FILE_SYSTEM=FakeFileSystem(),
-    ), patch.object(os, "environ", fake_environ):
-        scanning.scan_imports(
-            module_files,
-            found_packages=found_packages,
-            include_external_packages=False,
-            exclude_type_checking_imports=False,
-        )
-
-    [call] = mock_scan_chunks.call_args_list
-    chunks = call.args[0]
-    assert len(chunks) == expected_number_of_chunks
-
-
 def _module_to_module_file(module: Module) -> ModuleFile:
     some_mtime = 100933.4
     return ModuleFile(module=module, mtime=some_mtime)

Original file line number	Diff line number	Diff line change
`@@ -16,7 +16,6 @@ authors = [`
`16`	`16`	`]`
`17`	`17`	`requires-python = ">=3.9"`
`18`	`18`	`dependencies = [`
`19`		`- "joblib>=1.3.0",`
`20`	`19`	`"typing-extensions>=3.10.0.0",`
`21`	`20`	`]`
`22`	`21`	`classifiers = [`