Skip to content

Commit f764d6c

Browse files
committed
Remove Python multiprocessing
1 parent 7c8c958 commit f764d6c

5 files changed

Lines changed: 3 additions & 192 deletions

File tree

docs/usage.rst

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,6 @@ Building the graph
8484
:return: An import graph that you can use to analyse the package.
8585
:rtype: ``ImportGraph``
8686

87-
This method uses multiple operating system processes to build the graph, if the number of modules to scan (not
88-
including modules in the cache) is 50 or more. This threshold can be adjusted by setting the ``GRIMP_MIN_MULTIPROCESSING_MODULES``
89-
environment variable to a different number. To disable multiprocessing altogether, set it to a large number (more than
90-
the number of modules in the codebase being analyzed).
91-
9287
.. _typing module documentation: https://docs.python.org/3/library/typing.html#typing.TYPE_CHECKING
9388

9489
Methods for analysing the module tree

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ authors = [
1616
]
1717
requires-python = ">=3.9"
1818
dependencies = [
19-
"joblib>=1.3.0",
2019
"typing-extensions>=3.10.0.0",
2120
]
2221
classifiers = [

src/grimp/application/scanning.py

Lines changed: 3 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
1-
import math
2-
import os
3-
from typing import Collection, Set, Dict, Iterable
4-
5-
import joblib # type: ignore
1+
from typing import Collection, Set, Dict
62

73
from grimp import _rustgrimp as rust # type: ignore[attr-defined]
84
from grimp.domain.valueobjects import DirectImport, Module
@@ -11,93 +7,22 @@
117
from grimp.application.ports.modulefinder import ModuleFile, FoundPackage
128

139

14-
# Calling code can set this environment variable if it wants to tune when to switch to
15-
# multiprocessing, or set it to a large number to disable it altogether.
16-
MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME = "GRIMP_MIN_MULTIPROCESSING_MODULES"
17-
# This is an arbitrary number, but setting it too low slows down our functional tests considerably.
18-
# If you change this, update docs/usage.rst too!
19-
DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING = 50
20-
21-
2210
def scan_imports(
2311
module_files: Collection[ModuleFile],
2412
*,
2513
found_packages: Set[FoundPackage],
2614
include_external_packages: bool,
2715
exclude_type_checking_imports: bool,
28-
) -> Dict[ModuleFile, Set[DirectImport]]:
29-
chunks = _create_chunks(module_files)
30-
return _scan_chunks(
31-
chunks,
32-
found_packages,
33-
include_external_packages,
34-
exclude_type_checking_imports,
35-
)
36-
37-
38-
def _create_chunks(module_files: Collection[ModuleFile]) -> tuple[tuple[ModuleFile, ...], ...]:
39-
"""
40-
Split the module files into chunks, each to be worked on by a separate OS process.
41-
"""
42-
module_files_tuple = tuple(module_files)
43-
44-
number_of_module_files = len(module_files_tuple)
45-
n_chunks = _decide_number_of_processes(number_of_module_files)
46-
chunk_size = math.ceil(number_of_module_files / n_chunks)
47-
48-
return tuple(
49-
module_files_tuple[i * chunk_size : (i + 1) * chunk_size] for i in range(n_chunks)
50-
)
51-
52-
53-
def _decide_number_of_processes(number_of_module_files: int) -> int:
54-
min_number_of_modules = int(
55-
os.environ.get(
56-
MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME,
57-
DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING,
58-
)
59-
)
60-
if number_of_module_files < min_number_of_modules:
61-
# Don't incur the overhead of multiple processes.
62-
return 1
63-
return min(joblib.cpu_count(), number_of_module_files)
64-
65-
66-
def _scan_chunk(
67-
found_packages: Set[FoundPackage],
68-
include_external_packages: bool,
69-
exclude_type_checking_imports: bool,
70-
chunk: Iterable[ModuleFile],
7116
) -> Dict[ModuleFile, Set[DirectImport]]:
7217
file_system: AbstractFileSystem = settings.FILE_SYSTEM
7318
basic_file_system = file_system.convert_to_basic()
7419
imports_by_module: dict[Module, set[DirectImport]] = rust.scan_for_imports(
75-
module_files=chunk,
20+
module_files=tuple(module_files),
7621
found_packages=found_packages,
7722
# Ensure that the passed exclude_type_checking_imports is definitely a boolean,
7823
# otherwise the Rust class will error.
7924
include_external_packages=bool(include_external_packages),
8025
exclude_type_checking_imports=exclude_type_checking_imports,
8126
file_system=basic_file_system,
8227
)
83-
return {module_file: imports_by_module[module_file.module] for module_file in chunk}
84-
85-
86-
def _scan_chunks(
87-
chunks: Collection[Collection[ModuleFile]],
88-
found_packages: Set[FoundPackage],
89-
include_external_packages: bool,
90-
exclude_type_checking_imports: bool,
91-
) -> Dict[ModuleFile, Set[DirectImport]]:
92-
number_of_processes = len(chunks)
93-
import_scanning_jobs = joblib.Parallel(n_jobs=number_of_processes)(
94-
joblib.delayed(_scan_chunk)(
95-
found_packages, include_external_packages, exclude_type_checking_imports, chunk
96-
)
97-
for chunk in chunks
98-
)
99-
100-
imports_by_module_file = {}
101-
for chunk_imports_by_module_file in import_scanning_jobs:
102-
imports_by_module_file.update(chunk_imports_by_module_file)
103-
return imports_by_module_file
28+
return {module_file: imports_by_module[module_file.module] for module_file in module_files}

tests/functional/test_build_and_use_graph.py

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
from grimp import build_graph
22
from typing import Set, Tuple, Optional
33
import pytest
4-
from unittest.mock import patch
5-
from grimp.application import scanning
64

75

86
"""
@@ -56,33 +54,6 @@ def test_modules():
5654
}
5755

5856

59-
@patch.object(scanning, "DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING", 0)
60-
def test_modules_multiprocessing():
61-
"""
62-
This test runs relatively slowly, but it's important we cover the multiprocessing code.
63-
"""
64-
graph = build_graph("testpackage", cache_dir=None)
65-
66-
assert graph.modules == {
67-
"testpackage",
68-
"testpackage.one",
69-
"testpackage.one.alpha",
70-
"testpackage.one.beta",
71-
"testpackage.one.gamma",
72-
"testpackage.one.delta",
73-
"testpackage.one.delta.blue",
74-
"testpackage.two",
75-
"testpackage.two.alpha",
76-
"testpackage.two.beta",
77-
"testpackage.two.gamma",
78-
"testpackage.utils",
79-
"testpackage.three",
80-
"testpackage.three.beta",
81-
"testpackage.three.gamma",
82-
"testpackage.three.alpha",
83-
}
84-
85-
8657
def test_add_module():
8758
graph = build_graph("testpackage", cache_dir=None)
8859
number_of_modules = len(graph.modules)

tests/unit/application/test_scanning.py

Lines changed: 0 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,12 @@
11
from typing import Set
22

3-
import os
4-
from unittest.mock import patch
53
import pytest # type: ignore
6-
import joblib # type: ignore
74

85
from grimp.application.ports.modulefinder import FoundPackage, ModuleFile
96
from grimp.application import scanning
107
from grimp.domain.valueobjects import DirectImport, Module
118
from tests.config import override_settings
129
from grimp import _rustgrimp as rust # type: ignore[attr-defined]
13-
from tests.adaptors.filesystem import FakeFileSystem
14-
15-
16-
SOME_CPU_COUNT = 8
1710

1811

1912
@pytest.mark.parametrize(
@@ -944,78 +937,6 @@ def test_exclude_type_checking_imports(
944937
assert {module_foo_one_file: expected_result} == result
945938

946939

947-
@patch.object(scanning, "_scan_chunks", return_value={})
948-
@patch.object(joblib, "cpu_count", return_value=SOME_CPU_COUNT)
949-
@pytest.mark.parametrize(
950-
"number_of_modules, fake_environ, expected_number_of_chunks",
951-
[
952-
(
953-
scanning.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING - 1,
954-
{},
955-
1,
956-
),
957-
(
958-
scanning.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING,
959-
{},
960-
SOME_CPU_COUNT,
961-
),
962-
(
963-
scanning.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING + 1,
964-
{},
965-
SOME_CPU_COUNT,
966-
),
967-
(
968-
149,
969-
{scanning.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150},
970-
1,
971-
),
972-
(
973-
150,
974-
{scanning.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150},
975-
SOME_CPU_COUNT,
976-
),
977-
(
978-
151,
979-
{scanning.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150},
980-
SOME_CPU_COUNT,
981-
),
982-
],
983-
)
984-
def test_scanning_multiprocessing_respects_min_number_of_modules(
985-
mock_cpu_count,
986-
mock_scan_chunks,
987-
number_of_modules,
988-
fake_environ,
989-
expected_number_of_chunks,
990-
):
991-
module_files = frozenset(
992-
{
993-
ModuleFile(
994-
module=Module(f"mypackage.mod_{i}"),
995-
mtime=999,
996-
)
997-
for i in range(number_of_modules)
998-
}
999-
)
1000-
found_packages = {
1001-
FoundPackage(name="mypackage", directory="/path/to/mypackage", module_files=module_files)
1002-
}
1003-
1004-
with override_settings(
1005-
FILE_SYSTEM=FakeFileSystem(),
1006-
), patch.object(os, "environ", fake_environ):
1007-
scanning.scan_imports(
1008-
module_files,
1009-
found_packages=found_packages,
1010-
include_external_packages=False,
1011-
exclude_type_checking_imports=False,
1012-
)
1013-
1014-
[call] = mock_scan_chunks.call_args_list
1015-
chunks = call.args[0]
1016-
assert len(chunks) == expected_number_of_chunks
1017-
1018-
1019940
def _module_to_module_file(module: Module) -> ModuleFile:
1020941
some_mtime = 100933.4
1021942
return ModuleFile(module=module, mtime=some_mtime)

0 commit comments

Comments
 (0)