Skip to content

Commit f5ae0e0

Browse files
committed
Allow control of multiprocessing using env var
This provides the ability to adjust the arbitrary cut off, or turn off multiprocessing altogether.
1 parent 6db751e commit f5ae0e0

5 files changed

Lines changed: 71 additions & 12 deletions

File tree

CHANGELOG.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22
Changelog
33
=========
44

5+
latest
6+
------
7+
8+
* Provide more control of multiprocessing via ``GRIMP_MIN_MULTIPROCESSING_MODULES``
9+
environment variable.
10+
511
3.8.1 (2025-04-23)
612
------------------
713

docs/usage.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,12 @@ Building the graph
8282
:param str, optional cache_dir: The directory to use for caching the graph. Defaults to ``.grimp_cache``. To disable caching,
8383
pass ``None``. See :doc:`caching`.
8484
:return: An import graph that you can use to analyse the package.
85-
:rtype: ImportGraph
85+
:rtype: ``ImportGraph``
86+
87+
This method uses multiple operating system processes to build the graph, if the number of modules to scan (not
88+
including modules in the cache) is 50 or more. This threshold can be adjusted by setting the ``GRIMP_MIN_MULTIPROCESSING_MODULES``
89+
environment variable to a different number. To disable multiprocessing altogether, set it to a large number (more than
90+
the number of modules in the codebase being analyzed).
8691

8792
.. _typing module documentation: https://docs.python.org/3/library/typing.html#typing.TYPE_CHECKING
8893

src/grimp/application/usecases.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,19 @@
1515
from ..application.ports.packagefinder import AbstractPackageFinder
1616
from ..domain.valueobjects import DirectImport, Module
1717
from .config import settings
18+
import os
1819

1920

2021
class NotSupplied:
2122
pass
2223

2324

25+
# Calling code can set this environment variable if it wants to tune when to switch to
26+
# multiprocessing, or set it to a large number to disable it altogether.
27+
MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME = "GRIMP_MIN_MULTIPROCESSING_MODULES"
2428
# This is an arbitrary number, but setting it too low slows down our functional tests considerably.
25-
MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING = 50
29+
# If you change this, update docs/usage.rst too!
30+
DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING = 50
2631

2732

2833
def build_graph(
@@ -238,7 +243,13 @@ def _create_chunks(module_files: Collection[ModuleFile]) -> tuple[tuple[ModuleFi
238243

239244

240245
def _decide_number_of_processes(number_of_module_files: int) -> int:
241-
if number_of_module_files < MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING:
246+
min_number_of_modules = int(
247+
os.environ.get(
248+
MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME,
249+
DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING,
250+
)
251+
)
252+
if number_of_module_files < min_number_of_modules:
242253
# Don't incur the overhead of multiple processes.
243254
return 1
244255
return min(joblib.cpu_count(), number_of_module_files)

tests/functional/test_build_and_use_graph.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import pytest
44
from unittest.mock import patch
55
from grimp.application import usecases
6-
6+
import os
77
"""
88
For ease of reference, these are the imports of all the files:
99
@@ -55,7 +55,9 @@ def test_modules():
5555
}
5656

5757

58-
@patch.object(usecases, "MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING", 0)
58+
@patch.object(os, "environ", {
59+
usecases.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: "0"
60+
})
5961
def test_modules_multiprocessing():
6062
"""
6163
This test runs relatively slowly, but it's important we cover the multiprocessing code.

tests/unit/application/test_usecases.py

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import os
12
from typing import Dict, Optional, Set
23
from unittest.mock import sentinel, patch
34

@@ -13,6 +14,8 @@
1314
from tests.adaptors.modulefinder import BaseFakeModuleFinder
1415
from tests.config import override_settings
1516

17+
SOME_CPU_COUNT = 8
18+
1619

1720
class TestBuildGraph:
1821
@pytest.mark.parametrize("include_external_packages", (True, False))
@@ -135,17 +138,49 @@ def write(
135138
usecases.build_graph("mypackage", **kwargs)
136139

137140
@patch.object(usecases, "_scan_chunks", return_value={})
138-
@patch.object(joblib, "cpu_count", return_value=8)
141+
@patch.object(joblib, "cpu_count", return_value=SOME_CPU_COUNT)
139142
@pytest.mark.parametrize(
140-
"number_of_modules, expected_number_of_chunks",
143+
"number_of_modules, fake_environ, expected_number_of_chunks",
141144
[
142-
(49, 1), # Below threshold - just use one.
143-
(50, 8), # At threshold - use number of CPUs.
144-
(1000, 8), # Above threshold - use number of CPUs.
145+
(
146+
usecases.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING - 1,
147+
{},
148+
1,
149+
),
150+
(
151+
usecases.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING,
152+
{},
153+
SOME_CPU_COUNT,
154+
),
155+
(
156+
usecases.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING + 1,
157+
{},
158+
SOME_CPU_COUNT,
159+
),
160+
(
161+
149,
162+
{usecases.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150},
163+
1,
164+
),
165+
(
166+
150,
167+
{usecases.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150},
168+
SOME_CPU_COUNT,
169+
),
170+
(
171+
151,
172+
{usecases.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150},
173+
SOME_CPU_COUNT,
174+
),
145175
],
146176
)
147177
def test_scanning_multiprocessing_respects_min_number_of_modules(
148-
self, mock_cpu_count, mock_scan_chunks, number_of_modules, expected_number_of_chunks
178+
self,
179+
mock_cpu_count,
180+
mock_scan_chunks,
181+
number_of_modules,
182+
fake_environ,
183+
expected_number_of_chunks,
149184
):
150185
class FakePackageFinder(BaseFakePackageFinder):
151186
directory_map = {"mypackage": "/path/to/mypackage"}
@@ -167,7 +202,7 @@ class FakeModuleFinder(BaseFakeModuleFinder):
167202
FILE_SYSTEM=FakeFileSystem(),
168203
PACKAGE_FINDER=FakePackageFinder(),
169204
MODULE_FINDER=FakeModuleFinder(),
170-
):
205+
), patch.object(os, "environ", fake_environ):
171206
usecases.build_graph("mypackage", cache_dir=None)
172207

173208
[call] = mock_scan_chunks.call_args_list

0 commit comments

Comments
 (0)