diff --git a/concore.py b/concore.py index 2147e75..d2f5b6f 100644 --- a/concore.py +++ b/concore.py @@ -21,13 +21,122 @@ logging.getLogger('requests').setLevel(logging.WARNING) -# if windows, create script to kill this process -# because batch files don't provide easy way to know pid of last command -# ignored for posix != windows, because "concorepid" is handled by script -# ignored for docker (linux != windows), because handled by docker stop +# if windows, register this process PID for safe termination +# Previous approach: single "concorekill.bat" overwritten by each node (race condition). +# New approach: append PID to shared registry; generate validated kill script. +# See: https://github.com/ControlCore-Project/concore/issues/391 + +_LOCK_LEN = 0x7FFFFFFF # lock range large enough to cover entire file +_BASE_DIR = os.path.abspath(".") # capture CWD before atexit can shift it +_PID_REGISTRY_FILE = os.path.join(_BASE_DIR, "concorekill_pids.txt") +_KILL_SCRIPT_FILE = os.path.join(_BASE_DIR, "concorekill.bat") + +def _register_pid(): + """Append current PID to the shared registry file. Uses file locking on Windows.""" + try: + with open(_PID_REGISTRY_FILE, "a") as f: + if hasattr(sys, 'getwindowsversion'): + import msvcrt + try: + msvcrt.locking(f.fileno(), msvcrt.LK_LOCK, _LOCK_LEN) + f.write(str(os.getpid()) + "\n") + finally: + try: + f.seek(0) + msvcrt.locking(f.fileno(), msvcrt.LK_UNLCK, _LOCK_LEN) + except OSError: + pass + else: + f.write(str(os.getpid()) + "\n") + except OSError: + pass + +def _cleanup_pid(): + """Remove current PID from registry on exit. Uses file locking on Windows.""" + pid = str(os.getpid()) + try: + if not os.path.exists(_PID_REGISTRY_FILE): + return + with open(_PID_REGISTRY_FILE, "r+") as f: + if hasattr(sys, 'getwindowsversion'): + import msvcrt + try: + msvcrt.locking(f.fileno(), msvcrt.LK_LOCK, _LOCK_LEN) + pids = [line.strip() for line in f if line.strip()] + remaining = [p for p in pids if p != pid] + if remaining: + f.seek(0) + f.truncate() + for p in remaining: + f.write(p + "\n") + else: + f.close() + try: + os.remove(_PID_REGISTRY_FILE) + except OSError: + pass + try: + os.remove(_KILL_SCRIPT_FILE) + except OSError: + pass + return + finally: + try: + f.seek(0) + msvcrt.locking(f.fileno(), msvcrt.LK_UNLCK, _LOCK_LEN) + except (OSError, ValueError): + pass + else: + pids = [line.strip() for line in f if line.strip()] + remaining = [p for p in pids if p != pid] + if remaining: + f.seek(0) + f.truncate() + for p in remaining: + f.write(p + "\n") + else: + f.close() + try: + os.remove(_PID_REGISTRY_FILE) + except OSError: + pass + try: + os.remove(_KILL_SCRIPT_FILE) + except OSError: + pass + except OSError: + pass + +def _write_kill_script(): + """Generate concorekill.bat that validates each PID before killing.""" + try: + reg_name = os.path.basename(_PID_REGISTRY_FILE) + bat_name = os.path.basename(_KILL_SCRIPT_FILE) + script = "@echo off\r\n" + script += 'if not exist "%~dp0' + reg_name + '" (\r\n' + script += " echo No PID registry found. Nothing to kill.\r\n" + script += " exit /b 0\r\n" + script += ")\r\n" + script += 'for /f "usebackq tokens=*" %%p in ("%~dp0' + reg_name + '") do (\r\n' + script += ' wmic process where "ProcessId=%%p" get CommandLine /value 2>nul | find /i "concore" >nul\r\n' + script += " if not errorlevel 1 (\r\n" + script += " echo Killing concore process %%p\r\n" + script += " taskkill /F /PID %%p >nul 2>&1\r\n" + script += " ) else (\r\n" + script += " echo Skipping PID %%p - not a concore process or not running\r\n" + script += " )\r\n" + script += ")\r\n" + script += 'del /q "%~dp0' + reg_name + '" 2>nul\r\n' + script += 'del /q "%~dp0' + bat_name + '" 2>nul\r\n' + with open(_KILL_SCRIPT_FILE, "w", newline="") as f: + f.write(script) + except OSError: + pass + if hasattr(sys, 'getwindowsversion'): - with open("concorekill.bat","w") as fpid: - fpid.write("taskkill /F /PID "+str(os.getpid())+"\n") + _register_pid() + _write_kill_script() + atexit.register(_cleanup_pid) ZeroMQPort = concore_base.ZeroMQPort convert_numpy_to_python = concore_base.convert_numpy_to_python diff --git a/tests/test_concore.py b/tests/test_concore.py index b1a980e..267b17a 100644 --- a/tests/test_concore.py +++ b/tests/test_concore.py @@ -1,5 +1,6 @@ import pytest import os +import sys import numpy as np @@ -450,3 +451,142 @@ def test_write_timestamp_matches_cpp_semantics(self, temp_dir): "After 3 writes with delta=1 simtime must remain 0 " "(matching C++/MATLAB/Verilog); got %s" % concore.simtime ) + + +class TestPidRegistry: + """Tests for the Windows PID registry mechanism (Issue #391).""" + + @pytest.fixture(autouse=True) + def use_temp_dir(self, temp_dir, monkeypatch): + self.temp_dir = temp_dir + monkeypatch.chdir(temp_dir) + import concore + + monkeypatch.setattr(concore, "_BASE_DIR", temp_dir) + monkeypatch.setattr( + concore, + "_PID_REGISTRY_FILE", + os.path.join(temp_dir, "concorekill_pids.txt"), + ) + monkeypatch.setattr( + concore, + "_KILL_SCRIPT_FILE", + os.path.join(temp_dir, "concorekill.bat"), + ) + + def test_register_pid_creates_registry_file(self): + from concore import _register_pid, _PID_REGISTRY_FILE + + _register_pid() + assert os.path.exists(_PID_REGISTRY_FILE) + with open(_PID_REGISTRY_FILE) as f: + pids = [line.strip() for line in f if line.strip()] + assert str(os.getpid()) in pids + + def test_register_pid_appends_not_overwrites(self): + from concore import _register_pid, _PID_REGISTRY_FILE + + with open(_PID_REGISTRY_FILE, "w") as f: + f.write("11111\n") + f.write("22222\n") + _register_pid() + with open(_PID_REGISTRY_FILE) as f: + pids = [line.strip() for line in f if line.strip()] + assert "11111" in pids + assert "22222" in pids + assert str(os.getpid()) in pids + assert len(pids) == 3 + + def test_cleanup_pid_removes_current_pid(self): + from concore import _cleanup_pid, _PID_REGISTRY_FILE + + current_pid = str(os.getpid()) + with open(_PID_REGISTRY_FILE, "w") as f: + f.write("99999\n") + f.write(current_pid + "\n") + f.write("88888\n") + _cleanup_pid() + with open(_PID_REGISTRY_FILE) as f: + pids = [line.strip() for line in f if line.strip()] + assert current_pid not in pids + assert "99999" in pids + assert "88888" in pids + + def test_cleanup_pid_deletes_files_when_last_pid(self): + from concore import _cleanup_pid, _PID_REGISTRY_FILE, _KILL_SCRIPT_FILE + + current_pid = str(os.getpid()) + with open(_PID_REGISTRY_FILE, "w") as f: + f.write(current_pid + "\n") + with open(_KILL_SCRIPT_FILE, "w") as f: + f.write("@echo off\n") + _cleanup_pid() + assert not os.path.exists(_PID_REGISTRY_FILE) + assert not os.path.exists(_KILL_SCRIPT_FILE) + + def test_cleanup_pid_handles_missing_registry(self): + from concore import _cleanup_pid, _PID_REGISTRY_FILE + + assert not os.path.exists(_PID_REGISTRY_FILE) + _cleanup_pid() # Should not raise + + def test_write_kill_script_generates_bat_file(self): + from concore import _write_kill_script, _KILL_SCRIPT_FILE, _PID_REGISTRY_FILE + + _write_kill_script() + assert os.path.exists(_KILL_SCRIPT_FILE) + with open(_KILL_SCRIPT_FILE) as f: + content = f.read() + assert os.path.basename(_PID_REGISTRY_FILE) in content + assert "wmic" in content + assert "taskkill" in content + assert "concore" in content.lower() + + def test_multi_node_registration(self): + from concore import _register_pid, _PID_REGISTRY_FILE + + fake_pids = ["1204", "1932", "8120"] + with open(_PID_REGISTRY_FILE, "w") as f: + for pid in fake_pids: + f.write(pid + "\n") + _register_pid() + with open(_PID_REGISTRY_FILE) as f: + pids = [line.strip() for line in f if line.strip()] + for pid in fake_pids: + assert pid in pids + assert str(os.getpid()) in pids + assert len(pids) == 4 + + def test_cleanup_preserves_other_pids(self): + from concore import _cleanup_pid, _PID_REGISTRY_FILE + + current_pid = str(os.getpid()) + other_pids = ["1111", "2222", "3333"] + with open(_PID_REGISTRY_FILE, "w") as f: + for pid in other_pids: + f.write(pid + "\n") + f.write(current_pid + "\n") + _cleanup_pid() + with open(_PID_REGISTRY_FILE) as f: + pids = [line.strip() for line in f if line.strip()] + assert len(pids) == 3 + assert current_pid not in pids + for pid in other_pids: + assert pid in pids + + @pytest.mark.skipif( + not hasattr(sys, "getwindowsversion"), reason="Windows-only test" + ) + def test_import_registers_pid_on_windows(self): + """Verify module-level PID registration on Windows.""" + import importlib + + for mod_name in ("concore", "concore_base"): + sys.modules.pop(mod_name, None) + import concore + + assert os.path.exists(concore._PID_REGISTRY_FILE) + with open(concore._PID_REGISTRY_FILE) as f: + pids = [line.strip() for line in f if line.strip()] + assert str(os.getpid()) in pids + importlib.reload(concore)