Skip to content

Commit 6f3dc8a

Browse files
committed
feat: differentiated exit code 3 for infrastructure errors
Adds explicit handling and structured logging for API/infrastructure failures so they are distinguishable from real blocking security findings in CI. Exit code semantics (NEW for 2.3.0): 0 - Clean scan, no blocking issues (or --disable-blocking) 1 - Blocking security finding(s) detected 2 - Process interrupted (SIGINT) -- already in place 3 - Infrastructure / API error -- NEW 5 - Warning-level alerts only -- preserved This is a BREAKING change for any pipeline that previously caught exit 1 to mean "anything went wrong." Such pipelines now need to handle 3 separately for infrastructure failures, or use --exit-code-on-api-error to remap. Changes: - socketsecurity/core/__init__.py * Import RequestTimeoutExceeded and `requests` * Wrap fullscans.stream_diff with requests.exceptions.Timeout -> RequestTimeoutExceeded * Wrap fullscans.post (create_full_scan) with the same pattern - socketsecurity/socketcli.py * Import RequestTimeoutExceeded + APIFailure * IS_BUILDKITE constant (gates BK-specific markers per spec §3) * New _emit_infrastructure_error helper emits Buildkite log section markers (^^^ +++ and ---) when BUILDKITE=true, plus a soft_fail hint; bare log.error otherwise. Markers go to stdout via print() so they aren't prefixed with log formatting; markers are literal strings on other CI platforms so the gate is required. * Explicit RequestTimeoutExceeded and APIFailure handlers added before the generic Exception handler, all using config.exit_code_on_api_error - socketsecurity/config.py * New CliConfig field: exit_code_on_api_error (default 3) * New flag: --exit-code-on-api-error <int> Customers can remap to 0 (swallow), 100 (Buildkite soft_fail), etc. Note: --disable-blocking now only zeroes out exit 1 (security findings), not exit 3 (infrastructure). This separation is the whole point of the new code -- callers who want to also swallow infra errors should use --exit-code-on-api-error 0. Motivated by customer incidents (Plaid 413s and timeouts; Anthropic 'other' SocketCategory crash). Signed-off-by: lelia <2418071+lelia@users.noreply.github.com>
1 parent d727dcc commit 6f3dc8a

3 files changed

Lines changed: 90 additions & 10 deletions

File tree

socketsecurity/config.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ class CliConfig:
9898
pending_head: bool = False
9999
enable_diff: bool = False
100100
timeout: Optional[int] = 1200
101+
exit_code_on_api_error: int = 3
101102
exclude_license_details: bool = False
102103
include_module_folders: bool = False
103104
repo_is_public: bool = False
@@ -224,6 +225,7 @@ def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig':
224225
'integration_type': args.integration,
225226
'pending_head': args.pending_head,
226227
'timeout': args.timeout,
228+
'exit_code_on_api_error': args.exit_code_on_api_error,
227229
'exclude_license_details': args.exclude_license_details,
228230
'include_module_folders': args.include_module_folders,
229231
'repo_is_public': args.repo_is_public,
@@ -754,6 +756,20 @@ def create_argument_parser() -> argparse.ArgumentParser:
754756
help="Timeout in seconds for API requests",
755757
required=False
756758
)
759+
advanced_group.add_argument(
760+
"--exit-code-on-api-error",
761+
dest="exit_code_on_api_error",
762+
type=int,
763+
default=3,
764+
metavar="<int>",
765+
help=(
766+
"Exit code to use when the CLI encounters an API or infrastructure "
767+
"error (timeout, network failure, unexpected exception). Default: 3. "
768+
"Use this to distinguish infrastructure failures from security findings "
769+
"in your CI pipeline. Example for Buildkite soft_fail: set to 100. "
770+
"Set to 0 to swallow infrastructure errors entirely."
771+
)
772+
)
757773
advanced_group.add_argument(
758774
"--allow-unverified",
759775
action="store_true",

socketsecurity/core/__init__.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
if TYPE_CHECKING:
1313
from socketsecurity.config import CliConfig
14+
import requests
1415
from socketdev import socketdev
1516
from socketdev.exceptions import APIFailure
1617
from socketdev.fullscans import FullScanParams, SocketArtifact
@@ -26,7 +27,7 @@
2627
Package,
2728
Purl
2829
)
29-
from socketsecurity.core.exceptions import APIResourceNotFound
30+
from socketsecurity.core.exceptions import APIResourceNotFound, RequestTimeoutExceeded
3031
from .socket_config import SocketConfig
3132
from .utils import socket_globs
3233
from .resource_utils import check_file_count_against_ulimit
@@ -538,7 +539,13 @@ def create_full_scan(self, files: List[str], params: FullScanParams, base_paths:
538539
log.info("Creating new full scan")
539540
create_full_start = time.time()
540541

541-
res = self.sdk.fullscans.post(files, params, use_types=True, use_lazy_loading=True, max_open_files=50, base_paths=base_paths)
542+
try:
543+
res = self.sdk.fullscans.post(files, params, use_types=True, use_lazy_loading=True, max_open_files=50, base_paths=base_paths)
544+
except requests.exceptions.Timeout as e:
545+
raise RequestTimeoutExceeded(
546+
f"Request timed out while creating full scan for org "
547+
f"'{self.config.org_slug}': {e}"
548+
)
542549
if not res.success:
543550
log.error(f"Error creating full scan: {res.message}, status: {res.status}")
544551
raise Exception(f"Error creating full scan: {res.message}, status: {res.status}")
@@ -945,6 +952,11 @@ def get_added_and_removed_packages(
945952
include_license_details=str(include_license_details).lower()
946953
).data
947954
)
955+
except requests.exceptions.Timeout as e:
956+
raise RequestTimeoutExceeded(
957+
f"Request timed out while comparing scans "
958+
f"(head: {head_full_scan_id}, new: {new_full_scan_id}): {e}"
959+
)
948960
except APIFailure as e:
949961
log.error(f"API Error: {e}")
950962
raise

socketsecurity/socketcli.py

Lines changed: 60 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,13 @@
1010
from dotenv import load_dotenv
1111
from git import InvalidGitRepositoryError, NoSuchPathError
1212
from socketdev import socketdev
13+
from socketdev.exceptions import APIFailure
1314
from socketdev.fullscans import FullScanParams
1415
from socketsecurity.config import CliConfig
1516
from socketsecurity.core import Core
1617
from socketsecurity.core.classes import Diff
1718
from socketsecurity.core.cli_client import CliClient
19+
from socketsecurity.core.exceptions import RequestTimeoutExceeded
1820
from socketsecurity.core.git_interface import Git
1921
from socketsecurity.core.logging import initialize_logging, set_debug_mode
2022
from socketsecurity.core.messages import Messages
@@ -28,6 +30,11 @@
2830

2931
DEFAULT_API_TIMEOUT = 1200
3032

33+
# Buildkite sets BUILDKITE=true in every job environment. Used to gate
34+
# log section markers (^^^ +++, ---) that render as literal strings in
35+
# GitHub Actions / GitLab CI / other platforms.
36+
IS_BUILDKITE = os.getenv("BUILDKITE") == "true"
37+
3138

3239
def get_api_request_timeout(config: CliConfig) -> int:
3340
return config.timeout if config.timeout is not None else DEFAULT_API_TIMEOUT
@@ -43,6 +50,39 @@ def build_socket_sdk(config: CliConfig) -> socketdev:
4350
)
4451

4552

53+
def _emit_infrastructure_error(
54+
message: str,
55+
hint: str = None,
56+
include_traceback: bool = False,
57+
) -> None:
58+
"""Emit a structured error for infrastructure failures.
59+
60+
Uses Buildkite log section markers when running in Buildkite so the
61+
error auto-expands in the BK UI. Markers go to stdout via print()
62+
(not log.error) so they're not prefixed with log formatting.
63+
"""
64+
if IS_BUILDKITE:
65+
# ^^^ +++ retroactively opens the current log section so the error
66+
# is visible immediately without manual expansion.
67+
print("^^^ +++", flush=True)
68+
print("--- :warning: Socket Infrastructure Error", flush=True)
69+
70+
log.error(message)
71+
72+
if hint:
73+
log.error(hint)
74+
75+
if IS_BUILDKITE:
76+
log.error(
77+
"Tip: to prevent this from blocking your pipeline, add "
78+
"`soft_fail: [{exit_status: 3}]` to this step, or use "
79+
"`--exit-code-on-api-error` to set a custom exit code."
80+
)
81+
82+
if include_traceback:
83+
traceback.print_exc()
84+
85+
4686
def cli():
4787
try:
4888
main_code()
@@ -53,15 +93,27 @@ def cli():
5393
sys.exit(2)
5494
else:
5595
sys.exit(0)
96+
except RequestTimeoutExceeded as error:
97+
config = CliConfig.from_args()
98+
_emit_infrastructure_error(
99+
f"Request timed out: {error}",
100+
hint="This is an infrastructure issue, not a security finding.",
101+
)
102+
sys.exit(config.exit_code_on_api_error)
103+
except APIFailure as error:
104+
config = CliConfig.from_args()
105+
_emit_infrastructure_error(
106+
f"API error: {error}",
107+
hint="This is an infrastructure issue, not a security finding.",
108+
)
109+
sys.exit(config.exit_code_on_api_error)
56110
except Exception as error:
57-
log.error("Unexpected error when running the cli")
58-
log.error(error)
59-
traceback.print_exc()
60-
config = CliConfig.from_args() # Get current config
61-
if not config.disable_blocking:
62-
sys.exit(3)
63-
else:
64-
sys.exit(0)
111+
config = CliConfig.from_args()
112+
_emit_infrastructure_error(
113+
f"Unexpected error when running the CLI: {error}",
114+
include_traceback=True,
115+
)
116+
sys.exit(config.exit_code_on_api_error)
65117

66118

67119
def main_code():

0 commit comments

Comments
 (0)