Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,21 @@ The server dynamically filters the available tools based on the permissions asso
- **Required Permission**: `policy-events.read`
- **Sample Prompt**: "Get the process tree for event ID abc123"

- **`count_runtime_events`**
- **Description**: Count runtime security events matching a filter expression in the last N hours, without paginating event bodies. Returns a histogram across 16 event categories, each bucketed by severity codes "0"-"7".
- **Required Permission**: `policy-events.read`
- **Sample Prompt**: "How many high-severity runtime events fired in cluster 'prod-gke' in the last 24 hours?"

- **`runtime_events_timeseries`**
- **Description**: Bucket runtime security event counts over time, grouped by a categorical field (default `severity`). Use to find when a burst started or ended without paginating; minimum bucket width is 1 minute.
- **Required Permission**: `policy-events.read`
- **Sample Prompt**: "When did the spike in Suspicious Outbound Connection events on cluster 'prod-gke' start and stop?"

- **`discover_runtime_event_field_values`**
- **Description**: Discover the distinct values of a runtime-events field present in a time window. Returns `suggested` (values active in the window) and `other` (values known to the tenant but inactive). Use BEFORE writing filters to avoid guessing cluster, rule, or image names.
- **Required Permission**: `policy-events.read`
- **Sample Prompt**: "Which clusters produced any runtime events in the last hour?" or "What rule names are firing right now?"

- **`run_sysql`**
- **Description**: Execute a pre-written SysQL query directly (use only when user provides explicit query).
- **Required Permission**: `sage.exec`, `risks.read`
Expand Down
3 changes: 3 additions & 0 deletions cmd/server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@ func setupHandler(sysdigClient sysdig.ExtendedClientWithResponsesInterface) *mcp
tools.NewToolListRuntimeEvents(sysdigClient, systemClock),
tools.NewToolGetEventInfo(sysdigClient),
tools.NewToolGetEventProcessTree(sysdigClient),
tools.NewToolCountRuntimeEvents(sysdigClient, systemClock),
tools.NewToolRuntimeEventsTimeseries(sysdigClient, systemClock),
tools.NewToolDiscoverRuntimeEventFieldValues(sysdigClient, systemClock),
tools.NewToolRunSysql(sysdigClient),
tools.NewToolGenerateSysql(sysdigClient),

Expand Down
38 changes: 38 additions & 0 deletions internal/infra/mcp/tools/secure_events_common.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package tools

// secureEventsBaseFilter is the filter prefix applied to every runtime-events
// query the server makes on behalf of an LLM. It hides classes of events that
// are noisy at investigation time (benchmarks, posture findings, scanning
// activity) so that user-supplied filters target the runtime signal.
const secureEventsBaseFilter = `not originator in ("benchmarks","compliance","cloudsec","scanning","hostscanning")`

// composeSecureEventsFilter merges the user-supplied filter expression with
// the baseline. An empty userFilter returns the baseline unchanged.
func composeSecureEventsFilter(userFilter string) string {
if userFilter == "" {
return secureEventsBaseFilter
}
return secureEventsBaseFilter + " and " + userFilter
}

// secureEventsFilterDSL is the shared filter-expression description used by
// list_runtime_events, count_runtime_events, runtime_events_timeseries, and
// discover_runtime_event_field_values. Keeping the prose in one place lets the
// LLM apply identical filter intuition across all four tools.
const secureEventsFilterDSL = `Logical filter expression to select runtime security events.
Supports operators: =, !=, in, contains, starts with, exists.
Combine with and/or/not.
Key attributes include: severity (codes "0"-"7"), originator, sourceType, ruleName, rawEventCategory, engine, source, category, kubernetes.cluster.name, host.hostName, container.image.repo, container.image.tag, aws.accountId, azure.subscriptionId, gcp.projectId, policyId, trigger.

To find machine learning (ML) detections (e.g. crypto mining, anomalous logins), use engine or source filters:
- All ML events: 'engine = "machineLearning"'
- AWS ML detections: 'source = "agentless-aws-ml"'
- Okta ML detections: 'source = "agentless-okta-ml"'
- By category: 'category = "machine-learning"'

You can specify the severity of the events based on the following cases:
- high-severity: 'severity in ("0","1","2","3")'
- medium: 'severity in ("4","5")'
- low: 'severity in ("6")'
- info: 'severity in ("7")'
`
77 changes: 77 additions & 0 deletions internal/infra/mcp/tools/tool_count_runtime_events.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package tools

import (
"context"
"time"

"github.com/mark3labs/mcp-go/mcp"
"github.com/mark3labs/mcp-go/server"
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/clock"
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig"
)

type ToolCountRuntimeEvents struct {
sysdigClient sysdig.ExtendedClientWithResponsesInterface
clock clock.Clock
}

func NewToolCountRuntimeEvents(client sysdig.ExtendedClientWithResponsesInterface, clock clock.Clock) *ToolCountRuntimeEvents {
return &ToolCountRuntimeEvents{
sysdigClient: client,
clock: clock,
}
}

func (h *ToolCountRuntimeEvents) handle(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
params := toolRequestToCountParams(request, h.clock)

response, err := h.sysdigClient.GetSecureEventsCountWithResponse(ctx, params)
if err != nil {
return mcp.NewToolResultErrorFromErr("error triggering request", err), nil
}
if response.StatusCode() >= 400 {
return mcp.NewToolResultErrorf("error counting events, status code: %d, response: %s", response.StatusCode(), response.Body), nil
}

return mcp.NewToolResultJSON(response.JSON200)
}

func toolRequestToCountParams(request mcp.CallToolRequest, clock clock.Clock) *sysdig.GetSecureEventsCountParams {
scopeHours := request.GetInt("scope_hours", 1)
to := clock.Now()
from := to.Add(-time.Duration(scopeHours) * time.Hour)

filter := composeSecureEventsFilter(request.GetString("filter_expr", ""))

return &sysdig.GetSecureEventsCountParams{
From: from.UnixNano(),
To: to.UnixNano(),
Filter: &filter,
}
}

func (h *ToolCountRuntimeEvents) RegisterInServer(s *server.MCPServer) {
tool := mcp.NewTool("count_runtime_events",
mcp.WithDescription("Count runtime security events matching a filter expression in the last N hours, without paginating event bodies. Returns a histogram across 16 event categories (policyEvents, scanningEvents, cloudTrailEvents, mlCloudEvents, oktaEvents, githubEvents, gcpEvents, falcoCloudEvents, admissionControllerEvents, profilingDetectionEvents, awsMlConsoleLoginEvents, hostScanningEvents, benchmarkEvents, complianceEvents, cloudsecEvents, statefulDetectionEvents) where each category carries a `countBySeverity` map keyed \"0\" (highest) through \"7\" (info). Use this when the question is \"how many\" rather than \"which ones\" — it is one call regardless of result size."),
mcp.WithNumber("scope_hours",
mcp.Description("Number of hours back from now to count events over. Maximum 336 (14 days) — the backend rejects wider windows. Default 1."),
mcp.DefaultNumber(1),
),
mcp.WithString("filter_expr",
mcp.Description(secureEventsFilterDSL),
Examples(
`severity in ("0","1","2","3")`,
`ruleName = "Malware Detection"`,
`kubernetes.cluster.name = "cluster1" and severity in ("0","1","2","3")`,
`engine = "machineLearning"`,
`aws.accountId = "123456789012"`,
),
),
mcp.WithOutputSchema[map[string]any](),
mcp.WithReadOnlyHintAnnotation(true),
mcp.WithDestructiveHintAnnotation(false),
WithRequiredPermissions("policy-events.read"),
)

s.AddTool(tool, h.handle)
}
145 changes: 145 additions & 0 deletions internal/infra/mcp/tools/tool_count_runtime_events_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
package tools_test

import (
"context"
"fmt"
"net/http"
"time"

"github.com/mark3labs/mcp-go/client"
"github.com/mark3labs/mcp-go/mcp"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"go.uber.org/mock/gomock"

mocks_clock "github.com/sysdiglabs/sysdig-mcp-server/internal/infra/clock/mocks"
inframcp "github.com/sysdiglabs/sysdig-mcp-server/internal/infra/mcp"
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/mcp/tools"
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig"
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig/mocks"
)

var _ = Describe("ToolCountRuntimeEvents", func() {
var (
mockClient *mocks.MockExtendedClientWithResponsesInterface
mockClock *mocks_clock.MockClock
tool *tools.ToolCountRuntimeEvents
ctrl *gomock.Controller
handler *inframcp.Handler
mcpClient *client.Client
)

BeforeEach(func() {
ctrl = gomock.NewController(GinkgoT())
mockClient = mocks.NewMockExtendedClientWithResponsesInterface(ctrl)
mockClient.EXPECT().GetMyPermissionsWithResponse(gomock.Any(), gomock.Any()).Return(&sysdig.GetMyPermissionsResponse{
HTTPResponse: &http.Response{StatusCode: 200},
JSON200: &sysdig.UserPermissions{
Permissions: []string{"policy-events.read"},
},
}, nil).AnyTimes()
mockClock = mocks_clock.NewMockClock(ctrl)
mockClock.EXPECT().Now().AnyTimes().Return(time.Date(2000, time.January, 1, 0, 0, 0, 0, time.UTC))
tool = tools.NewToolCountRuntimeEvents(mockClient, mockClock)
handler = inframcp.NewHandler("dev", mockClient)
handler.RegisterTools(tool)

var err error
mcpClient, err = handler.ServeInProcessClient()
Expect(err).NotTo(HaveOccurred())

_, err = mcpClient.Initialize(context.Background(), mcp.InitializeRequest{})
Expect(err).NotTo(HaveOccurred())
})

AfterEach(func() {
ctrl.Finish()
})

It("converts a request into count params with baseline filter prepended", func(ctx SpecContext) {
mockClient.EXPECT().GetSecureEventsCountWithResponse(gomock.Any(), gomock.Any()).DoAndReturn(
func(_ context.Context, params *sysdig.GetSecureEventsCountParams, _ ...sysdig.RequestEditorFn) (*sysdig.GetSecureEventsCountResponse, error) {
Expect(params.From).To(Equal(int64(946677600000000000))) // 2000-01-01 minus 2h
Expect(params.To).To(Equal(int64(946684800000000000))) // 2000-01-01 00:00:00 UTC
Expect(*params.Filter).To(ContainSubstring(`not originator in ("benchmarks","compliance","cloudsec","scanning","hostscanning")`))
Expect(*params.Filter).To(ContainSubstring(`severity = 4`))

body := map[string]any{
"policyEvents": map[string]any{"countBySeverity": map[string]any{"0": 1.0}},
}
return &sysdig.GetSecureEventsCountResponse{
HTTPResponse: &http.Response{StatusCode: 200},
JSON200: &body,
}, nil
})

result, err := mcpClient.CallTool(ctx, mcp.CallToolRequest{
Params: mcp.CallToolParams{
Name: "count_runtime_events",
Arguments: map[string]any{
"scope_hours": 2,
"filter_expr": "severity = 4",
},
},
})

Expect(err).NotTo(HaveOccurred())
Expect(result.IsError).To(BeFalse())
})

It("uses defaults (1h window, baseline filter only) when no args provided", func(ctx SpecContext) {
mockClient.EXPECT().GetSecureEventsCountWithResponse(gomock.Any(), gomock.Any()).DoAndReturn(
func(_ context.Context, params *sysdig.GetSecureEventsCountParams, _ ...sysdig.RequestEditorFn) (*sysdig.GetSecureEventsCountResponse, error) {
Expect(params.From).To(Equal(int64(946681200000000000))) // 2000-01-01 minus 1h
Expect(params.To).To(Equal(int64(946684800000000000)))
Expect(*params.Filter).To(Equal(`not originator in ("benchmarks","compliance","cloudsec","scanning","hostscanning")`))

body := map[string]any{}
return &sysdig.GetSecureEventsCountResponse{
HTTPResponse: &http.Response{StatusCode: 200},
JSON200: &body,
}, nil
})

result, err := mcpClient.CallTool(ctx, mcp.CallToolRequest{
Params: mcp.CallToolParams{
Name: "count_runtime_events",
Arguments: map[string]any{},
},
})

Expect(err).NotTo(HaveOccurred())
Expect(result.IsError).To(BeFalse())
})

It("surfaces a client error as a tool error", func(ctx SpecContext) {
mockClient.EXPECT().GetSecureEventsCountWithResponse(gomock.Any(), gomock.Any()).Return(nil, fmt.Errorf("client error"))

result, err := mcpClient.CallTool(ctx, mcp.CallToolRequest{
Params: mcp.CallToolParams{
Name: "count_runtime_events",
Arguments: map[string]any{},
},
})

Expect(err).NotTo(HaveOccurred())
Expect(result.IsError).To(BeTrue())
})

It("surfaces a non-2xx HTTP response as a tool error", func(ctx SpecContext) {
mockClient.EXPECT().GetSecureEventsCountWithResponse(gomock.Any(), gomock.Any()).Return(&sysdig.GetSecureEventsCountResponse{
HTTPResponse: &http.Response{StatusCode: 401},
Body: []byte("Unauthorized"),
}, nil)

result, err := mcpClient.CallTool(ctx, mcp.CallToolRequest{
Params: mcp.CallToolParams{
Name: "count_runtime_events",
Arguments: map[string]any{},
},
})

Expect(err).NotTo(HaveOccurred())
Expect(result.IsError).To(BeTrue())
})
})
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package tools

import (
"context"
"time"

"github.com/mark3labs/mcp-go/mcp"
"github.com/mark3labs/mcp-go/server"
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/clock"
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig"
)

type ToolDiscoverRuntimeEventFieldValues struct {
sysdigClient sysdig.ExtendedClientWithResponsesInterface
clock clock.Clock
}

func NewToolDiscoverRuntimeEventFieldValues(client sysdig.ExtendedClientWithResponsesInterface, clock clock.Clock) *ToolDiscoverRuntimeEventFieldValues {
return &ToolDiscoverRuntimeEventFieldValues{
sysdigClient: client,
clock: clock,
}
}

func (h *ToolDiscoverRuntimeEventFieldValues) handle(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
field := request.GetString("field", "")
if field == "" {
return mcp.NewToolResultErrorf("field is required"), nil
}

scopeHours := request.GetInt("scope_hours", 1)
to := h.clock.Now()
from := to.Add(-time.Duration(scopeHours) * time.Hour)

params := &sysdig.GetEventFieldValuesParams{
Field: field,
From: from.UnixNano(),
To: to.UnixNano(),
}
if filterExpr := request.GetString("filter_expr", ""); filterExpr != "" {
params.Filter = &filterExpr
}

response, err := h.sysdigClient.GetEventFieldValuesWithResponse(ctx, params)
if err != nil {
return mcp.NewToolResultErrorFromErr("error triggering request", err), nil
}
if response.StatusCode() >= 400 {
return mcp.NewToolResultErrorf("error discovering field values, status code: %d, response: %s", response.StatusCode(), response.Body), nil
}

return mcp.NewToolResultJSON(response.JSON200)
}

func (h *ToolDiscoverRuntimeEventFieldValues) RegisterInServer(s *server.MCPServer) {
tool := mcp.NewTool("discover_runtime_event_field_values",
mcp.WithDescription("Discover the distinct values of a runtime-events field present in a time window. Returns two buckets: `suggested` = values producing events in the window (fire order — what's actually happening); `other` = values known to the tenant but inactive in the window (catalog — what's possible). Use BEFORE writing a filter to learn which cluster / rule / image / namespace names are real, instead of guessing and getting empty results. Common fields to discover: kubernetes.cluster.name, kubernetes.namespace.name, ruleName, container.image.repo, host.hostName, aws.accountId, source, engine."),
mcp.WithString("field",
mcp.Description("Field whose distinct values to enumerate. Examples: kubernetes.cluster.name, ruleName, container.image.repo, host.hostName, severity, source, engine."),
mcp.Required(),
Examples(
"kubernetes.cluster.name",
"ruleName",
"container.image.repo",
"host.hostName",
"severity",
"source",
"engine",
"aws.accountId",
),
),
mcp.WithNumber("scope_hours",
mcp.Description("Number of hours back from now to scan. Maximum 336 (14 days). Default 1."),
mcp.DefaultNumber(1),
),
mcp.WithString("filter_expr",
mcp.Description("Optional filter expression to scope the search before enumerating values. Same DSL as other runtime-event tools. Without a filter, the enumeration spans all categories of events in the window."),
Examples(
`kubernetes.cluster.name = "production-gke"`,
`engine = "machineLearning"`,
`severity in ("0","1","2","3")`,
),
),
mcp.WithOutputSchema[map[string]any](),
mcp.WithReadOnlyHintAnnotation(true),
mcp.WithDestructiveHintAnnotation(false),
WithRequiredPermissions("policy-events.read"),
)

s.AddTool(tool, h.handle)
}
Loading
Loading