Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
0cf0d2c
Wire vendor assessment and LLM file parts
aureliensibiril Apr 2, 2026
0448636
Add vendor assessment agent with composable tool framework
aureliensibiril Apr 2, 2026
f2f4bfc
Add structured output types for vetting agents
aureliensibiril Apr 6, 2026
7541ee1
Wire output types into vetting sub-agents
aureliensibiril Apr 6, 2026
78be83f
Update agent prompts for JSON output format
aureliensibiril Apr 6, 2026
90a3b4f
Add JSON schema support to Anthropic provider
aureliensibiril Apr 6, 2026
74f2c1f
Validate JSON output in agent-as-tool results
aureliensibiril Apr 6, 2026
39dd32c
Fix WithTx callback signature in vendor assessment
aureliensibiril Apr 6, 2026
f32e6a0
Fix e2e config for agents key rename
aureliensibiril Apr 6, 2026
d395f48
Fix Anthropic streaming for thinking and tool use
aureliensibiril Apr 6, 2026
e144da1
Fix Anthropic streaming for thinking and tool use
aureliensibiril Apr 6, 2026
b2bd314
Retry on empty structured output response
aureliensibiril Apr 6, 2026
a7894d2
Use typed ErrStreamingRequired for Anthropic fallback
aureliensibiril Apr 7, 2026
62736af
Factor probod LLM client provisioning
aureliensibiril Apr 7, 2026
39d8763
Factor vetting sub-agent constructors
aureliensibiril Apr 7, 2026
3468bed
Tighten vetting assessment validation and config
aureliensibiril Apr 7, 2026
df7edbf
Add assessVendor MCP tool
aureliensibiril Apr 7, 2026
08027ad
Simplify vetting orchestrator entries
aureliensibiril Apr 7, 2026
4d44f96
Detect Anthropic SDK client-side streaming requirement
aureliensibiril Apr 7, 2026
853e538
Fix gofmt: trailing blank line in vendor_service.go
aureliensibiril Apr 7, 2026
1b4908c
Move vetting prompts into prompts/ subdirectory
aureliensibiril Apr 7, 2026
f0fadbe
Remove redundant JSON examples from sub-agent prompts
aureliensibiril Apr 7, 2026
a8dc059
Describe VendorInfo fields with jsonschema tags
aureliensibiril Apr 7, 2026
13a33e6
Reduce vetting extraction prompt to a schema stub
aureliensibiril Apr 7, 2026
84c03d5
Rewrite vetting orchestrator prompts in XML
aureliensibiril Apr 7, 2026
0002d20
Rewrite vetting sub-agent prompts in XML
aureliensibiril Apr 7, 2026
29e3aee
Defer structured output until synthesis turn
aureliensibiril Apr 7, 2026
eb747cf
Hoist agent loop constants to package level
aureliensibiril Apr 7, 2026
6a3d3e8
Enforce VendorInfo enums via schema decoration
aureliensibiril Apr 7, 2026
eb995c8
Tighten vetting output type schema tests
aureliensibiril Apr 7, 2026
606cdb2
Merge progress hooks into a single parametrised struct
aureliensibiril Apr 7, 2026
7ce5f7f
Drop unused agent toolset indirection
aureliensibiril Apr 8, 2026
5d5d0f3
Harden browser SSRF guards against internal hosts
aureliensibiril Apr 8, 2026
57035ae
Fix correctness bugs in browser and diff tools
aureliensibiril Apr 8, 2026
c731532
Document SSL inspector InsecureSkipVerify intent
aureliensibiril Apr 8, 2026
4ff60c0
Rewrite vetting prompt examples as valid JSON
aureliensibiril Apr 8, 2026
027d752
Give vendor info extractor its own timeout budget
aureliensibiril Apr 8, 2026
8073b27
Dedupe SearXNG request via searxngSearch helper
aureliensibiril Apr 17, 2026
1e01082
Dedupe VendorSubprocessor conversion via helpers
aureliensibiril Apr 17, 2026
8798139
Restore EvidenceDescriber worker config
aureliensibiril Apr 17, 2026
e81fb47
Introduce VendorAssessor interface
aureliensibiril Apr 17, 2026
2762587
Add stub vendor assessor for end-to-end tests
aureliensibiril Apr 17, 2026
2678fc3
Add AssessVendor e2e test
aureliensibiril Apr 17, 2026
aef6b8c
Gate vendor assessor stub behind e2e build tag
aureliensibiril Apr 17, 2026
42ee649
Fold TypedTool into the Tool constructor
aureliensibiril Apr 17, 2026
2fd87a5
Expose empty-output retries as an agent option
aureliensibiril Apr 17, 2026
01f86d1
Surface synthesis-turn retries and fix append style
aureliensibiril Apr 17, 2026
6d82d02
Share probod target prerequisites
aureliensibiril Apr 17, 2026
a72c8ec
Parameterize probod target names
aureliensibiril Apr 17, 2026
064c61e
Keep original llm config keys
aureliensibiril Apr 17, 2026
c494256
Add DisabledVendorAssessor for unconfigured deployments
aureliensibiril Apr 17, 2026
5426285
Gate vendor assessor on explicit provider config
aureliensibiril Apr 17, 2026
ff0c7b9
Drop e2e-specific probod binary
aureliensibiril Apr 17, 2026
0dfbac5
Move vendor vetting suite to pkg/vetting
aureliensibiril Apr 17, 2026
dc918e0
Inline changelog generator into DocumentService
aureliensibiril Apr 17, 2026
af94afd
Embed changelog prompt from a separate file
aureliensibiril Apr 17, 2026
44b1371
Delete empty pkg/agents package
aureliensibiril Apr 17, 2026
75fc28f
Sort vetting import after the pkg/vetting rename
aureliensibiril Apr 17, 2026
c366dee
Document vendor-assessor opt-in in sample config
aureliensibiril Apr 17, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 17 additions & 16 deletions GNUmakefile
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,18 @@ E2E_COVER_DIR ?= $(CURDIR)/coverage/e2e
DOCKER_IMAGE_NAME= ghcr.io/getprobo/probo
DOCKER_TAG_NAME?= latest

Comment thread
aureliensibiril marked this conversation as resolved.
PROBOD_BIN_DEPS= pkg/server/api/connect/v1/schema/schema.go \
pkg/server/api/connect/v1/types/types.go \
pkg/server/api/console/v1/schema/schema.go \
pkg/server/api/console/v1/types/types.go \
pkg/server/api/trust/v1/schema/schema.go \
pkg/server/api/trust/v1/types/types.go \
pkg/server/api/mcp/v1/server/server.go \
pkg/server/api/mcp/v1/types/types.go \
apps/console/dist/index.html \
apps/trust/dist/index.html \
@probo/emails

PROBOD_BIN_EXTRA_DEPS=
PROBOD_BIN= bin/probod
PROBOD_SRC= cmd/probod/main.go
Expand Down Expand Up @@ -127,8 +139,8 @@ test-bench: test ## Run benchmark tests

.PHONY: test-e2e
test-e2e: CGO_ENABLED=1
test-e2e: bin/probod ## Run console e2e tests
PROBO_E2E_BINARY=$(CURDIR)/bin/probod \
test-e2e: $(PROBOD_BIN) ## Run console e2e tests
PROBO_E2E_BINARY=$(CURDIR)/$(PROBOD_BIN) \
PROBO_E2E_CONFIG=$(E2E_CONFIG) \
GOTESTSUM_FORMAT=testname $(GO_TEST) -count=1 ./e2e/console/...

Expand All @@ -152,7 +164,7 @@ coverage-combined: coverage-report test-e2e-coverage ## Generate combined covera
$(GO) tool cover -html=coverage-combined.out -o=coverage-combined.html

.PHONY: build
build: bin/probod bin/prb bin/probod-bootstrap
build: $(PROBOD_BIN) bin/prb bin/probod-bootstrap

.PHONY: sbom-docker
sbom-docker: docker-build
Expand Down Expand Up @@ -191,19 +203,8 @@ scan-license: ## Check dependencies licenses compliance
docker-build:
$(DOCKER_BUILD) --tag $(DOCKER_IMAGE_NAME):$(DOCKER_TAG_NAME) --file Dockerfile .

.PHONY: bin/probod
bin/probod: pkg/server/api/connect/v1/schema/schema.go \
pkg/server/api/connect/v1/types/types.go \
pkg/server/api/console/v1/schema/schema.go \
pkg/server/api/console/v1/types/types.go \
pkg/server/api/trust/v1/schema/schema.go \
pkg/server/api/trust/v1/types/types.go \
pkg/server/api/mcp/v1/server/server.go \
pkg/server/api/mcp/v1/types/types.go \
apps/console/dist/index.html \
apps/trust/dist/index.html \
$(PROBOD_BIN_EXTRA_DEPS) \
@probo/emails
.PHONY: $(PROBOD_BIN)
$(PROBOD_BIN): $(PROBOD_BIN_DEPS) $(PROBOD_BIN_EXTRA_DEPS)
$(GO_BUILD) -o $(PROBOD_BIN) $(PROBOD_SRC)

.PHONY: bin/prb
Expand Down
5 changes: 5 additions & 0 deletions cfg/dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ probod:
model-name: gpt-4o
temperature: 0.1
max-tokens: 4096
# vendor-assessor is opt-in: uncomment to enable AI-driven
# vendor assessment. Without this block the feature returns
# UNAVAILABLE to callers.
# vendor-assessor:
# provider: openai

evidence-describer:
interval: 10
Expand Down
112 changes: 112 additions & 0 deletions e2e/console/vendor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -986,6 +986,118 @@ func TestVendor_OmittableWebsiteUrl(t *testing.T) {
})
}

// TestVendor_Assess exercises the assessVendor mutation through authorization
// and tenant-isolation paths without running the real LLM/browser pipeline.
// The e2e config deliberately omits `llm.vendor-assessor.provider`, so an
// authorized call reaches DisabledVendorAssessor and surfaces a stable
// UNAVAILABLE error. Happy-path payload shape is covered by unit tests in
// pkg/probo.
func TestVendor_Assess(t *testing.T) {
t.Parallel()

const query = `
mutation AssessVendor($input: AssessVendorInput!) {
assessVendor(input: $input) {
vendor {
id
}
}
}
`

type resultShape struct {
AssessVendor struct {
Vendor struct {
ID string `json:"id"`
} `json:"vendor"`
} `json:"assessVendor"`
}

t.Run("owner call surfaces the disabled error", func(t *testing.T) {
t.Parallel()

owner := testutil.NewClient(t, testutil.RoleOwner)
vendorID := factory.NewVendor(owner).WithName("Unconfigured assess").Create()

var result resultShape
err := owner.Execute(query, map[string]any{
"input": map[string]any{
"id": vendorID,
"websiteUrl": "https://vendor.example.com",
},
}, &result)
testutil.RequireErrorCode(t, err, "UNAVAILABLE")
})

t.Run("admin call surfaces the disabled error", func(t *testing.T) {
t.Parallel()

owner := testutil.NewClient(t, testutil.RoleOwner)
admin := testutil.NewClientInOrg(t, testutil.RoleAdmin, owner)
vendorID := factory.NewVendor(owner).WithName("Admin-assessed vendor").Create()

var result resultShape
err := admin.Execute(query, map[string]any{
"input": map[string]any{
"id": vendorID,
"websiteUrl": "https://admin.example.com",
},
}, &result)
testutil.RequireErrorCode(t, err, "UNAVAILABLE")
})

t.Run("viewer cannot assess a vendor", func(t *testing.T) {
t.Parallel()

owner := testutil.NewClient(t, testutil.RoleOwner)
viewer := testutil.NewClientInOrg(t, testutil.RoleViewer, owner)
vendorID := factory.NewVendor(owner).WithName("Viewer attempt").Create()

var result resultShape
err := viewer.Execute(query, map[string]any{
"input": map[string]any{
"id": vendorID,
"websiteUrl": "https://viewer.example.com",
},
}, &result)
testutil.RequireForbiddenError(t, err)
})

t.Run("cannot assess vendor from another organization", func(t *testing.T) {
t.Parallel()

org1Owner := testutil.NewClient(t, testutil.RoleOwner)
org2Owner := testutil.NewClient(t, testutil.RoleOwner)
vendorID := factory.NewVendor(org1Owner).WithName("Org1 vendor").Create()

var result resultShape
err := org2Owner.Execute(query, map[string]any{
"input": map[string]any{
"id": vendorID,
"websiteUrl": "https://cross-tenant.example.com",
},
}, &result)
require.Error(t, err, "vendor assess must not cross tenant boundaries")
})

t.Run("procedure is accepted on the input", func(t *testing.T) {
t.Parallel()

owner := testutil.NewClient(t, testutil.RoleOwner)
vendorID := factory.NewVendor(owner).WithName("Procedure test").Create()

var result resultShape
err := owner.Execute(query, map[string]any{
"input": map[string]any{
"id": vendorID,
"websiteUrl": "https://procedure.example.com",
"procedure": "Focus on SOC 2 controls and data residency",
},
}, &result)
testutil.RequireErrorCode(t, err, "UNAVAILABLE")
})
}

func TestVendor_TenantIsolation(t *testing.T) {
t.Parallel()

Expand Down
90 changes: 58 additions & 32 deletions pkg/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,48 +23,53 @@ import (
"go.probo.inc/probo/pkg/llm"
)

const DefaultMaxTurns = 10
const (
DefaultMaxTurns = 10
DefaultMaxEmptyOutputRetries = 2
)

type (
Option func(*Agent)

Agent struct {
name string
handoffDescription string
instructions string
instructionsFunc func(ctx context.Context, a *Agent) string
model string
modelSettings ModelSettings
tools []Tool
handoffs []*Handoff
mcpServers []*MCPServer
maxTurns int
maxToolDepth int
client *llm.Client
logger *log.Logger
hooks []RunHooks
agentHooks AgentHooks
inputGuardrails []InputGuardrail
outputGuardrails []OutputGuardrail
session Session
sessionID string
outputType *OutputType
toolUseBehavior ToolUseBehavior
resetToolChoice bool
responseFormat *llm.ResponseFormat
approval *ApprovalConfig
name string
handoffDescription string
instructions string
instructionsFunc func(ctx context.Context, a *Agent) string
model string
modelSettings ModelSettings
tools []Tool
handoffs []*Handoff
mcpServers []*MCPServer
maxTurns int
maxEmptyOutputRetries int
maxToolDepth int
client *llm.Client
logger *log.Logger
hooks []RunHooks
agentHooks AgentHooks
inputGuardrails []InputGuardrail
outputGuardrails []OutputGuardrail
session Session
sessionID string
outputType *OutputType
toolUseBehavior ToolUseBehavior
resetToolChoice bool
responseFormat *llm.ResponseFormat
approval *ApprovalConfig
}
)

func New(name string, client *llm.Client, opts ...Option) *Agent {
a := &Agent{
name: name,
client: client,
maxTurns: DefaultMaxTurns,
maxToolDepth: DefaultMaxToolDepth,
toolUseBehavior: RunLLMAgain(),
resetToolChoice: true,
logger: log.NewLogger(log.WithOutput(io.Discard)),
name: name,
client: client,
maxTurns: DefaultMaxTurns,
maxEmptyOutputRetries: DefaultMaxEmptyOutputRetries,
maxToolDepth: DefaultMaxToolDepth,
toolUseBehavior: RunLLMAgain(),
resetToolChoice: true,
logger: log.NewLogger(log.WithOutput(io.Discard)),
}

for _, opt := range opts {
Expand Down Expand Up @@ -204,6 +209,18 @@ func WithMaxTurns(n int) Option {
}
}

// WithMaxEmptyOutputRetries bounds the number of times the core loop
// will re-ask the model to produce a structured output after it
// returned a thinking-only empty response on a synthesis turn.
func WithMaxEmptyOutputRetries(n int) Option {
return func(a *Agent) {
if n < 0 {
n = 0
}
a.maxEmptyOutputRetries = n
}
}

func WithMaxToolDepth(n int) Option {
return func(a *Agent) {
if n < 1 {
Expand Down Expand Up @@ -255,6 +272,15 @@ func WithParallelToolCalls(enabled bool) Option {
}
}

func WithThinking(budgetTokens int) Option {
return func(a *Agent) {
a.modelSettings.Thinking = &llm.ThinkingConfig{
Enabled: true,
BudgetTokens: budgetTokens,
}
}
}

func WithLogger(l *log.Logger) Option {
return func(a *Agent) {
a.logger = l
Expand Down
Loading
Loading