-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathpathfinder.example.yaml
More file actions
152 lines (139 loc) · 5.86 KB
/
pathfinder.example.yaml
File metadata and controls
152 lines (139 loc) · 5.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# Pathfinder — agentic knowledge server for AI agents
# Copy this file to pathfinder.yaml and customize for your project.
# Full documentation: https://pathfinder.copilotkit.dev
server:
name: my-project-docs
version: "1.0.0"
# max_sessions: 1000 # Global max concurrent sessions across all IPs (default: 1000)
# # When exceeded, returns HTTP 503 with JSON
# # {error, reason, totalSessions, maxSessions, retryAfterSeconds, contact}
# # and a Retry-After header.
# max_sessions_per_ip: 20 # Max concurrent MCP sessions per IP (default: 20)
# # When exceeded, returns HTTP 429 with JSON
# # {error, reason, limit, currentCount, retryAfterSeconds, contact}
# # and a Retry-After header.
# session_ttl_minutes: 30 # Idle session timeout for active sessions (default: 30)
# # "Active" = has invoked at least one tool (search, bash, etc.)
# session_unused_ttl_minutes: 15 # Idle session timeout for unused sessions (default: 15)
# # "Unused" = connected but never invoked a tool.
# # Set to match Railway's 15-min SSE hard limit.
# allowlist: # IPs / CIDRs that bypass max_sessions_per_ip. Empty by default.
# - "160.79.106.35" # Example: Anthropic Assistant crawler
# - "10.0.0.0/8" # Example: internal health-probe CIDR
# NOTE: behind a reverse proxy (Railway, Fly, etc.), allowlist entries only match
# when trust_proxy: true. Otherwise the server sees the proxy IP for every request.
#
# ⚠️ SECURITY WARNING — trust_proxy ⚠️
# When true, the server honors X-Forwarded-For and populates req.ip from
# the leftmost entry. This is REQUIRED when the server runs behind a
# reverse proxy (Railway, Fly, Nginx, etc.) that sets X-Forwarded-For.
#
# Only enable this when the proxy discards any client-supplied
# X-Forwarded-For AND sets its own trusted value. If the proxy passes
# through client-supplied X-Forwarded-For, attackers can send
# `X-Forwarded-For: 160.79.106.35` to be seen as an allowlisted IP and
# BYPASS the rate limiter entirely.
#
# When false (the default), X-Forwarded-For is ignored and the server
# uses the TCP socket's peer address. Leave this false for any server
# exposed directly to the public internet.
#
# Hosted note — Railway, Fly, Render, Heroku, Cloud Run, AWS App
# Runner, Vercel, and similar PaaS edges ALWAYS sit in front of your
# container and terminate TLS, so the socket peer the server sees is
# the platform proxy — never the real client. On these platforms you
# MUST set trust_proxy (to true, a hop count, or a trusted-proxy CIDR
# allowlist matching your platform's docs) or every rate-limit bucket
# and CIDR allowlist entry evaluates against the proxy IP instead of
# the real caller.
#
# Accepted values:
# false — ignore X-Forwarded-For (hardened default)
# true — trust every hop (only safe when the platform
# strips client-supplied X-Forwarded-For)
# <integer> — trust N hops (e.g. 1 for single-proxy Railway)
# [<cidr>, ...] — allowlist of trusted proxy CIDRs
trust_proxy: false
sources:
- name: docs
type: markdown
repo: https://github.com/your-org/your-repo.git
path: docs/
file_patterns: ["**/*.mdx", "**/*.md"]
chunk:
target_tokens: 600
overlap_tokens: 50
# ── Notion source (requires NOTION_TOKEN) ──
# - name: wiki
# type: notion
# root_pages: ["your-page-id"]
# databases: ["your-db-id"]
# max_depth: 5
# include_properties: true
# chunk:
# target_tokens: 600
# overlap_tokens: 50
# ── Document source (requires pdf-parse and/or mammoth) ──
# - name: specs
# type: document
# path: docs/specs/
# file_patterns: ["**/*.pdf", "**/*.docx"]
# chunk:
# target_tokens: 600
# overlap_tokens: 50
# ── Slack source (requires SLACK_BOT_TOKEN + OPENAI_API_KEY) ──
# - name: community
# type: slack
# channels: ["C0123456789"]
# min_thread_replies: 2
# chunk:
# target_tokens: 600
tools:
# Semantic search (RAG) — requires embedding config below
- name: search-docs
type: search
description: "Search documentation for relevant information."
source: docs
default_limit: 5
max_limit: 20
result_format: docs
# search_mode: vector # 'vector' (default) | 'keyword' | 'hybrid'
# Filesystem exploration — no database or API keys needed
- name: explore-docs
type: bash
description: "Explore documentation files using bash commands (find, grep, cat, ls, head)."
sources: [docs]
bash:
session_state: true
grep_strategy: hybrid # memory | vector | hybrid
virtual_files: true
# ── Knowledge tool (for FAQ/Q&A sources) ──
# - name: get-faq
# type: knowledge
# description: "Browse and search community Q&A"
# sources: [community]
# Required for search tools (omit for bash-only mode)
embedding:
provider: openai
model: text-embedding-3-small
dimensions: 1536
# ── Ollama (local, requires ollama running with model pulled) ──
# embedding:
# provider: ollama
# model: nomic-embed-text
# dimensions: 768
# base_url: http://localhost:11434
# ── Local transformers.js (zero external deps, CPU-only) ──
# embedding:
# provider: local
# model: Xenova/all-MiniLM-L6-v2
# dimensions: 384
indexing:
auto_reindex: true
reindex_hour_utc: 3
stale_threshold_hours: 24
# ── Analytics (query logging and dashboard) ──
# analytics:
# enabled: true
# log_queries: true
# retention_days: 90