From fc51741581098d27b0409e69e0f91d9e769c4cba Mon Sep 17 00:00:00 2001 From: gaurav-gaikwad-12071998 <122888092+gaurav-gaikwad-12071998@users.noreply.github.com> Date: Tue, 14 Apr 2026 18:58:25 +0530 Subject: [PATCH 1/2] added jsonb suuport --- CUSTOM_QUERY_GUIDE_V2.md | 868 ++++++++++++++++++ pkg/database/postgres/repo.go | 218 ++++- pkg/database/postgres/repo_filter_test.go | 177 ++++ .../postgres/repo_jsonb_buildquery_test.go | 74 ++ pkg/database/postgres/repo_jsonb_test.go | 302 ++++++ pkg/models/schema.go | 1 + 6 files changed, 1631 insertions(+), 9 deletions(-) create mode 100644 CUSTOM_QUERY_GUIDE_V2.md create mode 100644 pkg/database/postgres/repo_jsonb_buildquery_test.go create mode 100644 pkg/database/postgres/repo_jsonb_test.go diff --git a/CUSTOM_QUERY_GUIDE_V2.md b/CUSTOM_QUERY_GUIDE_V2.md new file mode 100644 index 0000000..fd5e1fc --- /dev/null +++ b/CUSTOM_QUERY_GUIDE_V2.md @@ -0,0 +1,868 @@ +# Custom Store Query API - xAPI Database Guide + +## Overview + +The Custom Query API allows you to execute complex SQL queries against xAPI Learning Record Store tables. This guide explains the actual table structure, available columns, and how to query them effectively. + +--- + +## Table Structure & Relationships + +### Core Tables + +#### 1. **statements** (table: `xapi_statements`) + +Stores xAPI statements with core activity tracking data. + +**Primary Columns:** +| Column | Type | Description | +|--------|------|-------------| +| `id` | uuid | Primary key | +| `statement_id` | uuid | Unique xAPI statement ID | +| `agent_sha` | varchar(64) | SHA hash of actor/agent | +| `verb_id` | varchar(255) | Verb IRI (e.g., http://adlnet.gov/expapi/verbs/attempted) | +| `object_id` | varchar(255) | ID of object (Activity, Agent, etc.) | +| `object_type` | text | Type of object (Activity, Agent, SubStatement, etc.) | +| `registration` | uuid | Activity session registration ID | +| `timestamp` | timestamptz | When activity occurred | +| `stored` | timestamptz | When stored in LRS | +| `voided` | boolean | Whether statement is voided | +| `created_at` | timestamptz | Record creation time | +| `updated_at` | timestamptz | Record update time | + +**JSONB Columns (Queryable):** +- `result` - Score, success, completion, extensions +- `context` - Contextual information, instructor, team +- `object` - Full object definition +- `authority` - Authority/system information +- `attachments` - Attached files + +**Relationships:** +- `agent_sha` → `agents.agent_sha` +- `verb_id` → `verbs.id` +- `object_id` → `activities.activity_id` (when object_type = 'Activity') + +--- + +#### 2. **agents** (table: `agents`) + +Stores actor/agent information (persons and groups). + +**Primary Columns:** +| Column | Type | Description | +|--------|------|-------------| +| `id` | uuid | Primary key | +| `agent_sha` | varchar(64) | Unique SHA hash of agent | +| `name` | jsonb | Agent name | +| `mbox` | varchar(255) | Email address (mailto:user@example.com) | +| `mbox_sha1sum` | varchar(40) | SHA1 of email | +| `openid` | varchar(2048) | OpenID identifier | +| `object_type` | varchar(20) | 'Agent' or 'Group' | +| `created_at` | timestamptz | Creation time | +| `updated_at` | timestamptz | Update time | + +**JSONB Columns:** +- `agent_json` - Full agent object +- `account` - Account with homePage and name + +--- + +#### 3. **activities** (table: `activities`) + +Stores activity/course definitions. + +**Primary Columns:** +| Column | Type | Description | +|--------|------|-------------| +| `id` | uuid | Primary key | +| `activity_id` | varchar(2048) | Unique activity IRI | +| `type` | varchar(512) | Activity type (http://adlnet.gov/expapi/activities/course) | +| `name` | jsonb | Activity name in multiple languages | +| `description` | jsonb | Activity description in multiple languages | +| `more_info` | varchar(2048) | URL for more information | +| `created_at` | timestamptz | Creation time | +| `updated_at` | timestamptz | Update time | + +**JSONB Columns:** +- `definition` - Full activity definition including interaction type and components + +--- + +#### 4. **verbs** (table: `verbs`) + +Predefined xAPI verbs. + +**Columns:** +| Column | Type | Description | +|--------|------|-------------| +| `id` | varchar(255) | Verb IRI (PRIMARY KEY) | +| `display` | jsonb | Display names in multiple languages | +| `created_at` | timestamptz | Creation time | +| `updated_at` | timestamptz | Update time | + +**Example verb IDs:** +- `http://adlnet.gov/expapi/verbs/attempted` +- `http://adlnet.gov/expapi/verbs/completed` +- `http://adlnet.gov/expapi/verbs/passed` +- `http://adlnet.gov/expapi/verbs/experienced` + +--- + +#### 5. **states** (table: `xapi_states`) + +Stores learner state documents. + +**Columns:** +| Column | Type | Description | +|--------|------|-------------| +| `state_id` | varchar(255) | State identifier | +| `activity_id` | varchar(2048) | Activity IRI | +| `agent_sha` | varchar(64) | Agent SHA | +| `registration` | uuid | Activity session registration | +| `content` | bytea | Binary state document content | +| `content_type` | varchar(255) | MIME type (application/json) | +| `etag` | varchar(64) | Cache validation tag | +| `last_modified` | timestamptz | Last modification time | +| `created_at` | timestamptz | Creation time | +| `updated_at` | timestamptz | Update time | + +**Primary Key:** (`state_id`, `activity_id`, `agent_sha`, `registration`) + +--- + +#### 6. **agent_profiles** (table: `agent_profiles`) + +Agent profile documents. + +**Columns:** +| Column | Type | Description | +|--------|------|-------------| +| `id` | uuid | Primary key | +| `agent_sha` | varchar(64) | Agent SHA | +| `profile_id` | varchar(255) | Profile identifier | +| `content_type` | varchar(255) | MIME type | +| `last_modified` | timestamptz | Last modification | +| `created_at` | timestamptz | Creation time | +| `updated_at` | timestamptz | Update time | + +**JSONB Columns:** +- `profile_data` - Profile content + +--- + +#### 7. **activity_profiles** (table: `activity_profiles`) + +Activity profile documents. + +**Columns:** +| Column | Type | Description | +|--------|------|-------------| +| `id` | uuid | Primary key | +| `activity_id` | varchar(2048) | Activity IRI | +| `profile_id` | varchar(255) | Profile identifier | +| `content_type` | varchar(255) | MIME type | +| `last_modified` | timestamptz | Last modification | +| `created_at` | timestamptz | Creation time | +| `updated_at` | timestamptz | Update time | + +**JSONB Columns:** +- `profile_data` - Profile content + +--- + +#### 8. **voided_statements** (table: `voided_statements`) + +Tracks voided statements. + +**Columns:** +| Column | Type | Description | +|--------|------|-------------| +| `statement_id` | uuid | Voided statement ID | +| `tenant_id` | uuid | Tenant ID | +| `voided_by_statement_id` | uuid | ID of voiding statement | +| `voided_at` | timestamp | When voided | + +**Primary Key:** (`statement_id`, `tenant_id`) + +--- + +## Common Query Patterns + +### Example 1: Get statements from specific time period + +```json +{ + "table_name": "statements", + "query": { + "select": ["id", "statement_id", "verb_id", "object_type", "timestamp"], + "range": { + "column": "timestamp", + "from": "2026-04-01T00:00:00Z", + "to": "2026-04-07T23:59:59Z" + }, + "order_by": ["timestamp DESC"], + "limit": 50 + } +} +``` + +**SQL Generated:** +```sql +SELECT id, statement_id, verb_id, object_type, timestamp +FROM tenant_schema.store_uuid_xapi_statements +WHERE timestamp BETWEEN '2026-04-01T00:00:00Z' AND '2026-04-07T23:59:59Z' +ORDER BY timestamp DESC +LIMIT 50 +``` + +--- + +### Example 2: Join statements with agents + +Query statements along with agent email information. + +```json +{ + "table_name": "statements", + "query": { + "select": ["s.statement_id", "s.verb_id", "ag.mbox", "ag.object_type", "s.timestamp"], + "joins": [ + { + "table": "agents", + "type": "INNER", + "on": "s.agent_sha = ag.agent_sha" + } + ], + "filters": [ + { + "column": "s.voided", + "operator": "eq", + "value": false + } + ], + "order_by": ["s.timestamp DESC"], + "limit": 100 + } +} +``` + +**SQL Generated:** +```sql +SELECT s.statement_id, s.verb_id, ag.mbox, ag.object_type, s.timestamp +FROM tenant_schema.store_uuid_xapi_statements s +INNER JOIN tenant_schema.store_uuid_agents ag + ON s.agent_sha = ag.agent_sha +WHERE s.voided = false +ORDER BY s.timestamp DESC +LIMIT 100 +``` + +--- + +### Example 3: Count statements per verb + +Group statements by action to see usage patterns. + +```json +{ + "table_name": "statements", + "query": { + "select": ["verb_id"], + "aggregates": [ + { + "function": "COUNT", + "column": "id", + "alias": "count" + } + ], + "group_by": ["verb_id"], + "order_by": ["count DESC"] + } +} +``` + +**SQL Generated:** +```sql +SELECT verb_id, COUNT(id) as count +FROM tenant_schema.store_uuid_xapi_statements +GROUP BY verb_id +ORDER BY count DESC +``` + +--- + +### Example 4: Complex filter with multiple verbs + +Get attempted OR completed statements that were not voided. + +```json +{ + "table_name": "statements", + "query": { + "select": ["id", "verb_id", "timestamp"], + "complex_filter": { + "logic": "AND", + "filters": [ + { + "column": "voided", + "operator": "eq", + "value": false + } + ], + "groups": [ + { + "logic": "OR", + "filters": [ + {"column": "verb_id", "operator": "like", "value": "%attempted%"}, + {"column": "verb_id", "operator": "like", "value": "%completed%"} + ] + } + ] + }, + "order_by": ["timestamp DESC"] + } +} +``` + +**SQL Generated:** +```sql +SELECT id, verb_id, timestamp +FROM tenant_schema.store_uuid_xapi_statements +WHERE voided = false + AND (verb_id LIKE '%attempted%' OR verb_id LIKE '%completed%') +ORDER BY timestamp DESC +``` + +--- + +### Example 5: JSONB Query - Filter by result success (NEW) + +Query statements where the JSONB result column indicates success. Uses the new `json_path` array format. + +```json +{ + "table_name": "statements", + "query": { + "select": ["id", "statement_id", "agent_sha", "verb_id", "timestamp"], + "filters": [ + { + "column": "result", + "json_path": ["success"], + "operator": "eq", + "value": true + } + ], + "order_by": ["timestamp DESC"], + "limit": 50 + } +} +``` + +**SQL Generated:** +```sql +SELECT id, statement_id, agent_sha, verb_id, timestamp +FROM tenant_schema.store_uuid_xapi_statements +WHERE result ->> 'success' = $1 +ORDER BY timestamp DESC +LIMIT 50 +``` + +**Note:** The `json_path` array specifies the path to the JSON key. For nested values use: `"json_path": ["nested", "key", "value"]` + +--- + +### Example 6: Complex JSONB Query - Result score and context + +Query statements with successful completions and instructor-led context. + +```json +{ + "table_name": "statements", + "query": { + "select": ["id", "statement_id", "agent_sha", "verb_id"], + "complex_filter": { + "logic": "AND", + "filters": [ + { + "column": "result", + "json_path": ["success"], + "operator": "eq", + "value": true + }, + { + "column": "result", + "json_path": ["score", "scaled"], + "operator": "gte", + "value": 0.8 + } + ], + "groups": [ + { + "logic": "OR", + "filters": [ + { + "column": "context", + "json_path": ["instructor", "name"], + "operator": "ilike", + "value": "%Smith%" + }, + { + "column": "context", + "json_path": ["team", "name"], + "operator": "is_not_null" + } + ] + } + ] + }, + "order_by": ["timestamp DESC"], + "limit": 100 + } +} +``` + +**SQL Generated:** +```sql +SELECT id, statement_id, agent_sha, verb_id +FROM tenant_schema.store_uuid_xapi_statements +WHERE result ->> 'success' = $1 + AND result -> 'score' ->> 'scaled' >= $2 + AND ( + context -> 'instructor' ->> 'name' ILIKE $3 + OR context -> 'team' ->> 'name' IS NOT NULL + ) +ORDER BY timestamp DESC +LIMIT 100 +``` + +--- + +### Example 7: Multi-table JSONB Query + +Join statements with agents and query JSONB result data along with agent information. + +```json +{ + "table_name": "statements", + "query": { + "select": ["ag.mbox", "s.verb_id", "COUNT(s.id) as attempts"], + "joins": [ + { + "table": "agents", + "type": "INNER", + "on": "s.agent_sha = ag.agent_sha" + } + ], + "filters": [ + { + "column": "result", + "json_path": ["success"], + "operator": "eq", + "value": true + } + ], + "group_by": ["ag.mbox", "s.verb_id"], + "having": [ + { + "column": "attempts", + "operator": "gte", + "value": 2 + } + ], + "order_by": ["attempts DESC"] + } +} +``` + +**SQL Generated:** +```sql +SELECT ag.mbox, s.verb_id, COUNT(s.id) as attempts +FROM tenant_schema.store_uuid_xapi_statements s +INNER JOIN tenant_schema.store_uuid_agents ag + ON s.agent_sha = ag.agent_sha +WHERE s.result ->> 'success' = $1 +GROUP BY ag.mbox, s.verb_id +HAVING COUNT(s.id) >= $2 +ORDER BY attempts DESC +``` + +--- + +### Example 8: JSONB Null Checks + +Find statements with missing context data. + +```json +{ + "table_name": "statements", + "query": { + "select": ["id", "statement_id", "verb_id", "timestamp"], + "filters": [ + { + "column": "context", + "json_path": ["instructor"], + "operator": "is_null" + }, + { + "column": "context", + "json_path": ["team"], + "operator": "is_not_null" + } + ], + "order_by": ["timestamp DESC"], + "limit": 50 + } +} +``` + +**SQL Generated:** +```sql +SELECT id, statement_id, verb_id, timestamp +FROM tenant_schema.store_uuid_xapi_statements +WHERE context -> 'instructor' IS NULL + AND context -> 'team' IS NOT NULL +ORDER BY timestamp DESC +LIMIT 50 +``` + +--- + +### Example 9: Original Multi-table analysis + +Join statements, agents, and activities to get comprehensive view. + +```json +{ + "table_name": "statements", + "query": { + "select": ["ag.mbox", "act.type", "COUNT(s.id) as stmt_count"], + "joins": [ + { + "table": "agents", + "type": "INNER", + "on": "s.agent_sha = ag.agent_sha" + }, + { + "table": "activities", + "type": "LEFT", + "on": "s.object_id = act.activity_id" + } + ], + "range": { + "column": "s.timestamp", + "from": "2026-04-01T00:00:00Z", + "to": "2026-04-07T23:59:59Z" + }, + "group_by": ["ag.mbox", "act.type"], + "having": [ + { + "column": "stmt_count", + "operator": "gte", + "value": 3 + } + ], + "order_by": ["stmt_count DESC"] + } +} +``` + +**SQL Generated:** +```sql +SELECT ag.mbox, act.type, COUNT(s.id) as stmt_count +FROM tenant_schema.store_uuid_xapi_statements s +INNER JOIN tenant_schema.store_uuid_agents ag + ON s.agent_sha = ag.agent_sha +LEFT JOIN tenant_schema.store_uuid_activities act + ON s.object_id = act.activity_id +WHERE s.timestamp BETWEEN '2026-04-01T00:00:00Z' AND '2026-04-07T23:59:59Z' +GROUP BY ag.mbox, act.type +HAVING COUNT(s.id) >= $1 +ORDER BY stmt_count DESC +``` + +--- + +## JSONB Query Reference (NEW) + +### Queryable JSONB Columns + +The statements table contains JSONB columns that can be efficiently queried using json_path arrays: + +| Column | Description | Example Paths | +|--------|-------------|----------------| +| `result` | Learning outcome data | `["success"]`, `["score", "scaled"]`, `["duration"]`, `["completion"]` | +| `context` | Contextual information | `["instructor", "name"]`, `["team", "name"]`, `["language"]`, `["revision"]` | +| `object` | Activity/object definition | `["definition", "type"]`, `["definition", "name"]`, `["id"]` | +| `authority` | Authority/system info | `["name"]`, `["mbox"]`, `["openid"]` | +| `attachments` | Attached documents | `["display"]`, `["fileUrl"]`, `["contentType"]` | + +### JSONB Operator Mapping + +| QueryFilter Operator | JSONB SQL | Usage | +|--------|----------|-------| +| `eq` | `->>` (text compare) | `json_path[-1] = value` | +| `neq` | `->>` | Not equal comparison | +| `gt`, `gte`, `lt`, `lte` | `->>` | Numeric/text comparisons | +| `like`, `ilike` | `->>` | Pattern matching | +| `in`, `not_in` | `->>` | Value in list | +| `is_null` | `->` | Path exists but null | +| `is_not_null` | `->` | Path exists and not null | + +### Best Practices for JSONB Queries + +1. **Use json_path array, not raw operators** + - ❌ Don't: `"column": "result->'success'->>'value'"` + - ✅ Do: `"column": "result", "json_path": ["success", "value"]` + +2. **Specific paths perform better** + - Use full paths when possible for consistency + - Ex: `["score", "scaled"]` instead of just `["score"]` + +3. **NULL handling** + - Use `is_null` to check if path exists + - Paths that don't exist return NULL + +4. **Type coercion** + - String comparisons work for all types + - Use numeric operators carefully with JSON values + +5. **Deeply nested paths** + - No practical depth limit, but longer paths = slower queries + - Consider database indexing for frequently queried paths + +--- + +## Query Guidelines + +### Do's ✓ + +1. **Use specific columns** - Instead of `SELECT *`, specify needed columns +2. **Filter early** - Use WHERE before aggregation +3. **Index on common filters** - Query indexed columns first +4. **Paginate large results** - Always use LIMIT/OFFSET +5. **Use aliases in joins** - Makes queries more readable +6. **Match UUIDs carefully** - Ensure type compatibility +7. **Use json_path arrays for JSONB** - Separate column from path components (NEW) +8. **Provide all json_path elements** - Don't split JSONB paths between column and json_path + +### Don'ts ✗ + +1. **Avoid SELECT *** on large tables +2. **Don't nest filters too deeply** - Max 10 levels +3. **Don't use LIKE with leading %** - Performance issue +4. **Avoid querying JSONB directly** - Use indexed columns when possible +5. **Don't forget LIMIT** - Prevents returning massive result sets +6. **Don't embed JSONB operators in column name** - Use json_path field instead (NEW) + - ❌ Wrong: `"column": "result->'success'->>'value'"` + - ✅ Correct: `"column": "result", "json_path": ["success", "value"]` +7. **Don't mix JSONB operators** - Framework handles all operator syntax automatically + +--- + +## Performance Tips + +1. **Timestamp queries** - Always use `timestamp` not `stored` when possible +2. **Agent lookups** - Use `agent_sha` instead of `mbox` for faster joins +3. **Activity type filtering** - Index on `type` column helps +4. **Verb filtering** - `verb_id` is indexed +5. **Pagination** - Use offset pagination with consistent ordering +6. **JSONB queries** - Index frequently queried JSONB paths using PostgreSQL GIN indexes +7. **Avoid deep nesting** - Keep JSONB paths to 3-4 levels when possible + +--- + +## Common Use Cases + +### Track specific user activity +```json +{ + "table_name": "statements", + "query": { + "select": ["timestamp", "verb_id", "object_type"], + "filters": [ + { + "column": "agent_sha", + "operator": "eq", + "value": "specific_sha_value" + } + ], + "order_by": ["timestamp DESC"], + "limit": 100 + } +} +``` + +### Successful course completions (JSONB) +```json +{ + "table_name": "statements", + "query": { + "select": ["agent_sha", "verb_id", "timestamp"], + "filters": [ + { + "column": "result", + "json_path": ["success"], + "operator": "eq", + "value": true + }, + { + "column": "result", + "json_path": ["completion"], + "operator": "eq", + "value": true + } + ], + "order_by": ["timestamp DESC"] + } +} +``` + +### High-scoring activities (JSONB) +```json +{ + "table_name": "statements", + "query": { + "select": ["agent_sha", "object_id", "verb_id", "timestamp"], + "filters": [ + { + "column": "result", + "json_path": ["score", "scaled"], + "operator": "gte", + "value": 0.9 + } + ], + "order_by": ["timestamp DESC"], + "limit": 50 + } +} +``` + +### Course completion analysis +```json +{ + "table_name": "statements", + "query": { + "select": ["ag.mbox"], + "joins": [ + { + "table": "agents", + "type": "INNER", + "on": "s.agent_sha = ag.agent_sha" + } + ], + "filters": [ + { + "column": "result", + "json_path": ["completion"], + "operator": "eq", + "value": true + } + ], + "aggregates": [ + { + "function": "COUNT", + "column": "id", + "alias": "completions" + } + ], + "group_by": ["ag.mbox"], + "order_by": ["completions DESC"] + } +} +``` + +### Statements with context instructor (JSONB) +```json +{ + "table_name": "statements", + "query": { + "select": ["id", "agent_sha", "verb_id", "timestamp"], + "filters": [ + { + "column": "context", + "json_path": ["instructor", "name"], + "operator": "is_not_null" + } + ], + "order_by": ["timestamp DESC"], + "limit": 100 + } +} +``` + +### Daily submission volume +```json +{ + "table_name": "statements", + "query": { + "select": [], + "aggregates": [ + { + "function": "COUNT", + "column": "id", + "alias": "count" + } + ], + "range": { + "column": "timestamp", + "from": "2026-04-01T00:00:00Z", + "to": "2026-04-07T23:59:59Z" + }, + "group_by": ["DATE(timestamp)"], + "order_by": ["COUNT(id) DESC"] + } +} +``` + +### Agent demographics +```json +{ + "table_name": "agents", + "query": { + "select": ["object_type"], + "aggregates": [ + { + "function": "COUNT", + "column": "id", + "alias": "count" + } + ], + "group_by": ["object_type"], + "order_by": ["count DESC"] + } +} +``` + +### Activity usage +```json +{ + "table_name": "statements", + "query": { + "select": ["act.type"], + "joins": [ + { + "table": "activities", + "type": "LEFT", + "on": "s.object_id = act.activity_id" + } + ], + "filters": [ + { + "column": "s.object_type", + "operator": "eq", + "value": "Activity" + } + ], + "aggregates": [ + { + "function": "COUNT", + "column": "s.id", + "alias": "count" + } + ], + "group_by": ["act.type"], + "order_by": ["count DESC"] + } +} +``` + diff --git a/pkg/database/postgres/repo.go b/pkg/database/postgres/repo.go index 8629200..0d7fda5 100644 --- a/pkg/database/postgres/repo.go +++ b/pkg/database/postgres/repo.go @@ -145,6 +145,74 @@ func ValidateColumnName(name string) error { return nil } +// ValidateJSONBPath validates JSONB path expressions +// Supports expressions like: column->'key'->>'value', column->0->>'nested', etc. +func ValidateJSONBPath(path string) error { + path = strings.TrimSpace(path) + + if len(path) == 0 { + return fmt.Errorf("JSONB path cannot be empty") + } + + if len(path) > 512 { // Allow longer paths for JSONB + return fmt.Errorf("JSONB path exceeds maximum length (512 chars): %d", len(path)) + } + + // Check for SQL injection patterns (allow single quotes for JSONB keys, but block double quotes and dangerous SQL) + dangerousPatterns := []string{";", "--", "/*", "*/", "\""} + for _, pattern := range dangerousPatterns { + if strings.Contains(path, pattern) { + return fmt.Errorf("JSONB path contains potentially dangerous characters: %s", path) + } + } + + // Validate structure: must start with valid column name, followed by JSONB operators + // Find the base column name (everything before the first ->) + operatorIndex := strings.Index(path, "->") + if operatorIndex == -1 { + return fmt.Errorf("JSONB path must contain -> or ->> operator: %s", path) + } + + baseName := path[:operatorIndex] + if !validColumnRegex.MatchString(baseName) { + return fmt.Errorf("invalid base column in JSONB path: %s", baseName) + } + + // Rest of the path after base column should contain balanced single quotes and valid JSONB operators + // Check for balanced quotes + singleQuoteCount := strings.Count(path, "'") + if singleQuoteCount%2 != 0 { + return fmt.Errorf("unbalanced quotes in JSONB path: %s", path) + } + + return nil +} + +// IsJSONBPath checks if a column reference uses JSONB operators +func IsJSONBPath(name string) bool { + return strings.Contains(name, "->") || strings.Contains(name, "->>") +} + +// ValidateAndFormatColumn validates a column name or JSONB path and formats it for SQL +// For simple columns, it quotes the identifier; for JSONB paths, it returns the path as-is (already safe) +func ValidateAndFormatColumn(name string) (string, error) { + name = strings.TrimSpace(name) + + // Check if it's a JSONB path expression + if IsJSONBPath(name) { + if err := ValidateJSONBPath(name); err != nil { + return "", err + } + return name, nil // Return JSONB path unquoted + } + + // Otherwise validate as regular column name + if err := ValidateColumnName(name); err != nil { + return "", err + } + return pq.QuoteIdentifier(name), nil // Quote regular column names +} + // SplitQualifiedName splits a qualified table name on dots while respecting quoted identifiers func SplitQualifiedName(qualifiedName string) ([]string, error) { parts := make([]string, 0, 2) @@ -234,6 +302,12 @@ var allowedOperators = map[string]bool{ "lt": true, "<": true, "lte": true, "<=": true, "like": true, "ilike": true, "in": true, "not_in": true, "is_null": true, "is_not_null": true, "any": true, + // JSONB operators + "jsonb_contains": true, // @> + "jsonb_contained": true, // <@ + "jsonb_has_key": true, // ? + "jsonb_has_any_key": true, // ?| + "jsonb_has_all_keys": true, // ?& } // ValidateOperator ensures operator is in the whitelist and safe to use @@ -721,7 +795,11 @@ func (postgresDbService *PostgresDbService) ToInterfaceSlice(v interface{}) ([]i // BuildSimpleCondition builds conditions for simple operators (=, !=, <, >, etc.) func (postgresDbService *PostgresDbService) BuildSimpleCondition(filter models.QueryFilter, operator string, argCounter int) (string, []interface{}, int) { - condition := fmt.Sprintf("%s %s $%d", pq.QuoteIdentifier(filter.Column), operator, argCounter) + formattedColumn, err := ValidateAndFormatColumn(filter.Column) + if err != nil { + return "", nil, argCounter + } + condition := fmt.Sprintf("%s %s $%d", formattedColumn, operator, argCounter) args := []interface{}{filter.Value} return condition, args, argCounter + 1 } @@ -733,6 +811,11 @@ func (postgresDbService *PostgresDbService) BuildInCondition(filter models.Query return "", nil, argCounter } + formattedColumn, err := ValidateAndFormatColumn(filter.Column) + if err != nil { + return "", nil, argCounter + } + placeholders := make([]string, len(values)) args := make([]interface{}, 0, len(values)) for i, val := range values { @@ -745,36 +828,151 @@ func (postgresDbService *PostgresDbService) BuildInCondition(filter models.Query if useNot { operator = "NOT IN" } - condition := fmt.Sprintf("%s %s (%s)", pq.QuoteIdentifier(filter.Column), operator, strings.Join(placeholders, ", ")) + condition := fmt.Sprintf("%s %s (%s)", formattedColumn, operator, strings.Join(placeholders, ", ")) return condition, args, argCounter } // BuildNullCondition builds conditions for IS NULL/IS NOT NULL operators func (postgresDbService *PostgresDbService) BuildNullCondition(filter models.QueryFilter, useNot bool, argCounter int) (string, []interface{}, int) { + formattedColumn, err := ValidateAndFormatColumn(filter.Column) + if err != nil { + return "", nil, argCounter + } operator := "IS NULL" if useNot { operator = "IS NOT NULL" } - condition := fmt.Sprintf("%s %s", pq.QuoteIdentifier(filter.Column), operator) + condition := fmt.Sprintf("%s %s", formattedColumn, operator) return condition, nil, argCounter } // BuildAnyCondition builds conditions for ANY operator func (postgresDbService *PostgresDbService) BuildAnyCondition(filter models.QueryFilter, argCounter int) (string, []interface{}, int) { - condition := fmt.Sprintf("$%d = ANY(%s)", argCounter, pq.QuoteIdentifier(filter.Column)) + formattedColumn, err := ValidateAndFormatColumn(filter.Column) + if err != nil { + return "", nil, argCounter + } + condition := fmt.Sprintf("$%d = ANY(%s)", argCounter, formattedColumn) args := []interface{}{filter.Value} return condition, args, argCounter + 1 } +// BuildJSONBCondition builds conditions for JSONB path queries +// Example: column=["raw_statement"], json_path=["result", "success"], operator="eq", value="true" +// Produces: raw_statement->'result'->>'success' = $1 +func (postgresDbService *PostgresDbService) BuildJSONBCondition(filter models.QueryFilter, argCounter int) (string, []interface{}, int) { + // Validate column name + if err := ValidateColumnName(filter.Column); err != nil { + return "", nil, argCounter + } + + if len(filter.JSONPath) == 0 { + return "", nil, argCounter + } + + // Build JSONB path expression: column->'key1'->'key2'->>'final_key' + quotedCol := pq.QuoteIdentifier(filter.Column) + pathExpr := quotedCol + + // Navigate through the path, using ->> for the last key to extract text + for i, key := range filter.JSONPath { + quotedKey := fmt.Sprintf("'%s'", strings.ReplaceAll(key, "'", "''")) // SQL escape single quotes + if i == len(filter.JSONPath)-1 { + // Last key - use ->> to extract as text for comparison + pathExpr += fmt.Sprintf(" ->> %s", quotedKey) + } else { + // Intermediate keys - use -> to navigate as JSONB + pathExpr += fmt.Sprintf(" -> %s", quotedKey) + } + } + + // Now build the condition using the path expression + operator := strings.ToLower(filter.Operator) + var condition string + var args []interface{} + + switch operator { + case "eq", "=": + condition = fmt.Sprintf("%s = $%d", pathExpr, argCounter) + args = []interface{}{filter.Value} + argCounter++ + case "neq", "!=", "<>": + condition = fmt.Sprintf("%s != $%d", pathExpr, argCounter) + args = []interface{}{filter.Value} + argCounter++ + case "gt", ">": + condition = fmt.Sprintf("%s > $%d", pathExpr, argCounter) + args = []interface{}{filter.Value} + argCounter++ + case "gte", ">=": + condition = fmt.Sprintf("%s >= $%d", pathExpr, argCounter) + args = []interface{}{filter.Value} + argCounter++ + case "lt", "<": + condition = fmt.Sprintf("%s < $%d", pathExpr, argCounter) + args = []interface{}{filter.Value} + argCounter++ + case "lte", "<=": + condition = fmt.Sprintf("%s <= $%d", pathExpr, argCounter) + args = []interface{}{filter.Value} + argCounter++ + case "like": + condition = fmt.Sprintf("%s LIKE $%d", pathExpr, argCounter) + args = []interface{}{filter.Value} + argCounter++ + case "ilike": + condition = fmt.Sprintf("%s ILIKE $%d", pathExpr, argCounter) + args = []interface{}{filter.Value} + argCounter++ + case "in": + values, ok := postgresDbService.ToInterfaceSlice(filter.Value) + if !ok || len(values) == 0 { + return "", nil, argCounter + } + placeholders := make([]string, len(values)) + for i, val := range values { + placeholders[i] = fmt.Sprintf("$%d", argCounter) + args = append(args, val) + argCounter++ + } + condition = fmt.Sprintf("%s IN (%s)", pathExpr, strings.Join(placeholders, ", ")) + case "not_in": + values, ok := postgresDbService.ToInterfaceSlice(filter.Value) + if !ok || len(values) == 0 { + return "", nil, argCounter + } + placeholders := make([]string, len(values)) + for i, val := range values { + placeholders[i] = fmt.Sprintf("$%d", argCounter) + args = append(args, val) + argCounter++ + } + condition = fmt.Sprintf("%s NOT IN (%s)", pathExpr, strings.Join(placeholders, ", ")) + case "is_null": + condition = fmt.Sprintf("%s IS NULL", pathExpr) + case "is_not_null": + condition = fmt.Sprintf("%s IS NOT NULL", pathExpr) + default: + // Unknown operator + return "", nil, argCounter + } + + return condition, args, argCounter +} + func (postgresDbService *PostgresDbService) BuildFilterCondition(filter models.QueryFilter, argCounter int) (string, []interface{}, int) { // VALIDATE OPERATOR FIRST - before any SQL string building if err := ValidateOperator(filter.Operator); err != nil { - // Return empty condition on invalid operator - caller should handle this - // or we could return error as fourth return value (future improvement) + // Return empty condition on invalid operator return "", nil, argCounter } - // VALIDATE COLUMN NAME - ensure column is safe from SQL injection + // Check if this is a JSONB path query + if len(filter.JSONPath) > 0 { + return postgresDbService.BuildJSONBCondition(filter, argCounter) + } + + // Regular column validation and processing if err := ValidateColumnName(filter.Column); err != nil { // Return empty condition on invalid column return "", nil, argCounter @@ -2740,10 +2938,12 @@ func (r *PostgresDbService) RemoveManyToManyRelations(relationship *models.Relat // - int: Updated argCounter after consuming parameters // // Example output for one-to-many: -// SELECT orders.* FROM orders WHERE orders.user_id = $1 +// +// SELECT orders.* FROM orders WHERE orders.user_id = $1 // // Example output for many-to-many: -// SELECT t.* FROM products t INNER JOIN order_items j ON t.id = j.product_id WHERE j.order_id = $1 +// +// SELECT t.* FROM products t INNER JOIN order_items j ON t.id = j.product_id WHERE j.order_id = $1 func (r *PostgresDbService) buildRelationshipBaseQuery(relationship *models.RelationshipDefinition, params models.QueryParams, argCounter int) (string, int) { var query strings.Builder diff --git a/pkg/database/postgres/repo_filter_test.go b/pkg/database/postgres/repo_filter_test.go index 494d23e..d83df30 100644 --- a/pkg/database/postgres/repo_filter_test.go +++ b/pkg/database/postgres/repo_filter_test.go @@ -6,6 +6,7 @@ package postgres_test import ( + "strings" "testing" "github.com/aptlogica/go-postgres-rest/pkg/database/postgres" @@ -170,3 +171,179 @@ func TestBuildSelectColumnParts(t *testing.T) { t.Fatalf("expected [\"*\"], got %v", parts) } } + +// JSONB Path Support Tests +func TestValidateJSONBPath(t *testing.T) { + testCases := []struct { + name string + path string + shouldErr bool + }{ + // Valid JSONB paths + {name: "simple JSONB arrow", path: "data->'key'", shouldErr: false}, + {name: "JSONB double arrow", path: "data->>'value'", shouldErr: false}, + {name: "nested JSONB path", path: "raw_statement->'result'->>'success'", shouldErr: false}, + {name: "JSONB with array index", path: "data->[0]", shouldErr: false}, + {name: "complex JSONB path", path: "raw_statement->'verb'->>'id'", shouldErr: false}, + + // Invalid JSONB paths + {name: "empty path", path: "", shouldErr: true}, + {name: "path with semicolon", path: "data->'key';DROP TABLE x", shouldErr: true}, + {name: "path with SQL comment", path: "data->'key'--comment", shouldErr: true}, + {name: "path with double quotes", path: "data->\"key\"", shouldErr: true}, + {name: "path with unbalanced quotes", path: "data->'key", shouldErr: true}, + {name: "path too long", path: "a" + string(make([]byte, 513)), shouldErr: true}, + {name: "no JSONB operator", path: "data", shouldErr: true}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + err := postgres.ValidateJSONBPath(tc.path) + if (err != nil) != tc.shouldErr { + t.Fatalf("ValidateJSONBPath(%q): got error=%v, want error=%v. Error: %v", tc.path, err != nil, tc.shouldErr, err) + } + }) + } +} + +func TestIsJSONBPath(t *testing.T) { + testCases := []struct { + name string + column string + isJSONB bool + }{ + {name: "regular column", column: "age", isJSONB: false}, + {name: "quoted column", column: "\"complex-name\"", isJSONB: false}, + {name: "JSONB with arrow", column: "data->'key'", isJSONB: true}, + {name: "JSONB with double arrow", column: "data->>'value'", isJSONB: true}, + {name: "nested JSONB", column: "data->'a'->>'b'", isJSONB: true}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := postgres.IsJSONBPath(tc.column) + if result != tc.isJSONB { + t.Fatalf("IsJSONBPath(%q): got %v, want %v", tc.column, result, tc.isJSONB) + } + }) + } +} + +func TestValidateAndFormatColumn(t *testing.T) { + testCases := []struct { + name string + column string + shouldErr bool + expectedOut string + }{ + // Regular columns (should be quoted) + {name: "simple column", column: "age", shouldErr: false, expectedOut: "\"age\""}, + {name: "column with underscore", column: "user_id", shouldErr: false, expectedOut: "\"user_id\""}, + + // JSONB paths (should NOT be quoted) + {name: "JSONB arrow", column: "data->'key'", shouldErr: false, expectedOut: "data->'key'"}, + {name: "JSONB double arrow", column: "data->>'value'", shouldErr: false, expectedOut: "data->>'value'"}, + {name: "nested JSONB", column: "raw_statement->'result'->>'success'", shouldErr: false, expectedOut: "raw_statement->'result'->>'success'"}, + {name: "JSONB verb path", column: "raw_statement->'verb'->>'id'", shouldErr: false, expectedOut: "raw_statement->'verb'->>'id'"}, + + // Invalid columns + {name: "column with dash (not JSONB)", column: "bad-col", shouldErr: true, expectedOut: ""}, + {name: "invalid JSONB", column: "data->'key';DROP", shouldErr: true, expectedOut: ""}, + {name: "empty column", column: "", shouldErr: true, expectedOut: ""}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result, err := postgres.ValidateAndFormatColumn(tc.column) + if (err != nil) != tc.shouldErr { + t.Fatalf("ValidateAndFormatColumn(%q): got error=%v, want error=%v", tc.column, err != nil, tc.shouldErr) + } + if !tc.shouldErr && result != tc.expectedOut { + t.Fatalf("ValidateAndFormatColumn(%q): got %q, want %q", tc.column, result, tc.expectedOut) + } + }) + } +} + +func TestBuildSimpleConditionWithJSONB(t *testing.T) { + svc := &postgres.PostgresDbService{} + + // Regular column + cond, args, next := svc.BuildSimpleCondition(models.QueryFilter{Column: "age", Operator: "eq", Value: 25}, "=", 1) + expectedCond := "\"age\" = $1" + if cond != expectedCond || len(args) != 1 || args[0] != 25 || next != 2 { + t.Fatalf("regular column: expected %s, got %s", expectedCond, cond) + } + + // JSONB path + cond, args, next = svc.BuildSimpleCondition(models.QueryFilter{Column: "raw_statement->'result'->>'success'", Operator: "eq", Value: "true"}, "=", 1) + expectedCond = "raw_statement->'result'->>'success' = $1" + if cond != expectedCond || len(args) != 1 || args[0] != "true" || next != 2 { + t.Fatalf("JSONB path: expected %s, got %s", expectedCond, cond) + } + + // JSONB path with greater than operator + cond, args, next = svc.BuildSimpleCondition(models.QueryFilter{Column: "data->'count'->>'value'", Operator: "gt", Value: 100}, ">", 5) + expectedCond = "data->'count'->>'value' > $5" + if cond != expectedCond || len(args) != 1 || args[0] != 100 || next != 6 { + t.Fatalf("JSONB greater than: expected %s, got %s", expectedCond, cond) + } +} + +func TestBuildComplexFilterWithJSONB(t *testing.T) { + svc := &postgres.PostgresDbService{} + + // Test complex filter with JSONB paths using json_path arrays + filter := models.ComplexFilter{ + Logic: "AND", + Filters: []models.QueryFilter{ + {Column: "timestamp", Operator: "gte", Value: "2026-04-01T00:00:00Z"}, + {Column: "raw_statement", JSONPath: []string{"result", "success"}, Operator: "eq", Value: "true"}, + }, + Groups: []models.ComplexFilter{ + { + Logic: "OR", + Filters: []models.QueryFilter{ + {Column: "raw_statement", JSONPath: []string{"verb", "id"}, Operator: "eq", Value: "http://adlnet.gov/expapi/verbs/completed"}, + {Column: "raw_statement", JSONPath: []string{"verb", "id"}, Operator: "eq", Value: "http://adlnet.gov/expapi/verbs/passed"}, + }, + }, + }, + } + + cond, args, next := svc.BuildComplexFilter(filter, 1) + + if cond == "" || len(args) == 0 || next <= 1 { + t.Fatalf("BuildComplexFilter with JSONB failed: cond=%q, args=%v, next=%d", cond, args, next) + } + + // Verify the JSONB paths are in the condition + if !strings.Contains(cond, "raw_statement") { + t.Fatalf("BuildComplexFilter output missing raw_statement: %s", cond) + } + + // Verify regular columns are quoted + if !strings.Contains(cond, "\"timestamp\"") { + t.Fatalf("BuildComplexFilter output should have quoted timestamp: %s", cond) + } + + // Verify AND logic is present + if !strings.Contains(cond, "AND") { + t.Fatalf("BuildComplexFilter output missing AND logic: %s", cond) + } + + // Verify OR logic is present + if !strings.Contains(cond, "OR") { + t.Fatalf("BuildComplexFilter output missing OR logic: %s", cond) + } + + // Verify arguments are in order + if len(args) < 4 { + t.Fatalf("expected at least 4 arguments, got %d", len(args)) + } +} + +// Helper function for string contains check +func contains(s, substr string) bool { + return strings.Contains(s, substr) +} diff --git a/pkg/database/postgres/repo_jsonb_buildquery_test.go b/pkg/database/postgres/repo_jsonb_buildquery_test.go new file mode 100644 index 0000000..656795f --- /dev/null +++ b/pkg/database/postgres/repo_jsonb_buildquery_test.go @@ -0,0 +1,74 @@ +// Copyright (c) 2026 Aptlogica Technologies Private Limited +// SPDX-License-Identifier: MIT +// Websites: https://www.aptlogica.com | https://www.serenibase.com +// Support: support@aptlogica.com | support@serenibase.com + +package postgres_test + +import ( + "strings" + "testing" + + postgres "github.com/aptlogica/go-postgres-rest/pkg/database/postgres" + "github.com/aptlogica/go-postgres-rest/pkg/models" +) + +// Test that BuildAdvancedQuery correctly incorporates JSONB paths from complex filters +func TestBuildAdvancedQueryWithJSONB(t *testing.T) { + svc := &postgres.PostgresDbService{} + + limit := 50 + params := models.QueryParams{ + Select: []string{"id", "raw_statement", "timestamp", "stored"}, + Complex: &models.ComplexFilter{ + Logic: "AND", + Filters: []models.QueryFilter{ + {Column: "timestamp", Operator: "gte", Value: "2026-04-01T00:00:00Z"}, + {Column: "raw_statement", JSONPath: []string{"result", "success"}, Operator: "eq", Value: "true"}, + }, + Groups: []models.ComplexFilter{ + { + Logic: "OR", + Filters: []models.QueryFilter{ + {Column: "raw_statement", JSONPath: []string{"verb", "id"}, Operator: "eq", Value: "http://adlnet.gov/expapi/verbs/completed"}, + {Column: "raw_statement", JSONPath: []string{"verb", "id"}, Operator: "eq", Value: "http://adlnet.gov/expapi/verbs/passed"}, + }, + }, + }, + }, + OrderBy: []string{"timestamp desc"}, + Limit: &limit, + } + + query, args := svc.BuildAdvancedQuery("statements", params) + + if !strings.Contains(query, "FROM statements") { + t.Fatalf("expected FROM clause, got query: %s", query) + } + + // Verify JSONB paths are properly built in the query + if !strings.Contains(query, "raw_statement") { + t.Fatalf("expected raw_statement in WHERE, got query: %s", query) + } + + if !strings.Contains(query, "result") || !strings.Contains(query, "success") { + t.Fatalf("expected JSONB path components in query, got: %s", query) + } + + if !strings.Contains(query, "verb") || !strings.Contains(query, "id") { + t.Fatalf("expected JSONB verb/id path in query, got: %s", query) + } + + if !strings.Contains(strings.ToUpper(query), "ORDER BY") { + t.Fatalf("expected ORDER BY in query: %s", query) + } + + if !strings.Contains(query, "LIMIT") { + t.Fatalf("expected LIMIT in query: %s", query) + } + + // Limit should be the last argument appended + if len(args) == 0 || args[len(args)-1] != 50 { + t.Fatalf("expected last arg to be limit 50, got args: %v", args) + } +} diff --git a/pkg/database/postgres/repo_jsonb_test.go b/pkg/database/postgres/repo_jsonb_test.go new file mode 100644 index 0000000..cad09d8 --- /dev/null +++ b/pkg/database/postgres/repo_jsonb_test.go @@ -0,0 +1,302 @@ +// Copyright (c) 2026 Aptlogica Technologies Private Limited +// SPDX-License-Identifier: MIT +// Websites: https://www.aptlogica.com | https://www.serenibase.com +// Support: support@aptlogica.com | support@serenibase.com + +package postgres_test + +import ( + "strings" + "testing" + + "github.com/aptlogica/go-postgres-rest/pkg/database/postgres" + "github.com/aptlogica/go-postgres-rest/pkg/models" +) + +// TestBuildJSONBConditionBasic tests basic JSONB path queries +func TestBuildJSONBConditionBasic(t *testing.T) { + svc := &postgres.PostgresDbService{} + + // Test: column='raw_statement', json_path=['result', 'success'], operator='eq', value='true' + cond, args, next := svc.BuildJSONBCondition(models.QueryFilter{ + Column: "raw_statement", + JSONPath: []string{"result", "success"}, + Operator: "eq", + Value: "true", + }, 1) + + // Should produce: "raw_statement" -> 'result' ->> 'success' = $1 + if !strings.Contains(cond, "raw_statement") || !strings.Contains(cond, "'result'") || !strings.Contains(cond, "'success'") { + t.Fatalf("unexpected condition: %s", cond) + } + if !strings.Contains(cond, "=") || len(args) != 1 || args[0] != "true" || next != 2 { + t.Fatalf("expected condition with = operator, args=['true'], next=2, got: cond=%s args=%v next=%d", cond, args, next) + } +} + +// TestBuildJSONBConditionDeepPath tests deeply nested JSONB paths +func TestBuildJSONBConditionDeepPath(t *testing.T) { + svc := &postgres.PostgresDbService{} + + // Test deeper nesting: column='data', json_path=['user', 'profile', 'email'] + cond, args, _ := svc.BuildJSONBCondition(models.QueryFilter{ + Column: "data", + JSONPath: []string{"user", "profile", "email"}, + Operator: "eq", + Value: "user@example.com", + }, 1) + + // Should have multiple -> operators and final ->> + if strings.Count(cond, "->") < 2 { + t.Fatalf("expected multiple -> operators for nested path, got: %s", cond) + } + if len(args) != 1 || args[0] != "user@example.com" { + t.Fatalf("unexpected args: %v", args) + } +} + +// TestBuildJSONBConditionOperators tests different operators with JSONB paths +func TestBuildJSONBConditionOperators(t *testing.T) { + svc := &postgres.PostgresDbService{} + + tests := []struct { + name string + operator string + value interface{} + expectedOp string + expectedArgs int + }{ + {"eq", "eq", "value", "=", 1}, + {"neq", "!=", "value", "!=", 1}, + {"gt", ">", 10, ">", 1}, + {"gte", ">=", 10, ">=", 1}, + {"lt", "<", 10, "<", 1}, + {"lte", "<=", 10, "<=", 1}, + {"like", "like", "%pattern%", "LIKE", 1}, + {"ilike", "ilike", "%Pattern%", "ILIKE", 1}, + {"in", "in", []string{"a", "b", "c"}, "IN", 3}, + {"not_in", "not_in", []int{1, 2, 3}, "NOT IN", 3}, + {"is_null", "is_null", nil, "IS NULL", 0}, + {"is_not_null", "is_not_null", nil, "IS NOT NULL", 0}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cond, args, _ := svc.BuildJSONBCondition(models.QueryFilter{ + Column: "payload", + JSONPath: []string{"status", "code"}, + Operator: tt.operator, + Value: tt.value, + }, 1) + + if !strings.Contains(cond, tt.expectedOp) { + t.Fatalf("expected operator %s, got: %s", tt.expectedOp, cond) + } + if len(args) != tt.expectedArgs { + t.Fatalf("expected %d args, got %d: %v", tt.expectedArgs, len(args), args) + } + }) + } +} + +// TestBuildJSONBConditionSQLEscaping tests SQL injection prevention +func TestBuildJSONBConditionSQLEscaping(t *testing.T) { + svc := &postgres.PostgresDbService{} + + // Test that single quotes in JSON keys are properly escaped + cond, _, _ := svc.BuildJSONBCondition(models.QueryFilter{ + Column: "metadata", + JSONPath: []string{"user's", "name"}, + Operator: "eq", + Value: "test", + }, 1) + + // Single quotes in the JSON key should be escaped (doubled) + if !strings.Contains(cond, "''") { + t.Fatalf("expected single quote escaping in JSON keys, got: %s", cond) + } +} + +// TestBuildComplexFilterWithJSONBFilters tests JSONB path queries in complex filters +func TestBuildComplexFilterWithJSONBFilters(t *testing.T) { + svc := &postgres.PostgresDbService{} + + // Test combining JSONB filters with regular filters in a complex filter + complex := models.ComplexFilter{ + Logic: "AND", + Filters: []models.QueryFilter{ + { + Column: "raw_statement", + JSONPath: []string{"result", "success"}, + Operator: "eq", + Value: "true", + }, + { + Column: "raw_statement", + JSONPath: []string{"verb", "id"}, + Operator: "eq", + Value: "http://adlnet.gov/expapi/verbs/completed", + }, + }, + } + + cond, args, _ := svc.BuildComplexFilter(complex, 1) + if cond == "" || len(args) != 2 { + t.Fatalf("unexpected complex filter with JSONB: cond=%s args=%v", cond, args) + } + if !strings.Contains(cond, "AND") { + t.Fatalf("expected AND logic in complex filter, got: %s", cond) + } +} + +// TestBuildComplexFilterWithJSONBGroups tests JSONB paths in complex filter groups +func TestBuildComplexFilterWithJSONBGroups(t *testing.T) { + svc := &postgres.PostgresDbService{} + + // Test: (raw_statement->'verb'->>'id' = 'completed' OR raw_statement->'verb'->>'id' = 'passed') AND timestamp >= '2026-04-01' + complex := models.ComplexFilter{ + Logic: "AND", + Filters: []models.QueryFilter{ + { + Column: "timestamp", + Operator: "gte", + Value: "2026-04-01T00:00:00Z", + }, + }, + Groups: []models.ComplexFilter{ + { + Logic: "OR", + Filters: []models.QueryFilter{ + { + Column: "raw_statement", + JSONPath: []string{"verb", "id"}, + Operator: "eq", + Value: "http://adlnet.gov/expapi/verbs/completed", + }, + { + Column: "raw_statement", + JSONPath: []string{"verb", "id"}, + Operator: "eq", + Value: "http://adlnet.gov/expapi/verbs/passed", + }, + }, + }, + }, + } + + cond, args, _ := svc.BuildComplexFilter(complex, 1) + if cond == "" { + t.Fatalf("expected complex filter with JSONB groups, got empty") + } + + // Should have both AND and OR logic + if !strings.Contains(cond, "OR") || !strings.Contains(cond, "AND") { + t.Fatalf("expected both OR and AND logic, got: %s", cond) + } + + // Should have correct number of args (1 timestamp + 2 verb ids) + if len(args) != 3 { + t.Fatalf("expected 3 args, got %d: %v", len(args), args) + } +} + +// TestBuildAdvancedQueryWithJSONBPath tests JSONB in full advanced queries +func TestBuildAdvancedQueryWithJSONBPath(t *testing.T) { + svc := &postgres.PostgresDbService{} + + limit := 50 + params := models.QueryParams{ + Select: []string{"id", "raw_statement", "timestamp"}, + Complex: &models.ComplexFilter{ + Logic: "AND", + Filters: []models.QueryFilter{ + { + Column: "timestamp", + Operator: "gte", + Value: "2026-04-01T00:00:00Z", + }, + { + Column: "raw_statement", + JSONPath: []string{"result", "success"}, + Operator: "eq", + Value: "true", + }, + }, + Groups: []models.ComplexFilter{ + { + Logic: "OR", + Filters: []models.QueryFilter{ + { + Column: "raw_statement", + JSONPath: []string{"verb", "id"}, + Operator: "eq", + Value: "http://adlnet.gov/expapi/verbs/completed", + }, + { + Column: "raw_statement", + JSONPath: []string{"verb", "id"}, + Operator: "eq", + Value: "http://adlnet.gov/expapi/verbs/passed", + }, + }, + }, + }, + }, + OrderBy: []string{"timestamp DESC"}, + Limit: &limit, + } + + query, args := svc.BuildAdvancedQuery("statements", params) + + // Basic structure checks + if !strings.Contains(query, "SELECT") || !strings.Contains(query, "FROM statements") { + t.Fatalf("expected basic SELECT structure, got: %s", query) + } + + // Should contain WHERE clause + if !strings.Contains(query, "WHERE") { + t.Fatalf("expected WHERE clause in query, got: %s", query) + } + + // Should have ORDER BY and LIMIT + if !strings.Contains(query, "ORDER BY") || !strings.Contains(query, "LIMIT") { + t.Fatalf("expected ORDER BY and LIMIT, got: %s", query) + } + + // Should have correct number of args (1 timestamp + 3 jsonb values + 1 limit) + if len(args) < 4 { + t.Fatalf("expected at least 4 args, got %d: %v", len(args), args) + } +} + +// TestBuildJSONBConditionEmptyPath tests edge cases with empty JSONB paths +func TestBuildJSONBConditionEmptyPath(t *testing.T) { + svc := &postgres.PostgresDbService{} + + cond, args, counter := svc.BuildJSONBCondition(models.QueryFilter{ + Column: "data", + JSONPath: []string{}, // Empty path + Operator: "eq", + Value: "test", + }, 1) + + if cond != "" || len(args) != 0 || counter != 1 { + t.Fatalf("expected empty condition for empty JSONPath, got cond=%s args=%v counter=%d", cond, args, counter) + } +} + +// TestBuildJSONBConditionInvalidColumn tests handling of invalid column names +func TestBuildJSONBConditionInvalidColumn(t *testing.T) { + svc := &postgres.PostgresDbService{} + + cond, args, counter := svc.BuildJSONBCondition(models.QueryFilter{ + Column: "1invalid", // Invalid column name + JSONPath: []string{"key"}, + Operator: "eq", + Value: "test", + }, 1) + + if cond != "" || len(args) != 0 || counter != 1 { + t.Fatalf("expected empty condition for invalid column, got cond=%s", cond) + } +} diff --git a/pkg/models/schema.go b/pkg/models/schema.go index c63d6d2..f2d0a8f 100644 --- a/pkg/models/schema.go +++ b/pkg/models/schema.go @@ -41,6 +41,7 @@ type ForeignKey struct { // Enhanced query models type QueryFilter struct { Column string `json:"column"` + JSONPath []string `json:"json_path,omitempty"` // For JSONB path queries: ["result", "success"] Operator string `json:"operator"` Value interface{} `json:"value"` Logic string `json:"logic,omitempty"` // "AND" or "OR" From a874cf0af53e9a2da7e847dbc52bfc090a563497 Mon Sep 17 00:00:00 2001 From: gaurav-gaikwad-12071998 <122888092+gaurav-gaikwad-12071998@users.noreply.github.com> Date: Tue, 14 Apr 2026 19:25:00 +0530 Subject: [PATCH 2/2] Delete CUSTOM_QUERY_GUIDE_V2.md --- CUSTOM_QUERY_GUIDE_V2.md | 868 --------------------------------------- 1 file changed, 868 deletions(-) delete mode 100644 CUSTOM_QUERY_GUIDE_V2.md diff --git a/CUSTOM_QUERY_GUIDE_V2.md b/CUSTOM_QUERY_GUIDE_V2.md deleted file mode 100644 index fd5e1fc..0000000 --- a/CUSTOM_QUERY_GUIDE_V2.md +++ /dev/null @@ -1,868 +0,0 @@ -# Custom Store Query API - xAPI Database Guide - -## Overview - -The Custom Query API allows you to execute complex SQL queries against xAPI Learning Record Store tables. This guide explains the actual table structure, available columns, and how to query them effectively. - ---- - -## Table Structure & Relationships - -### Core Tables - -#### 1. **statements** (table: `xapi_statements`) - -Stores xAPI statements with core activity tracking data. - -**Primary Columns:** -| Column | Type | Description | -|--------|------|-------------| -| `id` | uuid | Primary key | -| `statement_id` | uuid | Unique xAPI statement ID | -| `agent_sha` | varchar(64) | SHA hash of actor/agent | -| `verb_id` | varchar(255) | Verb IRI (e.g., http://adlnet.gov/expapi/verbs/attempted) | -| `object_id` | varchar(255) | ID of object (Activity, Agent, etc.) | -| `object_type` | text | Type of object (Activity, Agent, SubStatement, etc.) | -| `registration` | uuid | Activity session registration ID | -| `timestamp` | timestamptz | When activity occurred | -| `stored` | timestamptz | When stored in LRS | -| `voided` | boolean | Whether statement is voided | -| `created_at` | timestamptz | Record creation time | -| `updated_at` | timestamptz | Record update time | - -**JSONB Columns (Queryable):** -- `result` - Score, success, completion, extensions -- `context` - Contextual information, instructor, team -- `object` - Full object definition -- `authority` - Authority/system information -- `attachments` - Attached files - -**Relationships:** -- `agent_sha` → `agents.agent_sha` -- `verb_id` → `verbs.id` -- `object_id` → `activities.activity_id` (when object_type = 'Activity') - ---- - -#### 2. **agents** (table: `agents`) - -Stores actor/agent information (persons and groups). - -**Primary Columns:** -| Column | Type | Description | -|--------|------|-------------| -| `id` | uuid | Primary key | -| `agent_sha` | varchar(64) | Unique SHA hash of agent | -| `name` | jsonb | Agent name | -| `mbox` | varchar(255) | Email address (mailto:user@example.com) | -| `mbox_sha1sum` | varchar(40) | SHA1 of email | -| `openid` | varchar(2048) | OpenID identifier | -| `object_type` | varchar(20) | 'Agent' or 'Group' | -| `created_at` | timestamptz | Creation time | -| `updated_at` | timestamptz | Update time | - -**JSONB Columns:** -- `agent_json` - Full agent object -- `account` - Account with homePage and name - ---- - -#### 3. **activities** (table: `activities`) - -Stores activity/course definitions. - -**Primary Columns:** -| Column | Type | Description | -|--------|------|-------------| -| `id` | uuid | Primary key | -| `activity_id` | varchar(2048) | Unique activity IRI | -| `type` | varchar(512) | Activity type (http://adlnet.gov/expapi/activities/course) | -| `name` | jsonb | Activity name in multiple languages | -| `description` | jsonb | Activity description in multiple languages | -| `more_info` | varchar(2048) | URL for more information | -| `created_at` | timestamptz | Creation time | -| `updated_at` | timestamptz | Update time | - -**JSONB Columns:** -- `definition` - Full activity definition including interaction type and components - ---- - -#### 4. **verbs** (table: `verbs`) - -Predefined xAPI verbs. - -**Columns:** -| Column | Type | Description | -|--------|------|-------------| -| `id` | varchar(255) | Verb IRI (PRIMARY KEY) | -| `display` | jsonb | Display names in multiple languages | -| `created_at` | timestamptz | Creation time | -| `updated_at` | timestamptz | Update time | - -**Example verb IDs:** -- `http://adlnet.gov/expapi/verbs/attempted` -- `http://adlnet.gov/expapi/verbs/completed` -- `http://adlnet.gov/expapi/verbs/passed` -- `http://adlnet.gov/expapi/verbs/experienced` - ---- - -#### 5. **states** (table: `xapi_states`) - -Stores learner state documents. - -**Columns:** -| Column | Type | Description | -|--------|------|-------------| -| `state_id` | varchar(255) | State identifier | -| `activity_id` | varchar(2048) | Activity IRI | -| `agent_sha` | varchar(64) | Agent SHA | -| `registration` | uuid | Activity session registration | -| `content` | bytea | Binary state document content | -| `content_type` | varchar(255) | MIME type (application/json) | -| `etag` | varchar(64) | Cache validation tag | -| `last_modified` | timestamptz | Last modification time | -| `created_at` | timestamptz | Creation time | -| `updated_at` | timestamptz | Update time | - -**Primary Key:** (`state_id`, `activity_id`, `agent_sha`, `registration`) - ---- - -#### 6. **agent_profiles** (table: `agent_profiles`) - -Agent profile documents. - -**Columns:** -| Column | Type | Description | -|--------|------|-------------| -| `id` | uuid | Primary key | -| `agent_sha` | varchar(64) | Agent SHA | -| `profile_id` | varchar(255) | Profile identifier | -| `content_type` | varchar(255) | MIME type | -| `last_modified` | timestamptz | Last modification | -| `created_at` | timestamptz | Creation time | -| `updated_at` | timestamptz | Update time | - -**JSONB Columns:** -- `profile_data` - Profile content - ---- - -#### 7. **activity_profiles** (table: `activity_profiles`) - -Activity profile documents. - -**Columns:** -| Column | Type | Description | -|--------|------|-------------| -| `id` | uuid | Primary key | -| `activity_id` | varchar(2048) | Activity IRI | -| `profile_id` | varchar(255) | Profile identifier | -| `content_type` | varchar(255) | MIME type | -| `last_modified` | timestamptz | Last modification | -| `created_at` | timestamptz | Creation time | -| `updated_at` | timestamptz | Update time | - -**JSONB Columns:** -- `profile_data` - Profile content - ---- - -#### 8. **voided_statements** (table: `voided_statements`) - -Tracks voided statements. - -**Columns:** -| Column | Type | Description | -|--------|------|-------------| -| `statement_id` | uuid | Voided statement ID | -| `tenant_id` | uuid | Tenant ID | -| `voided_by_statement_id` | uuid | ID of voiding statement | -| `voided_at` | timestamp | When voided | - -**Primary Key:** (`statement_id`, `tenant_id`) - ---- - -## Common Query Patterns - -### Example 1: Get statements from specific time period - -```json -{ - "table_name": "statements", - "query": { - "select": ["id", "statement_id", "verb_id", "object_type", "timestamp"], - "range": { - "column": "timestamp", - "from": "2026-04-01T00:00:00Z", - "to": "2026-04-07T23:59:59Z" - }, - "order_by": ["timestamp DESC"], - "limit": 50 - } -} -``` - -**SQL Generated:** -```sql -SELECT id, statement_id, verb_id, object_type, timestamp -FROM tenant_schema.store_uuid_xapi_statements -WHERE timestamp BETWEEN '2026-04-01T00:00:00Z' AND '2026-04-07T23:59:59Z' -ORDER BY timestamp DESC -LIMIT 50 -``` - ---- - -### Example 2: Join statements with agents - -Query statements along with agent email information. - -```json -{ - "table_name": "statements", - "query": { - "select": ["s.statement_id", "s.verb_id", "ag.mbox", "ag.object_type", "s.timestamp"], - "joins": [ - { - "table": "agents", - "type": "INNER", - "on": "s.agent_sha = ag.agent_sha" - } - ], - "filters": [ - { - "column": "s.voided", - "operator": "eq", - "value": false - } - ], - "order_by": ["s.timestamp DESC"], - "limit": 100 - } -} -``` - -**SQL Generated:** -```sql -SELECT s.statement_id, s.verb_id, ag.mbox, ag.object_type, s.timestamp -FROM tenant_schema.store_uuid_xapi_statements s -INNER JOIN tenant_schema.store_uuid_agents ag - ON s.agent_sha = ag.agent_sha -WHERE s.voided = false -ORDER BY s.timestamp DESC -LIMIT 100 -``` - ---- - -### Example 3: Count statements per verb - -Group statements by action to see usage patterns. - -```json -{ - "table_name": "statements", - "query": { - "select": ["verb_id"], - "aggregates": [ - { - "function": "COUNT", - "column": "id", - "alias": "count" - } - ], - "group_by": ["verb_id"], - "order_by": ["count DESC"] - } -} -``` - -**SQL Generated:** -```sql -SELECT verb_id, COUNT(id) as count -FROM tenant_schema.store_uuid_xapi_statements -GROUP BY verb_id -ORDER BY count DESC -``` - ---- - -### Example 4: Complex filter with multiple verbs - -Get attempted OR completed statements that were not voided. - -```json -{ - "table_name": "statements", - "query": { - "select": ["id", "verb_id", "timestamp"], - "complex_filter": { - "logic": "AND", - "filters": [ - { - "column": "voided", - "operator": "eq", - "value": false - } - ], - "groups": [ - { - "logic": "OR", - "filters": [ - {"column": "verb_id", "operator": "like", "value": "%attempted%"}, - {"column": "verb_id", "operator": "like", "value": "%completed%"} - ] - } - ] - }, - "order_by": ["timestamp DESC"] - } -} -``` - -**SQL Generated:** -```sql -SELECT id, verb_id, timestamp -FROM tenant_schema.store_uuid_xapi_statements -WHERE voided = false - AND (verb_id LIKE '%attempted%' OR verb_id LIKE '%completed%') -ORDER BY timestamp DESC -``` - ---- - -### Example 5: JSONB Query - Filter by result success (NEW) - -Query statements where the JSONB result column indicates success. Uses the new `json_path` array format. - -```json -{ - "table_name": "statements", - "query": { - "select": ["id", "statement_id", "agent_sha", "verb_id", "timestamp"], - "filters": [ - { - "column": "result", - "json_path": ["success"], - "operator": "eq", - "value": true - } - ], - "order_by": ["timestamp DESC"], - "limit": 50 - } -} -``` - -**SQL Generated:** -```sql -SELECT id, statement_id, agent_sha, verb_id, timestamp -FROM tenant_schema.store_uuid_xapi_statements -WHERE result ->> 'success' = $1 -ORDER BY timestamp DESC -LIMIT 50 -``` - -**Note:** The `json_path` array specifies the path to the JSON key. For nested values use: `"json_path": ["nested", "key", "value"]` - ---- - -### Example 6: Complex JSONB Query - Result score and context - -Query statements with successful completions and instructor-led context. - -```json -{ - "table_name": "statements", - "query": { - "select": ["id", "statement_id", "agent_sha", "verb_id"], - "complex_filter": { - "logic": "AND", - "filters": [ - { - "column": "result", - "json_path": ["success"], - "operator": "eq", - "value": true - }, - { - "column": "result", - "json_path": ["score", "scaled"], - "operator": "gte", - "value": 0.8 - } - ], - "groups": [ - { - "logic": "OR", - "filters": [ - { - "column": "context", - "json_path": ["instructor", "name"], - "operator": "ilike", - "value": "%Smith%" - }, - { - "column": "context", - "json_path": ["team", "name"], - "operator": "is_not_null" - } - ] - } - ] - }, - "order_by": ["timestamp DESC"], - "limit": 100 - } -} -``` - -**SQL Generated:** -```sql -SELECT id, statement_id, agent_sha, verb_id -FROM tenant_schema.store_uuid_xapi_statements -WHERE result ->> 'success' = $1 - AND result -> 'score' ->> 'scaled' >= $2 - AND ( - context -> 'instructor' ->> 'name' ILIKE $3 - OR context -> 'team' ->> 'name' IS NOT NULL - ) -ORDER BY timestamp DESC -LIMIT 100 -``` - ---- - -### Example 7: Multi-table JSONB Query - -Join statements with agents and query JSONB result data along with agent information. - -```json -{ - "table_name": "statements", - "query": { - "select": ["ag.mbox", "s.verb_id", "COUNT(s.id) as attempts"], - "joins": [ - { - "table": "agents", - "type": "INNER", - "on": "s.agent_sha = ag.agent_sha" - } - ], - "filters": [ - { - "column": "result", - "json_path": ["success"], - "operator": "eq", - "value": true - } - ], - "group_by": ["ag.mbox", "s.verb_id"], - "having": [ - { - "column": "attempts", - "operator": "gte", - "value": 2 - } - ], - "order_by": ["attempts DESC"] - } -} -``` - -**SQL Generated:** -```sql -SELECT ag.mbox, s.verb_id, COUNT(s.id) as attempts -FROM tenant_schema.store_uuid_xapi_statements s -INNER JOIN tenant_schema.store_uuid_agents ag - ON s.agent_sha = ag.agent_sha -WHERE s.result ->> 'success' = $1 -GROUP BY ag.mbox, s.verb_id -HAVING COUNT(s.id) >= $2 -ORDER BY attempts DESC -``` - ---- - -### Example 8: JSONB Null Checks - -Find statements with missing context data. - -```json -{ - "table_name": "statements", - "query": { - "select": ["id", "statement_id", "verb_id", "timestamp"], - "filters": [ - { - "column": "context", - "json_path": ["instructor"], - "operator": "is_null" - }, - { - "column": "context", - "json_path": ["team"], - "operator": "is_not_null" - } - ], - "order_by": ["timestamp DESC"], - "limit": 50 - } -} -``` - -**SQL Generated:** -```sql -SELECT id, statement_id, verb_id, timestamp -FROM tenant_schema.store_uuid_xapi_statements -WHERE context -> 'instructor' IS NULL - AND context -> 'team' IS NOT NULL -ORDER BY timestamp DESC -LIMIT 50 -``` - ---- - -### Example 9: Original Multi-table analysis - -Join statements, agents, and activities to get comprehensive view. - -```json -{ - "table_name": "statements", - "query": { - "select": ["ag.mbox", "act.type", "COUNT(s.id) as stmt_count"], - "joins": [ - { - "table": "agents", - "type": "INNER", - "on": "s.agent_sha = ag.agent_sha" - }, - { - "table": "activities", - "type": "LEFT", - "on": "s.object_id = act.activity_id" - } - ], - "range": { - "column": "s.timestamp", - "from": "2026-04-01T00:00:00Z", - "to": "2026-04-07T23:59:59Z" - }, - "group_by": ["ag.mbox", "act.type"], - "having": [ - { - "column": "stmt_count", - "operator": "gte", - "value": 3 - } - ], - "order_by": ["stmt_count DESC"] - } -} -``` - -**SQL Generated:** -```sql -SELECT ag.mbox, act.type, COUNT(s.id) as stmt_count -FROM tenant_schema.store_uuid_xapi_statements s -INNER JOIN tenant_schema.store_uuid_agents ag - ON s.agent_sha = ag.agent_sha -LEFT JOIN tenant_schema.store_uuid_activities act - ON s.object_id = act.activity_id -WHERE s.timestamp BETWEEN '2026-04-01T00:00:00Z' AND '2026-04-07T23:59:59Z' -GROUP BY ag.mbox, act.type -HAVING COUNT(s.id) >= $1 -ORDER BY stmt_count DESC -``` - ---- - -## JSONB Query Reference (NEW) - -### Queryable JSONB Columns - -The statements table contains JSONB columns that can be efficiently queried using json_path arrays: - -| Column | Description | Example Paths | -|--------|-------------|----------------| -| `result` | Learning outcome data | `["success"]`, `["score", "scaled"]`, `["duration"]`, `["completion"]` | -| `context` | Contextual information | `["instructor", "name"]`, `["team", "name"]`, `["language"]`, `["revision"]` | -| `object` | Activity/object definition | `["definition", "type"]`, `["definition", "name"]`, `["id"]` | -| `authority` | Authority/system info | `["name"]`, `["mbox"]`, `["openid"]` | -| `attachments` | Attached documents | `["display"]`, `["fileUrl"]`, `["contentType"]` | - -### JSONB Operator Mapping - -| QueryFilter Operator | JSONB SQL | Usage | -|--------|----------|-------| -| `eq` | `->>` (text compare) | `json_path[-1] = value` | -| `neq` | `->>` | Not equal comparison | -| `gt`, `gte`, `lt`, `lte` | `->>` | Numeric/text comparisons | -| `like`, `ilike` | `->>` | Pattern matching | -| `in`, `not_in` | `->>` | Value in list | -| `is_null` | `->` | Path exists but null | -| `is_not_null` | `->` | Path exists and not null | - -### Best Practices for JSONB Queries - -1. **Use json_path array, not raw operators** - - ❌ Don't: `"column": "result->'success'->>'value'"` - - ✅ Do: `"column": "result", "json_path": ["success", "value"]` - -2. **Specific paths perform better** - - Use full paths when possible for consistency - - Ex: `["score", "scaled"]` instead of just `["score"]` - -3. **NULL handling** - - Use `is_null` to check if path exists - - Paths that don't exist return NULL - -4. **Type coercion** - - String comparisons work for all types - - Use numeric operators carefully with JSON values - -5. **Deeply nested paths** - - No practical depth limit, but longer paths = slower queries - - Consider database indexing for frequently queried paths - ---- - -## Query Guidelines - -### Do's ✓ - -1. **Use specific columns** - Instead of `SELECT *`, specify needed columns -2. **Filter early** - Use WHERE before aggregation -3. **Index on common filters** - Query indexed columns first -4. **Paginate large results** - Always use LIMIT/OFFSET -5. **Use aliases in joins** - Makes queries more readable -6. **Match UUIDs carefully** - Ensure type compatibility -7. **Use json_path arrays for JSONB** - Separate column from path components (NEW) -8. **Provide all json_path elements** - Don't split JSONB paths between column and json_path - -### Don'ts ✗ - -1. **Avoid SELECT *** on large tables -2. **Don't nest filters too deeply** - Max 10 levels -3. **Don't use LIKE with leading %** - Performance issue -4. **Avoid querying JSONB directly** - Use indexed columns when possible -5. **Don't forget LIMIT** - Prevents returning massive result sets -6. **Don't embed JSONB operators in column name** - Use json_path field instead (NEW) - - ❌ Wrong: `"column": "result->'success'->>'value'"` - - ✅ Correct: `"column": "result", "json_path": ["success", "value"]` -7. **Don't mix JSONB operators** - Framework handles all operator syntax automatically - ---- - -## Performance Tips - -1. **Timestamp queries** - Always use `timestamp` not `stored` when possible -2. **Agent lookups** - Use `agent_sha` instead of `mbox` for faster joins -3. **Activity type filtering** - Index on `type` column helps -4. **Verb filtering** - `verb_id` is indexed -5. **Pagination** - Use offset pagination with consistent ordering -6. **JSONB queries** - Index frequently queried JSONB paths using PostgreSQL GIN indexes -7. **Avoid deep nesting** - Keep JSONB paths to 3-4 levels when possible - ---- - -## Common Use Cases - -### Track specific user activity -```json -{ - "table_name": "statements", - "query": { - "select": ["timestamp", "verb_id", "object_type"], - "filters": [ - { - "column": "agent_sha", - "operator": "eq", - "value": "specific_sha_value" - } - ], - "order_by": ["timestamp DESC"], - "limit": 100 - } -} -``` - -### Successful course completions (JSONB) -```json -{ - "table_name": "statements", - "query": { - "select": ["agent_sha", "verb_id", "timestamp"], - "filters": [ - { - "column": "result", - "json_path": ["success"], - "operator": "eq", - "value": true - }, - { - "column": "result", - "json_path": ["completion"], - "operator": "eq", - "value": true - } - ], - "order_by": ["timestamp DESC"] - } -} -``` - -### High-scoring activities (JSONB) -```json -{ - "table_name": "statements", - "query": { - "select": ["agent_sha", "object_id", "verb_id", "timestamp"], - "filters": [ - { - "column": "result", - "json_path": ["score", "scaled"], - "operator": "gte", - "value": 0.9 - } - ], - "order_by": ["timestamp DESC"], - "limit": 50 - } -} -``` - -### Course completion analysis -```json -{ - "table_name": "statements", - "query": { - "select": ["ag.mbox"], - "joins": [ - { - "table": "agents", - "type": "INNER", - "on": "s.agent_sha = ag.agent_sha" - } - ], - "filters": [ - { - "column": "result", - "json_path": ["completion"], - "operator": "eq", - "value": true - } - ], - "aggregates": [ - { - "function": "COUNT", - "column": "id", - "alias": "completions" - } - ], - "group_by": ["ag.mbox"], - "order_by": ["completions DESC"] - } -} -``` - -### Statements with context instructor (JSONB) -```json -{ - "table_name": "statements", - "query": { - "select": ["id", "agent_sha", "verb_id", "timestamp"], - "filters": [ - { - "column": "context", - "json_path": ["instructor", "name"], - "operator": "is_not_null" - } - ], - "order_by": ["timestamp DESC"], - "limit": 100 - } -} -``` - -### Daily submission volume -```json -{ - "table_name": "statements", - "query": { - "select": [], - "aggregates": [ - { - "function": "COUNT", - "column": "id", - "alias": "count" - } - ], - "range": { - "column": "timestamp", - "from": "2026-04-01T00:00:00Z", - "to": "2026-04-07T23:59:59Z" - }, - "group_by": ["DATE(timestamp)"], - "order_by": ["COUNT(id) DESC"] - } -} -``` - -### Agent demographics -```json -{ - "table_name": "agents", - "query": { - "select": ["object_type"], - "aggregates": [ - { - "function": "COUNT", - "column": "id", - "alias": "count" - } - ], - "group_by": ["object_type"], - "order_by": ["count DESC"] - } -} -``` - -### Activity usage -```json -{ - "table_name": "statements", - "query": { - "select": ["act.type"], - "joins": [ - { - "table": "activities", - "type": "LEFT", - "on": "s.object_id = act.activity_id" - } - ], - "filters": [ - { - "column": "s.object_type", - "operator": "eq", - "value": "Activity" - } - ], - "aggregates": [ - { - "function": "COUNT", - "column": "s.id", - "alias": "count" - } - ], - "group_by": ["act.type"], - "order_by": ["count DESC"] - } -} -``` -