diff --git a/apps/docs/components/icons.tsx b/apps/docs/components/icons.tsx index dcd5741f2b..43c5c2e591 100644 --- a/apps/docs/components/icons.tsx +++ b/apps/docs/components/icons.tsx @@ -3430,6 +3430,23 @@ export const ResendIcon = (props: SVGProps) => ( ) +export const GoogleBigQueryIcon = (props: SVGProps) => ( + + + + + +) + export const GoogleVaultIcon = (props: SVGProps) => ( = { gitlab: GitLabIcon, gmail_v2: GmailIcon, gong: GongIcon, + google_bigquery: GoogleBigQueryIcon, google_books: GoogleBooksIcon, google_calendar_v2: GoogleCalendarIcon, google_docs: GoogleDocsIcon, diff --git a/apps/docs/content/docs/en/tools/google_bigquery.mdx b/apps/docs/content/docs/en/tools/google_bigquery.mdx new file mode 100644 index 0000000000..4218ff9f68 --- /dev/null +++ b/apps/docs/content/docs/en/tools/google_bigquery.mdx @@ -0,0 +1,161 @@ +--- +title: Google BigQuery +description: Query, list, and insert data in Google BigQuery +--- + +import { BlockInfoCard } from "@/components/ui/block-info-card" + + + +## Usage Instructions + +Connect to Google BigQuery to run SQL queries, list datasets and tables, get table metadata, and insert rows. + + + +## Tools + +### `google_bigquery_query` + +Run a SQL query against Google BigQuery and return the results + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `projectId` | string | Yes | Google Cloud project ID | +| `query` | string | Yes | SQL query to execute | +| `useLegacySql` | boolean | No | Whether to use legacy SQL syntax \(default: false\) | +| `maxResults` | number | No | Maximum number of rows to return | +| `defaultDatasetId` | string | No | Default dataset for unqualified table names | +| `location` | string | No | Processing location \(e.g., "US", "EU"\) | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `columns` | array | Array of column names from the query result | +| `rows` | array | Array of row objects keyed by column name | +| `totalRows` | string | Total number of rows in the complete result set | +| `jobComplete` | boolean | Whether the query completed within the timeout | +| `totalBytesProcessed` | string | Total bytes processed by the query | +| `cacheHit` | boolean | Whether the query result was served from cache | +| `jobReference` | object | Job reference \(useful when jobComplete is false\) | +| ↳ `projectId` | string | Project ID containing the job | +| ↳ `jobId` | string | Unique job identifier | +| ↳ `location` | string | Geographic location of the job | +| `pageToken` | string | Token for fetching additional result pages | + +### `google_bigquery_list_datasets` + +List all datasets in a Google BigQuery project + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `projectId` | string | Yes | Google Cloud project ID | +| `maxResults` | number | No | Maximum number of datasets to return | +| `pageToken` | string | No | Token for pagination | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `datasets` | array | Array of dataset objects | +| ↳ `datasetId` | string | Unique dataset identifier | +| ↳ `projectId` | string | Project ID containing this dataset | +| ↳ `friendlyName` | string | Descriptive name for the dataset | +| ↳ `location` | string | Geographic location where the data resides | +| `nextPageToken` | string | Token for fetching next page of results | + +### `google_bigquery_list_tables` + +List all tables in a Google BigQuery dataset + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `projectId` | string | Yes | Google Cloud project ID | +| `datasetId` | string | Yes | BigQuery dataset ID | +| `maxResults` | number | No | Maximum number of tables to return | +| `pageToken` | string | No | Token for pagination | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `tables` | array | Array of table objects | +| ↳ `tableId` | string | Table identifier | +| ↳ `datasetId` | string | Dataset ID containing this table | +| ↳ `projectId` | string | Project ID containing this table | +| ↳ `type` | string | Table type \(TABLE, VIEW, EXTERNAL, etc.\) | +| ↳ `friendlyName` | string | User-friendly name for the table | +| ↳ `creationTime` | string | Time when created, in milliseconds since epoch | +| `totalItems` | number | Total number of tables in the dataset | +| `nextPageToken` | string | Token for fetching next page of results | + +### `google_bigquery_get_table` + +Get metadata and schema for a Google BigQuery table + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `projectId` | string | Yes | Google Cloud project ID | +| `datasetId` | string | Yes | BigQuery dataset ID | +| `tableId` | string | Yes | BigQuery table ID | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `tableId` | string | Table ID | +| `datasetId` | string | Dataset ID | +| `projectId` | string | Project ID | +| `type` | string | Table type \(TABLE, VIEW, SNAPSHOT, MATERIALIZED_VIEW, EXTERNAL\) | +| `description` | string | Table description | +| `numRows` | string | Total number of rows | +| `numBytes` | string | Total size in bytes, excluding data in streaming buffer | +| `schema` | array | Array of column definitions | +| ↳ `name` | string | Column name | +| ↳ `type` | string | Data type \(STRING, INTEGER, FLOAT, BOOLEAN, TIMESTAMP, RECORD, etc.\) | +| ↳ `mode` | string | Column mode \(NULLABLE, REQUIRED, or REPEATED\) | +| ↳ `description` | string | Column description | +| `creationTime` | string | Table creation time \(milliseconds since epoch\) | +| `lastModifiedTime` | string | Last modification time \(milliseconds since epoch\) | +| `location` | string | Geographic location where the table resides | + +### `google_bigquery_insert_rows` + +Insert rows into a Google BigQuery table using streaming insert + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `projectId` | string | Yes | Google Cloud project ID | +| `datasetId` | string | Yes | BigQuery dataset ID | +| `tableId` | string | Yes | BigQuery table ID | +| `rows` | string | Yes | JSON array of row objects to insert | +| `skipInvalidRows` | boolean | No | Whether to insert valid rows even if some are invalid | +| `ignoreUnknownValues` | boolean | No | Whether to ignore columns not in the table schema | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `insertedRows` | number | Number of rows successfully inserted | +| `errors` | array | Array of per-row insertion errors \(empty if all succeeded\) | +| ↳ `index` | number | Zero-based index of the row that failed | +| ↳ `errors` | array | Error details for this row | +| ↳ `reason` | string | Short error code summarizing the error | +| ↳ `location` | string | Where the error occurred | +| ↳ `message` | string | Human-readable error description | + + diff --git a/apps/docs/content/docs/en/tools/meta.json b/apps/docs/content/docs/en/tools/meta.json index 9fc1cc577e..e5afbc1b50 100644 --- a/apps/docs/content/docs/en/tools/meta.json +++ b/apps/docs/content/docs/en/tools/meta.json @@ -37,6 +37,7 @@ "gitlab", "gmail", "gong", + "google_bigquery", "google_books", "google_calendar", "google_docs", diff --git a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/credential-selector/components/oauth-required-modal.tsx b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/credential-selector/components/oauth-required-modal.tsx index 6cac32e626..6c66ffead6 100644 --- a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/credential-selector/components/oauth-required-modal.tsx +++ b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/credential-selector/components/oauth-required-modal.tsx @@ -44,6 +44,7 @@ const SCOPE_DESCRIPTIONS: Record = { 'https://www.googleapis.com/auth/userinfo.profile': 'View basic profile info', 'https://www.googleapis.com/auth/forms.body': 'View and manage Google Forms', 'https://www.googleapis.com/auth/forms.responses.readonly': 'View responses to Google Forms', + 'https://www.googleapis.com/auth/bigquery': 'View and manage data in Google BigQuery', 'https://www.googleapis.com/auth/ediscovery': 'Access Google Vault for eDiscovery', 'https://www.googleapis.com/auth/devstorage.read_only': 'Read files from Google Cloud Storage', 'https://www.googleapis.com/auth/admin.directory.group': 'Manage Google Workspace groups', diff --git a/apps/sim/blocks/blocks/google_bigquery.ts b/apps/sim/blocks/blocks/google_bigquery.ts new file mode 100644 index 0000000000..0ba15dfe56 --- /dev/null +++ b/apps/sim/blocks/blocks/google_bigquery.ts @@ -0,0 +1,256 @@ +import { GoogleBigQueryIcon } from '@/components/icons' +import type { BlockConfig } from '@/blocks/types' +import { AuthMode } from '@/blocks/types' + +export const GoogleBigQueryBlock: BlockConfig = { + type: 'google_bigquery', + name: 'Google BigQuery', + description: 'Query, list, and insert data in Google BigQuery', + longDescription: + 'Connect to Google BigQuery to run SQL queries, list datasets and tables, get table metadata, and insert rows.', + docsLink: 'https://docs.sim.ai/tools/google_bigquery', + category: 'tools', + bgColor: '#E0E0E0', + icon: GoogleBigQueryIcon, + authMode: AuthMode.OAuth, + subBlocks: [ + { + id: 'operation', + title: 'Operation', + type: 'dropdown', + options: [ + { label: 'Run Query', id: 'query' }, + { label: 'List Datasets', id: 'list_datasets' }, + { label: 'List Tables', id: 'list_tables' }, + { label: 'Get Table', id: 'get_table' }, + { label: 'Insert Rows', id: 'insert_rows' }, + ], + value: () => 'query', + }, + + { + id: 'credential', + title: 'Google Account', + type: 'oauth-input', + canonicalParamId: 'oauthCredential', + mode: 'basic', + required: true, + serviceId: 'google-bigquery', + requiredScopes: ['https://www.googleapis.com/auth/bigquery'], + placeholder: 'Select Google account', + }, + { + id: 'manualCredential', + title: 'Google Account', + type: 'short-input', + canonicalParamId: 'oauthCredential', + mode: 'advanced', + placeholder: 'Enter credential ID', + required: true, + }, + + { + id: 'projectId', + title: 'Project ID', + type: 'short-input', + placeholder: 'Enter Google Cloud project ID', + required: true, + }, + + { + id: 'query', + title: 'SQL Query', + type: 'long-input', + placeholder: 'SELECT * FROM `project.dataset.table` LIMIT 100', + condition: { field: 'operation', value: 'query' }, + required: { field: 'operation', value: 'query' }, + wandConfig: { + enabled: true, + prompt: `Generate a BigQuery Standard SQL query based on the user's description. +The query should: +- Use Standard SQL syntax (not Legacy SQL) +- Be well-formatted and efficient +- Include appropriate LIMIT clauses when applicable + +Examples: +- "get all users" -> SELECT * FROM \`project.dataset.users\` LIMIT 1000 +- "count orders by status" -> SELECT status, COUNT(*) as count FROM \`project.dataset.orders\` GROUP BY status +- "recent events" -> SELECT * FROM \`project.dataset.events\` ORDER BY created_at DESC LIMIT 100 + +Return ONLY the SQL query - no explanations, no quotes, no extra text.`, + placeholder: 'Describe the query you want to run...', + }, + }, + { + id: 'useLegacySql', + title: 'Use Legacy SQL', + type: 'switch', + condition: { field: 'operation', value: 'query' }, + }, + { + id: 'maxResults', + title: 'Max Results', + type: 'short-input', + placeholder: 'Maximum rows to return', + condition: { field: 'operation', value: ['query', 'list_datasets', 'list_tables'] }, + }, + { + id: 'defaultDatasetId', + title: 'Default Dataset', + type: 'short-input', + placeholder: 'Default dataset for unqualified table names', + condition: { field: 'operation', value: 'query' }, + }, + { + id: 'location', + title: 'Location', + type: 'short-input', + placeholder: 'Processing location (e.g., US, EU)', + condition: { field: 'operation', value: 'query' }, + }, + + { + id: 'datasetId', + title: 'Dataset ID', + type: 'short-input', + placeholder: 'Enter BigQuery dataset ID', + condition: { field: 'operation', value: ['list_tables', 'get_table', 'insert_rows'] }, + required: { field: 'operation', value: ['list_tables', 'get_table', 'insert_rows'] }, + }, + + { + id: 'tableId', + title: 'Table ID', + type: 'short-input', + placeholder: 'Enter BigQuery table ID', + condition: { field: 'operation', value: ['get_table', 'insert_rows'] }, + required: { field: 'operation', value: ['get_table', 'insert_rows'] }, + }, + + { + id: 'rows', + title: 'Rows', + type: 'long-input', + placeholder: '[{"column1": "value1", "column2": 42}]', + condition: { field: 'operation', value: 'insert_rows' }, + required: { field: 'operation', value: 'insert_rows' }, + wandConfig: { + enabled: true, + prompt: `Generate a JSON array of row objects for BigQuery insertion based on the user's description. +Each row should be a JSON object where keys are column names and values match the expected types. + +Examples: +- "3 users" -> [{"name": "Alice", "email": "alice@example.com"}, {"name": "Bob", "email": "bob@example.com"}, {"name": "Charlie", "email": "charlie@example.com"}] +- "order record" -> [{"order_id": "ORD-001", "amount": 99.99, "status": "pending"}] + +Return ONLY the JSON array - no explanations, no wrapping, no extra text.`, + placeholder: 'Describe the rows to insert...', + generationType: 'json-object', + }, + }, + { + id: 'skipInvalidRows', + title: 'Skip Invalid Rows', + type: 'switch', + condition: { field: 'operation', value: 'insert_rows' }, + }, + { + id: 'ignoreUnknownValues', + title: 'Ignore Unknown Values', + type: 'switch', + condition: { field: 'operation', value: 'insert_rows' }, + }, + + { + id: 'pageToken', + title: 'Page Token', + type: 'short-input', + placeholder: 'Pagination token', + condition: { field: 'operation', value: ['list_datasets', 'list_tables'] }, + }, + ], + tools: { + access: [ + 'google_bigquery_query', + 'google_bigquery_list_datasets', + 'google_bigquery_list_tables', + 'google_bigquery_get_table', + 'google_bigquery_insert_rows', + ], + config: { + tool: (params) => { + switch (params.operation) { + case 'query': + return 'google_bigquery_query' + case 'list_datasets': + return 'google_bigquery_list_datasets' + case 'list_tables': + return 'google_bigquery_list_tables' + case 'get_table': + return 'google_bigquery_get_table' + case 'insert_rows': + return 'google_bigquery_insert_rows' + default: + throw new Error(`Invalid Google BigQuery operation: ${params.operation}`) + } + }, + params: (params) => { + const { oauthCredential, rows, maxResults, ...rest } = params + return { + ...rest, + oauthCredential, + ...(rows && { rows: typeof rows === 'string' ? rows : JSON.stringify(rows) }), + ...(maxResults !== undefined && maxResults !== '' && { maxResults: Number(maxResults) }), + } + }, + }, + }, + inputs: { + operation: { type: 'string', description: 'Operation to perform' }, + oauthCredential: { type: 'string', description: 'Google BigQuery OAuth credential' }, + projectId: { type: 'string', description: 'Google Cloud project ID' }, + query: { type: 'string', description: 'SQL query to execute' }, + useLegacySql: { type: 'boolean', description: 'Whether to use legacy SQL syntax' }, + maxResults: { type: 'number', description: 'Maximum number of results to return' }, + defaultDatasetId: { + type: 'string', + description: 'Default dataset for unqualified table names', + }, + location: { type: 'string', description: 'Processing location' }, + datasetId: { type: 'string', description: 'BigQuery dataset ID' }, + tableId: { type: 'string', description: 'BigQuery table ID' }, + rows: { type: 'string', description: 'JSON array of row objects to insert' }, + skipInvalidRows: { type: 'boolean', description: 'Whether to skip invalid rows during insert' }, + ignoreUnknownValues: { + type: 'boolean', + description: 'Whether to ignore unknown column values', + }, + pageToken: { type: 'string', description: 'Pagination token' }, + }, + outputs: { + columns: { type: 'json', description: 'Array of column names (query)' }, + rows: { type: 'json', description: 'Array of row objects (query)' }, + totalRows: { type: 'string', description: 'Total number of rows (query)' }, + jobComplete: { type: 'boolean', description: 'Whether the query completed (query)' }, + totalBytesProcessed: { type: 'string', description: 'Bytes processed (query)' }, + cacheHit: { type: 'boolean', description: 'Whether result was cached (query)' }, + jobReference: { type: 'json', description: 'Job reference for incomplete queries (query)' }, + pageToken: { type: 'string', description: 'Token for additional result pages (query)' }, + datasets: { type: 'json', description: 'Array of dataset objects (list_datasets)' }, + tables: { type: 'json', description: 'Array of table objects (list_tables)' }, + totalItems: { type: 'number', description: 'Total items count (list_tables)' }, + tableId: { type: 'string', description: 'Table ID (get_table)' }, + datasetId: { type: 'string', description: 'Dataset ID (get_table)' }, + type: { type: 'string', description: 'Table type (get_table)' }, + description: { type: 'string', description: 'Table description (get_table)' }, + numRows: { type: 'string', description: 'Row count (get_table)' }, + numBytes: { type: 'string', description: 'Size in bytes (get_table)' }, + schema: { type: 'json', description: 'Column definitions (get_table)' }, + creationTime: { type: 'string', description: 'Creation time (get_table)' }, + lastModifiedTime: { type: 'string', description: 'Last modified time (get_table)' }, + location: { type: 'string', description: 'Data location (get_table)' }, + insertedRows: { type: 'number', description: 'Rows inserted (insert_rows)' }, + errors: { type: 'json', description: 'Insert errors (insert_rows)' }, + nextPageToken: { type: 'string', description: 'Token for next page of results' }, + }, +} diff --git a/apps/sim/blocks/registry.ts b/apps/sim/blocks/registry.ts index 03b9827a77..d9b2affdb3 100644 --- a/apps/sim/blocks/registry.ts +++ b/apps/sim/blocks/registry.ts @@ -43,6 +43,7 @@ import { GitLabBlock } from '@/blocks/blocks/gitlab' import { GmailBlock, GmailV2Block } from '@/blocks/blocks/gmail' import { GongBlock } from '@/blocks/blocks/gong' import { GoogleSearchBlock } from '@/blocks/blocks/google' +import { GoogleBigQueryBlock } from '@/blocks/blocks/google_bigquery' import { GoogleBooksBlock } from '@/blocks/blocks/google_books' import { GoogleCalendarBlock, GoogleCalendarV2Block } from '@/blocks/blocks/google_calendar' import { GoogleDocsBlock } from '@/blocks/blocks/google_docs' @@ -240,6 +241,7 @@ export const registry: Record = { google_sheets_v2: GoogleSheetsV2Block, google_slides: GoogleSlidesBlock, google_slides_v2: GoogleSlidesV2Block, + google_bigquery: GoogleBigQueryBlock, google_vault: GoogleVaultBlock, grafana: GrafanaBlock, grain: GrainBlock, diff --git a/apps/sim/components/icons.tsx b/apps/sim/components/icons.tsx index dcd5741f2b..43c5c2e591 100644 --- a/apps/sim/components/icons.tsx +++ b/apps/sim/components/icons.tsx @@ -3430,6 +3430,23 @@ export const ResendIcon = (props: SVGProps) => ( ) +export const GoogleBigQueryIcon = (props: SVGProps) => ( + + + + + +) + export const GoogleVaultIcon = (props: SVGProps) => ( { + try { + const response = await fetch('https://openidconnect.googleapis.com/v1/userinfo', { + headers: { Authorization: `Bearer ${tokens.accessToken}` }, + }) + if (!response.ok) { + logger.error('Failed to fetch Google user info', { status: response.status }) + throw new Error(`Failed to fetch Google user info: ${response.statusText}`) + } + const profile = await response.json() + const now = new Date() + return { + id: `${profile.sub}-${crypto.randomUUID()}`, + name: profile.name || 'Google User', + email: profile.email, + image: profile.picture || undefined, + emailVerified: profile.email_verified || false, + createdAt: now, + updatedAt: now, + } + } catch (error) { + logger.error('Error in Google getUserInfo', { error }) + throw error + } + }, + }, + { providerId: 'google-vault', clientId: env.GOOGLE_CLIENT_ID as string, diff --git a/apps/sim/lib/oauth/oauth.ts b/apps/sim/lib/oauth/oauth.ts index b890566334..96d9167dbe 100644 --- a/apps/sim/lib/oauth/oauth.ts +++ b/apps/sim/lib/oauth/oauth.ts @@ -8,6 +8,7 @@ import { DropboxIcon, GithubIcon, GmailIcon, + GoogleBigQueryIcon, GoogleCalendarIcon, GoogleDocsIcon, GoogleDriveIcon, @@ -119,6 +120,14 @@ export const OAUTH_PROVIDERS: Record = { baseProviderIcon: GoogleIcon, scopes: ['https://www.googleapis.com/auth/calendar'], }, + 'google-bigquery': { + name: 'Google BigQuery', + description: 'Query, list, and insert data in Google BigQuery.', + providerId: 'google-bigquery', + icon: GoogleBigQueryIcon, + baseProviderIcon: GoogleIcon, + scopes: ['https://www.googleapis.com/auth/bigquery'], + }, 'google-vault': { name: 'Google Vault', description: 'Search, export, and manage matters/holds via Google Vault.', diff --git a/apps/sim/lib/oauth/types.ts b/apps/sim/lib/oauth/types.ts index d5114a38bc..70229738ab 100644 --- a/apps/sim/lib/oauth/types.ts +++ b/apps/sim/lib/oauth/types.ts @@ -7,6 +7,7 @@ export type OAuthProvider = | 'google-docs' | 'google-sheets' | 'google-calendar' + | 'google-bigquery' | 'google-vault' | 'google-forms' | 'google-groups' @@ -52,6 +53,7 @@ export type OAuthService = | 'google-docs' | 'google-sheets' | 'google-calendar' + | 'google-bigquery' | 'google-vault' | 'google-forms' | 'google-groups' diff --git a/apps/sim/tools/google_bigquery/get_table.ts b/apps/sim/tools/google_bigquery/get_table.ts new file mode 100644 index 0000000000..50cf0437a8 --- /dev/null +++ b/apps/sim/tools/google_bigquery/get_table.ts @@ -0,0 +1,132 @@ +import type { + GoogleBigQueryGetTableParams, + GoogleBigQueryGetTableResponse, +} from '@/tools/google_bigquery/types' +import type { ToolConfig } from '@/tools/types' + +export const googleBigQueryGetTableTool: ToolConfig< + GoogleBigQueryGetTableParams, + GoogleBigQueryGetTableResponse +> = { + id: 'google_bigquery_get_table', + name: 'BigQuery Get Table', + description: 'Get metadata and schema for a Google BigQuery table', + version: '1.0.0', + + oauth: { + required: true, + provider: 'google-bigquery', + }, + + params: { + accessToken: { + type: 'string', + required: true, + visibility: 'hidden', + description: 'OAuth access token', + }, + projectId: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Google Cloud project ID', + }, + datasetId: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'BigQuery dataset ID', + }, + tableId: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'BigQuery table ID', + }, + }, + + request: { + url: (params) => + `https://bigquery.googleapis.com/bigquery/v2/projects/${encodeURIComponent(params.projectId)}/datasets/${encodeURIComponent(params.datasetId)}/tables/${encodeURIComponent(params.tableId)}`, + method: 'GET', + headers: (params) => ({ + Authorization: `Bearer ${params.accessToken}`, + }), + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + if (!response.ok) { + const errorMessage = data.error?.message || 'Failed to get BigQuery table' + throw new Error(errorMessage) + } + + const schema = (data.schema?.fields ?? []).map( + (f: { name: string; type: string; mode?: string; description?: string }) => ({ + name: f.name, + type: f.type, + mode: f.mode ?? null, + description: f.description ?? null, + }) + ) + + return { + success: true, + output: { + tableId: data.tableReference?.tableId ?? null, + datasetId: data.tableReference?.datasetId ?? null, + projectId: data.tableReference?.projectId ?? null, + type: data.type ?? null, + description: data.description ?? null, + numRows: data.numRows ?? null, + numBytes: data.numBytes ?? null, + schema, + creationTime: data.creationTime ?? null, + lastModifiedTime: data.lastModifiedTime ?? null, + location: data.location ?? null, + }, + } + }, + + outputs: { + tableId: { type: 'string', description: 'Table ID' }, + datasetId: { type: 'string', description: 'Dataset ID' }, + projectId: { type: 'string', description: 'Project ID' }, + type: { + type: 'string', + description: 'Table type (TABLE, VIEW, SNAPSHOT, MATERIALIZED_VIEW, EXTERNAL)', + }, + description: { type: 'string', description: 'Table description', optional: true }, + numRows: { type: 'string', description: 'Total number of rows' }, + numBytes: { + type: 'string', + description: 'Total size in bytes, excluding data in streaming buffer', + }, + schema: { + type: 'array', + description: 'Array of column definitions', + items: { + type: 'object', + properties: { + name: { type: 'string', description: 'Column name' }, + type: { + type: 'string', + description: 'Data type (STRING, INTEGER, FLOAT, BOOLEAN, TIMESTAMP, RECORD, etc.)', + }, + mode: { + type: 'string', + description: 'Column mode (NULLABLE, REQUIRED, or REPEATED)', + optional: true, + }, + description: { type: 'string', description: 'Column description', optional: true }, + }, + }, + }, + creationTime: { type: 'string', description: 'Table creation time (milliseconds since epoch)' }, + lastModifiedTime: { + type: 'string', + description: 'Last modification time (milliseconds since epoch)', + }, + location: { type: 'string', description: 'Geographic location where the table resides' }, + }, +} diff --git a/apps/sim/tools/google_bigquery/index.ts b/apps/sim/tools/google_bigquery/index.ts new file mode 100644 index 0000000000..ea1aa73371 --- /dev/null +++ b/apps/sim/tools/google_bigquery/index.ts @@ -0,0 +1,5 @@ +export { googleBigQueryGetTableTool } from '@/tools/google_bigquery/get_table' +export { googleBigQueryInsertRowsTool } from '@/tools/google_bigquery/insert_rows' +export { googleBigQueryListDatasetsTool } from '@/tools/google_bigquery/list_datasets' +export { googleBigQueryListTablesTool } from '@/tools/google_bigquery/list_tables' +export { googleBigQueryQueryTool } from '@/tools/google_bigquery/query' diff --git a/apps/sim/tools/google_bigquery/insert_rows.ts b/apps/sim/tools/google_bigquery/insert_rows.ts new file mode 100644 index 0000000000..8f7e03e839 --- /dev/null +++ b/apps/sim/tools/google_bigquery/insert_rows.ts @@ -0,0 +1,174 @@ +import type { + GoogleBigQueryInsertRowsParams, + GoogleBigQueryInsertRowsResponse, +} from '@/tools/google_bigquery/types' +import type { ToolConfig } from '@/tools/types' + +export const googleBigQueryInsertRowsTool: ToolConfig< + GoogleBigQueryInsertRowsParams, + GoogleBigQueryInsertRowsResponse +> = { + id: 'google_bigquery_insert_rows', + name: 'BigQuery Insert Rows', + description: 'Insert rows into a Google BigQuery table using streaming insert', + version: '1.0.0', + + oauth: { + required: true, + provider: 'google-bigquery', + }, + + params: { + accessToken: { + type: 'string', + required: true, + visibility: 'hidden', + description: 'OAuth access token', + }, + projectId: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Google Cloud project ID', + }, + datasetId: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'BigQuery dataset ID', + }, + tableId: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'BigQuery table ID', + }, + rows: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'JSON array of row objects to insert', + }, + skipInvalidRows: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Whether to insert valid rows even if some are invalid', + }, + ignoreUnknownValues: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Whether to ignore columns not in the table schema', + }, + }, + + request: { + url: (params) => + `https://bigquery.googleapis.com/bigquery/v2/projects/${encodeURIComponent(params.projectId)}/datasets/${encodeURIComponent(params.datasetId)}/tables/${encodeURIComponent(params.tableId)}/insertAll`, + method: 'POST', + headers: (params) => ({ + Authorization: `Bearer ${params.accessToken}`, + 'Content-Type': 'application/json', + }), + body: (params) => { + const parsedRows = typeof params.rows === 'string' ? JSON.parse(params.rows) : params.rows + const rows = (parsedRows as Record[]).map( + (row: Record) => ({ json: row }) + ) + + const body: Record = { rows } + if (params.skipInvalidRows !== undefined) body.skipInvalidRows = params.skipInvalidRows + if (params.ignoreUnknownValues !== undefined) + body.ignoreUnknownValues = params.ignoreUnknownValues + + return body + }, + }, + + transformResponse: async (response: Response, params?: GoogleBigQueryInsertRowsParams) => { + const data = await response.json() + if (!response.ok) { + const errorMessage = data.error?.message || 'Failed to insert rows into BigQuery table' + throw new Error(errorMessage) + } + + const insertErrors = data.insertErrors ?? [] + const errors = insertErrors.map( + (err: { + index: number + errors: Array<{ reason?: string; location?: string; message?: string }> + }) => ({ + index: err.index, + errors: err.errors.map((e) => ({ + reason: e.reason ?? null, + location: e.location ?? null, + message: e.message ?? null, + })), + }) + ) + + let totalRows = 0 + if (params?.rows) { + const parsed = typeof params.rows === 'string' ? JSON.parse(params.rows) : params.rows + totalRows = Array.isArray(parsed) ? parsed.length : 0 + } + + // When insertErrors is empty, all rows succeeded. + // When insertErrors is present and skipInvalidRows is false (default), + // the entire batch is rejected — no rows are inserted. + let insertedRows = 0 + if (insertErrors.length === 0) { + insertedRows = totalRows + } else if (params?.skipInvalidRows) { + const failedIndexes = new Set(insertErrors.map((e: { index: number }) => e.index)) + insertedRows = totalRows - failedIndexes.size + } + + return { + success: true, + output: { + insertedRows, + errors, + }, + } + }, + + outputs: { + insertedRows: { type: 'number', description: 'Number of rows successfully inserted' }, + errors: { + type: 'array', + description: 'Array of per-row insertion errors (empty if all succeeded)', + items: { + type: 'object', + properties: { + index: { type: 'number', description: 'Zero-based index of the row that failed' }, + errors: { + type: 'array', + description: 'Error details for this row', + items: { + type: 'object', + properties: { + reason: { + type: 'string', + description: 'Short error code summarizing the error', + optional: true, + }, + location: { + type: 'string', + description: 'Where the error occurred', + optional: true, + }, + message: { + type: 'string', + description: 'Human-readable error description', + optional: true, + }, + }, + }, + }, + }, + }, + }, + }, +} diff --git a/apps/sim/tools/google_bigquery/list_datasets.ts b/apps/sim/tools/google_bigquery/list_datasets.ts new file mode 100644 index 0000000000..75da30bfe5 --- /dev/null +++ b/apps/sim/tools/google_bigquery/list_datasets.ts @@ -0,0 +1,121 @@ +import type { + GoogleBigQueryListDatasetsParams, + GoogleBigQueryListDatasetsResponse, +} from '@/tools/google_bigquery/types' +import type { ToolConfig } from '@/tools/types' + +export const googleBigQueryListDatasetsTool: ToolConfig< + GoogleBigQueryListDatasetsParams, + GoogleBigQueryListDatasetsResponse +> = { + id: 'google_bigquery_list_datasets', + name: 'BigQuery List Datasets', + description: 'List all datasets in a Google BigQuery project', + version: '1.0.0', + + oauth: { + required: true, + provider: 'google-bigquery', + }, + + params: { + accessToken: { + type: 'string', + required: true, + visibility: 'hidden', + description: 'OAuth access token', + }, + projectId: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Google Cloud project ID', + }, + maxResults: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Maximum number of datasets to return', + }, + pageToken: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Token for pagination', + }, + }, + + request: { + url: (params) => { + const url = new URL( + `https://bigquery.googleapis.com/bigquery/v2/projects/${encodeURIComponent(params.projectId)}/datasets` + ) + if (params.maxResults !== undefined && params.maxResults !== null) { + const maxResults = Number(params.maxResults) + if (Number.isFinite(maxResults) && maxResults > 0) { + url.searchParams.set('maxResults', String(maxResults)) + } + } + if (params.pageToken) url.searchParams.set('pageToken', params.pageToken) + return url.toString() + }, + method: 'GET', + headers: (params) => ({ + Authorization: `Bearer ${params.accessToken}`, + }), + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + if (!response.ok) { + const errorMessage = data.error?.message || 'Failed to list BigQuery datasets' + throw new Error(errorMessage) + } + + const datasets = (data.datasets ?? []).map( + (ds: { + datasetReference: { datasetId: string; projectId: string } + friendlyName?: string + location?: string + }) => ({ + datasetId: ds.datasetReference.datasetId, + projectId: ds.datasetReference.projectId, + friendlyName: ds.friendlyName ?? null, + location: ds.location ?? null, + }) + ) + + return { + success: true, + output: { + datasets, + nextPageToken: data.nextPageToken ?? null, + }, + } + }, + + outputs: { + datasets: { + type: 'array', + description: 'Array of dataset objects', + items: { + type: 'object', + properties: { + datasetId: { type: 'string', description: 'Unique dataset identifier' }, + projectId: { type: 'string', description: 'Project ID containing this dataset' }, + friendlyName: { + type: 'string', + description: 'Descriptive name for the dataset', + optional: true, + }, + location: { type: 'string', description: 'Geographic location where the data resides' }, + }, + }, + }, + nextPageToken: { + type: 'string', + description: 'Token for fetching next page of results', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/google_bigquery/list_tables.ts b/apps/sim/tools/google_bigquery/list_tables.ts new file mode 100644 index 0000000000..1d116a5c1e --- /dev/null +++ b/apps/sim/tools/google_bigquery/list_tables.ts @@ -0,0 +1,142 @@ +import type { + GoogleBigQueryListTablesParams, + GoogleBigQueryListTablesResponse, +} from '@/tools/google_bigquery/types' +import type { ToolConfig } from '@/tools/types' + +export const googleBigQueryListTablesTool: ToolConfig< + GoogleBigQueryListTablesParams, + GoogleBigQueryListTablesResponse +> = { + id: 'google_bigquery_list_tables', + name: 'BigQuery List Tables', + description: 'List all tables in a Google BigQuery dataset', + version: '1.0.0', + + oauth: { + required: true, + provider: 'google-bigquery', + }, + + params: { + accessToken: { + type: 'string', + required: true, + visibility: 'hidden', + description: 'OAuth access token', + }, + projectId: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Google Cloud project ID', + }, + datasetId: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'BigQuery dataset ID', + }, + maxResults: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Maximum number of tables to return', + }, + pageToken: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Token for pagination', + }, + }, + + request: { + url: (params) => { + const url = new URL( + `https://bigquery.googleapis.com/bigquery/v2/projects/${encodeURIComponent(params.projectId)}/datasets/${encodeURIComponent(params.datasetId)}/tables` + ) + if (params.maxResults !== undefined && params.maxResults !== null) { + const maxResults = Number(params.maxResults) + if (Number.isFinite(maxResults) && maxResults > 0) { + url.searchParams.set('maxResults', String(maxResults)) + } + } + if (params.pageToken) url.searchParams.set('pageToken', params.pageToken) + return url.toString() + }, + method: 'GET', + headers: (params) => ({ + Authorization: `Bearer ${params.accessToken}`, + }), + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + if (!response.ok) { + const errorMessage = data.error?.message || 'Failed to list BigQuery tables' + throw new Error(errorMessage) + } + + const tables = (data.tables ?? []).map( + (t: { + tableReference: { tableId: string; datasetId: string; projectId: string } + type?: string + friendlyName?: string + creationTime?: string + }) => ({ + tableId: t.tableReference.tableId, + datasetId: t.tableReference.datasetId, + projectId: t.tableReference.projectId, + type: t.type ?? null, + friendlyName: t.friendlyName ?? null, + creationTime: t.creationTime ?? null, + }) + ) + + return { + success: true, + output: { + tables, + totalItems: data.totalItems ?? null, + nextPageToken: data.nextPageToken ?? null, + }, + } + }, + + outputs: { + tables: { + type: 'array', + description: 'Array of table objects', + items: { + type: 'object', + properties: { + tableId: { type: 'string', description: 'Table identifier' }, + datasetId: { type: 'string', description: 'Dataset ID containing this table' }, + projectId: { type: 'string', description: 'Project ID containing this table' }, + type: { type: 'string', description: 'Table type (TABLE, VIEW, EXTERNAL, etc.)' }, + friendlyName: { + type: 'string', + description: 'User-friendly name for the table', + optional: true, + }, + creationTime: { + type: 'string', + description: 'Time when created, in milliseconds since epoch', + optional: true, + }, + }, + }, + }, + totalItems: { + type: 'number', + description: 'Total number of tables in the dataset', + optional: true, + }, + nextPageToken: { + type: 'string', + description: 'Token for fetching next page of results', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/google_bigquery/query.ts b/apps/sim/tools/google_bigquery/query.ts new file mode 100644 index 0000000000..ad11adc901 --- /dev/null +++ b/apps/sim/tools/google_bigquery/query.ts @@ -0,0 +1,164 @@ +import type { + GoogleBigQueryQueryParams, + GoogleBigQueryQueryResponse, +} from '@/tools/google_bigquery/types' +import type { ToolConfig } from '@/tools/types' + +export const googleBigQueryQueryTool: ToolConfig< + GoogleBigQueryQueryParams, + GoogleBigQueryQueryResponse +> = { + id: 'google_bigquery_query', + name: 'BigQuery Run Query', + description: 'Run a SQL query against Google BigQuery and return the results', + version: '1.0.0', + + oauth: { + required: true, + provider: 'google-bigquery', + }, + + params: { + accessToken: { + type: 'string', + required: true, + visibility: 'hidden', + description: 'OAuth access token', + }, + projectId: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Google Cloud project ID', + }, + query: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'SQL query to execute', + }, + useLegacySql: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Whether to use legacy SQL syntax (default: false)', + }, + maxResults: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Maximum number of rows to return', + }, + defaultDatasetId: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Default dataset for unqualified table names', + }, + location: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Processing location (e.g., "US", "EU")', + }, + }, + + request: { + url: (params) => + `https://bigquery.googleapis.com/bigquery/v2/projects/${encodeURIComponent(params.projectId)}/queries`, + method: 'POST', + headers: (params) => ({ + Authorization: `Bearer ${params.accessToken}`, + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + query: params.query, + useLegacySql: params.useLegacySql ?? false, + } + if (params.maxResults !== undefined) body.maxResults = Number(params.maxResults) + if (params.defaultDatasetId) { + body.defaultDataset = { + projectId: params.projectId, + datasetId: params.defaultDatasetId, + } + } + if (params.location) body.location = params.location + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + if (!response.ok) { + const errorMessage = data.error?.message || 'Failed to execute BigQuery query' + throw new Error(errorMessage) + } + + const columns = (data.schema?.fields ?? []).map((f: { name: string }) => f.name) + const rows = (data.rows ?? []).map((row: { f: Array<{ v: unknown }> }) => { + const obj: Record = {} + row.f.forEach((field, index) => { + obj[columns[index]] = field.v ?? null + }) + return obj + }) + + return { + success: true, + output: { + columns, + rows, + totalRows: data.totalRows ?? null, + jobComplete: data.jobComplete ?? false, + totalBytesProcessed: data.totalBytesProcessed ?? null, + cacheHit: data.cacheHit ?? null, + jobReference: data.jobReference ?? null, + pageToken: data.pageToken ?? null, + }, + } + }, + + outputs: { + columns: { + type: 'array', + description: 'Array of column names from the query result', + items: { type: 'string', description: 'Column name' }, + }, + rows: { + type: 'array', + description: 'Array of row objects keyed by column name', + items: { + type: 'object', + description: 'Row with column name/value pairs', + }, + }, + totalRows: { + type: 'string', + description: 'Total number of rows in the complete result set', + optional: true, + }, + jobComplete: { type: 'boolean', description: 'Whether the query completed within the timeout' }, + totalBytesProcessed: { type: 'string', description: 'Total bytes processed by the query' }, + cacheHit: { + type: 'boolean', + description: 'Whether the query result was served from cache', + optional: true, + }, + jobReference: { + type: 'object', + description: 'Job reference (useful when jobComplete is false)', + optional: true, + properties: { + projectId: { type: 'string', description: 'Project ID containing the job' }, + jobId: { type: 'string', description: 'Unique job identifier' }, + location: { type: 'string', description: 'Geographic location of the job' }, + }, + }, + pageToken: { + type: 'string', + description: 'Token for fetching additional result pages', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/google_bigquery/types.ts b/apps/sim/tools/google_bigquery/types.ts new file mode 100644 index 0000000000..c7432e0dea --- /dev/null +++ b/apps/sim/tools/google_bigquery/types.ts @@ -0,0 +1,119 @@ +import type { ToolResponse } from '@/tools/types' + +export interface GoogleBigQueryBaseParams { + accessToken: string + projectId: string +} + +export interface GoogleBigQueryQueryParams extends GoogleBigQueryBaseParams { + query: string + useLegacySql?: boolean + maxResults?: number + defaultDatasetId?: string + location?: string +} + +export interface GoogleBigQueryListDatasetsParams extends GoogleBigQueryBaseParams { + maxResults?: number + pageToken?: string +} + +export interface GoogleBigQueryListTablesParams extends GoogleBigQueryBaseParams { + datasetId: string + maxResults?: number + pageToken?: string +} + +export interface GoogleBigQueryGetTableParams extends GoogleBigQueryBaseParams { + datasetId: string + tableId: string +} + +export interface GoogleBigQueryInsertRowsParams extends GoogleBigQueryBaseParams { + datasetId: string + tableId: string + rows: string + skipInvalidRows?: boolean + ignoreUnknownValues?: boolean +} + +export interface GoogleBigQueryJobReference { + projectId: string + jobId: string + location: string +} + +export interface GoogleBigQueryQueryResponse extends ToolResponse { + output: { + columns: string[] + rows: Record[] + totalRows: string | null + jobComplete: boolean + totalBytesProcessed: string | null + cacheHit: boolean | null + jobReference: GoogleBigQueryJobReference | null + pageToken: string | null + } +} + +export interface GoogleBigQueryListDatasetsResponse extends ToolResponse { + output: { + datasets: Array<{ + datasetId: string + projectId: string + friendlyName: string | null + location: string | null + }> + nextPageToken: string | null + } +} + +export interface GoogleBigQueryListTablesResponse extends ToolResponse { + output: { + tables: Array<{ + tableId: string + datasetId: string + projectId: string + type: string | null + friendlyName: string | null + creationTime: string | null + }> + totalItems: number | null + nextPageToken: string | null + } +} + +export interface GoogleBigQueryGetTableResponse extends ToolResponse { + output: { + tableId: string + datasetId: string + projectId: string + type: string | null + description: string | null + numRows: string | null + numBytes: string | null + schema: Array<{ + name: string + type: string + mode: string | null + description: string | null + }> + creationTime: string | null + lastModifiedTime: string | null + location: string | null + } +} + +export interface GoogleBigQueryInsertRowsResponse extends ToolResponse { + output: { + insertedRows: number + errors: Array<{ + index: number + errors: Array<{ + reason: string | null + location: string | null + message: string | null + }> + }> + } +} diff --git a/apps/sim/tools/registry.ts b/apps/sim/tools/registry.ts index 5a2f5787c7..c14cf1c72d 100644 --- a/apps/sim/tools/registry.ts +++ b/apps/sim/tools/registry.ts @@ -620,6 +620,13 @@ import { gongLookupPhoneTool, } from '@/tools/gong' import { googleSearchTool } from '@/tools/google' +import { + googleBigQueryGetTableTool, + googleBigQueryInsertRowsTool, + googleBigQueryListDatasetsTool, + googleBigQueryListTablesTool, + googleBigQueryQueryTool, +} from '@/tools/google_bigquery' import { googleBooksVolumeDetailsTool, googleBooksVolumeSearchTool } from '@/tools/google_books' import { googleCalendarCreateTool, @@ -3556,6 +3563,11 @@ export const tools: Record = { wordpress_list_users: wordpressListUsersTool, wordpress_get_user: wordpressGetUserTool, wordpress_search_content: wordpressSearchContentTool, + google_bigquery_query: googleBigQueryQueryTool, + google_bigquery_list_datasets: googleBigQueryListDatasetsTool, + google_bigquery_list_tables: googleBigQueryListTablesTool, + google_bigquery_get_table: googleBigQueryGetTableTool, + google_bigquery_insert_rows: googleBigQueryInsertRowsTool, google_vault_create_matters_export: createMattersExportTool, google_vault_list_matters_export: listMattersExportTool, google_vault_create_matters_holds: createMattersHoldsTool,