From 79ca82f3a65deb0342eab07cfb28c02670b3baf4 Mon Sep 17 00:00:00 2001 From: Tom Scanlan Date: Sat, 11 Apr 2026 10:48:15 -0400 Subject: [PATCH] perf(db): add query fingerprint to Prometheus metrics and optimize home page queries - Add fingerprint label to db_query_duration_seconds histogram for identifying slow query shapes - Add skipCount option to ContrailQueryService.find() to avoid unnecessary COUNT(*) scans - Replace ORDER BY RANDOM() in getHomePageFeaturedGroups with two-query ID-then-fetch pattern Addresses consistent 500ms p99 SELECT query duration in production. --- src/contrail/contrail-query.service.spec.ts | 48 +++++++ src/contrail/contrail-query.service.ts | 14 +- src/database/data-source.ts | 3 + src/database/database-metrics.module.ts | 2 +- src/database/database-metrics.service.spec.ts | 131 ++++++++++++++++++ src/database/database-metrics.service.ts | 10 +- .../services/event-query.service.spec.ts | 22 +++ src/event/services/event-query.service.ts | 2 + src/group/group.service.spec.ts | 69 ++++++++- src/group/group.service.ts | 28 ++-- src/test/mocks/mocks.ts | 1 + 11 files changed, 307 insertions(+), 23 deletions(-) create mode 100644 src/database/database-metrics.service.spec.ts diff --git a/src/contrail/contrail-query.service.spec.ts b/src/contrail/contrail-query.service.spec.ts index b1448d02..111602c4 100644 --- a/src/contrail/contrail-query.service.spec.ts +++ b/src/contrail/contrail-query.service.spec.ts @@ -82,6 +82,54 @@ describe('ContrailQueryService', () => { }), ).resolves.not.toThrow(); }); + + it('should skip the COUNT query when skipCount is true', async () => { + const mockRecords = [ + { + uri: 'at://did:plc:a/community.lexicon.calendar.event/1', + did: 'did:plc:a', + }, + { + uri: 'at://did:plc:b/community.lexicon.calendar.event/2', + did: 'did:plc:b', + }, + ]; + mockDataSource.query.mockResolvedValueOnce(mockRecords); + + const result = await service.find('community.lexicon.calendar.event', { + skipCount: true, + limit: 50, + }); + + // Only one query should have been executed (the SELECT, not the COUNT) + expect(mockDataSource.query).toHaveBeenCalledTimes(1); + expect(result.records).toEqual(mockRecords); + expect(result.total).toBe(-1); + }); + + it('should still run COUNT query when skipCount is false', async () => { + mockDataSource.query + .mockResolvedValueOnce([{ total: '5' }]) + .mockResolvedValueOnce([{ uri: 'at://test', did: 'did:plc:a' }]); + + const result = await service.find('community.lexicon.calendar.event', { + skipCount: false, + }); + + expect(mockDataSource.query).toHaveBeenCalledTimes(2); + expect(result.total).toBe(5); + }); + + it('should run COUNT query when skipCount is not specified', async () => { + mockDataSource.query + .mockResolvedValueOnce([{ total: '3' }]) + .mockResolvedValueOnce([]); + + const result = await service.find('community.lexicon.calendar.event'); + + expect(mockDataSource.query).toHaveBeenCalledTimes(2); + expect(result.total).toBe(3); + }); }); describe('findWithGeoFilter', () => { diff --git a/src/contrail/contrail-query.service.ts b/src/contrail/contrail-query.service.ts index 85ec8605..c0809ede 100644 --- a/src/contrail/contrail-query.service.ts +++ b/src/contrail/contrail-query.service.ts @@ -63,6 +63,7 @@ export class ContrailQueryService { orderBy?: string; limit?: number; offset?: number; + skipCount?: boolean; } = {}, ): Promise<{ records: ContrailRecord[]; total: number }> { if (options.orderBy) { @@ -89,11 +90,14 @@ export class ContrailQueryService { const where = sqlParts.length > 0 ? `WHERE ${sqlParts.join(' AND ')}` : ''; - const countResult = await ds.query( - `SELECT count(*) as total FROM ${table} ${where}`, - allParams, - ); - const total = parseInt(countResult[0]?.total ?? '0', 10); + let total = -1; + if (!options.skipCount) { + const countResult = await ds.query( + `SELECT count(*) as total FROM ${table} ${where}`, + allParams, + ); + total = parseInt(countResult[0]?.total ?? '0', 10); + } const orderClause = options.orderBy ? `ORDER BY ${options.orderBy}` : ''; diff --git a/src/database/data-source.ts b/src/database/data-source.ts index 9b492757..3b0267e5 100644 --- a/src/database/data-source.ts +++ b/src/database/data-source.ts @@ -162,6 +162,7 @@ function patchDriverForMetrics(dataSource: DataSource, tenantId: string): void { operation, duration, 'success', + queryFingerprint, ); } @@ -182,6 +183,7 @@ function patchDriverForMetrics(dataSource: DataSource, tenantId: string): void { operation, duration, 'error', + queryFingerprint, ); } @@ -203,6 +205,7 @@ function patchDriverForMetrics(dataSource: DataSource, tenantId: string): void { operation, duration, 'success', + queryFingerprint, ); } span.setAttribute('db.duration_ms', duration); diff --git a/src/database/database-metrics.module.ts b/src/database/database-metrics.module.ts index 0e0943f9..608309e7 100644 --- a/src/database/database-metrics.module.ts +++ b/src/database/database-metrics.module.ts @@ -34,7 +34,7 @@ const databaseMetricsProviders = [ makeHistogramProvider({ name: 'db_query_duration_seconds', help: 'Database query duration in seconds', - labelNames: ['tenant', 'operation', 'status'], + labelNames: ['tenant', 'operation', 'status', 'fingerprint'], buckets: [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 2.5, 5, 10], }), makeCounterProvider({ diff --git a/src/database/database-metrics.service.spec.ts b/src/database/database-metrics.service.spec.ts new file mode 100644 index 00000000..d1eec11e --- /dev/null +++ b/src/database/database-metrics.service.spec.ts @@ -0,0 +1,131 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { DatabaseMetricsService } from './database-metrics.service'; +import { Histogram, Counter } from 'prom-client'; + +describe('DatabaseMetricsService', () => { + let service: DatabaseMetricsService; + let mockQueryDurationHistogram: jest.Mocked>; + let mockQueriesCounter: jest.Mocked>; + + beforeEach(async () => { + mockQueryDurationHistogram = { + observe: jest.fn(), + } as any; + + mockQueriesCounter = { + inc: jest.fn(), + } as any; + + const module: TestingModule = await Test.createTestingModule({ + providers: [ + DatabaseMetricsService, + { + provide: 'PROM_METRIC_DB_POOL_SIZE', + useValue: { set: jest.fn() }, + }, + { + provide: 'PROM_METRIC_DB_POOL_IDLE', + useValue: { set: jest.fn() }, + }, + { + provide: 'PROM_METRIC_DB_POOL_WAITING', + useValue: { set: jest.fn() }, + }, + { + provide: 'PROM_METRIC_DB_ACTIVE_CONNECTIONS', + useValue: { set: jest.fn() }, + }, + { + provide: 'PROM_METRIC_DB_QUERY_DURATION_SECONDS', + useValue: mockQueryDurationHistogram, + }, + { + provide: 'PROM_METRIC_DB_CONNECTION_ERRORS_TOTAL', + useValue: { inc: jest.fn() }, + }, + { + provide: 'PROM_METRIC_DB_QUERIES_TOTAL', + useValue: mockQueriesCounter, + }, + { + provide: 'PROM_METRIC_DB_CONNECTION_ACQUISITION_DURATION_SECONDS', + useValue: { observe: jest.fn() }, + }, + ], + }).compile(); + + service = module.get(DatabaseMetricsService); + }); + + describe('recordQueryDuration', () => { + it('should pass fingerprint label to histogram observe', () => { + service.recordQueryDuration( + 'tenant1', + 'SELECT', + 50, + 'success', + 'abc123def456', + ); + + expect(mockQueryDurationHistogram.observe).toHaveBeenCalledWith( + { + tenant: 'tenant1', + operation: 'SELECT', + status: 'success', + fingerprint: 'abc123def456', + }, + 0.05, + ); + }); + + it('should default fingerprint to "unknown" when not provided', () => { + service.recordQueryDuration('tenant1', 'INSERT', 100, 'success'); + + expect(mockQueryDurationHistogram.observe).toHaveBeenCalledWith( + { + tenant: 'tenant1', + operation: 'INSERT', + status: 'success', + fingerprint: 'unknown', + }, + 0.1, + ); + }); + + it('should include fingerprint in slow query warning log', () => { + const warnSpy = jest.spyOn(service['logger'], 'warn'); + + service.recordQueryDuration( + 'tenant1', + 'SELECT', + 1500, + 'success', + 'slowquery1234', + ); + + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining('slowquery1234'), + ); + }); + + it('should pass fingerprint with error status', () => { + service.recordQueryDuration( + 'tenant1', + 'UPDATE', + 200, + 'error', + 'errfp123456', + ); + + expect(mockQueryDurationHistogram.observe).toHaveBeenCalledWith( + { + tenant: 'tenant1', + operation: 'UPDATE', + status: 'error', + fingerprint: 'errfp123456', + }, + 0.2, + ); + }); + }); +}); diff --git a/src/database/database-metrics.service.ts b/src/database/database-metrics.service.ts index 70efbf2e..f997edbe 100644 --- a/src/database/database-metrics.service.ts +++ b/src/database/database-metrics.service.ts @@ -123,12 +123,18 @@ export class DatabaseMetricsService implements OnModuleInit { operation: string, durationMs: number, status: 'success' | 'error' = 'success', + fingerprint?: string, ): void { try { const durationSeconds = durationMs / 1000; this.queryDurationHistogram.observe( - { tenant: tenantId, operation, status }, + { + tenant: tenantId, + operation, + status, + fingerprint: fingerprint || 'unknown', + }, durationSeconds, ); @@ -138,7 +144,7 @@ export class DatabaseMetricsService implements OnModuleInit { // Warn on slow queries (> 1 second) if (durationMs > 1000) { this.logger.warn( - `Slow query detected for tenant ${tenantId} (${operation}, ${status}): ${durationMs}ms`, + `Slow query detected for tenant ${tenantId} (${operation}, ${status}, fingerprint=${fingerprint || 'unknown'}): ${durationMs}ms`, ); } } catch (error) { diff --git a/src/event/services/event-query.service.spec.ts b/src/event/services/event-query.service.spec.ts index cb471cb6..fae2cff2 100644 --- a/src/event/services/event-query.service.spec.ts +++ b/src/event/services/event-query.service.spec.ts @@ -780,6 +780,28 @@ describe('EventQueryService', () => { // 3. Result contains at most 4 events expect(result.length).toBeLessThanOrEqual(4); }); + + it('should pass skipCount: true to contrail find (no COUNT query needed for random sampling)', async () => { + const contrailService = service[ + 'contrailQueryService' + ] as jest.Mocked; + const enrichmentService = service[ + 'atprotoEnrichmentService' + ] as jest.Mocked; + + contrailService.find.mockResolvedValueOnce({ + records: [] as any, + total: -1, + }); + enrichmentService.enrichRecords.mockResolvedValueOnce([]); + + await service.getHomePageFeaturedEvents(); + + expect(contrailService.find).toHaveBeenCalledWith( + 'community.lexicon.calendar.event', + expect.objectContaining({ skipCount: true }), + ); + }); }); describe('getHomePageUserNextHostedEvent', () => { diff --git a/src/event/services/event-query.service.ts b/src/event/services/event-query.service.ts index c6a95e34..d5dfb5ba 100644 --- a/src/event/services/event-query.service.ts +++ b/src/event/services/event-query.service.ts @@ -978,6 +978,7 @@ export class EventQueryService { const now = new Date().toISOString(); // Fetch upcoming public events from Contrail (larger window for random sampling) + // skipCount: true — we only need records for random sampling, not the total count const contrailResult = await this.contrailQueryService.find( 'community.lexicon.calendar.event', { @@ -990,6 +991,7 @@ export class EventQueryService { orderBy: `record->>'startsAt' ASC, uri ASC`, limit: 50, offset: 0, + skipCount: true, }, ); diff --git a/src/group/group.service.spec.ts b/src/group/group.service.spec.ts index eb42cc58..634e7ff8 100644 --- a/src/group/group.service.spec.ts +++ b/src/group/group.service.spec.ts @@ -522,21 +522,76 @@ describe('GroupService', () => { }); describe('getHomePageFeaturedGroups', () => { + beforeEach(() => { + // Ensure createQueryBuilder returns the mockRepository (may be corrupted by spies in other tests) + mockRepository.createQueryBuilder.mockReturnThis(); + }); + it('should return featured groups', async () => { - jest - .spyOn(service['groupRepository'], 'find') - .mockResolvedValue([mockGroup]); - jest.spyOn(mockRepository, 'getMany').mockResolvedValue([mockGroup]); + // First getMany returns IDs, second returns full entities + mockRepository.getMany + .mockResolvedValueOnce([{ id: mockGroup.id }]) + .mockResolvedValueOnce([mockGroup]); const result = await service.getHomePageFeaturedGroups(); expect(result).toEqual([mockGroup]); }); + + it('should not use ORDER BY RANDOM() — should fetch IDs first then load full entities', async () => { + // First getMany returns IDs, second returns full entities + const mockGroupIds = [ + { id: 1 }, + { id: 2 }, + { id: 3 }, + { id: 4 }, + { id: 5 }, + { id: 6 }, + { id: 7 }, + { id: 8 }, + ]; + const mockFullGroups = mockGroupIds.slice(0, 5).map((g) => ({ + ...mockGroup, + id: g.id, + })); + + mockRepository.getMany + .mockResolvedValueOnce(mockGroupIds) + .mockResolvedValueOnce(mockFullGroups); + + const result = await service.getHomePageFeaturedGroups(); + + // createQueryBuilder should be called twice: once for IDs, once for full entities + expect(mockRepository.createQueryBuilder).toHaveBeenCalledTimes(2); + + // First call: ID-only query uses select('group.id') + expect(mockRepository.select).toHaveBeenCalledWith('group.id'); + + // Second call: full entity query uses whereInIds + expect(mockRepository.whereInIds).toHaveBeenCalled(); + + // Result should have at most 5 groups + expect(result.length).toBeLessThanOrEqual(5); + }); + + it('should return empty array when no groups match', async () => { + // ID query returns empty + mockRepository.getMany.mockResolvedValueOnce([]); + + const result = await service.getHomePageFeaturedGroups(); + + expect(result).toEqual([]); + // Should only call createQueryBuilder once (ID query) — no need for second query + expect(mockRepository.createQueryBuilder).toHaveBeenCalledTimes(1); + }); }); describe('getHomePageUserCreatedGroups', () => { + beforeEach(() => { + // Ensure createQueryBuilder returns the mockRepository (may be corrupted by spies in other tests) + mockRepository.createQueryBuilder.mockReturnThis(); + }); + it('should return user created groups', async () => { - jest - .spyOn(service['groupRepository'], 'find') - .mockResolvedValue([mockGroup]); + mockRepository.getMany.mockResolvedValueOnce([mockGroup]); const result = await service.getHomePageUserCreatedGroups(mockUser.id); expect(result).toEqual([mockGroup]); }); diff --git a/src/group/group.service.ts b/src/group/group.service.ts index e109ab29..e7cd584b 100644 --- a/src/group/group.service.ts +++ b/src/group/group.service.ts @@ -908,21 +908,33 @@ export class GroupService { async getHomePageFeaturedGroups(): Promise { await this.getTenantSpecificGroupRepository(); - const groups = await this.groupRepository + // Fetch IDs only (lightweight) then shuffle in JS to avoid ORDER BY RANDOM() + // which forces PostgreSQL to load and sort ALL matching rows + const idRows = await this.groupRepository .createQueryBuilder('group') - .select(['group']) - .leftJoinAndSelect('group.groupMembers', 'groupMembers') - .leftJoinAndSelect('group.categories', 'categories') - .leftJoinAndSelect('group.image', 'image') + .select('group.id') .where({ visibility: GroupVisibility.Public, status: GroupStatus.Published, }) - .orderBy('RANDOM()') - .limit(5) .getMany(); - return groups; + if (idRows.length === 0) return []; + + // Shuffle and pick 5 in JS + const selectedIds = idRows + .sort(() => Math.random() - 0.5) + .slice(0, 5) + .map((g) => g.id); + + // Load full entities with relations for only the selected IDs + return this.groupRepository + .createQueryBuilder('group') + .leftJoinAndSelect('group.groupMembers', 'groupMembers') + .leftJoinAndSelect('group.categories', 'categories') + .leftJoinAndSelect('group.image', 'image') + .whereInIds(selectedIds) + .getMany(); } async getHomePageUserCreatedGroups( diff --git a/src/test/mocks/mocks.ts b/src/test/mocks/mocks.ts index 4b7de7d9..58cd3cce 100644 --- a/src/test/mocks/mocks.ts +++ b/src/test/mocks/mocks.ts @@ -153,6 +153,7 @@ export const mockRepository = { merge: jest.fn(), leftJoin: jest.fn().mockReturnThis(), select: jest.fn().mockReturnThis(), + whereInIds: jest.fn().mockReturnThis(), getRawMany: jest.fn(), softDelete: jest.fn(), findAndCount: jest.fn(),