Skip to content

Commit 453f3c1

Browse files
committed
feat: Implement conversation filtering to prevent saving trivial and repeated messages to AI training data.
1 parent f01dec4 commit 453f3c1

File tree

1 file changed

+132
-31
lines changed

1 file changed

+132
-31
lines changed

app/api/ai/route.ts

Lines changed: 132 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,93 @@ function checkRateLimit(ip: string): { allowed: boolean; error?: string } {
8282
return { allowed: true };
8383
}
8484

85+
// Filter function to determine if conversation should be saved
86+
async function shouldSaveConversation(message: string, userId: string): Promise<boolean> {
87+
const trimmedMessage = message.trim().toLowerCase();
88+
89+
// Filter 1: Simple greetings
90+
const simpleGreetings = [
91+
/^hi!*$/,
92+
/^hello!*$/,
93+
/^hey!*$/,
94+
/^hii+!*$/,
95+
/^sup!*$/,
96+
/^yo!*$/,
97+
/^hai!*$/,
98+
/^helo!*$/,
99+
/^hllo!*$/,
100+
];
101+
102+
for (const pattern of simpleGreetings) {
103+
if (pattern.test(trimmedMessage)) {
104+
console.log(`Filtering out simple greeting: "${message}"`);
105+
return false;
106+
}
107+
}
108+
109+
// Filter 2: Casual chit-chat (very short messages or common casual phrases)
110+
const casualPhrases = [
111+
/^thanks?!*$/,
112+
/^thank you!*$/,
113+
/^ok!*$/,
114+
/^okay!*$/,
115+
/^cool!*$/,
116+
/^nice!*$/,
117+
/^lol!*$/,
118+
/^haha+!*$/,
119+
/^yeah!*$/,
120+
/^yep!*$/,
121+
/^nope?!*$/,
122+
/^sure!*$/,
123+
/^k!*$/,
124+
];
125+
126+
for (const pattern of casualPhrases) {
127+
if (pattern.test(trimmedMessage)) {
128+
console.log(`Filtering out casual chit-chat: "${message}"`);
129+
return false;
130+
}
131+
}
132+
133+
// Filter out very short messages (less than 10 characters)
134+
if (trimmedMessage.length < 10) {
135+
console.log(`Filtering out short message: "${message}" (${trimmedMessage.length} chars)`);
136+
return false;
137+
}
138+
139+
// Filter 3: Repeated questions (check if same user asked exact same question in last 24 hours)
140+
try {
141+
const supabase = getSupabaseClient();
142+
const twentyFourHoursAgo = new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString();
143+
144+
const { data, error } = await supabase
145+
.from('ai_training_data')
146+
.select('query_text')
147+
.eq('user_id', userId)
148+
.eq('query_text', message)
149+
.gte('created_at', twentyFourHoursAgo)
150+
.limit(1);
151+
152+
if (error) {
153+
console.error('Error checking for duplicate questions:', error);
154+
// If there's an error, allow saving to be safe
155+
return true;
156+
}
157+
158+
if (data && data.length > 0) {
159+
console.log(`Filtering out repeated question: "${message}"`);
160+
return false;
161+
}
162+
} catch (error) {
163+
console.error('Error in duplicate check:', error);
164+
// If there's an error, allow saving to be safe
165+
return true;
166+
}
167+
168+
// If none of the filters matched, save the conversation
169+
return true;
170+
}
171+
85172
// Configure the runtime for this API route
86173
export const runtime = 'nodejs';
87174

@@ -1083,20 +1170,27 @@ export async function POST(request: NextRequest) {
10831170

10841171
// Save to database after stream completes
10851172
try {
1086-
const sessionId = crypto.randomUUID();
1087-
const supabase = getSupabaseClient();
1088-
1089-
await supabase
1090-
.from('ai_training_data')
1091-
.insert({
1092-
user_id: userId,
1093-
session_id: sessionId,
1094-
query_text: message,
1095-
response_text: fullResponse,
1096-
context_type: finalContext
1097-
});
1098-
1099-
console.log(`Streaming conversation saved to database (User: ${userId})`);
1173+
// Check if conversation should be saved
1174+
const shouldSave = await shouldSaveConversation(message, userId);
1175+
1176+
if (shouldSave) {
1177+
const sessionId = crypto.randomUUID();
1178+
const supabase = getSupabaseClient();
1179+
1180+
await supabase
1181+
.from('ai_training_data')
1182+
.insert({
1183+
user_id: userId,
1184+
session_id: sessionId,
1185+
query_text: message,
1186+
response_text: fullResponse,
1187+
context_type: finalContext
1188+
});
1189+
1190+
console.log(`Streaming conversation saved to database (User: ${userId})`);
1191+
} else {
1192+
console.log(`Streaming conversation filtered out (User: ${userId})`);
1193+
}
11001194
} catch (dbError) {
11011195
console.error('Error saving streaming conversation:', dbError);
11021196
}
@@ -1164,24 +1258,31 @@ export async function POST(request: NextRequest) {
11641258

11651259
// Save conversation to database for training and analytics
11661260
try {
1167-
const sessionId = crypto.randomUUID();
1168-
const supabase = getSupabaseClient();
1169-
1170-
const { error: dbError } = await supabase
1171-
.from('ai_training_data')
1172-
.insert({
1173-
user_id: userId || null,
1174-
session_id: sessionId,
1175-
query_text: message,
1176-
response_text: aiResponse,
1177-
context_type: finalContext
1178-
});
1179-
1180-
if (dbError) {
1181-
console.error('Failed to save AI conversation:', dbError);
1261+
// Check if conversation should be saved
1262+
const shouldSave = await shouldSaveConversation(message, userId);
1263+
1264+
if (shouldSave) {
1265+
const sessionId = crypto.randomUUID();
1266+
const supabase = getSupabaseClient();
1267+
1268+
const { error: dbError } = await supabase
1269+
.from('ai_training_data')
1270+
.insert({
1271+
user_id: userId || null,
1272+
session_id: sessionId,
1273+
query_text: message,
1274+
response_text: aiResponse,
1275+
context_type: finalContext
1276+
});
1277+
1278+
if (dbError) {
1279+
console.error('Failed to save AI conversation:', dbError);
1280+
} else {
1281+
const userInfo = userId ? `(User: ${userId})` : '(Anonymous)';
1282+
console.log(`AI conversation saved successfully to database ${userInfo}`);
1283+
}
11821284
} else {
1183-
const userInfo = userId ? `(User: ${userId})` : '(Anonymous)';
1184-
console.log(`AI conversation saved successfully to database ${userInfo}`);
1285+
console.log(`Non-streaming conversation filtered out (User: ${userId})`);
11851286
}
11861287
} catch (dbSaveError) {
11871288
console.error('Error saving to database:', dbSaveError);

0 commit comments

Comments
 (0)