@@ -115,11 +115,123 @@ function buildPaginationClause(opts?: PaginationOptions): { sql: string; params:
115115 return { sql, params } ;
116116}
117117
118+ // ---------------------------------------------------------------------------
119+ // Deduplication: same-origin similarity check before creating new memories
120+ // ---------------------------------------------------------------------------
121+
122+ function tokenize ( text : string ) : Set < string > {
123+ return new Set (
124+ text
125+ . toLowerCase ( )
126+ . replace ( / [ ^ a - z 0 - 9 \s ] / g, " " )
127+ . split ( / \s + / )
128+ . filter ( ( w ) => w . length > 2 ) ,
129+ ) ;
130+ }
131+
132+ function jaccardSimilarity ( a : Set < string > , b : Set < string > ) : number {
133+ if ( a . size === 0 && b . size === 0 ) return 1 ;
134+ let intersection = 0 ;
135+ for ( const w of a ) {
136+ if ( b . has ( w ) ) intersection ++ ;
137+ }
138+ const union = a . size + b . size - intersection ;
139+ return union === 0 ? 0 : intersection / union ;
140+ }
141+
142+ const TITLE_SIMILARITY_THRESHOLD = 0.5 ;
143+ const CONTENT_SIMILARITY_THRESHOLD = 0.4 ;
144+ const DEDUP_WINDOW_HOURS = 48 ;
145+
146+ export interface DedupResult {
147+ action : "created" | "merged" ;
148+ memory : MemoryEntry ;
149+ mergedInto ?: string ;
150+ }
151+
152+ function findSimilarMemory (
153+ db : Database . Database ,
154+ userId : string ,
155+ input : CreateMemoryInput ,
156+ ) : MemoryRow | null {
157+ const cutoff = new Date ( Date . now ( ) - DEDUP_WINDOW_HOURS * 60 * 60 * 1000 ) . toISOString ( ) ;
158+
159+ const candidates = db
160+ . prepare (
161+ `SELECT ${ COLUMNS }
162+ FROM memories
163+ WHERE user_id = ? AND origin = ? AND deleted_at IS NULL AND created_at > ?
164+ ORDER BY created_at DESC
165+ LIMIT 50` ,
166+ )
167+ . all ( userId , input . origin , cutoff ) as MemoryRow [ ] ;
168+
169+ if ( candidates . length === 0 ) return null ;
170+
171+ const inputTitleTokens = tokenize ( input . title ) ;
172+ const inputContentTokens = tokenize ( input . content ) ;
173+
174+ for ( const candidate of candidates ) {
175+ const titleSim = jaccardSimilarity ( inputTitleTokens , tokenize ( candidate . title ) ) ;
176+ if ( titleSim >= TITLE_SIMILARITY_THRESHOLD ) {
177+ const contentSim = jaccardSimilarity ( inputContentTokens , tokenize ( candidate . content ) ) ;
178+ if ( contentSim >= CONTENT_SIMILARITY_THRESHOLD ) {
179+ return candidate ;
180+ }
181+ }
182+ }
183+
184+ return null ;
185+ }
186+
187+ function mergeTags ( existingRaw : string , newTags : string [ ] ) : string [ ] {
188+ const existing = safeJsonArray ( existingRaw ) ;
189+ const merged = new Set ( [ ...existing , ...newTags ] ) ;
190+ return [ ...merged ] ;
191+ }
192+
118193export function createMemory (
119194 db : Database . Database ,
120195 userId : string ,
121196 input : CreateMemoryInput ,
122197) : MemoryEntry {
198+ const similar = findSimilarMemory ( db , userId , input ) ;
199+
200+ if ( similar ) {
201+ const now = new Date ( ) . toISOString ( ) ;
202+ const mergedTags = mergeTags ( similar . tags , input . tags ) ;
203+ const tagsJson = JSON . stringify ( mergedTags ) ;
204+
205+ const maxVersion = db
206+ . prepare ( `SELECT COALESCE(MAX(version_number), 0) as max_ver FROM memory_versions WHERE memory_id = ?` )
207+ . get ( similar . id ) as { max_ver : number } ;
208+
209+ db . prepare (
210+ `INSERT INTO memory_versions (id, memory_id, user_id, title, content, tags, memory_type, origin, allowed_vendors, version_number, created_at)
211+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` ,
212+ ) . run (
213+ randomUUID ( ) , similar . id , userId , similar . title , similar . content ,
214+ similar . tags , similar . memory_type ?? "semantic" , similar . origin , similar . allowed_vendors ,
215+ maxVersion . max_ver + 1 , now ,
216+ ) ;
217+
218+ db . prepare (
219+ `UPDATE memories SET title = ?, content = ?, tags = ?, updated_at = ? WHERE id = ? AND user_id = ?` ,
220+ ) . run ( input . title , input . content , tagsJson , now , similar . id , userId ) ;
221+
222+ return {
223+ id : similar . id ,
224+ title : input . title ,
225+ content : input . content ,
226+ tags : mergedTags ,
227+ origin : input . origin ,
228+ allowed_vendors : safeJsonArray ( similar . allowed_vendors ) ,
229+ memory_type : ( similar . memory_type as MemoryType ) ?? "semantic" ,
230+ created_at : similar . created_at ,
231+ updated_at : now ,
232+ } ;
233+ }
234+
123235 const id = randomUUID ( ) ;
124236 const now = new Date ( ) . toISOString ( ) ;
125237 const tagsJson = JSON . stringify ( input . tags ) ;
0 commit comments