From 64fcdfaa0ff3dc7e58cc645435dad607b937e71c Mon Sep 17 00:00:00 2001 From: Michael Vetter Date: Fri, 27 Mar 2026 23:07:42 +0100 Subject: [PATCH 1/3] refactor(database): enforce unique archive_id for reliable deduplication Implement a UNIQUE constraint on the archive_id column (XEP-0359 stanza-id) to prevent duplicate messages in the chat log. This addresses problems where the same message could be stored multiple times if received via both MAM and regular. Signed-off-by: Michael Vetter --- src/database.c | 113 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 84 insertions(+), 29 deletions(-) diff --git a/src/database.c b/src/database.c index 60ec5f958..15d7e946f 100644 --- a/src/database.c +++ b/src/database.c @@ -35,9 +35,10 @@ static prof_msg_type_t _get_message_type_type(const char* const type); static prof_enc_t _get_message_enc_type(const char* const encstr); static int _get_db_version(void); static gboolean _migrate_to_v2(void); +static gboolean _migrate_to_v3(void); static gboolean _check_available_space_for_db_migration(char* path_to_db); -static const int latest_version = 2; +static const int latest_version = 3; static char* _db_strdup(const char* str) @@ -117,7 +118,7 @@ log_database_init(ProfAccount* account) "`timestamp` TEXT, " "`type` TEXT, " "`stanza_id` TEXT, " - "`archive_id` TEXT, " + "`archive_id` TEXT UNIQUE, " "`encryption` TEXT, " "`marked_read` INTEGER, " "`replace_id` TEXT, " @@ -177,6 +178,10 @@ log_database_init(ProfAccount* account) cons_show_error("Database Initialization Error: Unable to migrate database to version 2. Please, check error logs for details."); goto out; } + if (db_version < 3 && (!_check_available_space_for_db_migration(filename) || !_migrate_to_v3())) { + cons_show_error("Database Initialization Error: Unable to migrate database to version 3. Please, check error logs for details."); + goto out; + } cons_show("Database schema migration was successful."); } @@ -490,31 +495,11 @@ _add_to_db(ProfMessage* message, char* type, const Jid* const from_jid, const Ji } // stanza-id (XEP-0359) doesn't have to be present in the message. - // But if it's duplicated, it's a serious server-side problem, so we better track it. - // Unless it's MAM, in that case it's expected behaviour. - if (message->stanzaid && !message->is_mam) { - auto_sqlite char* duplicate_check_query = sqlite3_mprintf("SELECT 1 FROM `ChatLogs` WHERE (`archive_id` = %Q)", - message->stanzaid); - - if (!duplicate_check_query) { - log_error("Could not allocate memory for SQL duplicate query in log_database_add()"); - return; - } - - sqlite3_stmt* stmt; - - if (SQLITE_OK == sqlite3_prepare_v2(g_chatlog_database, duplicate_check_query, -1, &stmt, NULL)) { - if (sqlite3_step(stmt) == SQLITE_ROW) { - log_error("Duplicate stanza-id found for the message. stanza_id: %s; archive_id: %s; sender: %s; content: %s", message->id, message->stanzaid, from_jid->barejid, message->plain); - cons_show_error("Got a message with duplicate (server-generated) stanza-id from %s.", from_jid->fulljid); - } - sqlite3_finalize(stmt); - } - } + // We use archive_id UNIQUE constraint and INSERT OR IGNORE for deduplication. auto_sqlite char* orig_message_id = original_message_id == -1 ? NULL : sqlite3_mprintf("%d", original_message_id); - auto_sqlite char* query = sqlite3_mprintf("INSERT INTO `ChatLogs` " + auto_sqlite char* query = sqlite3_mprintf("INSERT OR IGNORE INTO `ChatLogs` " "(`from_jid`, `from_resource`, `to_jid`, `to_resource`, " "`message`, `timestamp`, `stanza_id`, `archive_id`, " "`replaces_db_id`, `replace_id`, `type`, `encryption`) " @@ -545,11 +530,6 @@ _add_to_db(ProfMessage* message, char* type, const Jid* const from_jid, const Ji } else { log_error("Unknown SQLite error in _add_to_db()."); } - } else { - int inserted_rows_count = sqlite3_changes(g_chatlog_database); - if (inserted_rows_count < 1) { - log_error("SQLite did not insert message (rows: %d, id: %s, content: %s)", inserted_rows_count, message->id, message->plain); - } } } @@ -656,3 +636,78 @@ _check_available_space_for_db_migration(char* path_to_db) return FALSE; } } + +static gboolean +_migrate_to_v3(void) +{ + char* err_msg = NULL; + + const char* sql_statements[] = { + "BEGIN TRANSACTION", + "CREATE TABLE `ChatLogs_v3_migration` (" + "`id` INTEGER PRIMARY KEY AUTOINCREMENT, " + "`from_jid` TEXT NOT NULL, " + "`to_jid` TEXT NOT NULL, " + "`from_resource` TEXT, " + "`to_resource` TEXT, " + "`message` TEXT, " + "`timestamp` TEXT, " + "`type` TEXT, " + "`stanza_id` TEXT, " + "`archive_id` TEXT UNIQUE, " + "`encryption` TEXT, " + "`marked_read` INTEGER, " + "`replace_id` TEXT, " + "`replaces_db_id` INTEGER, " + "`replaced_by_db_id` INTEGER)", + + "INSERT INTO `ChatLogs_v3_migration` " + "SELECT * FROM `ChatLogs` " + "WHERE `archive_id` IS NULL " + "UNION ALL " + "SELECT * FROM (SELECT * FROM `ChatLogs` WHERE `archive_id` IS NOT NULL GROUP BY `archive_id`)", + + "DROP TABLE `ChatLogs` ", + "ALTER TABLE `ChatLogs_v3_migration` RENAME TO `ChatLogs` ", + + "CREATE INDEX ChatLogs_timestamp_IDX ON `ChatLogs` (`timestamp`)", + "CREATE INDEX ChatLogs_to_from_jid_IDX ON `ChatLogs` (`to_jid`, `from_jid`)", + "CREATE TRIGGER update_corrected_message " + "AFTER INSERT ON ChatLogs " + "FOR EACH ROW " + "WHEN NEW.replaces_db_id IS NOT NULL " + "BEGIN " + "UPDATE ChatLogs " + "SET replaced_by_db_id = NEW.id " + "WHERE id = NEW.replaces_db_id; " + "END;", + + "UPDATE `DbVersion` SET `version` = 3;", + "END TRANSACTION" + }; + + int statements_count = sizeof(sql_statements) / sizeof(sql_statements[0]); + + for (int i = 0; i < statements_count; i++) { + if (SQLITE_OK != sqlite3_exec(g_chatlog_database, sql_statements[i], NULL, 0, &err_msg)) { + log_error("SQLite error in _migrate_to_v3() on statement %d: %s", i, err_msg); + if (err_msg) { + sqlite3_free(err_msg); + err_msg = NULL; + } + goto cleanup; + } + } + + return TRUE; + +cleanup: + if (SQLITE_OK != sqlite3_exec(g_chatlog_database, "ROLLBACK;", NULL, 0, &err_msg)) { + log_error("DB Migration] Unable to ROLLBACK: %s", err_msg); + if (err_msg) { + sqlite3_free(err_msg); + } + } + + return FALSE; +} From 247c44be307c0bf9c80882019e0788252e508178 Mon Sep 17 00:00:00 2001 From: Michael Vetter Date: Fri, 27 Mar 2026 23:57:59 +0100 Subject: [PATCH 2/3] fix(xmpp): verify 'by' attribute of stanza-id and MAM result Implement security checks to ensure the 'archive_id' (XEP-0359) used for database deduplication originates from a trusted source. XEP-0359 Section 3.1: "The 'by' attribute MUST be the XMPP address (JID) of the entity assigning the unique and stable stanza ID." Furthermore, Section 4 (Security Considerations) specifies: "A client SHOULD only trust elements from its own server or from a MUC service it is joined to." XEP-0313 Section 4.1.2: "The 'by' attribute of the element is the JID of the archive being queried." and "If the 'by' attribute is not present, the recipient MUST assume that the results are from their own personal archive." Let _handle_chat verify 'by' attribute matches our bare JID. Let _handle_groupchat verify 'by' attribute matches the MUC s bare JID. Let _handle_mam verify the 'by' attribute matches the outer message 'from' (archive JID). If 'by' is missing then 'from' matches our own bare JID (personal archive). Signed-off-by: Michael Vetter --- src/xmpp/message.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/xmpp/message.c b/src/xmpp/message.c index 76179efc6..130cace03 100644 --- a/src/xmpp/message.c +++ b/src/xmpp/message.c @@ -1067,9 +1067,12 @@ _handle_groupchat(xmpp_stanza_t* const stanza) char* stanzaid = NULL; xmpp_stanza_t* stanzaidst = xmpp_stanza_get_child_by_name_and_ns(stanza, STANZA_NAME_STANZA_ID, STANZA_NS_STABLE_ID); if (stanzaidst) { - stanzaid = (char*)xmpp_stanza_get_attribute(stanzaidst, STANZA_ATTR_ID); - if (stanzaid) { - message->stanzaid = strdup(stanzaid); + const char* by = xmpp_stanza_get_attribute(stanzaidst, "by"); + if (by && (g_strcmp0(by, from_jid->barejid) == 0)) { + stanzaid = (char*)xmpp_stanza_get_attribute(stanzaidst, STANZA_ATTR_ID); + if (stanzaid) { + message->stanzaid = strdup(stanzaid); + } } } @@ -1389,9 +1392,12 @@ _handle_chat(xmpp_stanza_t* const stanza, gboolean is_mam, gboolean is_carbon, c char* stanzaid = NULL; xmpp_stanza_t* stanzaidst = xmpp_stanza_get_child_by_name_and_ns(stanza, STANZA_NAME_STANZA_ID, STANZA_NS_STABLE_ID); if (stanzaidst) { - stanzaid = (char*)xmpp_stanza_get_attribute(stanzaidst, STANZA_ATTR_ID); - if (stanzaid) { - message->stanzaid = strdup(stanzaid); + const char* by = xmpp_stanza_get_attribute(stanzaidst, "by"); + if (by && equals_our_barejid(by)) { + stanzaid = (char*)xmpp_stanza_get_attribute(stanzaidst, STANZA_ATTR_ID); + if (stanzaid) { + message->stanzaid = strdup(stanzaid); + } } } } @@ -1524,6 +1530,20 @@ _handle_mam(xmpp_stanza_t* const stanza) // // same as from XEP-0359 for live messages + const char* by = xmpp_stanza_get_attribute(result, "by"); + const char* from = xmpp_stanza_get_from(stanza); + if (by) { + if (g_strcmp0(by, from) != 0) { + log_warning("MAM result 'by' attribute (%s) does not match 'from' (%s)", by, from); + return TRUE; + } + } else { + if (from && !equals_our_barejid(from)) { + log_warning("MAM result from %s with no 'by' attribute (expected our own JID)", from); + return TRUE; + } + } + const char* result_id = xmpp_stanza_get_id(result); GDateTime* timestamp = stanza_get_delay_from(forwarded, NULL); From c43c9567dfa696112083a87c04dd17db51664bf6 Mon Sep 17 00:00:00 2001 From: Michael Vetter Date: Tue, 31 Mar 2026 13:23:48 +0000 Subject: [PATCH 3/3] refactor(database): use ON CONFLICT for deduplication Replace 'INSERT OR IGNORE' with 'INSERT ... ON CONFLICT(`archive_id`) DO NOTHING RETURNING id'. This is only available since sqlite 3.35.0. So deduplication only happens for `archive_id` and we don't silently ignore other errors or constraints (like not null). We can now detect if an insertion was skipped due to duplication by checking the result of 'RETURNING id'. We don't print out when we don't insert duplicated messages since this will happen often and will be too noisy. So we match the behaviour of what Dino is doing. Signed-off-by: Michael Vetter --- meson.build | 2 +- src/database.c | 33 ++++++++++++++++++++++----------- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/meson.build b/meson.build index 8212ff6b0..6ce1d7fb1 100644 --- a/meson.build +++ b/meson.build @@ -92,7 +92,7 @@ endif glib_dep = dependency('glib-2.0', version: '>= 2.62.0') gio_dep = dependency('gio-2.0') curl_dep = dependency('libcurl', version: '>= 7.62.0') -sqlite_dep = dependency('sqlite3', version: '>= 3.22.0') +sqlite_dep = dependency('sqlite3', version: '>= 3.35.0') thread_dep = dependency('threads') math_dep = cc.find_library('m') libstrophe_dep = dependency('libstrophe', version: '>= 0.12.3') diff --git a/src/database.c b/src/database.c index 15d7e946f..54e413109 100644 --- a/src/database.c +++ b/src/database.c @@ -159,7 +159,7 @@ log_database_init(ProfAccount* account) } if (db_version == -1) { - query = "INSERT OR IGNORE INTO `DbVersion` (`version`) VALUES ('2')"; + query = "INSERT INTO `DbVersion` (`version`) VALUES ('2') ON CONFLICT(`version`) DO NOTHING"; if (SQLITE_OK != sqlite3_exec(g_chatlog_database, query, NULL, 0, &err_msg)) { goto out; } @@ -495,15 +495,17 @@ _add_to_db(ProfMessage* message, char* type, const Jid* const from_jid, const Ji } // stanza-id (XEP-0359) doesn't have to be present in the message. - // We use archive_id UNIQUE constraint and INSERT OR IGNORE for deduplication. + // We use archive_id UNIQUE constraint and ON CONFLICT DO NOTHING for deduplication. auto_sqlite char* orig_message_id = original_message_id == -1 ? NULL : sqlite3_mprintf("%d", original_message_id); - auto_sqlite char* query = sqlite3_mprintf("INSERT OR IGNORE INTO `ChatLogs` " + auto_sqlite char* query = sqlite3_mprintf("INSERT INTO `ChatLogs` " "(`from_jid`, `from_resource`, `to_jid`, `to_resource`, " "`message`, `timestamp`, `stanza_id`, `archive_id`, " "`replaces_db_id`, `replace_id`, `type`, `encryption`) " - "VALUES (%Q, %Q, %Q, %Q, %Q, %Q, %Q, %Q, %Q, %Q, %Q, %Q)", + "VALUES (%Q, %Q, %Q, %Q, %Q, %Q, %Q, %Q, %Q, %Q, %Q, %Q) " + "ON CONFLICT(`archive_id`) DO NOTHING " + "RETURNING id", from_jid->barejid, from_jid->resourcepart, to_jid->barejid, @@ -523,14 +525,23 @@ _add_to_db(ProfMessage* message, char* type, const Jid* const from_jid, const Ji log_debug("Writing to DB. Query: %s", query); - if (SQLITE_OK != sqlite3_exec(g_chatlog_database, query, NULL, 0, &err_msg)) { - if (err_msg) { - log_error("SQLite error in _add_to_db(): %s", err_msg); - sqlite3_free(err_msg); - } else { - log_error("Unknown SQLite error in _add_to_db()."); - } + sqlite3_stmt* stmt = NULL; + int rc = sqlite3_prepare_v2(g_chatlog_database, query, -1, &stmt, NULL); + if (rc != SQLITE_OK) { + log_error("SQLite error in _add_to_db() (prepare): %s", sqlite3_errmsg(g_chatlog_database)); + return; } + + rc = sqlite3_step(stmt); + if (rc == SQLITE_ROW) { + log_debug("Successfully inserted message into database."); + } else if (rc == SQLITE_DONE) { + log_debug("Message already exists in database (archive_id: %s), skipping.", message->stanzaid); + } else { + log_error("SQLite error in _add_to_db() (step): %s", sqlite3_errmsg(g_chatlog_database)); + } + + sqlite3_finalize(stmt); } static int