palsoftware · pzauner · Jan 5, 2026
diff --git a/.kotlin/errors/errors-1767625645437.log b/.kotlin/errors/errors-1767625645437.log
@@ -0,0 +1,4 @@
+kotlin version: 2.0.21
+error message: The daemon has terminated unexpectedly on startup attempt #1 with error code: 0. The daemon process output:
+    1. Kotlin compile daemon is ready
+
diff --git a/app/build.gradle.kts b/app/build.gradle.kts
@@ -3,6 +3,7 @@ plugins {
     id("org.jetbrains.kotlin.android")
     id("org.jetbrains.kotlin.plugin.compose")
     id("org.jetbrains.kotlin.plugin.serialization") version "2.0.21"
+    id("com.google.devtools.ksp") version "2.0.21-1.0.27"
 }
 
 import java.io.File
@@ -201,14 +202,19 @@ dependencies {
     implementation("androidx.recyclerview:recyclerview:1.3.2")
     // Emoji2 per supporto emoji future-proof
     implementation("androidx.emoji2:emoji2:1.4.0")
-    implementation("androidx.emoji2:emoji2-views:1.4.0")
-    implementation("androidx.emoji2:emoji2-views-helper:1.4.0")
     // Kotlinx Serialization for dictionary optimization
     implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.3")
     implementation("org.jetbrains.kotlinx:kotlinx-serialization-cbor:1.6.3")
     // Shizuku for ADB shell access
     implementation("dev.rikka.shizuku:api:13.1.5")
     implementation("dev.rikka.shizuku:provider:13.1.5")
+
+    // Room database for user dictionary and ngrams
+    val roomVersion = "2.6.1"
+    implementation("androidx.room:room-runtime:$roomVersion")
+    implementation("androidx.room:room-ktx:$roomVersion")
+    ksp("androidx.room:room-compiler:$roomVersion")
+
     testImplementation(libs.junit)
     androidTestImplementation(libs.androidx.junit)
     androidTestImplementation(libs.androidx.espresso.core)

diff --git a/app/build.properties b/app/build.properties
@@ -1,4 +1,4 @@
 #Build number and date
-#Sat Jan 03 00:42:14 CET 2026
-buildDate=03 gen 2026
-buildNumber=1819
+#Mon Jan 05 19:22:08 CET 2026
+buildDate=05 gen 2026
+buildNumber=2009
diff --git a/app/src/main/java/it/palsoftware/pastiera/AutoCorrectionCategoryScreen.kt b/app/src/main/java/it/palsoftware/pastiera/AutoCorrectionCategoryScreen.kt
diff --git a/app/src/main/java/it/palsoftware/pastiera/core/suggestions/AutoReplaceController.kt b/app/src/main/java/it/palsoftware/pastiera/core/suggestions/AutoReplaceController.kt
@@ -12,27 +12,12 @@ class AutoReplaceController(
     private val suggestionEngine: SuggestionEngine,
     private val settingsProvider: () -> SuggestionSettings
 ) {
-    // #region agent log
-    private fun debugLog(hypothesisId: String, location: String, message: String, data: Map<String, Any?> = emptyMap()) {
-        try {
-            val logFile = File("/Users/andrea/Desktop/DEV/Pastiera/pastiera/.cursor/debug.log")
-            val logEntry = JSONObject().apply {
-                put("sessionId", "debug-session")
-                put("runId", "run1")
-                put("hypothesisId", hypothesisId)
-                put("location", location)
-                put("message", message)
-                put("timestamp", System.currentTimeMillis())
-                put("data", JSONObject(data))
-            }
-            logFile.appendText(logEntry.toString() + "\n")
-        } catch (e: Exception) {
-            // Ignore log errors
-        }
-    }
-    // #endregion
 
-    data class ReplaceResult(val replaced: Boolean, val committed: Boolean)
+    data class ReplaceResult(
+        val replaced: Boolean,
+        val committed: Boolean,
+        val committedWord: String? = null
+    )
 
     // Track last replacement for undo
     private data class LastReplacement(
@@ -99,7 +84,9 @@ class AutoReplaceController(
         keyCode: Int,
         event: KeyEvent?,
         tracker: CurrentWordTracker,
-        inputConnection: InputConnection?
+        inputConnection: InputConnection?,
+        contextHistory: List<String> = emptyList(),
+        cachedSuggestions: List<SuggestionResult> = emptyList()
     ): ReplaceResult {
         val unicodeChar = event?.unicodeChar ?: 0
         val boundaryChar = when {
@@ -111,12 +98,20 @@ class AutoReplaceController(
 
         val settings = settingsProvider()
         if (!settings.autoReplaceOnSpaceEnter || inputConnection == null) {
+            val wordBefore = tracker.currentWord
+            tracker.onBoundaryReached(boundaryChar, inputConnection)
+            return ReplaceResult(false, unicodeChar != 0, if (wordBefore.isBlank()) null else wordBefore)
+        }
+
+        val word = tracker.currentWord
+        if (word.isBlank()) {
             tracker.onBoundaryReached(boundaryChar, inputConnection)
-            return ReplaceResult(false, unicodeChar != 0)
+            return ReplaceResult(false, unicodeChar != 0, null)
         }
 
         // If cursor is after non-letter/digit and not standard punctuation (e.g., emoji),
         // skip auto-replace to avoid dropping trailing symbols.
+        // Moved here to avoid IPC call when word is already blank.
         val textBefore = inputConnection.getTextBeforeCursor(16, 0)?.toString().orEmpty()
         val lastCharBeforeCursor = textBefore.lastOrNull()
         val allowedPunctuation = it.palsoftware.pastiera.core.Punctuation.BOUNDARY + "-"
@@ -125,38 +120,39 @@ class AutoReplaceController(
             lastCharBeforeCursor !in allowedPunctuation &&
             !lastCharBeforeCursor.isWhitespace()
         ) {
+            val wordBefore = tracker.currentWord
             tracker.onBoundaryReached(boundaryChar, inputConnection)
-            return ReplaceResult(false, unicodeChar != 0)
+            return ReplaceResult(false, unicodeChar != 0, if (wordBefore.isBlank()) null else wordBefore)
         }
-
-        val word = tracker.currentWord
-        // #region agent log
-        val textBeforeReal = inputConnection?.getTextBeforeCursor(16, 0)?.toString().orEmpty()
-        debugLog("C", "AutoReplaceController.handleBoundary:beforeReplace", "handleBoundary called", mapOf(
-            "trackerWord" to word,
-            "trackerWordLength" to word.length,
-            "textBeforeReal" to textBeforeReal,
-            "textBeforeRealLength" to textBeforeReal.length,
-            "keyCode" to keyCode,
-            "boundaryChar" to (boundaryChar?.toString() ?: "null")
-        ))
-        // #endregion
         if (word.isBlank()) {
             tracker.onBoundaryReached(boundaryChar, inputConnection)
-            return ReplaceResult(false, unicodeChar != 0)
+            return ReplaceResult(false, unicodeChar != 0, null)
         }
 
         val apostropheSplit = splitApostropheWord(word)
         val lookupWord = apostropheSplit?.root ?: word
 
-        val suggestions = suggestionEngine.suggest(
-            lookupWord,
-            limit = 1,
-            includeAccentMatching = settings.accentMatching,
-            useKeyboardProximity = settings.useKeyboardProximity,
-            useEditTypeRanking = settings.useEditTypeRanking
-        )
-        val topRaw = suggestions.firstOrNull()
+        // Optimization: Use cached suggestions if they match the current lookup word
+        val topRaw = if (cachedSuggestions.isNotEmpty() && 
+            cachedSuggestions.firstOrNull()?.let { res -> 
+                // Check if the cached suggestion was for the current word (case-insensitive)
+                // Note: normalized comparison is better
+                val normRes = res.candidate.lowercase().take(lookupWord.length)
+                normRes == lookupWord.lowercase() || res.distance > 0
+            } == true) {
+            cachedSuggestions.firstOrNull()
+        } else {
+            // Fallback to synchronous suggest only if cache is empty or doesn't match
+            suggestionEngine.suggest(
+                lookupWord,
+                limit = 1,
+                includeAccentMatching = settings.accentMatching,
+                useKeyboardProximity = settings.useKeyboardProximity,
+                useEditTypeRanking = settings.useEditTypeRanking,
+                contextHistory = contextHistory
+            ).firstOrNull()
+        }
+
         val top = topRaw?.let {
             if (apostropheSplit != null) {
                 val recomposed = recomposeApostropheCandidate(apostropheSplit, it.candidate) ?: return@let null
@@ -188,27 +184,8 @@ class AutoReplaceController(
 
         if (shouldReplace) {
             val replacement = applyCasing(top!!.candidate, word)
-            // #region agent log
-            val textBeforeDelete = inputConnection.getTextBeforeCursor(16, 0)?.toString().orEmpty()
-            debugLog("C", "AutoReplaceController.handleBoundary:beforeDelete", "about to deleteSurroundingText", mapOf(
-                "trackerWord" to word,
-                "trackerWordLength" to word.length,
-                "deleteCount" to word.length,
-                "textBeforeDelete" to textBeforeDelete,
-                "textBeforeDeleteLength" to textBeforeDelete.length,
-                "replacement" to replacement
-            ))
-            // #endregion
             inputConnection.beginBatchEdit()
             inputConnection.deleteSurroundingText(word.length, 0)
-            // #region agent log
-            val textAfterDelete = inputConnection.getTextBeforeCursor(16, 0)?.toString().orEmpty()
-            debugLog("C", "AutoReplaceController.handleBoundary:afterDelete", "deleteSurroundingText completed", mapOf(
-                "textAfterDelete" to textAfterDelete,
-                "textAfterDeleteLength" to textAfterDelete.length,
-                "deletedCount" to word.length
-            ))
-            // #endregion
             inputConnection.commitText(replacement, 1)
             repository.markUsed(replacement)
 
@@ -231,13 +208,14 @@ class AutoReplaceController(
                 }
                 Log.d("AutoReplaceController", "Committed boundary '$boundaryChar', markAutoSpace=${shouldAppendBoundary && boundaryChar == ' '}")
             }
-            return ReplaceResult(true, true)
+            return ReplaceResult(true, true, replacement)
         }
 
         // Clear last replacement if no replacement happened
         lastReplacement = null
+        val wordBefore = tracker.currentWord
         tracker.onBoundaryReached(boundaryChar, inputConnection)
-        return ReplaceResult(false, unicodeChar != 0)
+        return ReplaceResult(false, unicodeChar != 0, if (wordBefore.isBlank()) null else wordBefore)
     }
 
     fun handleBackspaceUndo(

diff --git a/app/src/main/java/it/palsoftware/pastiera/core/suggestions/CorpusImporter.kt b/app/src/main/java/it/palsoftware/pastiera/core/suggestions/CorpusImporter.kt
@@ -0,0 +1,125 @@
+package it.palsoftware.pastiera.core.suggestions
+
+import android.content.Context
+import android.net.Uri
+import android.util.Log
+import kotlinx.coroutines.Dispatchers
+import kotlinx.coroutines.withContext
+import java.io.BufferedReader
+import java.io.InputStreamReader
+import java.text.Normalizer
+import java.util.Locale
+
+class CorpusImporter(private val context: Context, private val userStore: UserDictionaryStore) {
+
+    private val tag = "CorpusImporter"
+
+    suspend fun importFromUri(uri: Uri, locale: Locale, onProgress: (Float) -> Unit): Result<Int> = withContext(Dispatchers.IO) {
+        try {
+            val inputStream = context.contentResolver.openInputStream(uri) ?: return@withContext Result.failure(Exception("Failed to open input stream"))
+
+            // Read bytes to detect encoding
+            val bytes = inputStream.readBytes()
+            inputStream.close()
+
+            if (bytes.isEmpty()) return@withContext Result.success(0)
+
+            // Simple encoding detection
+            val charset = when {
+                bytes.size >= 2 && bytes[0] == 0xFF.toByte() && bytes[1] == 0xFE.toByte() -> Charsets.UTF_16LE
+                bytes.size >= 2 && bytes[0] == 0xFE.toByte() && bytes[1] == 0xFF.toByte() -> Charsets.UTF_16BE
+                else -> Charsets.UTF_8
+            }
+
+            val reader = bytes.inputStream().bufferedReader(charset)
+            val totalSize = bytes.size.toLong()
+            var bytesRead = 0L
+            var ngramsAdded = 0
+
+            // Sliding window for ngrams
+            val window = mutableListOf<String>()
+            val maxWindowSize = 3
+
+            // Pre-compile regex for performance
+            val whitespaceRegex = "\\s+".toRegex()
+
+            reader.forEachLine { line ->
+                if (line.isBlank()) return@forEachLine
+
+                // Approximate progress based on line length
+                bytesRead += line.toByteArray(charset).size.toLong() + 2 // +2 for potential newline
+                if (totalSize > 0) {
+                    onProgress((bytesRead.toFloat() / totalSize).coerceAtMost(1.0f))
+                }
+
+                // Process line: tokenize including punctuation as separate tokens
+                val tokens = mutableListOf<String>()
+                line.split(whitespaceRegex).forEach { part ->
+                    val currentWord = StringBuilder()
+                    part.forEach { char ->
+                        if (char.isLetterOrDigit() || char == '\'') {
+                            currentWord.append(char)
+                        } else {
+                            if (currentWord.isNotEmpty()) {
+                                // IMPORTANT: Use cleanWord (preserving umlauts) instead of normalize
+                                tokens.add(currentWord.toString())
+                                currentWord.setLength(0)
+                            }
+                            if (char == ',' || char == '.' || char == '!' || char == '?') {
+                                tokens.add(char.toString())
+                            }
+                        }
+                    }
+                    if (currentWord.isNotEmpty()) {
+                        tokens.add(currentWord.toString())
+                    }
+                }
+
+                for (token in tokens) {
+                    if (token.isEmpty()) continue
+
+                    // 1. Unigram (Dictionary) - only for actual words
+                    if (token.any { it.isLetterOrDigit() }) {
+                        userStore.addWord(context, token, autoPersist = false)
+                    }
+
+                    // Update sliding window
+                    window.add(token)
+                    if (window.size > maxWindowSize) {
+                        window.removeAt(0)
+                    }
+
+                    // 2. NGrams from window (Words and Punctuation)
+                    if (window.size >= 2) {
+                        // Bigram
+                        val bigramContext = listOf(window[window.size - 2])
+                        userStore.addNGram(context, bigramContext, token, autoPersist = false)
+                        ngramsAdded++
+                    }
+
+                    if (window.size >= 3) {
+                        // Trigram
+                        val trigramContext = listOf(window[window.size - 3], window[window.size - 2])
+                        userStore.addNGram(context, trigramContext, token, autoPersist = false)
+                        ngramsAdded++
+                    }
+                }
+            }
+
+            // Persist all changes once at the end
+            userStore.persistManually(context)
+
+            Result.success(ngramsAdded)
+        } catch (e: Exception) {
+            Log.e(tag, "Error importing corpus", e)
+            Result.failure(e)
+        }
+    }
+
+    private fun normalize(word: String, locale: Locale): String {
+        val normalized = Normalizer.normalize(word.lowercase(locale), Normalizer.Form.NFD)
+        val withoutAccents = normalized.replace("\\p{Mn}".toRegex(), "")
+        return withoutAccents.replace("[^\\p{L}]".toRegex(), "")
+    }
+}
+