Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .kotlin/errors/errors-1767625645437.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
kotlin version: 2.0.21
error message: The daemon has terminated unexpectedly on startup attempt #1 with error code: 0. The daemon process output:
1. Kotlin compile daemon is ready

10 changes: 8 additions & 2 deletions app/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ plugins {
id("org.jetbrains.kotlin.android")
id("org.jetbrains.kotlin.plugin.compose")
id("org.jetbrains.kotlin.plugin.serialization") version "2.0.21"
id("com.google.devtools.ksp") version "2.0.21-1.0.27"
}

import java.io.File
Expand Down Expand Up @@ -201,14 +202,19 @@ dependencies {
implementation("androidx.recyclerview:recyclerview:1.3.2")
// Emoji2 per supporto emoji future-proof
implementation("androidx.emoji2:emoji2:1.4.0")
implementation("androidx.emoji2:emoji2-views:1.4.0")
implementation("androidx.emoji2:emoji2-views-helper:1.4.0")
// Kotlinx Serialization for dictionary optimization
implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.3")
implementation("org.jetbrains.kotlinx:kotlinx-serialization-cbor:1.6.3")
// Shizuku for ADB shell access
implementation("dev.rikka.shizuku:api:13.1.5")
implementation("dev.rikka.shizuku:provider:13.1.5")

// Room database for user dictionary and ngrams
val roomVersion = "2.6.1"
implementation("androidx.room:room-runtime:$roomVersion")
implementation("androidx.room:room-ktx:$roomVersion")
ksp("androidx.room:room-compiler:$roomVersion")

testImplementation(libs.junit)
androidTestImplementation(libs.androidx.junit)
androidTestImplementation(libs.androidx.espresso.core)
Expand Down
6 changes: 3 additions & 3 deletions app/build.properties
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#Build number and date
#Sat Jan 03 00:42:14 CET 2026
buildDate=03 gen 2026
buildNumber=1819
#Mon Jan 05 19:22:08 CET 2026
buildDate=05 gen 2026
buildNumber=2009

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,12 @@ class AutoReplaceController(
private val suggestionEngine: SuggestionEngine,
private val settingsProvider: () -> SuggestionSettings
) {
// #region agent log
private fun debugLog(hypothesisId: String, location: String, message: String, data: Map<String, Any?> = emptyMap()) {
try {
val logFile = File("/Users/andrea/Desktop/DEV/Pastiera/pastiera/.cursor/debug.log")
val logEntry = JSONObject().apply {
put("sessionId", "debug-session")
put("runId", "run1")
put("hypothesisId", hypothesisId)
put("location", location)
put("message", message)
put("timestamp", System.currentTimeMillis())
put("data", JSONObject(data))
}
logFile.appendText(logEntry.toString() + "\n")
} catch (e: Exception) {
// Ignore log errors
}
}
// #endregion

data class ReplaceResult(val replaced: Boolean, val committed: Boolean)
data class ReplaceResult(
val replaced: Boolean,
val committed: Boolean,
val committedWord: String? = null
)

// Track last replacement for undo
private data class LastReplacement(
Expand Down Expand Up @@ -99,7 +84,9 @@ class AutoReplaceController(
keyCode: Int,
event: KeyEvent?,
tracker: CurrentWordTracker,
inputConnection: InputConnection?
inputConnection: InputConnection?,
contextHistory: List<String> = emptyList(),
cachedSuggestions: List<SuggestionResult> = emptyList()
): ReplaceResult {
val unicodeChar = event?.unicodeChar ?: 0
val boundaryChar = when {
Expand All @@ -111,12 +98,20 @@ class AutoReplaceController(

val settings = settingsProvider()
if (!settings.autoReplaceOnSpaceEnter || inputConnection == null) {
val wordBefore = tracker.currentWord
tracker.onBoundaryReached(boundaryChar, inputConnection)
return ReplaceResult(false, unicodeChar != 0, if (wordBefore.isBlank()) null else wordBefore)
}

val word = tracker.currentWord
if (word.isBlank()) {
tracker.onBoundaryReached(boundaryChar, inputConnection)
return ReplaceResult(false, unicodeChar != 0)
return ReplaceResult(false, unicodeChar != 0, null)
}

// If cursor is after non-letter/digit and not standard punctuation (e.g., emoji),
// skip auto-replace to avoid dropping trailing symbols.
// Moved here to avoid IPC call when word is already blank.
val textBefore = inputConnection.getTextBeforeCursor(16, 0)?.toString().orEmpty()
val lastCharBeforeCursor = textBefore.lastOrNull()
val allowedPunctuation = it.palsoftware.pastiera.core.Punctuation.BOUNDARY + "-"
Expand All @@ -125,38 +120,39 @@ class AutoReplaceController(
lastCharBeforeCursor !in allowedPunctuation &&
!lastCharBeforeCursor.isWhitespace()
) {
val wordBefore = tracker.currentWord
tracker.onBoundaryReached(boundaryChar, inputConnection)
return ReplaceResult(false, unicodeChar != 0)
return ReplaceResult(false, unicodeChar != 0, if (wordBefore.isBlank()) null else wordBefore)
}

val word = tracker.currentWord
// #region agent log
val textBeforeReal = inputConnection?.getTextBeforeCursor(16, 0)?.toString().orEmpty()
debugLog("C", "AutoReplaceController.handleBoundary:beforeReplace", "handleBoundary called", mapOf(
"trackerWord" to word,
"trackerWordLength" to word.length,
"textBeforeReal" to textBeforeReal,
"textBeforeRealLength" to textBeforeReal.length,
"keyCode" to keyCode,
"boundaryChar" to (boundaryChar?.toString() ?: "null")
))
// #endregion
if (word.isBlank()) {
tracker.onBoundaryReached(boundaryChar, inputConnection)
return ReplaceResult(false, unicodeChar != 0)
return ReplaceResult(false, unicodeChar != 0, null)
}

val apostropheSplit = splitApostropheWord(word)
val lookupWord = apostropheSplit?.root ?: word

val suggestions = suggestionEngine.suggest(
lookupWord,
limit = 1,
includeAccentMatching = settings.accentMatching,
useKeyboardProximity = settings.useKeyboardProximity,
useEditTypeRanking = settings.useEditTypeRanking
)
val topRaw = suggestions.firstOrNull()
// Optimization: Use cached suggestions if they match the current lookup word
val topRaw = if (cachedSuggestions.isNotEmpty() &&
cachedSuggestions.firstOrNull()?.let { res ->
// Check if the cached suggestion was for the current word (case-insensitive)
// Note: normalized comparison is better
val normRes = res.candidate.lowercase().take(lookupWord.length)
normRes == lookupWord.lowercase() || res.distance > 0
} == true) {
cachedSuggestions.firstOrNull()
} else {
// Fallback to synchronous suggest only if cache is empty or doesn't match
suggestionEngine.suggest(
lookupWord,
limit = 1,
includeAccentMatching = settings.accentMatching,
useKeyboardProximity = settings.useKeyboardProximity,
useEditTypeRanking = settings.useEditTypeRanking,
contextHistory = contextHistory
).firstOrNull()
}

val top = topRaw?.let {
if (apostropheSplit != null) {
val recomposed = recomposeApostropheCandidate(apostropheSplit, it.candidate) ?: return@let null
Expand Down Expand Up @@ -188,27 +184,8 @@ class AutoReplaceController(

if (shouldReplace) {
val replacement = applyCasing(top!!.candidate, word)
// #region agent log
val textBeforeDelete = inputConnection.getTextBeforeCursor(16, 0)?.toString().orEmpty()
debugLog("C", "AutoReplaceController.handleBoundary:beforeDelete", "about to deleteSurroundingText", mapOf(
"trackerWord" to word,
"trackerWordLength" to word.length,
"deleteCount" to word.length,
"textBeforeDelete" to textBeforeDelete,
"textBeforeDeleteLength" to textBeforeDelete.length,
"replacement" to replacement
))
// #endregion
inputConnection.beginBatchEdit()
inputConnection.deleteSurroundingText(word.length, 0)
// #region agent log
val textAfterDelete = inputConnection.getTextBeforeCursor(16, 0)?.toString().orEmpty()
debugLog("C", "AutoReplaceController.handleBoundary:afterDelete", "deleteSurroundingText completed", mapOf(
"textAfterDelete" to textAfterDelete,
"textAfterDeleteLength" to textAfterDelete.length,
"deletedCount" to word.length
))
// #endregion
inputConnection.commitText(replacement, 1)
repository.markUsed(replacement)

Expand All @@ -231,13 +208,14 @@ class AutoReplaceController(
}
Log.d("AutoReplaceController", "Committed boundary '$boundaryChar', markAutoSpace=${shouldAppendBoundary && boundaryChar == ' '}")
}
return ReplaceResult(true, true)
return ReplaceResult(true, true, replacement)
}

// Clear last replacement if no replacement happened
lastReplacement = null
val wordBefore = tracker.currentWord
tracker.onBoundaryReached(boundaryChar, inputConnection)
return ReplaceResult(false, unicodeChar != 0)
return ReplaceResult(false, unicodeChar != 0, if (wordBefore.isBlank()) null else wordBefore)
}

fun handleBackspaceUndo(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
package it.palsoftware.pastiera.core.suggestions

import android.content.Context
import android.net.Uri
import android.util.Log
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.withContext
import java.io.BufferedReader
import java.io.InputStreamReader
import java.text.Normalizer
import java.util.Locale

class CorpusImporter(private val context: Context, private val userStore: UserDictionaryStore) {

private val tag = "CorpusImporter"

suspend fun importFromUri(uri: Uri, locale: Locale, onProgress: (Float) -> Unit): Result<Int> = withContext(Dispatchers.IO) {
try {
val inputStream = context.contentResolver.openInputStream(uri) ?: return@withContext Result.failure(Exception("Failed to open input stream"))

// Read bytes to detect encoding
val bytes = inputStream.readBytes()
inputStream.close()

if (bytes.isEmpty()) return@withContext Result.success(0)

// Simple encoding detection
val charset = when {
bytes.size >= 2 && bytes[0] == 0xFF.toByte() && bytes[1] == 0xFE.toByte() -> Charsets.UTF_16LE
bytes.size >= 2 && bytes[0] == 0xFE.toByte() && bytes[1] == 0xFF.toByte() -> Charsets.UTF_16BE
else -> Charsets.UTF_8
}

val reader = bytes.inputStream().bufferedReader(charset)
val totalSize = bytes.size.toLong()
var bytesRead = 0L
var ngramsAdded = 0

// Sliding window for ngrams
val window = mutableListOf<String>()
val maxWindowSize = 3

// Pre-compile regex for performance
val whitespaceRegex = "\\s+".toRegex()

reader.forEachLine { line ->
if (line.isBlank()) return@forEachLine

// Approximate progress based on line length
bytesRead += line.toByteArray(charset).size.toLong() + 2 // +2 for potential newline
if (totalSize > 0) {
onProgress((bytesRead.toFloat() / totalSize).coerceAtMost(1.0f))
}

// Process line: tokenize including punctuation as separate tokens
val tokens = mutableListOf<String>()
line.split(whitespaceRegex).forEach { part ->
val currentWord = StringBuilder()
part.forEach { char ->
if (char.isLetterOrDigit() || char == '\'') {
currentWord.append(char)
} else {
if (currentWord.isNotEmpty()) {
// IMPORTANT: Use cleanWord (preserving umlauts) instead of normalize
tokens.add(currentWord.toString())
currentWord.setLength(0)
}
if (char == ',' || char == '.' || char == '!' || char == '?') {
tokens.add(char.toString())
}
}
}
if (currentWord.isNotEmpty()) {
tokens.add(currentWord.toString())
}
}

for (token in tokens) {
if (token.isEmpty()) continue

// 1. Unigram (Dictionary) - only for actual words
if (token.any { it.isLetterOrDigit() }) {
userStore.addWord(context, token, autoPersist = false)
}

// Update sliding window
window.add(token)
if (window.size > maxWindowSize) {
window.removeAt(0)
}

// 2. NGrams from window (Words and Punctuation)
if (window.size >= 2) {
// Bigram
val bigramContext = listOf(window[window.size - 2])
userStore.addNGram(context, bigramContext, token, autoPersist = false)
ngramsAdded++
}

if (window.size >= 3) {
// Trigram
val trigramContext = listOf(window[window.size - 3], window[window.size - 2])
userStore.addNGram(context, trigramContext, token, autoPersist = false)
ngramsAdded++
}
}
}

// Persist all changes once at the end
userStore.persistManually(context)

Result.success(ngramsAdded)
} catch (e: Exception) {
Log.e(tag, "Error importing corpus", e)
Result.failure(e)
}
}

private fun normalize(word: String, locale: Locale): String {
val normalized = Normalizer.normalize(word.lowercase(locale), Normalizer.Form.NFD)
val withoutAccents = normalized.replace("\\p{Mn}".toRegex(), "")
return withoutAccents.replace("[^\\p{L}]".toRegex(), "")
}
}

Loading