Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@ class SummaryService @Inject constructor(
@ApplicationContext private val context: Context
) {

private val TAG = "SummaryService"
companion object {
private const val TAG = "SummaryService"
private val WHITESPACE_REGEX = Regex("\\s+")
}

private var modelFile: File? = null
private var isInitialized = false
private var isInitializing = false
Expand Down Expand Up @@ -79,7 +83,7 @@ class SummaryService @Inject constructor(
val selectedContent = selectKeyContent(content, maxWords = 300)
val prompt = buildPrompt(chapterTitle, selectedContent)

Log.d(TAG, "Generating summary (${selectedContent.split(Regex("\\s+")).size} words, ~${(selectedContent.length + prompt.length) / 4 + 200} tokens)")
Log.d(TAG, "Generating summary (${selectedContent.split(WHITESPACE_REGEX).size} words, ~${(selectedContent.length + prompt.length) / 4 + 200} tokens)")

generateWithRetry(prompt, selectedContent, content, onProgress)
}.onFailure { e ->
Expand Down Expand Up @@ -151,7 +155,7 @@ class SummaryService @Inject constructor(
private fun selectKeyContent(content: List<String>, maxWords: Int): String {
if (content.isEmpty()) return ""

val wordsPerParagraph = content.map { it.split(Regex("\\s+")) }
val wordsPerParagraph = content.map { it.split(WHITESPACE_REGEX) }
val totalWords = wordsPerParagraph.sumOf { it.size }
if (totalWords <= maxWords) return content.joinToString("\n\n")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ abstract class BaseJsoupSource(
protected open val preferencesManager: PreferencesManager? = null,
protected open val okHttpClient: okhttp3.OkHttpClient? = null
) : NovelSource {

companion object {
private val MULTIPLE_SLASHES_REGEX = Regex("/+")
}

protected open val userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
protected open val timeout = 15000L

Expand Down Expand Up @@ -73,6 +78,6 @@ abstract class BaseJsoupSource(
path.startsWith("//") -> "https:$path"
path.startsWith("/") -> "$baseUrl$path"
else -> if (path.startsWith(baseUrl)) path else "$baseUrl/$path"
}.replace(Regex("/+"), "/").replace("https:/", "https://").replace("http:/", "http://")
}.replace(MULTIPLE_SLASHES_REGEX, "/").replace("https:/", "https://").replace("http:/", "http://")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ class MangaBatSource @Inject constructor(
override val name = "MangaBat"
override val baseUrl = "https://www.mangabats.com"

companion object {
private val SUMMARY_REGEX = Regex(".*summary: ", RegexOption.IGNORE_CASE)
}

override suspend fun getPopularNovels(page: Int, tags: List<String>): List<ExploreItem> = io {
val url = if (tags.isNotEmpty()) {
val tagSlug = tags.first().lowercase().replace(" ", "-")
Expand Down Expand Up @@ -89,7 +93,7 @@ class MangaBatSource @Inject constructor(
val author = extractAuthor(document)
val summary = document.select("#contentBox, .panel-story-info-description, .story-info-description")
.first()?.text()?.replace("Description :", "")
?.replace(Regex(".*summary: ", RegexOption.IGNORE_CASE), "")?.trim()
?.replace(SUMMARY_REGEX, "")?.trim()

val coverUrl = extractCoverUrl(document)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,25 @@ class NovelFireSource @Inject constructor(
override val name = "NovelFire"
override val baseUrl = "https://novelfire.net"

companion object {
private val BRACKET_NUMBER_REGEX = Regex("^\\[\\d+\\]\\s*")
private val R_NUMBER_REGEX = Regex("^R\\s*\\d+(\\.\\d+)?\\s*")
private val RANK_PREFIX_REGEX = Regex("^Rank\\s*\\d+\\s*", RegexOption.IGNORE_CASE)
private val RANK_REGEX = Regex("RANK\\s+(\\d+)", RegexOption.IGNORE_CASE)
private val RATING_REGEX = Regex("Average score is\\s+([0-9.]+)", RegexOption.IGNORE_CASE)
private val CHAPTERS_COUNT_REGEX = Regex("(\\d+)\\s*Chapters", RegexOption.IGNORE_CASE)
private val TIME_AGO_REGEX = Regex("\\d+\\s+(year|month|day|hour|minute|second)s?\\s+ago.*$")
private val LEADING_NUM_REGEX = Regex("^(\\d+)\\s+(Chapter\\s+\\1.*)")
}

private fun cleanNovelTitle(title: String): String {
var clean = title
// Remove [123] at start
clean = clean.replace(Regex("^\\[\\d+\\]\\s*"), "")
clean = clean.replace(BRACKET_NUMBER_REGEX, "")
// Remove R 14.8 or R 123 at start
clean = clean.replace(Regex("^R\\s*\\d+(\\.\\d+)?\\s*"), "")
clean = clean.replace(R_NUMBER_REGEX, "")
// Remove Rank 123 at start
clean = clean.replace(Regex("^Rank\\s*\\d+\\s*", RegexOption.IGNORE_CASE), "")
clean = clean.replace(RANK_PREFIX_REGEX, "")
return clean.trim()
}

Expand Down Expand Up @@ -155,8 +166,8 @@ class NovelFireSource @Inject constructor(

val infoText = document.text()
val chapterCount = extractChapterCount(infoText)
val rank = Regex("RANK\\s+(\\d+)", RegexOption.IGNORE_CASE).find(infoText)?.groupValues?.get(1)
val rating = Regex("Average score is\\s+([0-9.]+)", RegexOption.IGNORE_CASE).find(infoText)?.groupValues?.get(1)
val rank = RANK_REGEX.find(infoText)?.groupValues?.get(1)
val rating = RATING_REGEX.find(infoText)?.groupValues?.get(1)

val chaptersUrl = getChaptersUrl(url, document)
val firstPageDoc = runCatching { getDocument(chaptersUrl) }.getOrDefault(document)
Expand Down Expand Up @@ -195,8 +206,7 @@ class NovelFireSource @Inject constructor(
}

private fun extractChapterCount(infoText: String): Int {
return Regex("(\\d+)\\s*Chapters", RegexOption.IGNORE_CASE)
.find(infoText)?.groupValues?.get(1)?.toIntOrNull() ?: 0
return CHAPTERS_COUNT_REGEX.find(infoText)?.groupValues?.get(1)?.toIntOrNull() ?: 0
}

private fun getChaptersUrl(url: String, document: org.jsoup.nodes.Document): String {
Expand All @@ -217,9 +227,8 @@ class NovelFireSource @Inject constructor(
element.select(".chapter-title").text().ifBlank { element.text() }
}

var cleanTitle = rawTitle.replace(Regex("\\d+\\s+(year|month|day|hour|minute|second)s?\\s+ago.*$"), "").trim()
val leadingNumRegex = Regex("^(\\d+)\\s+(Chapter\\s+\\1.*)")
leadingNumRegex.find(cleanTitle)?.let { match ->
var cleanTitle = rawTitle.replace(TIME_AGO_REGEX, "").trim()
LEADING_NUM_REGEX.find(cleanTitle)?.let { match ->
cleanTitle = match.groupValues[2]
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ class ReaderViewModel @Inject constructor(
private val preferencesManager: PreferencesManager
) : BaseViewModel<ReaderViewModel.ReaderUiState>(ReaderUiState()) {

companion object {
private val DOUBLE_NEWLINE_REGEX = Regex("""\n\s*\n""")
}

// Current library item ID being read
private var currentLibraryItemId: String? = null

Expand Down Expand Up @@ -613,7 +617,7 @@ class ReaderViewModel @Inject constructor(
if (textBuffer.isEmpty()) return
val joined = textBuffer.joinToString("\n\n")
val formatted = TextUtils.formatChapterText(joined)
val parts = formatted.split(Regex("""\n\s*\n""")).map { it.trim() }.filter { it.isNotBlank() }
val parts = formatted.split(DOUBLE_NEWLINE_REGEX).map { it.trim() }.filter { it.isNotBlank() }
parts.forEach { p -> formattedElements.add(ContentElement.Text(p)) }
textBuffer.clear()
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package io.aatricks.novelscraper.data.repository.source

import org.junit.Test
import kotlin.system.measureTimeMillis

class NovelFireSourceBenchmarkTest {

private fun cleanNovelTitleOriginal(title: String): String {
var clean = title
// Remove [123] at start
clean = clean.replace(Regex("^\\[\\d+\\]\\s*"), "")
// Remove R 14.8 or R 123 at start
clean = clean.replace(Regex("^R\\s*\\d+(\\.\\d+)?\\s*"), "")
// Remove Rank 123 at start
clean = clean.replace(Regex("^Rank\\s*\\d+\\s*", RegexOption.IGNORE_CASE), "")
return clean.trim()
}

companion object {
private val BRACKET_NUMBER_REGEX = Regex("^\\[\\d+\\]\\s*")
private val R_NUMBER_REGEX = Regex("^R\\s*\\d+(\\.\\d+)?\\s*")
private val RANK_PREFIX_REGEX = Regex("^Rank\\s*\\d+\\s*", RegexOption.IGNORE_CASE)
}

private fun cleanNovelTitleOptimized(title: String): String {
var clean = title
// Remove [123] at start
clean = clean.replace(BRACKET_NUMBER_REGEX, "")
// Remove R 14.8 or R 123 at start
clean = clean.replace(R_NUMBER_REGEX, "")
// Remove Rank 123 at start
clean = clean.replace(RANK_PREFIX_REGEX, "")
return clean.trim()
}

@Test
fun benchmarkCleanNovelTitle() {
val titles = listOf(
"[123] Some Novel Name",
"R 14.8 Another Novel Name",
"Rank 42 A Third Novel Name",
"Just a regular name",
"Rank 123 [123] Double trouble name",
"Nothing to replace here 123"
)

// Warmup
for (i in 0 until 1000) {
for (title in titles) {
cleanNovelTitleOriginal(title)
cleanNovelTitleOptimized(title)
}
}

val iterations = 100_000
val timeOriginal = measureTimeMillis {
for (i in 0 until iterations) {
for (title in titles) {
cleanNovelTitleOriginal(title)
}
}
}

val timeOptimized = measureTimeMillis {
for (i in 0 until iterations) {
for (title in titles) {
cleanNovelTitleOptimized(title)
}
}
}

println("Baseline - Time to run $iterations iterations: $timeOriginal ms")
println("Optimized - Time to run $iterations iterations: $timeOptimized ms")
println("Improvement: ${timeOriginal - timeOptimized} ms")
}
}
50 changes: 50 additions & 0 deletions benchmark_output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
> Task :app:preBuild UP-TO-DATE
> Task :app:preDebugBuild UP-TO-DATE
> Task :app:checkKotlinGradlePluginConfigurationErrors SKIPPED
> Task :app:checkDebugAarMetadata UP-TO-DATE
> Task :app:processDebugNavigationResources UP-TO-DATE
> Task :app:compileDebugNavigationResources UP-TO-DATE
> Task :app:generateDebugResValues UP-TO-DATE
> Task :app:mapDebugSourceSetPaths UP-TO-DATE
> Task :app:generateDebugResources UP-TO-DATE
> Task :app:mergeDebugResources UP-TO-DATE
> Task :app:packageDebugResources UP-TO-DATE
> Task :app:parseDebugLocalResources UP-TO-DATE
> Task :app:createDebugCompatibleScreenManifests UP-TO-DATE
> Task :app:extractDeepLinksDebug UP-TO-DATE
> Task :app:processDebugMainManifest UP-TO-DATE
> Task :app:processDebugManifest UP-TO-DATE
> Task :app:processDebugManifestForPackage UP-TO-DATE
> Task :app:processDebugResources UP-TO-DATE
> Task :app:kspDebugKotlin UP-TO-DATE
> Task :app:compileDebugKotlin UP-TO-DATE
> Task :app:javaPreCompileDebug UP-TO-DATE
> Task :app:compileDebugJavaWithJavac UP-TO-DATE
> Task :app:hiltAggregateDepsDebug UP-TO-DATE
> Task :app:hiltJavaCompileDebug
> Task :app:preDebugUnitTestBuild UP-TO-DATE
> Task :app:processDebugJavaRes
> Task :app:javaPreCompileDebugUnitTest UP-TO-DATE
> Task :app:bundleDebugClassesToCompileJar
> Task :app:transformDebugClassesWithAsm
> Task :app:bundleDebugClassesToRuntimeJar
> Task :app:kspDebugUnitTestKotlin

> Task :app:compileDebugUnitTestKotlin
w: file:///app/app/src/test/java/io/aatricks/novelscraper/data/repository/ContentRepositoryEpubTest.kt:43:19 'fun createTempDir(prefix: String = ..., suffix: String? = ..., directory: File? = ...): File' is deprecated. Avoid creating temporary directories in the default temp location with this function due to too wide permissions on the newly created directory. Use kotlin.io.path.createTempDirectory instead.

> Task :app:compileDebugUnitTestJavaWithJavac NO-SOURCE
> Task :app:hiltAggregateDepsDebugUnitTest
> Task :app:hiltJavaCompileDebugUnitTest NO-SOURCE
> Task :app:processDebugUnitTestJavaRes
> Task :app:transformDebugUnitTestClassesWithAsm

> Task :app:testDebugUnitTest

NovelFireSourceBenchmarkTest > benchmarkCleanNovelTitle STANDARD_OUT
Baseline - Time to run 100000 iterations: 1806 ms

NovelFireSourceBenchmarkTest > benchmarkCleanNovelTitle PASSED

BUILD SUCCESSFUL in 37s
32 actionable tasks: 11 executed, 21 up-to-date
49 changes: 49 additions & 0 deletions benchmark_output_2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
> Task :app:preBuild UP-TO-DATE
> Task :app:preDebugBuild UP-TO-DATE
> Task :app:checkKotlinGradlePluginConfigurationErrors SKIPPED
> Task :app:checkDebugAarMetadata UP-TO-DATE
> Task :app:processDebugNavigationResources UP-TO-DATE
> Task :app:compileDebugNavigationResources UP-TO-DATE
> Task :app:generateDebugResValues UP-TO-DATE
> Task :app:mapDebugSourceSetPaths UP-TO-DATE
> Task :app:generateDebugResources UP-TO-DATE
> Task :app:mergeDebugResources UP-TO-DATE
> Task :app:packageDebugResources UP-TO-DATE
> Task :app:parseDebugLocalResources UP-TO-DATE
> Task :app:createDebugCompatibleScreenManifests UP-TO-DATE
> Task :app:extractDeepLinksDebug UP-TO-DATE
> Task :app:processDebugMainManifest UP-TO-DATE
> Task :app:processDebugManifest UP-TO-DATE
> Task :app:processDebugManifestForPackage UP-TO-DATE
> Task :app:processDebugResources UP-TO-DATE
> Task :app:javaPreCompileDebug UP-TO-DATE
> Task :app:preDebugUnitTestBuild UP-TO-DATE
> Task :app:javaPreCompileDebugUnitTest UP-TO-DATE
> Task :app:kspDebugKotlin
> Task :app:compileDebugKotlin
> Task :app:compileDebugJavaWithJavac
> Task :app:hiltAggregateDepsDebug UP-TO-DATE
> Task :app:hiltJavaCompileDebug
> Task :app:processDebugJavaRes
> Task :app:bundleDebugClassesToCompileJar
> Task :app:transformDebugClassesWithAsm
> Task :app:bundleDebugClassesToRuntimeJar
> Task :app:kspDebugUnitTestKotlin
> Task :app:compileDebugUnitTestKotlin
> Task :app:compileDebugUnitTestJavaWithJavac NO-SOURCE
> Task :app:hiltAggregateDepsDebugUnitTest UP-TO-DATE
> Task :app:hiltJavaCompileDebugUnitTest NO-SOURCE
> Task :app:processDebugUnitTestJavaRes UP-TO-DATE
> Task :app:transformDebugUnitTestClassesWithAsm

> Task :app:testDebugUnitTest

NovelFireSourceBenchmarkTest > benchmarkCleanNovelTitle STANDARD_OUT
Baseline - Time to run 100000 iterations: 1788 ms
Optimized - Time to run 100000 iterations: 315 ms
Improvement: 1473 ms

NovelFireSourceBenchmarkTest > benchmarkCleanNovelTitle PASSED

BUILD SUCCESSFUL in 26s
32 actionable tasks: 12 executed, 20 up-to-date
Loading
Loading