diff --git a/app/src/main/java/io/aatricks/novelscraper/data/repository/source/NovelFireSource.kt b/app/src/main/java/io/aatricks/novelscraper/data/repository/source/NovelFireSource.kt index f14b89a..8c9cac1 100644 --- a/app/src/main/java/io/aatricks/novelscraper/data/repository/source/NovelFireSource.kt +++ b/app/src/main/java/io/aatricks/novelscraper/data/repository/source/NovelFireSource.kt @@ -43,6 +43,7 @@ class NovelFireSource @Inject constructor( val document = getDocument(url) val items = mutableListOf() + val seenUrls = mutableSetOf() val bookLinks = document.select("a[href^='/book/']") bookLinks.forEach { link -> @@ -51,15 +52,15 @@ class NovelFireSource @Inject constructor( val href = link.attr("href") if (title.isNotBlank() && !title.equals("Read Now", ignoreCase = true) && !title.contains("Chapter", ignoreCase = true)) { - val parent = link.closest(".novel-item, .item, .book-item") ?: link.parent()?.parent() - val img = parent?.select("img")?.first() - val coverUrl = img?.findImage()?.let { resolveUrl(it) } ?: "" + val absoluteUrl = resolveUrl(href) + if (seenUrls.add(absoluteUrl)) { + val parent = link.closest(".novel-item, .item, .book-item") ?: link.parent()?.parent() + val img = parent?.select("img")?.first() + val coverUrl = img?.findImage()?.let { resolveUrl(it) } ?: "" - val chapterText = parent?.select(".novel-stats, .stats, .chapters")?.text() ?: "" - val chapterCount = extractChapterCount(chapterText) + val chapterText = parent?.select(".novel-stats, .stats, .chapters")?.text() ?: "" + val chapterCount = extractChapterCount(chapterText) - val absoluteUrl = resolveUrl(href) - if (items.none { it.url == absoluteUrl }) { items.add(ExploreItem( title = title, url = absoluteUrl, @@ -112,6 +113,7 @@ class NovelFireSource @Inject constructor( val document = getDocument(fallbackUrl) val items = mutableListOf() + val seenUrls = mutableSetOf() val bookLinks = document.select("a[href^='/book/']") bookLinks.forEach { link -> @@ -120,15 +122,16 @@ class NovelFireSource @Inject constructor( val href = link.attr("href") if (title.isNotBlank() && !title.equals("Read Now", ignoreCase = true) && !title.contains("Chapter", ignoreCase = true)) { - val parent = link.closest(".novel-item, .item, .book-item") ?: link.parent()?.parent() - val img = parent?.select("img")?.first() - val coverUrl = img?.findImage()?.let { resolveUrl(it) } ?: "" val absoluteUrl = resolveUrl(href) - val chapterText = parent?.select(".novel-stats, .stats, .chapters")?.text() ?: "" - val chapterCount = extractChapterCount(chapterText) + if (seenUrls.add(absoluteUrl)) { + val parent = link.closest(".novel-item, .item, .book-item") ?: link.parent()?.parent() + val img = parent?.select("img")?.first() + val coverUrl = img?.findImage()?.let { resolveUrl(it) } ?: "" + + val chapterText = parent?.select(".novel-stats, .stats, .chapters")?.text() ?: "" + val chapterCount = extractChapterCount(chapterText) - if (items.none { it.url == absoluteUrl }) { items.add(ExploreItem( title = title, url = absoluteUrl, diff --git a/app/src/main/java/io/aatricks/novelscraper/ui/viewmodel/ExploreViewModel.kt b/app/src/main/java/io/aatricks/novelscraper/ui/viewmodel/ExploreViewModel.kt index 3302dcd..a8aa60f 100644 --- a/app/src/main/java/io/aatricks/novelscraper/ui/viewmodel/ExploreViewModel.kt +++ b/app/src/main/java/io/aatricks/novelscraper/ui/viewmodel/ExploreViewModel.kt @@ -126,8 +126,9 @@ class ExploreViewModel @Inject constructor( val nextPage = _uiState.value.page + 1 val newItems = fetchItems(nextPage) + val existingUrls = _uiState.value.items.map { it.url }.toSet() val distinctNewItems = newItems.filter { newItem -> - _uiState.value.items.none { it.url == newItem.url } + !existingUrls.contains(newItem.url) } updateState { it.copy( diff --git a/app/src/test/java/io/aatricks/novelscraper/data/repository/source/NovelFireSourceBenchmarkTest.kt b/app/src/test/java/io/aatricks/novelscraper/data/repository/source/NovelFireSourceBenchmarkTest.kt new file mode 100644 index 0000000..4110b54 --- /dev/null +++ b/app/src/test/java/io/aatricks/novelscraper/data/repository/source/NovelFireSourceBenchmarkTest.kt @@ -0,0 +1,91 @@ +package io.aatricks.novelscraper.data.repository.source + +import io.aatricks.novelscraper.data.local.PreferencesManager +import io.aatricks.novelscraper.data.model.ExploreItem +import kotlinx.coroutines.runBlocking +import okhttp3.OkHttpClient +import org.junit.Test +import org.mockito.kotlin.mock +import java.lang.reflect.Method +import kotlin.system.measureTimeMillis + +class NovelFireSourceBenchmarkTest { + + @Test + fun benchmarkSearchNovelsFallback() = runBlocking { + val mockPreferencesManager = mock() + val okHttpClient = OkHttpClient() + val source = NovelFireSource(mockPreferencesManager, okHttpClient) + + // Generate a large HTML document to simulate the O(N^2) issue + val numItems = 2000 + val sb = StringBuilder() + sb.append("") + for (i in 1..numItems) { + sb.append(""" +
+ Novel $i + +
100 Chapters
+
+ """.trimIndent()) + } + // Also add duplicates to see filtering + for (i in 1..500) { + sb.append(""" +
+ Novel $i Duplicate + +
100 Chapters
+
+ """.trimIndent()) + } + sb.append("") + val html = sb.toString() + + val document = org.jsoup.Jsoup.parse(html) + val baseUrl = "https://novelfire.net" + + val time = measureTimeMillis { + val items = mutableListOf() + val seenUrls = mutableSetOf() + val bookLinks = document.select("a[href^='/book/']") + + bookLinks.forEach { link -> + val rawTitle = link.text() + var title = rawTitle + title = title.replace(Regex("^\\[\\d+\\]\\s*"), "") + title = title.replace(Regex("^R\\s*\\d+(\\.\\d+)?\\s*"), "") + title = title.replace(Regex("^Rank\\s*\\d+\\s*", RegexOption.IGNORE_CASE), "") + title = title.trim() + + val href = link.attr("href") + + if (title.isNotBlank() && !title.equals("Read Now", ignoreCase = true) && !title.contains("Chapter", ignoreCase = true)) { + val absoluteUrl = if (href.startsWith("/")) "$baseUrl$href" else href + + if (seenUrls.add(absoluteUrl)) { + val parent = link.closest(".novel-item, .item, .book-item") ?: link.parent()?.parent() + val img = parent?.select("img")?.first() + var coverUrl = img?.attr("data-src")?.ifEmpty { img.attr("src") } ?: "" + if (coverUrl.startsWith("/")) coverUrl = "${baseUrl}${coverUrl}" + + val chapterText = parent?.select(".novel-stats, .stats, .chapters")?.text() ?: "" + val chapterCount = Regex("(\\d+)\\s*Chapters", RegexOption.IGNORE_CASE).find(chapterText)?.groupValues?.get(1)?.toIntOrNull() ?: 0 + + items.add(ExploreItem( + title = title, + url = absoluteUrl, + coverUrl = coverUrl.ifBlank { null }, + source = "NovelFire", + chapterCount = chapterCount + )) + } + } + } + println("Found ${items.size} unique items") + } + + println("Optimized Benchmark completed in ${time}ms") + } +} diff --git a/app/src/test/java/io/aatricks/novelscraper/data/repository/source/NovelFireSourceTest.kt b/app/src/test/java/io/aatricks/novelscraper/data/repository/source/NovelFireSourceTest.kt index 74ae95f..40965d7 100644 --- a/app/src/test/java/io/aatricks/novelscraper/data/repository/source/NovelFireSourceTest.kt +++ b/app/src/test/java/io/aatricks/novelscraper/data/repository/source/NovelFireSourceTest.kt @@ -22,6 +22,7 @@ class NovelFireSourceTest { val document = Jsoup.parse(html) val items = mutableListOf() + val seenUrls = mutableSetOf() val baseUrl = "https://novelfire.net" val name = "NovelFire" @@ -30,15 +31,16 @@ class NovelFireSourceTest { val title = it.text() val href = it.attr("href") if (title.isNotBlank() && !title.equals("Read Now", ignoreCase = true) && !title.contains("Chapter", ignoreCase = true)) { - val parent = it.closest(".novel-item, .item, .book-item") ?: it.parent()?.parent() - val img = parent?.select("img")?.first() - var coverUrl = img?.attr("data-src")?.ifEmpty { img.attr("src") } ?: "" - if (coverUrl.startsWith("/")) coverUrl = "$baseUrl$coverUrl" + val absoluteUrl = "$baseUrl$href" + if (seenUrls.add(absoluteUrl)) { + val parent = it.closest(".novel-item, .item, .book-item") ?: it.parent()?.parent() + val img = parent?.select("img")?.first() + var coverUrl = img?.attr("data-src")?.ifEmpty { img.attr("src") } ?: "" + if (coverUrl.startsWith("/")) coverUrl = "$baseUrl$coverUrl" - if (items.none { item -> item.url == "$baseUrl$href" }) { items.add(ExploreItem( title = title, - url = "$baseUrl$href", + url = absoluteUrl, coverUrl = if (coverUrl.isBlank()) null else coverUrl, source = name )) diff --git a/benchmark_output.txt b/benchmark_output.txt new file mode 100644 index 0000000..b85aa58 --- /dev/null +++ b/benchmark_output.txt @@ -0,0 +1,17 @@ + +FAILURE: Build failed with an exception. + +* What went wrong: +Gradle could not start your build. +> Could not create service of type BuildLifecycleController using ServicesProvider.createBuildLifecycleController(). + > Could not create service of type BuildModelController using VintageBuildControllerProvider.createBuildModelController(). + > Could not create service of type FileHasher using BuildSessionServices.createFileHasher(). + > Cannot lock file hash cache (/app/.gradle/8.13/fileHashes) as it has already been locked by this process. + +* Try: +> Run with --stacktrace option to get the stack trace. +> Run with --info or --debug option to get more log output. +> Run with --scan to get full insights. +> Get more help at https://help.gradle.org. + +BUILD FAILED in 901ms