diff --git a/app/src/main/java/io/aatricks/novelscraper/util/TextUtils.kt b/app/src/main/java/io/aatricks/novelscraper/util/TextUtils.kt index fe7ba74..5b2c193 100644 --- a/app/src/main/java/io/aatricks/novelscraper/util/TextUtils.kt +++ b/app/src/main/java/io/aatricks/novelscraper/util/TextUtils.kt @@ -209,11 +209,131 @@ object TextUtils { */ fun formatText(text: String): String { if (text.isEmpty()) return text - return text.replace(MULTIPLE_SPACES_REGEX, " ") - .replace(LINE_BREAK_REGEX, "\n") - .replace(SPACE_PLUS_NEWLINE_REGEX, "\n") - .replace(FOUR_PLUS_NEWLINES_REGEX, "\n\n\n") - .trim() + + var current = text + current = replaceSpacesPlusNewline(current, ' ') + current = replaceWindowsLineEndings(current) + current = replaceSpacesPlusNewline(current, '\n') + current = replaceFourPlusNewlines(current) + return current.trim() + } + + private fun replaceSpacesPlusNewline(text: String, replacementChar: Char): String { + var i = 0 + val len = text.length + var hasMatch = false + while (i < len) { + if (text[i] == ' ') { + var j = i + 1 + while (j < len && text[j] == ' ') j++ + if (j < len && text[j] == '\n') { + hasMatch = true + break + } + i = j + } else { + i++ + } + } + if (!hasMatch) return text + + val sb = StringBuilder(len) + i = 0 + while (i < len) { + if (text[i] == ' ') { + var j = i + 1 + while (j < len && text[j] == ' ') j++ + if (j < len && text[j] == '\n') { + sb.append(replacementChar) + i = j + 1 + } else { + for (k in i until j) sb.append(' ') + i = j + } + } else { + sb.append(text[i]) + i++ + } + } + return sb.toString() + } + + private fun replaceWindowsLineEndings(text: String): String { + var i = 0 + val len = text.length + var hasMatch = false + while (i < len) { + if (text[i] == '\r') { + hasMatch = true + break + } + i++ + } + if (!hasMatch) return text + + val sb = StringBuilder(len) + i = 0 + while (i < len) { + if (text[i] == '\r') { + sb.append('\n') + if (i + 1 < len && text[i + 1] == '\n') { + i += 2 + } else { + i += 1 + } + } else { + sb.append(text[i]) + i++ + } + } + return sb.toString() + } + + private fun replaceFourPlusNewlines(text: String): String { + var i = 0 + val len = text.length + var hasMatch = false + while (i < len) { + if (text[i] == '\n') { + var count = 1 + var j = i + 1 + while (j < len && text[j] == '\n') { + count++ + j++ + } + if (count >= 4) { + hasMatch = true + break + } + i = j + } else { + i++ + } + } + if (!hasMatch) return text + + val sb = StringBuilder(len) + i = 0 + while (i < len) { + if (text[i] == '\n') { + var count = 1 + var j = i + 1 + while (j < len && text[j] == '\n') { + count++ + j++ + } + if (count >= 4) { + sb.append("\n\n\n") + } else { + for (k in 0 until count) sb.append('\n') + } + i = j + } else { + sb.append(text[i]) + i++ + } + } + return sb.toString() } /** diff --git a/app/src/test/java/io/aatricks/novelscraper/util/TextUtilsBenchmarkTest.kt b/app/src/test/java/io/aatricks/novelscraper/util/TextUtilsBenchmarkTest.kt new file mode 100644 index 0000000..977c898 --- /dev/null +++ b/app/src/test/java/io/aatricks/novelscraper/util/TextUtilsBenchmarkTest.kt @@ -0,0 +1,33 @@ +package io.aatricks.novelscraper.util + +import org.junit.Test +import kotlin.system.measureTimeMillis + +class TextUtilsBenchmarkTest { + + @Test + fun benchmarkFormatText() { + val sb = StringBuilder() + for (i in 0 until 1000) { + sb.append("This is a test line. \n") + sb.append("Another line with Windows endings. \r\n") + sb.append("\n\n\n\n\n") + sb.append("A normal paragraph with some text and more text. ") + } + val text = sb.toString() + + for (i in 0 until 10) { + TextUtils.formatText(text) + } + + var totalTime = 0L + val iterations = 500 + for (i in 0 until iterations) { + totalTime += measureTimeMillis { + TextUtils.formatText(text) + } + } + + println("Optimized average time: ${totalTime / iterations.toDouble()} ms") + } +}