Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions benchmark/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,28 @@

```
# JMH version: 1.37
# VM version: JDK 23.0.2, OpenJDK 64-Bit Server VM, 23.0.2+7-jvmci-b01
# VM version: JDK 25.0.1, OpenJDK 64-Bit Server VM, 25.0.1
Benchmark Mode Cnt Score Error Units
ParserBenchmark.ceesvee avgt 10 267.157 ± 2.295 us/op
ParserBenchmark.scalaCsv avgt 10 776.875 ± 3.156 us/op
ParserBenchmark.univocity avgt 10 190.484 ± 0.927 us/op
ParserBenchmark.ceesvee avgt 10 261.357 ± 1.787 us/op
ParserBenchmark.scalaCsv avgt 10 741.778 ± 6.433 us/op
ParserBenchmark.univocity avgt 10 200.482 ± 2.715 us/op
```

```
# JMH version: 1.37
# VM version: JDK 25, OpenJDK 64-Bit Server VM, 25+37-jvmci-b01
Benchmark Mode Cnt Score Error Units
ParserBenchmark.ceesvee avgt 10 197.994 ± 2.344 us/op
ParserBenchmark.scalaCsv avgt 10 776.080 ± 1.457 us/op
ParserBenchmark.univocity avgt 10 208.226 ± 2.501 us/op
```

`benchmark/Jmh/run -i 10 -wi 5 -f 1 -t 2 ceesvee.benchmark.DecoderBenchmark`

```
# JMH version: 1.37
# VM version: JDK 23.0.2, OpenJDK 64-Bit Server VM, 23.0.2+7-jvmci-b01
# VM version: JDK 25.0.1, OpenJDK 64-Bit Server VM, 25.0.1
Benchmark Mode Cnt Score Error Units
DecoderBenchmark.ceesvee avgt 10 0.115 ± 0.001 us/op
DecoderBenchmark.univocity avgt 10 0.011 ± 0.001 us/op
DecoderBenchmark.ceesvee avgt 10 0.110 ± 0.001 us/op
DecoderBenchmark.univocity avgt 10 0.011 ± 0.001 us/op
```
131 changes: 76 additions & 55 deletions modules/core/src/main/scala/ceesvee/CsvParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,19 @@ object CsvParser {

def ignoreLine(line: String, options: Options): Boolean = {
val l = options.trim.strip(line)
ignoreTrimmedLine(l, options)
}

def isBlank = options.skipBlankRows && l.isEmpty
def isComment = options.commentPrefix.filter(_.nonEmpty).exists(l.startsWith(_))
private[ceesvee] def ignoreTrimmedLine(line: String, options: Options): Boolean = {
isBlank(line, options) || isComment(line, options)
}

isBlank || isComment
private def isBlank(line: String, options: Options): Boolean = {
options.skipBlankRows && line.isEmpty
}

private def isComment(line: String, options: Options): Boolean = {
options.commentPrefix.filter(_.nonEmpty).exists(line.startsWith(_))
}

/**
Expand Down Expand Up @@ -241,11 +249,35 @@ object CsvParser {
(State(leftover, insideQuoteIndex = insideQuoteIndex, previousCarriageReturn = previousCarriageReturn), builder.result())
}

private case class Slice(start: Int, end: Int)
private object Slice {
@SuppressWarnings(Array(
"org.wartremover.warts.MutableDataStructures",
"org.wartremover.warts.NonUnitStatements",
"org.wartremover.warts.SeqApply",
"org.wartremover.warts.Var",
"org.wartremover.warts.While",
))
def slice(slices: mutable.ArrayBuffer[Slice], line: String) = {
val sb = new mutable.StringBuilder
val n = slices.length
var i = 0
while (i < n) {
val slice = slices(i)
sb append line.substring(slice.start, slice.end)
i = i + 1
}
slices.clear()
sb.result()
}
}

/**
* Parse a line into a collection of CSV fields.
*/
@SuppressWarnings(Array(
"org.wartremover.warts.MutableDataStructures",
"org.wartremover.warts.NonUnitStatements",
"org.wartremover.warts.Var",
"org.wartremover.warts.While",
))
Expand All @@ -255,69 +287,58 @@ object CsvParser {
)(implicit f: Factory[String, C[String]]): C[String] = {
val fields = f.newBuilder

object ParseLine {

private val slices = mutable.ListBuffer.empty[(Int, Int)]
private var sliceStart = 0
val slices = mutable.ArrayBuffer.empty[Slice]
var sliceStart = 0

private var i = 0
private var insideQuote = false
var i = 0
var insideQuote = false

def run(): Unit = {
while (i < line.length) {
(line(i): @switch) match {

case ',' =>
if (!insideQuote) {
process()
i += 1
sliceStart = i
} else {
i += 1
}

case '"' =>
if (insideQuote && (i + 1) < line.length && line(i + 1) == '"') { // escaped quote
val _ = slices += (sliceStart -> i)
sliceStart = i + 1
i += 2
} else {
i += 1
insideQuote = !insideQuote
}
while (i < line.length) {
(line(i): @switch) match {

case _ =>
i += 1
case ',' =>
if (!insideQuote) {
{
slices.addOne(Slice(sliceStart, i))
fields addOne trimString(options, Slice.slice(slices, line))
}
i += 1
sliceStart = i
} else {
i += 1
}
}

process()
}

private def process(): Unit = {
val sb = new mutable.StringBuilder
(slices += (sliceStart -> i)).foreach { case (start, end) =>
sb append line.substring(start, end)
}
@SuppressWarnings(Array("org.wartremover.warts.ToString"))
val str = sb.toString

val _ = fields += {
// always ignore whitespace around a quoted cell
val trimmed = Options.Trim.True.strip(str)

if (trimmed.length >= 2 && trimmed.headOption.contains('"') && trimmed.lastOption.contains('"')) {
trimmed.substring(1, trimmed.length - 1)
case '"' =>
if (insideQuote && (i + 1) < line.length && line(i + 1) == '"') { // escaped quote
slices.addOne(Slice(sliceStart, i))
sliceStart = i + 1
i += 2
} else {
options.trim.strip(str)
i += 1
insideQuote = !insideQuote
}
}
slices.clear()

case _ =>
i += 1
}
}
ParseLine.run()

{
slices.addOne(Slice(sliceStart, i))
fields addOne trimString(options, Slice.slice(slices, line))
}

fields.result()
}

private def trimString(options: Options, str: String) = {
// always ignore whitespace around a quoted cell
val trimmed = Options.Trim.True.strip(str)

if (trimmed.length >= 2 && trimmed.charAt(0) == '"' && trimmed.charAt(trimmed.length - 1) == '"') {
trimmed.substring(1, trimmed.length - 1)
} else {
options.trim.strip(str)
}
}
}
2 changes: 1 addition & 1 deletion project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version=1.11.6
sbt.version=1.11.7