diff --git a/.gitignore b/.gitignore index 5cdd285..2db3b8c 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ .#* .lib .ensime_cache +.idea diff --git a/.travis.yml b/.travis.yml index 310b3ce..5f9f5fe 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,5 @@ language: scala +sudo: false jdk: - oraclejdk8 script: diff --git a/README.md b/README.md index 5055f26..6ea33b9 100644 --- a/README.md +++ b/README.md @@ -18,11 +18,12 @@ ### Overview -Jawn consists of three parts: - -1. A fast, generic JSON parser -2. A small, somewhat anemic AST +Jawn consists of four parts: + +1. A fast, generic JSON parser (`jawn-parser`) +2. A small, somewhat anemic AST (`jawn-ast`) 3. Support packages which parse to third-party ASTs +4. A few helpful utilities (`jawn-util`) Currently Jawn is competitive with the fastest Java JSON libraries (GSON and Jackson) and in the author's benchmarks it often wins. It @@ -30,7 +31,8 @@ 2014). Given the plethora of really nice JSON libraries for Scala, the -expectation is that you are here for (1) and (3) not (2). +expectation is that you're probably here for `jawn-parser` or a +support package. ### Quick Start @@ -43,10 +45,10 @@ resolvers += Resolver.sonatypeRepo("releases") // use this if you just want jawn's parser, and will implement your own facade -libraryDependencies += "org.spire-math" %% "jawn-parser" % "0.10.3" +libraryDependencies += "org.spire-math" %% "jawn-parser" % "0.11.0" // use this if you want jawn's parser and also jawn's ast -libraryDependencies += "org.spire-math" %% "jawn-ast" % "0.10.3" +libraryDependencies += "org.spire-math" %% "jawn-ast" % "0.11.0" ``` If you want to use Jawn's parser with another project's AST, see the @@ -54,7 +56,7 @@ you would say: ```scala -libraryDependencies += "org.spire-math" %% "jawn-spray" % "0.10.3" +libraryDependencies += "org.spire-math" %% "jawn-spray" % "0.11.0" ``` There are a few reasons you might want to do this: @@ -133,14 +135,14 @@ Jawn currently supports six external ASTs directly: -| AST | 2.10 | 2.11 | 2.12 | -|-----------|-------|-------|-------| -| Argonaut | 6.1 | 6.1 | | -| Json4s | 3.5.0 | 3.5.0 | 3.5.0 | -| Play-json | 2.4.8 | 2.5.9 | | -| Rojoma | 2.4.3 | 2.4.3 | 2.4.3 | -| Rojoma-v3 | 3.7.0 | 3.7.0 | 3.7.0 | -| Spray | 1.3.2 | 1.3.2 | 1.3.2 | +| AST | 2.10 | 2.11 | 2.12 | +|-----------|--------|--------|-------| +| Argonaut | 6.2 | 6.2 | 6.2 | +| Json4s | 3.5.2 | 3.5.2 | 3.5.2 | +| Play-json | 2.4.11 | 2.5.15 | 2.6.0 | +| Rojoma | 2.4.3 | 2.4.3 | 2.4.3 | +| Rojoma-v3 | 3.7.2 | 3.7.2 | 3.7.2 | +| Spray | 1.3.3 | 1.3.3 | 1.3.3 | Each of these subprojects provides a `Parser` object (an instance of `SupportParser[J]`) that is parameterized on the given project's @@ -165,7 +167,7 @@ ```scala resolvers += Resolver.sonatypeRepo("releases") -libraryDependencies += "org.spire-math" %% jawn-"XYZ" % "0.10.3" +libraryDependencies += "org.spire-math" %% jawn-"XYZ" % "0.11.0" ``` This is an example of how you might use the parser into your code: @@ -189,7 +191,7 @@ ```scala resolvers += Resolver.sonatypeRepo("releases") -libraryDependencies += "org.spire-math" %% "jawn-parser" % "0.10.3" +libraryDependencies += "org.spire-math" %% "jawn-parser" % "0.11.0" ``` To support your AST of choice, you'll want to define a `Facade[J]` @@ -422,4 +424,4 @@ All code is available to you under the MIT license, available at http://opensource.org/licenses/mit-license.php. -Copyright Erik Osheim, 2012-2016. +Copyright Erik Osheim, 2012-2017. diff --git a/ast/src/main/scala/jawn/ast/JParser.scala b/ast/src/main/scala/jawn/ast/JParser.scala index 8ab2bc7..704557c 100644 --- a/ast/src/main/scala/jawn/ast/JParser.scala +++ b/ast/src/main/scala/jawn/ast/JParser.scala @@ -15,6 +15,9 @@ def parseFromString(s: String): Try[JValue] = Try(new StringParser[JValue](s).parse) + def parseFromCharSequence(cs: CharSequence): Try[JValue] = + Try(new CharSequenceParser[JValue](cs).parse) + def parseFromPath(path: String): Try[JValue] = parseFromFile(new File(path)) diff --git a/ast/src/main/scala/jawn/ast/JValue.scala b/ast/src/main/scala/jawn/ast/JValue.scala index 361f43b..d09347b 100644 --- a/ast/src/main/scala/jawn/ast/JValue.scala +++ b/ast/src/main/scala/jawn/ast/JValue.scala @@ -204,7 +204,7 @@ case class DeferLong(s: String) extends JNum { - lazy val n: Long = java.lang.Long.parseLong(s) + lazy val n: Long = util.parseLongUnsafe(s) final override def getInt: Option[Int] = Some(n.toInt) final override def getLong: Option[Long] = Some(n) @@ -235,13 +235,13 @@ lazy val n: Double = java.lang.Double.parseDouble(s) final override def getInt: Option[Int] = Some(n.toInt) - final override def getLong: Option[Long] = Some(n.toLong) + final override def getLong: Option[Long] = Some(util.parseLongUnsafe(s)) final override def getDouble: Option[Double] = Some(n) final override def getBigInt: Option[BigInt] = Some(BigDecimal(s).toBigInt) final override def getBigDecimal: Option[BigDecimal] = Some(BigDecimal(s)) final override def asInt: Int = n.toInt - final override def asLong: Long = n.toLong + final override def asLong: Long = util.parseLongUnsafe(s) final override def asDouble: Double = n final override def asBigInt: BigInt = BigDecimal(s).toBigInt final override def asBigDecimal: BigDecimal = BigDecimal(s) diff --git a/ast/src/main/scala/jawn/ast/JawnFacade.scala b/ast/src/main/scala/jawn/ast/JawnFacade.scala index 278aa3e..a2d2d71 100644 --- a/ast/src/main/scala/jawn/ast/JawnFacade.scala +++ b/ast/src/main/scala/jawn/ast/JawnFacade.scala @@ -8,14 +8,21 @@ final val jnull = JNull final val jfalse = JFalse final val jtrue = JTrue - final def jnum(s: String) = DeferNum(s) - final def jint(s: String) = DeferLong(s) - final def jstring(s: String) = JString(s) + + final def jnum(s: CharSequence, decIndex: Int, expIndex: Int): JValue = + if (decIndex == -1 && expIndex == -1) { + DeferLong(s.toString) + } else { + DeferNum(s.toString) + } + + final def jstring(s: CharSequence): JValue = + JString(s.toString) final def singleContext(): FContext[JValue] = new FContext[JValue] { var value: JValue = _ - def add(s: String) { value = JString(s) } + def add(s: CharSequence) { value = JString(s.toString) } def add(v: JValue) { value = v } def finish: JValue = value def isObj: Boolean = false @@ -24,7 +31,7 @@ final def arrayContext(): FContext[JValue] = new FContext[JValue] { val vs = mutable.ArrayBuffer.empty[JValue] - def add(s: String) { vs.append(JString(s)) } + def add(s: CharSequence) { vs.append(JString(s.toString)) } def add(v: JValue) { vs.append(v) } def finish: JValue = JArray(vs.toArray) def isObj: Boolean = false @@ -34,8 +41,8 @@ new FContext[JValue] { var key: String = null val vs = mutable.Map.empty[String, JValue] - def add(s: String): Unit = - if (key == null) { key = s } else { vs(key) = JString(s); key = null } + def add(s: CharSequence): Unit = + if (key == null) { key = s.toString } else { vs(key.toString) = JString(s.toString); key = null } def add(v: JValue): Unit = { vs(key) = v; key = null } def finish = JObject(vs) diff --git a/ast/src/test/scala/jawn/ParseCheck.scala b/ast/src/test/scala/jawn/ParseCheck.scala index 75781f4..a5a5aa1 100644 --- a/ast/src/test/scala/jawn/ParseCheck.scala +++ b/ast/src/test/scala/jawn/ParseCheck.scala @@ -10,6 +10,8 @@ import scala.collection.mutable import scala.util.{Try, Success} + +import jawn.parser.TestUtil import ArbitraryUtil._ @@ -33,7 +35,7 @@ value1 shouldBe value2 value1.## shouldBe value2.## - parser.Util.withTemp(json1) { t => + TestUtil.withTemp(json1) { t => JParser.parseFromFile(t).get shouldBe value2 } } @@ -48,6 +50,17 @@ jstr2 shouldBe jstr1 json2 shouldBe json1 json2.## shouldBe json1.## + } + } + + property("string/charSequence parsing") { + forAll { value: JValue => + val s = CanonicalRenderer.render(value) + val j1 = JParser.parseFromString(s) + val cs = java.nio.CharBuffer.wrap(s.toCharArray) + val j2 = JParser.parseFromCharSequence(cs) + j1 shouldBe j2 + j1.## shouldBe j2.## } } @@ -76,6 +89,15 @@ } ++ checkRight(p.finish()) import AsyncParser.{UnwrapArray, ValueStream, SingleValue} + + property("async multi") { + val data = "[1,2,3][4,5,6]" + val p = AsyncParser[JValue](ValueStream) + val res0 = p.absorb(data) + val res1 = p.finish + //println((res0, res1)) + true + } property("async parsing") { forAll { (v: JValue) => @@ -134,13 +156,13 @@ val s0 = ("x" * (40 * M)) val e0 = q + s0 + q - parser.Util.withTemp(e0) { t => + TestUtil.withTemp(e0) { t => JParser.parseFromFile(t).filter(_ == JString(s0)).isSuccess shouldBe true } val s1 = "\\" * (20 * M) val e1 = q + s1 + s1 + q - parser.Util.withTemp(e1) { t => + TestUtil.withTemp(e1) { t => JParser.parseFromFile(t).filter(_ == JString(s1)).isSuccess shouldBe true } } diff --git a/benchmark/build.sbt b/benchmark/build.sbt index 4310d56..7cb15b1 100644 --- a/benchmark/build.sbt +++ b/benchmark/build.sbt @@ -3,18 +3,18 @@ javaOptions in run += "-Xmx6G" libraryDependencies ++= Seq( - "io.argonaut" %% "argonaut" % "6.1-M6", - "org.json4s" %% "json4s-native" % "3.2.11", - "org.json4s" %% "json4s-jackson" % "3.2.10", - "com.typesafe.play" %% "play-json" % "2.3.0", + "io.argonaut" %% "argonaut" % "6.2", + "org.json4s" %% "json4s-native" % "3.5.2", + "org.json4s" %% "json4s-jackson" % "3.5.2", + "com.typesafe.play" %% "play-json" % "2.5.15", "com.rojoma" %% "rojoma-json" % "2.4.3", - "com.rojoma" %% "rojoma-json-v3" % "3.3.0", - "io.spray" %% "spray-json" % "1.3.2", - "org.parboiled" %% "parboiled" % "2.1.0", - "com.fasterxml.jackson.core" % "jackson-annotations" % "2.5.3", - "com.fasterxml.jackson.core" % "jackson-core" % "2.5.3", - "com.fasterxml.jackson.core" % "jackson-databind" % "2.5.3", - "com.google.code.gson" % "gson" % "2.2.4" + "com.rojoma" %% "rojoma-json-v3" % "3.7.2", + "io.spray" %% "spray-json" % "1.3.3", + "org.parboiled" %% "parboiled" % "2.1.4", + "com.fasterxml.jackson.core" % "jackson-annotations" % "2.8.4", + "com.fasterxml.jackson.core" % "jackson-core" % "2.8.4", + "com.fasterxml.jackson.core" % "jackson-databind" % "2.8.4", + "com.google.code.gson" % "gson" % "2.8.1" ) // enable forking in run diff --git a/benchmark/src/main/scala/jawn/JmhBenchmarks.scala b/benchmark/src/main/scala/jawn/JmhBenchmarks.scala index 86108da..bc56f9f 100644 --- a/benchmark/src/main/scala/jawn/JmhBenchmarks.scala +++ b/benchmark/src/main/scala/jawn/JmhBenchmarks.scala @@ -10,7 +10,6 @@ @BenchmarkMode(Array(Mode.AverageTime)) @OutputTimeUnit(TimeUnit.MILLISECONDS) abstract class JmhBenchmarks(name: String) { - val path: String = s"src/main/resources/$name" def load(path: String): String = { @@ -27,6 +26,20 @@ def buffered(path: String): BufferedReader = new BufferedReader(new FileReader(new File(path))) + @Benchmark + def jawnCheckSyntax() = + jawn.Syntax.checkString(load(path)) + + @Benchmark + def jawnParse() = + jawn.ast.JParser.parseFromFile(new File(path)).get + + @Benchmark + def jawnStringParse() = + jawn.ast.JParser.parseFromString(load(path)).get +} + +trait OtherBenchmarks { self: JmhBenchmarks => @Benchmark def json4sJacksonParse() = { import org.json4s._ @@ -65,18 +78,6 @@ def gsonParse() = new com.google.gson.JsonParser().parse(buffered(path)) - @Benchmark - def jawnCheckSyntax() = - jawn.Syntax.checkString(load(path)) - - @Benchmark - def jawnParse() = - jawn.ast.JParser.parseFromFile(new File(path)).get - - @Benchmark - def jawnStringParse() = - jawn.ast.JParser.parseFromString(load(path)).get - // don't bother benchmarking jawn + external asts by default // @Benchmark @@ -105,10 +106,15 @@ // } } -class Qux2Bench extends JmhBenchmarks("qux2.json") -class Bla25Bench extends JmhBenchmarks("bla25.json") -class CountriesBench extends JmhBenchmarks("countries.geo.json") -class Ugh10kBench extends JmhBenchmarks("ugh10k.json") +class Qux2Bench extends JmhBenchmarks("qux2.json") with OtherBenchmarks +class Bla25Bench extends JmhBenchmarks("bla25.json") with OtherBenchmarks +class CountriesBench extends JmhBenchmarks("countries.geo.json") with OtherBenchmarks +class Ugh10kBench extends JmhBenchmarks("ugh10k.json") with OtherBenchmarks + +class JawnOnlyQux2Bench extends JmhBenchmarks("qux2.json") +class JawnOnlyBla25Bench extends JmhBenchmarks("bla25.json") +class JawnOnlyCountriesBench extends JmhBenchmarks("countries.geo.json") +class JawnOnlyUgh10kBench extends JmhBenchmarks("ugh10k.json") // // from https://github.com/zemirco/sf-city-lots-json // class CityLotsBench extends JmhBenchmarks("citylots.json") diff --git a/benchmark/src/main/scala/jawn/ParseLongBench.scala b/benchmark/src/main/scala/jawn/ParseLongBench.scala new file mode 100644 index 0000000..97e8e6a --- /dev/null +++ b/benchmark/src/main/scala/jawn/ParseLongBench.scala @@ -0,0 +1,133 @@ +package jawn +package benchmark + +import java.io.{BufferedReader, File, FileInputStream, FileReader} +import java.util.concurrent.TimeUnit +import org.openjdk.jmh.annotations._ +import scala.collection.mutable + +case class Slice(s: String, begin: Int, limit: Int) extends CharSequence { + val length: Int = limit - begin + def charAt(i: Int): Char = s.charAt(begin + i) + def subSequence(start: Int, end: Int): Slice = + Slice(s, begin + start, Math.min(end + begin, limit)) + override def toString: String = + s.substring(begin, limit) +} + +@State(Scope.Benchmark) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +class ParseLongBench { + + val longs: Array[Long] = Array( + -1346837161442476189L, -4666345991836441070L, 4868830844043235709L, + 2992690405064579158L, -2017521011608078634L, -3039682866169364757L, + 8997687047891586260L, 5932727796276454607L, 4062739618560250554L, + 8668950167358198490L, -8565613821858118870L, 8049785848575684314L, + -580831266940599830L, -3593199367295538945L, 8374322595267797482L, + 3088261552516619129L, -6879203747403593851L, -1842900848925949857L, + 4484592876047641351L, 5182973278356955602L, -6840392853855436945L, + -4176340556015032222L, -536379174926548619L, 6343722878919863216L, + 1557757008211571405L, -334093799456298669L, 619602023052756397L, + 6904874397154297343L, -4332034907782234995L, -8767842695446545180L, + -6127250063205613011L, 6902212562850963795L, 4778607575334665692L, + 7674074815344809639L, -3834944692798167050L, 7406081418831471202L, + -9126886315356724563L, 8093878176633322645L, 2471547025788214028L, + -5018828829942988155L, -6676531171364391367L, 8189793226936659851L, + 7150026713387306746L, -6065566098373722052L, 3281133763697608570L, + 957103694526079944L, -3009447279791131829L, -1995600795755716697L, + 2361055030313262510L, -4312828282749171343L, 8836216125516165138L, + 5548785979447786253L, 8567551485822958810L, 5931896003625723150L, + 3472058092439106147L, 4363240277904515929L, -2999484068697753019L, + -8285358702782547958L, -2407429647076308777L, 4411565001760018584L, + 792384115860070648L, 3328145302561962294L, -2377559446421434356L, + -7837698939558960516L, -565806101451282875L, -4792610084643070650L, + 2713520205731589923L, -6521104721472605988L, 5037187811345411645L, + 3866939564433764178L, -3851229228204678079L, -8171137274242372558L, + -14713951794749384L, 2061783257002637655L, -7375571393873059570L, + 7402007407273053723L, -5104318069025846447L, -8956415532448219980L, + 4904595193891993401L, 5396360181536889307L, -8043917553767343384L, + -3666269817017255250L, -6535587792359353103L, -4553034734642385706L, + -7544140164897268962L, 2468330113904053484L, 5790319365381968237L, + -2734383156062609640L, -4831208471935595172L, 4502079643250626043L, + 4778622151522470246L, 7233054223498326990L, 5833883346008509644L, + -8013495378054295093L, 2944606201054530456L, -8608231828651976245L, + -6957117814546267426L, -4744827311133020624L, 2640030216500286789L, + 8343959867315747844L) + + val strs: Array[CharSequence] = + longs.map(_.toString) + + val seqs: Array[CharSequence] = + longs.map { n => + val prefix = "x" * (n & 63).toInt + val suffix = "y" * ((n * 7) & 63).toInt + val i = prefix.length + val s = n.toString + Slice(prefix + s + suffix, i, s.length + i) + } + + val str: CharSequence = "23948271429443" + + val seq: CharSequence = Slice("weigjewigjwi23948271429443jgewigjweiwjegiwgjiewjgeiwjg", 12, 26) + + def sumJava(css: Array[CharSequence]): Long = { + var sum: Long = 0 + var i = 0 + while (i < css.length) { + sum += java.lang.Long.parseLong(css(i).toString) + i += 1 + } + sum + } + + def sumStd(css: Array[CharSequence]): Long = { + var sum: Long = 0 + var i = 0 + while (i < css.length) { + sum += css(i).toString.toLong + i += 1 + } + sum + } + + def sumSafe(css: Array[CharSequence]): Long = { + var sum: Long = 0 + var i = 0 + while (i < css.length) { + sum += Util.parseLong(css(i)) + i += 1 + } + sum + } + + def sumUnsafe(css: Array[CharSequence]): Long = { + var sum: Long = 0 + var i = 0 + while (i < css.length) { + sum += Util.parseLongUnsafe(css(i)) + i += 1 + } + sum + } + + @Benchmark def stringArrayJava(): Long = sumJava(strs) + @Benchmark def seqArrayJava(): Long = sumJava(seqs) + @Benchmark def stringValueJava(): Long = java.lang.Long.parseLong(str.toString) + @Benchmark def seqValueJava(): Long = java.lang.Long.parseLong(seq.toString) + + @Benchmark def stringArrayStd(): Long = sumStd(strs) + @Benchmark def seqArrayStd(): Long = sumStd(seqs) + @Benchmark def stringValueStd(): Long = str.toString.toLong + @Benchmark def seqValueStd(): Long = seq.toString.toLong + + @Benchmark def stringArraySafe(): Long = sumSafe(strs) + @Benchmark def seqArraySafe(): Long = sumSafe(seqs) + @Benchmark def stringValueSafe(): Long = Util.parseLong(str) + @Benchmark def seqValueSafe(): Long = Util.parseLong(seq) + + @Benchmark def stringArrayUnsafe(): Long = sumUnsafe(strs) + @Benchmark def seqArrayUnsafe(): Long = sumUnsafe(seqs) + @Benchmark def stringValueUnsafe(): Long = Util.parseLongUnsafe(str) + @Benchmark def seqValueUnsafe(): Long = Util.parseLongUnsafe(seq) +} diff --git a/build.sbt b/build.sbt index 9dc7b23..c32403e 100644 --- a/build.sbt +++ b/build.sbt @@ -1,21 +1,36 @@ import ReleaseTransformations._ + +lazy val previousJawnVersion = "0.10.4" + +lazy val stableCrossVersions = + Seq("2.10.6", "2.11.11", "2.12.2") + +// we'll support 2.13.0-M1 soon but not yet +lazy val allCrossVersions = + stableCrossVersions + +lazy val benchmarkVersion = + "2.12.2" lazy val jawnSettings = Seq( organization := "org.spire-math", - scalaVersion := "2.11.8", - crossScalaVersions := Seq("2.10.6", "2.11.8", "2.12.0"), + scalaVersion := "2.12.2", + crossScalaVersions := allCrossVersions, + + mimaPreviousArtifacts := Set(organization.value %% moduleName.value % previousJawnVersion), resolvers += Resolver.sonatypeRepo("releases"), - libraryDependencies ++= Seq( - "org.scalatest" %% "scalatest" % "3.0.0" % "test", - "org.scalacheck" %% "scalacheck" % "1.13.4" % "test" - ), - scalacOptions ++= Seq( - //"-Yinline-warnings", - "-deprecation", - "-optimize", - "-unchecked" - ), + + libraryDependencies ++= + "org.scalatest" %% "scalatest" % "3.0.3" % Test :: + "org.scalacheck" %% "scalacheck" % "1.13.5" % Test :: + Nil, + + scalacOptions ++= + "-deprecation" :: + "-optimize" :: + "-unchecked" :: + Nil, licenses += ("MIT", url("http://opensource.org/licenses/MIT")), homepage := Some(url("http://github.com/non/jawn")), @@ -26,26 +41,25 @@ publishArtifact in Test := false, pomIncludeRepository := Function.const(false), - publishTo <<= (version).apply { v => + publishTo := { val nexus = "https://oss.sonatype.org/" - if (v.trim.endsWith("SNAPSHOT")) + if (isSnapshot.value) { Some("Snapshots" at nexus + "content/repositories/snapshots") - else + } else { Some("Releases" at nexus + "service/local/staging/deploy/maven2") + } }, - pomExtra := ( - - git@github.com:non/jawn.git - scm:git:git@github.com:non/jawn.git - - - - d_m - Erik Osheim - http://github.com/non/ - - + scmInfo := Some(ScmInfo( + browseUrl = url("https://github.com/non/jawn"), + connection = "scm:git:git@github.com:non/jawn.git" + )), + + developers += Developer( + name = "Erik Osheim", + email = "erik@plastic-idolatry.com", + id = "d_m", + url = url("http://github.com/non/") ), releaseProcess := Seq[ReleaseStep]( @@ -65,7 +79,8 @@ lazy val noPublish = Seq( publish := {}, publishLocal := {}, - publishArtifact := false) + publishArtifact := false, + mimaPreviousArtifacts := Set()) lazy val root = project.in(file(".")) .aggregate(all.map(Project.projectToRef): _*) @@ -81,8 +96,16 @@ .settings(jawnSettings: _*) .disablePlugins(JmhPlugin) +lazy val util = project.in(file("util")) + .dependsOn(parser % "compile->compile;test->test") + .settings(name := "util") + .settings(moduleName := "jawn-util") + .settings(jawnSettings: _*) + .disablePlugins(JmhPlugin) + lazy val ast = project.in(file("ast")) .dependsOn(parser % "compile->compile;test->test") + .dependsOn(util % "compile->compile;test->test") .settings(name := "ast") .settings(moduleName := "jawn-ast") .settings(jawnSettings: _*) @@ -97,38 +120,43 @@ .disablePlugins(JmhPlugin) lazy val supportArgonaut = support("argonaut") - .settings(crossScalaVersions := Seq("2.10.6", "2.11.8")) - .settings(libraryDependencies += "io.argonaut" %% "argonaut" % "6.1") + .settings(crossScalaVersions := stableCrossVersions) + .settings(libraryDependencies += "io.argonaut" %% "argonaut" % "6.2") lazy val supportJson4s = support("json4s") - .settings(libraryDependencies += "org.json4s" %% "json4s-ast" % "3.5.0") + .dependsOn(util) + .settings(crossScalaVersions := stableCrossVersions) + .settings(libraryDependencies += "org.json4s" %% "json4s-ast" % "3.5.2") lazy val supportPlay = support("play") - .settings(crossScalaVersions := Seq("2.10.6", "2.11.8")) + .settings(crossScalaVersions := stableCrossVersions) .settings(libraryDependencies += (scalaBinaryVersion.value match { - case "2.10" => "com.typesafe.play" %% "play-json" % "2.4.8" - case _ => "com.typesafe.play" %% "play-json" % "2.5.9" + case "2.10" => "com.typesafe.play" %% "play-json" % "2.4.11" + case "2.11" => "com.typesafe.play" %% "play-json" % "2.5.15" + case _ => "com.typesafe.play" %% "play-json" % "2.6.0" })) lazy val supportRojoma = support("rojoma") - .settings(crossScalaVersions := Seq("2.10.6", "2.11.8", "2.12.0")) + .settings(crossScalaVersions := stableCrossVersions) .settings(libraryDependencies += "com.rojoma" %% "rojoma-json" % "2.4.3") lazy val supportRojomaV3 = support("rojoma-v3") - .settings(libraryDependencies += "com.rojoma" %% "rojoma-json-v3" % "3.7.0") + .settings(crossScalaVersions := stableCrossVersions) + .settings(libraryDependencies += "com.rojoma" %% "rojoma-json-v3" % "3.7.2") lazy val supportSpray = support("spray") + .settings(crossScalaVersions := stableCrossVersions) .settings(resolvers += "spray" at "http://repo.spray.io/") - .settings(libraryDependencies += "io.spray" %% "spray-json" % "1.3.2") + .settings(libraryDependencies += "io.spray" %% "spray-json" % "1.3.3") lazy val benchmark = project.in(file("benchmark")) .dependsOn(all.map(Project.classpathDependency[Project]): _*) .settings(name := "jawn-benchmark") .settings(jawnSettings: _*) - .settings(scalaVersion := "2.11.8") + .settings(scalaVersion := benchmarkVersion) + .settings(crossScalaVersions := Seq(benchmarkVersion)) .settings(noPublish: _*) - .settings(crossScalaVersions := Seq("2.11.8")) .enablePlugins(JmhPlugin) lazy val all = - Seq(parser, ast, supportArgonaut, supportJson4s, supportPlay, supportRojoma, supportRojomaV3, supportSpray) + Seq(parser, util, ast, supportArgonaut, supportJson4s, supportPlay, supportRojoma, supportRojomaV3, supportSpray) diff --git a/parser/src/main/scala/jawn/AsyncParser.scala b/parser/src/main/scala/jawn/AsyncParser.scala index 3204bf4..b41229d 100644 --- a/parser/src/main/scala/jawn/AsyncParser.scala +++ b/parser/src/main/scala/jawn/AsyncParser.scala @@ -243,14 +243,16 @@ } } - // every 1M we shift our array back by 1M. + // every 1M we shift our array back to the beginning. protected[this] final def reset(i: Int): Int = { if (offset >= 1048576) { - len -= 1048576 - offset -= 1048576 - pos -= 1048576 - System.arraycopy(data, 1048576, data, 0, len) - i - 1048576 + val diff = offset + curr -= diff + len -= diff + offset = 0 + pos -= diff + System.arraycopy(data, diff, data, 0, len) + i - diff } else { i } @@ -289,7 +291,7 @@ * boundaries. Also, the resulting String is not guaranteed to have length * (k - i). */ - protected[this] final def at(i: Int, k: Int): String = { + protected[this] final def at(i: Int, k: Int): CharSequence = { if (k > len) throw new AsyncException val size = k - i val arr = new Array[Byte](size) diff --git a/parser/src/main/scala/jawn/ByteBufferParser.scala b/parser/src/main/scala/jawn/ByteBufferParser.scala index 6ce82ab..1902b8d 100644 --- a/parser/src/main/scala/jawn/ByteBufferParser.scala +++ b/parser/src/main/scala/jawn/ByteBufferParser.scala @@ -14,11 +14,13 @@ * update its own mutable position fields. */ final class ByteBufferParser[J](src: ByteBuffer) extends SyncParser[J] with ByteBasedParser[J] { - final val start = src.position - final val limit = src.limit - start + private[this] final val start = src.position + private[this] final val limit = src.limit - start - var line = 0 - protected[this] final def newline(i: Int) { line += 1 } + private[this] var lineState = 0 + protected[this] def line(): Int = lineState + + protected[this] final def newline(i: Int) { lineState += 1 } protected[this] final def column(i: Int) = i protected[this] final def close() { src.position(src.limit) } @@ -27,7 +29,7 @@ protected[this] final def byte(i: Int): Byte = src.get(i + start) protected[this] final def at(i: Int): Char = src.get(i + start).toChar - protected[this] final def at(i: Int, k: Int): String = { + protected[this] final def at(i: Int, k: Int): CharSequence = { val len = k - i val arr = new Array[Byte](len) src.position(i + start) diff --git a/parser/src/main/scala/jawn/ChannelParser.scala b/parser/src/main/scala/jawn/ChannelParser.scala index 7fe1886..3c93e74 100644 --- a/parser/src/main/scala/jawn/ChannelParser.scala +++ b/parser/src/main/scala/jawn/ChannelParser.scala @@ -140,7 +140,7 @@ * on unicode boundaries. Also, the resulting String is not * guaranteed to have length (k - i). */ - protected[this] final def at(i: Int, k: Int): String = { + protected[this] final def at(i: Int, k: Int): CharSequence = { val len = k - i if (k > Allsize) { grow() diff --git a/parser/src/main/scala/jawn/CharBasedParser.scala b/parser/src/main/scala/jawn/CharBasedParser.scala index 46e6096..a054e5d 100644 --- a/parser/src/main/scala/jawn/CharBasedParser.scala +++ b/parser/src/main/scala/jawn/CharBasedParser.scala @@ -11,7 +11,9 @@ * * It is simpler than ByteBasedParser. */ -private[jawn] trait CharBasedParser[J] extends Parser[J] { +trait CharBasedParser[J] extends Parser[J] { + + private[this] final val charBuilder = new CharBuilder() /** * See if the string has any escape sequences. If not, return the diff --git a/parser/src/main/scala/jawn/CharBuilder.scala b/parser/src/main/scala/jawn/CharBuilder.scala index c4ba4b7..589437b 100644 --- a/parser/src/main/scala/jawn/CharBuilder.scala +++ b/parser/src/main/scala/jawn/CharBuilder.scala @@ -34,7 +34,7 @@ } } - def extend(s: String): Unit = { + def extend(s: CharSequence): Unit = { val tlen = len + s.length resizeIfNecessary(tlen) var i = 0 diff --git a/parser/src/main/scala/jawn/CharSequenceParser.scala b/parser/src/main/scala/jawn/CharSequenceParser.scala new file mode 100644 index 0000000..c592326 --- /dev/null +++ b/parser/src/main/scala/jawn/CharSequenceParser.scala @@ -0,0 +1,18 @@ +package jawn + +/** + * Lazy character sequence parsing. + * + * This is similar to StringParser, but acts on character sequences. + */ +private[jawn] final class CharSequenceParser[J](cs: CharSequence) extends SyncParser[J] with CharBasedParser[J] { + var line = 0 + final def column(i: Int) = i + final def newline(i: Int) { line += 1 } + final def reset(i: Int): Int = i + final def checkpoint(state: Int, i: Int, stack: List[FContext[J]]): Unit = () + final def at(i: Int): Char = cs.charAt(i) + final def at(i: Int, j: Int): CharSequence = cs.subSequence(i, j) + final def atEof(i: Int) = i == cs.length + final def close() = () +} diff --git a/parser/src/main/scala/jawn/Facade.scala b/parser/src/main/scala/jawn/Facade.scala index ed85f3d..203b68e 100644 --- a/parser/src/main/scala/jawn/Facade.scala +++ b/parser/src/main/scala/jawn/Facade.scala @@ -3,7 +3,7 @@ /** * Facade is a type class that describes how Jawn should construct * JSON AST elements of type J. - * + * * Facade[J] also uses FContext[J] instances, so implementors will * usually want to define both. */ @@ -15,9 +15,8 @@ def jnull(): J def jfalse(): J def jtrue(): J - def jnum(s: String): J - def jint(s: String): J - def jstring(s: String): J + def jnum(s: CharSequence, decIndex: Int, expIndex: Int): J + def jstring(s: CharSequence): J } /** @@ -28,7 +27,7 @@ * cases where the entire JSON document consists of "333.33". */ trait FContext[J] { - def add(s: String): Unit + def add(s: CharSequence): Unit def add(v: J): Unit def finish: J def isObj: Boolean diff --git a/parser/src/main/scala/jawn/MutableFacade.scala b/parser/src/main/scala/jawn/MutableFacade.scala index ac04ac5..8fe5716 100644 --- a/parser/src/main/scala/jawn/MutableFacade.scala +++ b/parser/src/main/scala/jawn/MutableFacade.scala @@ -8,7 +8,7 @@ def singleContext() = new FContext[J] { var value: J = _ - def add(s: String) { value = jstring(s) } + def add(s: CharSequence) { value = jstring(s) } def add(v: J) { value = v } def finish: J = value def isObj: Boolean = false @@ -16,7 +16,7 @@ def arrayContext() = new FContext[J] { val vs = mutable.ArrayBuffer.empty[J] - def add(s: String) { vs.append(jstring(s)) } + def add(s: CharSequence) { vs.append(jstring(s)) } def add(v: J) { vs.append(v) } def finish: J = jarray(vs) def isObj: Boolean = false @@ -25,8 +25,8 @@ def objectContext() = new FContext[J] { var key: String = null val vs = mutable.Map.empty[String, J] - def add(s: String): Unit = - if (key == null) { key = s } else { vs(key) = jstring(s); key = null } + def add(s: CharSequence): Unit = + if (key == null) { key = s.toString } else { vs(key) = jstring(s); key = null } def add(v: J): Unit = { vs(key) = v; key = null } def finish = jobject(vs) diff --git a/parser/src/main/scala/jawn/NullFacade.scala b/parser/src/main/scala/jawn/NullFacade.scala index 620d26a..39d5588 100644 --- a/parser/src/main/scala/jawn/NullFacade.scala +++ b/parser/src/main/scala/jawn/NullFacade.scala @@ -13,7 +13,7 @@ object NullFacade extends Facade[Unit] { case class NullContext(isObj: Boolean) extends FContext[Unit] { - def add(s: String): Unit = () + def add(s: CharSequence): Unit = () def add(v: Unit): Unit = () def finish: Unit = () } @@ -25,7 +25,6 @@ def jnull(): Unit = () def jfalse(): Unit = () def jtrue(): Unit = () - def jnum(s: String): Unit = () - def jint(s: String): Unit = () - def jstring(s: String): Unit = () + def jnum(s: CharSequence, decIndex: Int, expIndex: Int): Unit = () + def jstring(s: CharSequence): Unit = () } diff --git a/parser/src/main/scala/jawn/Parser.scala b/parser/src/main/scala/jawn/Parser.scala index 25c753f..1177e91 100644 --- a/parser/src/main/scala/jawn/Parser.scala +++ b/parser/src/main/scala/jawn/Parser.scala @@ -35,8 +35,6 @@ protected[this] final val utf8 = Charset.forName("UTF-8") - protected[this] final val charBuilder = new CharBuilder() - /** * Read the byte/char at 'i' as a Char. * @@ -48,22 +46,12 @@ /** * Read the bytes/chars from 'i' until 'j' as a String. */ - protected[this] def at(i: Int, j: Int): String + protected[this] def at(i: Int, j: Int): CharSequence /** * Return true iff 'i' is at or beyond the end of the input (EOF). */ protected[this] def atEof(i: Int): Boolean - - /** - * Return true iff the byte/char at 'i' is equal to 'c'. - */ - protected[this] final def is(i: Int, c: Char): Boolean = at(i) == c - - /** - * Return true iff the bytes/chars from 'i' until 'j' are equal to 'str'. - */ - protected[this] final def is(i: Int, j: Int, str: String): Boolean = at(i, j) == str /** * The reset() method is used to signal that we're working from the @@ -141,7 +129,8 @@ protected[this] final def parseNum(i: Int, ctxt: FContext[J])(implicit facade: Facade[J]): Int = { var j = i var c = at(j) - var dec = false + var decIndex = -1 + var expIndex = -1 if (c == '-') { j += 1 @@ -157,7 +146,7 @@ } if (c == '.') { - dec = true + decIndex = j - i j += 1 c = at(j) if ('0' <= c && c <= '9') { @@ -168,7 +157,7 @@ } if (c == 'e' || c == 'E') { - dec = true + expIndex = j - i j += 1 c = at(j) if (c == '+' || c == '-') { @@ -182,10 +171,7 @@ } } - if (dec) - ctxt.add(facade.jnum(at(i, j))) - else - ctxt.add(facade.jint(at(i, j))) + ctxt.add(facade.jnum(at(i, j), decIndex, expIndex)) j } @@ -206,7 +192,8 @@ protected[this] final def parseNumSlow(i: Int, ctxt: FContext[J])(implicit facade: Facade[J]): Int = { var j = i var c = at(j) - var dec = false + var decIndex = -1 + var expIndex = -1 if (c == '-') { // any valid input will require at least one digit after - @@ -216,7 +203,7 @@ if (c == '0') { j += 1 if (atEof(j)) { - ctxt.add(facade.jint(at(i, j))) + ctxt.add(facade.jnum(at(i, j), decIndex, expIndex)) return j } c = at(j) @@ -224,7 +211,7 @@ while ('0' <= c && c <= '9') { j += 1 if (atEof(j)) { - ctxt.add(facade.jint(at(i, j))) + ctxt.add(facade.jnum(at(i, j), decIndex, expIndex)) return j } c = at(j) @@ -235,14 +222,14 @@ if (c == '.') { // any valid input will require at least one digit after . - dec = true + decIndex = j - i j += 1 c = at(j) if ('0' <= c && c <= '9') { while ('0' <= c && c <= '9') { j += 1 if (atEof(j)) { - ctxt.add(facade.jnum(at(i, j))) + ctxt.add(facade.jnum(at(i, j), decIndex, expIndex)) return j } c = at(j) @@ -254,7 +241,7 @@ if (c == 'e' || c == 'E') { // any valid input will require at least one digit after e, e+, etc - dec = true + expIndex = j - i j += 1 c = at(j) if (c == '+' || c == '-') { @@ -265,7 +252,7 @@ while ('0' <= c && c <= '9') { j += 1 if (atEof(j)) { - ctxt.add(facade.jnum(at(i, j))) + ctxt.add(facade.jnum(at(i, j), decIndex, expIndex)) return j } c = at(j) @@ -274,10 +261,8 @@ die(i, "expected digit") } } - if (dec) - ctxt.add(facade.jnum(at(i, j))) - else - ctxt.add(facade.jint(at(i, j))) + + ctxt.add(facade.jnum(at(i, j), decIndex, expIndex)) j } @@ -287,7 +272,7 @@ * NOTE: This is only capable of generating characters from the basic plane. * This is why it can only return Char instead of Int. */ - protected[this] final def descape(s: String): Char = { + protected[this] final def descape(s: CharSequence): Char = { val hc = HexChars var i = 0 var x = 0 @@ -305,21 +290,39 @@ /** * Parse the JSON constant "true". - */ - protected[this] final def parseTrue(i: Int)(implicit facade: Facade[J]) = - if (is(i, i + 4, "true")) facade.jtrue else die(i, "expected true") + * + * Note that this method assumes that the first character has already been checked. + */ + protected[this] final def parseTrue(i: Int)(implicit facade: Facade[J]): J = + if (at(i + 1) == 'r' && at(i + 2) == 'u' && at(i + 3) == 'e') { + facade.jtrue + } else { + die(i, "expected true") + } /** * Parse the JSON constant "false". - */ - protected[this] final def parseFalse(i: Int)(implicit facade: Facade[J]) = - if (is(i, i + 5, "false")) facade.jfalse else die(i, "expected false") + * + * Note that this method assumes that the first character has already been checked. + */ + protected[this] final def parseFalse(i: Int)(implicit facade: Facade[J]): J = + if (at(i + 1) == 'a' && at(i + 2) == 'l' && at(i + 3) == 's' && at(i + 4) == 'e') { + facade.jfalse + } else { + die(i, "expected false") + } /** * Parse the JSON constant "null". - */ - protected[this] final def parseNull(i: Int)(implicit facade: Facade[J]) = - if (is(i, i + 4, "null")) facade.jnull else die(i, "expected null") + * + * Note that this method assumes that the first character has already been checked. + */ + protected[this] final def parseNull(i: Int)(implicit facade: Facade[J]): J = + if (at(i + 1) == 'u' && at(i + 2) == 'l' && at(i + 3) == 'l') { + facade.jnull + } else { + die(i, "expected null") + } /** * Parse and return the next JSON value and the position beyond it. @@ -380,164 +383,97 @@ protected[this] final def rparse(state: Int, j: Int, stack: List[FContext[J]])(implicit facade: Facade[J]): (J, Int) = { val i = reset(j) checkpoint(state, i, stack) - (state: @switch) match { + + val c = at(i) + + if (c == '\n') { + newline(i) + rparse(state, i + 1, stack) + } else if (c == ' ' || c == '\t' || c == '\r') { + rparse(state, i + 1, stack) + } else if (state == DATA) { // we are inside an object or array expecting to see data - case DATA => - (at(i): @switch) match { - case '[' => rparse(ARRBEG, i + 1, facade.arrayContext() :: stack) - case '{' => rparse(OBJBEG, i + 1, facade.objectContext() :: stack) - - case '-' | '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => - val ctxt = stack.head - val j = parseNum(i, ctxt) - rparse(if (ctxt.isObj) OBJEND else ARREND, j, stack) - - case '"' => - val ctxt = stack.head - val j = parseString(i, ctxt) - rparse(if (ctxt.isObj) OBJEND else ARREND, j, stack) - - case 't' => - val ctxt = stack.head - ctxt.add(parseTrue(i)) - rparse(if (ctxt.isObj) OBJEND else ARREND, i + 4, stack) - - case 'f' => - val ctxt = stack.head - ctxt.add(parseFalse(i)) - rparse(if (ctxt.isObj) OBJEND else ARREND, i + 5, stack) - - case 'n' => - val ctxt = stack.head - ctxt.add(parseNull(i)) - rparse(if (ctxt.isObj) OBJEND else ARREND, i + 4, stack) - - case ' ' => rparse(state, i + 1, stack) - case '\t' => rparse(state, i + 1, stack) - case '\r' => rparse(state, i + 1, stack) - case '\n' => newline(i); rparse(state, i + 1, stack) - - case _ => - die(i, "expected json value") + if (c == '[') { + rparse(ARRBEG, i + 1, facade.arrayContext() :: stack) + } else if (c == '{') { + rparse(OBJBEG, i + 1, facade.objectContext() :: stack) + } else { + val ctxt = stack.head + + if ((c >= '0' && c <= '9') || c == '-') { + val j = parseNum(i, ctxt) + rparse(if (ctxt.isObj) OBJEND else ARREND, j, stack) + } else if (c == '"') { + val j = parseString(i, ctxt) + rparse(if (ctxt.isObj) OBJEND else ARREND, j, stack) + } else if (c == 't') { + ctxt.add(parseTrue(i)) + rparse(if (ctxt.isObj) OBJEND else ARREND, i + 4, stack) + } else if (c == 'f') { + ctxt.add(parseFalse(i)) + rparse(if (ctxt.isObj) OBJEND else ARREND, i + 5, stack) + } else if (c == 'n') { + ctxt.add(parseNull(i)) + rparse(if (ctxt.isObj) OBJEND else ARREND, i + 4, stack) + } else { + die(i, "expected json value") } - - // we are in an object expecting to see a key - case KEY => - (at(i): @switch) match { - case '"' => - val j = parseString(i, stack.head) - rparse(SEP, j, stack) - - case ' ' => rparse(state, i + 1, stack) - case '\t' => rparse(state, i + 1, stack) - case '\r' => rparse(state, i + 1, stack) - case '\n' => newline(i); rparse(state, i + 1, stack) - - case _ => die(i, "expected \"") + } + } else if ( + (c == ']' && (state == ARREND || state == ARRBEG)) || + (c == '}' && (state == OBJEND || state == OBJBEG)) + ) { + // we are inside an array or object and have seen a key or a closing + // brace, respectively. + if (stack.isEmpty) { + error("invalid stack") + } else { + val ctxt1 = stack.head + val tail = stack.tail + + if (tail.isEmpty) { + (ctxt1.finish, i + 1) + } else { + val ctxt2 = tail.head + ctxt2.add(ctxt1.finish) + rparse(if (ctxt2.isObj) OBJEND else ARREND, i + 1, tail) } - - // we are starting an array, expecting to see data or a closing bracket - case ARRBEG => - (at(i): @switch) match { - case ']' => stack match { - case ctxt1 :: Nil => - (ctxt1.finish, i + 1) - case ctxt1 :: ctxt2 :: tail => - ctxt2.add(ctxt1.finish) - rparse(if (ctxt2.isObj) OBJEND else ARREND, i + 1, ctxt2 :: tail) - case _ => - error("invalid stack") - } - - case ' ' => rparse(state, i + 1, stack) - case '\t' => rparse(state, i + 1, stack) - case '\r' => rparse(state, i + 1, stack) - case '\n' => newline(i); rparse(state, i + 1, stack) - - case _ => rparse(DATA, i, stack) - } - - // we are starting an object, expecting to see a key or a closing brace - case OBJBEG => - (at(i): @switch) match { - case '}' => stack match { - case ctxt1 :: Nil => - (ctxt1.finish, i + 1) - case ctxt1 :: ctxt2 :: tail => - ctxt2.add(ctxt1.finish) - rparse(if (ctxt2.isObj) OBJEND else ARREND, i + 1, ctxt2 :: tail) - case _ => - error("invalid stack") - } - - case ' ' => rparse(state, i + 1, stack) - case '\t' => rparse(state, i + 1, stack) - case '\r' => rparse(state, i + 1, stack) - case '\n' => newline(i); rparse(state, i + 1, stack) - - case _ => rparse(KEY, i, stack) - } - - // we are in an object just after a key, expecting to see a colon - case SEP => - (at(i): @switch) match { - case ':' => rparse(DATA, i + 1, stack) - - case ' ' => rparse(state, i + 1, stack) - case '\t' => rparse(state, i + 1, stack) - case '\r' => rparse(state, i + 1, stack) - case '\n' => newline(i); rparse(state, i + 1, stack) - - case _ => die(i, "expected :") - } - - // we are at a possible stopping point for an array, expecting to see - // either a comma (before more data) or a closing bracket. - case ARREND => - (at(i): @switch) match { - case ',' => rparse(DATA, i + 1, stack) - - case ']' => stack match { - case ctxt1 :: Nil => - (ctxt1.finish, i + 1) - case ctxt1 :: ctxt2 :: tail => - ctxt2.add(ctxt1.finish) - rparse(if (ctxt2.isObj) OBJEND else ARREND, i + 1, ctxt2 :: tail) - case _ => - error("invalid stack") - } - - case ' ' => rparse(state, i + 1, stack) - case '\t' => rparse(state, i + 1, stack) - case '\r' => rparse(state, i + 1, stack) - case '\n' => newline(i); rparse(state, i + 1, stack) - - case _ => die(i, "expected ] or ,") - } - - // we are at a possible stopping point for an object, expecting to see - // either a comma (before more data) or a closing brace. - case OBJEND => - (at(i): @switch) match { - case ',' => rparse(KEY, i + 1, stack) - - case '}' => stack match { - case ctxt1 :: Nil => - (ctxt1.finish, i + 1) - case ctxt1 :: ctxt2 :: tail => - ctxt2.add(ctxt1.finish) - rparse(if (ctxt2.isObj) OBJEND else ARREND, i + 1, ctxt2 :: tail) - case _ => - error("invalid stack") - } - - case ' ' => rparse(state, i + 1, stack) - case '\t' => rparse(state, i + 1, stack) - case '\r' => rparse(state, i + 1, stack) - case '\n' => newline(i); rparse(state, i + 1, stack) - - case _ => die(i, "expected } or ,") - } + } + } else if (state == KEY) { + // we are in an object expecting to see a key. + if (c == '"') { + val j = parseString(i, stack.head) + rparse(SEP, j, stack) + } else { + die(i, "expected \"") + } + } else if (state == SEP) { + // we are in an object just after a key, expecting to see a colon. + if (c == ':') { + rparse(DATA, i + 1, stack) + } else { + die(i, "expected :") + } + } else if (state == ARREND) { + // we are in an array, expecting to see a comma (before more data). + if (c == ',') { + rparse(DATA, i + 1, stack) + } else { + die(i, "expected ] or ,") + } + } else if (state == OBJEND) { + // we are in an object, expecting to see a comma (before more data). + if (c == ',') { + rparse(KEY, i + 1, stack) + } else { + die(i, "expected } or ,") + } + } else if (state == ARRBEG) { + // we are starting an array, expecting to see data or a closing bracket. + rparse(DATA, i, stack) + } else { + // we are starting an object, expecting to see a key or a closing brace. + rparse(KEY, i, stack) } } } @@ -550,6 +486,9 @@ def parseFromString[J](s: String)(implicit facade: Facade[J]): Try[J] = Try(new StringParser[J](s).parse) + + def parseFromCharSequence[J](cs: CharSequence)(implicit facade: Facade[J]): Try[J] = + Try(new CharSequenceParser[J](cs).parse) def parseFromPath[J](path: String)(implicit facade: Facade[J]): Try[J] = Try(ChannelParser.fromFile[J](new File(path)).parse) diff --git a/parser/src/main/scala/jawn/SimpleFacade.scala b/parser/src/main/scala/jawn/SimpleFacade.scala index e022cdc..dabec01 100644 --- a/parser/src/main/scala/jawn/SimpleFacade.scala +++ b/parser/src/main/scala/jawn/SimpleFacade.scala @@ -5,7 +5,7 @@ /** * Facade is a type class that describes how Jawn should construct * JSON AST elements of type J. - * + * * Facade[J] also uses FContext[J] instances, so implementors will * usually want to define both. */ @@ -15,7 +15,7 @@ def singleContext() = new FContext[J] { var value: J = _ - def add(s: String) { value = jstring(s) } + def add(s: CharSequence) { value = jstring(s) } def add(v: J) { value = v } def finish: J = value def isObj: Boolean = false @@ -23,7 +23,7 @@ def arrayContext() = new FContext[J] { val vs = mutable.ListBuffer.empty[J] - def add(s: String) { vs += jstring(s) } + def add(s: CharSequence) { vs += jstring(s) } def add(v: J) { vs += v } def finish: J = jarray(vs.toList) def isObj: Boolean = false @@ -32,8 +32,8 @@ def objectContext() = new FContext[J] { var key: String = null var vs = Map.empty[String, J] - def add(s: String): Unit = - if (key == null) { key = s } else { vs = vs.updated(key, jstring(s)); key = null } + def add(s: CharSequence): Unit = + if (key == null) { key = s.toString } else { vs = vs.updated(key, jstring(s)); key = null } def add(v: J): Unit = { vs = vs.updated(key, v); key = null } def finish = jobject(vs) diff --git a/parser/src/main/scala/jawn/StringParser.scala b/parser/src/main/scala/jawn/StringParser.scala index 767fcea..91662fc 100644 --- a/parser/src/main/scala/jawn/StringParser.scala +++ b/parser/src/main/scala/jawn/StringParser.scala @@ -17,9 +17,9 @@ final def column(i: Int) = i final def newline(i: Int) { line += 1 } final def reset(i: Int): Int = i - final def checkpoint(state: Int, i: Int, stack: List[FContext[J]]) {} + final def checkpoint(state: Int, i: Int, stack: List[FContext[J]]): Unit = () final def at(i: Int): Char = s.charAt(i) - final def at(i: Int, j: Int): String = s.substring(i, j) + final def at(i: Int, j: Int): CharSequence = s.substring(i, j) final def atEof(i: Int) = i == s.length final def close() = () } diff --git a/parser/src/test/scala/jawn/ChannelSpec.scala b/parser/src/test/scala/jawn/ChannelSpec.scala index 559eddd..6d5d33a 100644 --- a/parser/src/test/scala/jawn/ChannelSpec.scala +++ b/parser/src/test/scala/jawn/ChannelSpec.scala @@ -14,11 +14,11 @@ val big = q + ("x" * (40 * M)) + q val bigEscaped = q + ("\\\\" * (20 * M)) + q - Util.withTemp(big) { t => + TestUtil.withTemp(big) { t => Parser.parseFromFile(t)(NullFacade).isSuccess shouldBe true } - Util.withTemp(bigEscaped) { t => + TestUtil.withTemp(bigEscaped) { t => Parser.parseFromFile(t)(NullFacade).isSuccess shouldBe true } } diff --git a/parser/src/test/scala/jawn/JNumIndexCheck.scala b/parser/src/test/scala/jawn/JNumIndexCheck.scala new file mode 100644 index 0000000..b0b6568 --- /dev/null +++ b/parser/src/test/scala/jawn/JNumIndexCheck.scala @@ -0,0 +1,81 @@ +package jawn +package parser + +import java.nio.ByteBuffer +import org.scalatest.{Matchers, PropSpec} +import org.scalatest.prop.PropertyChecks +import scala.util.Success + +class JNumIndexCheck extends PropSpec with Matchers with PropertyChecks { + object JNumIndexCheckFacade extends Facade[Boolean] { + class JNumIndexCheckContext(val isObj: Boolean) extends FContext[Boolean] { + var failed = false + def add(s: CharSequence): Unit = () + def add(v: Boolean): Unit = { + if (!v) failed = true + } + def finish: Boolean = !failed + } + + val singleContext: FContext[Boolean] = new JNumIndexCheckContext(false) + val arrayContext: FContext[Boolean] = new JNumIndexCheckContext(false) + val objectContext: FContext[Boolean] = new JNumIndexCheckContext(true) + + def jnull(): Boolean = true + def jfalse(): Boolean = true + def jtrue(): Boolean = true + def jnum(s: CharSequence, decIndex: Int, expIndex: Int): Boolean = { + val input = s.toString + val inputDecIndex = input.indexOf('.') + val inputExpIndex = if (input.indexOf('e') == -1) input.indexOf("E") else input.indexOf('e') + + decIndex == inputDecIndex && expIndex == inputExpIndex + } + def jstring(s: CharSequence): Boolean = true + } + + property("jnum provides the correct indices with parseFromString") { + forAll { (value: BigDecimal) => + val json = s"""{ "num": ${value.toString} }""" + Parser.parseFromString(json)(JNumIndexCheckFacade) shouldBe Success(true) + } + } + + property("jnum provides the correct indices with parseFromByteBuffer") { + forAll { (value: BigDecimal) => + val json = s"""{ "num": ${value.toString} }""" + val bb = ByteBuffer.wrap(json.getBytes("UTF-8")) + Parser.parseFromByteBuffer(bb)(JNumIndexCheckFacade) shouldBe Success(true) + } + } + + property("jnum provides the correct indices with parseFromFile") { + forAll { (value: BigDecimal) => + val json = s"""{ "num": ${value.toString} }""" + TestUtil.withTemp(json) { t => + Parser.parseFromFile(t)(JNumIndexCheckFacade) shouldBe Success(true) + } + } + } + + property("jnum provides the correct indices at the top level with parseFromString") { + forAll { (value: BigDecimal) => + Parser.parseFromString(value.toString)(JNumIndexCheckFacade) shouldBe Success(true) + } + } + + property("jnum provides the correct indices at the top level with parseFromByteBuffer") { + forAll { (value: BigDecimal) => + val bb = ByteBuffer.wrap(value.toString.getBytes("UTF-8")) + Parser.parseFromByteBuffer(bb)(JNumIndexCheckFacade) shouldBe Success(true) + } + } + + property("jnum provides the correct indices at the top level with parseFromFile") { + forAll { (value: BigDecimal) => + TestUtil.withTemp(value.toString) { t => + Parser.parseFromFile(t)(JNumIndexCheckFacade) shouldBe Success(true) + } + } + } +} diff --git a/parser/src/test/scala/jawn/SyntaxCheck.scala b/parser/src/test/scala/jawn/SyntaxCheck.scala index 6fe3295..fd00c26 100644 --- a/parser/src/test/scala/jawn/SyntaxCheck.scala +++ b/parser/src/test/scala/jawn/SyntaxCheck.scala @@ -62,12 +62,15 @@ import java.nio.ByteBuffer def isValidSyntax(s: String): Boolean = { + val cs = java.nio.CharBuffer.wrap(s.toCharArray) + val r0 = Parser.parseFromCharSequence(cs)(NullFacade).isSuccess val r1 = Parser.parseFromString(s)(NullFacade).isSuccess val bb = ByteBuffer.wrap(s.getBytes("UTF-8")) val r2 = Parser.parseFromByteBuffer(bb)(NullFacade).isSuccess + if (r0 == r1) r1 else sys.error(s"CharSequence/String parsing disagree($r0, $r1): $s") if (r1 == r2) r1 else sys.error(s"String/ByteBuffer parsing disagree($r1, $r2): $s") - Util.withTemp(s) { t => + TestUtil.withTemp(s) { t => Parser.parseFromFile(t)(NullFacade).isSuccess } diff --git a/parser/src/test/scala/jawn/TestUtil.scala b/parser/src/test/scala/jawn/TestUtil.scala new file mode 100644 index 0000000..64b8dd5 --- /dev/null +++ b/parser/src/test/scala/jawn/TestUtil.scala @@ -0,0 +1,18 @@ +package jawn +package parser + +import java.io._ + +object TestUtil { + def withTemp[A](s: String)(f: File => A): A = { + val t = File.createTempFile("jawn-syntax", ".json") + val pw = new PrintWriter(t) + pw.println(s) + pw.close() + try { + f(t) + } finally { + t.delete() + } + } +} diff --git a/parser/src/test/scala/jawn/Util.scala b/parser/src/test/scala/jawn/Util.scala deleted file mode 100644 index c503ef5..0000000 --- a/parser/src/test/scala/jawn/Util.scala +++ /dev/null @@ -1,18 +0,0 @@ -package jawn -package parser - -import java.io._ - -object Util { - def withTemp[A](s: String)(f: File => A): A = { - val t = File.createTempFile("jawn-syntax", ".json") - val pw = new PrintWriter(t) - pw.println(s) - pw.close() - try { - f(t) - } finally { - t.delete() - } - } -} diff --git a/project/build.properties b/project/build.properties index a6e117b..64317fd 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=0.13.8 +sbt.version=0.13.15 diff --git a/project/plugins.sbt b/project/plugins.sbt index dc0eb4d..618876a 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,6 +1,6 @@ -addSbtPlugin("com.eed3si9n" % "sbt-doge" % "0.1.5") -addSbtPlugin("com.jsuereth" % "sbt-pgp" % "1.0.0") -addSbtPlugin("com.github.gseitz" % "sbt-release" % "1.0.0") -addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "0.5.0") - -addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.1.15") +addSbtPlugin("com.eed3si9n" % "sbt-doge" % "0.1.5") +addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.2.25") +addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.1.14") +addSbtPlugin("com.jsuereth" % "sbt-pgp" % "1.0.1") +addSbtPlugin("com.github.gseitz" % "sbt-release" % "1.0.5") +addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "1.1") diff --git a/support/argonaut/src/main/scala/Parser.scala b/support/argonaut/src/main/scala/Parser.scala index 886ea8f..0c57e4d 100644 --- a/support/argonaut/src/main/scala/Parser.scala +++ b/support/argonaut/src/main/scala/Parser.scala @@ -10,13 +10,14 @@ def jnull() = Json.jNull def jfalse() = Json.jFalse def jtrue() = Json.jTrue - def jnum(s: String) = Json.jNumber(JsonNumber.unsafeDecimal(s)) - def jint(s: String) = Json.jNumber(JsonNumber.unsafeDecimal(s)) - def jstring(s: String) = Json.jString(s) + + def jnum(s: CharSequence, decIndex: Int, expIndex: Int) = + Json.jNumber(JsonNumber.unsafeDecimal(s.toString)) + def jstring(s: CharSequence) = Json.jString(s.toString) def singleContext() = new FContext[Json] { var value: Json = null - def add(s: String) { value = jstring(s) } + def add(s: CharSequence) { value = jstring(s) } def add(v: Json) { value = v } def finish: Json = value def isObj: Boolean = false @@ -24,7 +25,7 @@ def arrayContext() = new FContext[Json] { val vs = mutable.ListBuffer.empty[Json] - def add(s: String) { vs += jstring(s) } + def add(s: CharSequence) { vs += jstring(s) } def add(v: Json) { vs += v } def finish: Json = Json.jArray(vs.toList) def isObj: Boolean = false @@ -33,8 +34,8 @@ def objectContext() = new FContext[Json] { var key: String = null var vs = JsonObject.empty - def add(s: String): Unit = - if (key == null) { key = s } else { vs = vs + (key, jstring(s)); key = null } + def add(s: CharSequence): Unit = + if (key == null) { key = s.toString } else { vs = vs + (key, jstring(s)); key = null } def add(v: Json): Unit = { vs = vs + (key, v); key = null } def finish = Json.jObject(vs) diff --git a/support/json4s/src/main/scala/Parser.scala b/support/json4s/src/main/scala/Parser.scala index 3369613..e552621 100644 --- a/support/json4s/src/main/scala/Parser.scala +++ b/support/json4s/src/main/scala/Parser.scala @@ -4,21 +4,31 @@ import scala.collection.mutable import org.json4s.JsonAST._ -object Parser extends SupportParser[JValue] { +object Parser extends Parser(false, false) + +class Parser(useBigDecimalForDouble: Boolean, useBigIntForLong: Boolean) extends SupportParser[JValue] { implicit val facade: Facade[JValue] = new Facade[JValue] { def jnull() = JNull def jfalse() = JBool(false) def jtrue() = JBool(true) - def jnum(s: String) = JDouble(java.lang.Double.parseDouble(s)) - def jint(s: String) = JInt(java.lang.Integer.parseInt(s)) - def jstring(s: String) = JString(s) + + def jnum(s: CharSequence, decIndex: Int, expIndex: Int) = + if (decIndex == -1 && expIndex == -1) { + if (useBigIntForLong) JInt(BigInt(s.toString)) + else JLong(util.parseLongUnsafe(s)) + } else { + if (useBigDecimalForDouble) JDecimal(BigDecimal(s.toString)) + else JDouble(s.toString.toDouble) + } + + def jstring(s: CharSequence) = JString(s.toString) def singleContext() = new FContext[JValue] { var value: JValue = null - def add(s: String) { value = jstring(s) } + def add(s: CharSequence) { value = jstring(s) } def add(v: JValue) { value = v } def finish: JValue = value def isObj: Boolean = false @@ -27,7 +37,7 @@ def arrayContext() = new FContext[JValue] { val vs = mutable.ListBuffer.empty[JValue] - def add(s: String) { vs += jstring(s) } + def add(s: CharSequence) { vs += jstring(s) } def add(v: JValue) { vs += v } def finish: JValue = JArray(vs.toList) def isObj: Boolean = false @@ -37,8 +47,8 @@ new FContext[JValue] { var key: String = null val vs = mutable.ListBuffer.empty[JField] - def add(s: String): Unit = - if (key == null) key = s + def add(s: CharSequence): Unit = + if (key == null) key = s.toString else { vs += JField(key, jstring(s)); key = null } def add(v: JValue): Unit = { vs += JField(key, v); key = null } diff --git a/support/play/src/main/scala/Parser.scala b/support/play/src/main/scala/Parser.scala index a0ad428..1bca206 100644 --- a/support/play/src/main/scala/Parser.scala +++ b/support/play/src/main/scala/Parser.scala @@ -10,9 +10,10 @@ def jnull() = JsNull def jfalse() = JsBoolean(false) def jtrue() = JsBoolean(true) - def jnum(s: String) = JsNumber(BigDecimal(s)) - def jint(s: String) = JsNumber(BigDecimal(s)) - def jstring(s: String) = JsString(s) + + def jnum(s: CharSequence, decIndex: Int, expIndex: Int) = JsNumber(BigDecimal(s.toString)) + def jstring(s: CharSequence) = JsString(s.toString) + def jarray(vs: List[JsValue]) = JsArray(vs) def jobject(vs: Map[String, JsValue]) = JsObject(vs) } diff --git a/support/rojoma/src/main/scala/Parser.scala b/support/rojoma/src/main/scala/Parser.scala index 020b34f..c0725ea 100644 --- a/support/rojoma/src/main/scala/Parser.scala +++ b/support/rojoma/src/main/scala/Parser.scala @@ -10,9 +10,8 @@ def jnull() = JNull def jfalse() = JBoolean.canonicalFalse def jtrue() = JBoolean.canonicalTrue - def jnum(s: String) = JNumber(BigDecimal(s)) - def jint(s: String) = JNumber(BigDecimal(s)) - def jstring(s: String) = JString(s) + def jnum(s: CharSequence, decIndex: Int, expIndex: Int) = JNumber(BigDecimal(s.toString)) + def jstring(s: CharSequence) = JString(s.toString) def jarray(vs: mutable.ArrayBuffer[JValue]) = JArray(vs) def jobject(vs: mutable.Map[String, JValue]) = JObject(vs) } diff --git a/support/rojoma-v3/src/main/scala/Parser.scala b/support/rojoma-v3/src/main/scala/Parser.scala index da9f9fc..c031e71 100644 --- a/support/rojoma-v3/src/main/scala/Parser.scala +++ b/support/rojoma-v3/src/main/scala/Parser.scala @@ -10,9 +10,8 @@ def jnull() = JNull def jfalse() = JBoolean.canonicalFalse def jtrue() = JBoolean.canonicalTrue - def jnum(s: String) = JNumber.unsafeFromString(s) - def jint(s: String) = JNumber.unsafeFromString(s) - def jstring(s: String) = JString(s) + def jnum(s: CharSequence, decIndex: Int, expIndex: Int) = JNumber.unsafeFromString(s.toString) + def jstring(s: CharSequence) = JString(s.toString) def jarray(vs: mutable.ArrayBuffer[JValue]) = JArray(vs) def jobject(vs: mutable.Map[String, JValue]) = JObject(vs) } diff --git a/support/spray/src/main/scala/Parser.scala b/support/spray/src/main/scala/Parser.scala index 008a461..2e58966 100644 --- a/support/spray/src/main/scala/Parser.scala +++ b/support/spray/src/main/scala/Parser.scala @@ -9,9 +9,8 @@ def jnull() = JsNull def jfalse() = JsFalse def jtrue() = JsTrue - def jnum(s: String) = JsNumber(s) - def jint(s: String) = JsNumber(s) - def jstring(s: String) = JsString(s) + def jnum(s: CharSequence, decIndex: Int, expIndex: Int) = JsNumber(s.toString) + def jstring(s: CharSequence) = JsString(s.toString) def jarray(vs: List[JsValue]) = JsArray(vs: _*) def jobject(vs: Map[String, JsValue]) = JsObject(vs) } diff --git a/util/src/main/scala/jawn/util/InvalidLong.scala b/util/src/main/scala/jawn/util/InvalidLong.scala new file mode 100644 index 0000000..adffb97 --- /dev/null +++ b/util/src/main/scala/jawn/util/InvalidLong.scala @@ -0,0 +1,7 @@ +package jawn.util + +class InvalidLong(s: String) extends NumberFormatException(s"For input string '$s'") + +object InvalidLong { + def apply(s: String): InvalidLong = new InvalidLong(s) +} diff --git a/util/src/main/scala/jawn/util/Slice.scala b/util/src/main/scala/jawn/util/Slice.scala new file mode 100644 index 0000000..93a8159 --- /dev/null +++ b/util/src/main/scala/jawn/util/Slice.scala @@ -0,0 +1,95 @@ +package jawn.util + +/** + * Character sequence representing a lazily-calculated substring. + * + * This class has three constructors: + * + * - Slice(s) wraps a string, ensuring that future operations + * (e.g. subSequence) will construct slices instead of strings. + * + * - Slice(s, start, limit) is the default, and ensures that: + * + * 1. start >= 0 + * 2. limit >= start + * 3. limit <= s.length + * + * - Slice.unsafe(s, start, limit) is for situations where the above + * bounds-checking has already occurred. Only use this if you are + * absolutely sure your arguments satisfy the above invariants. + * + * Slice's subSequence returns another slice. This means that when + * wrapping a very large string, garbage collection on the underlying + * string will not occur until all slices are freed. + * + * Slice's universal equality is only defined with regard to other + * slices. This means comparing a Slice with other CharSequence values + * (including String) will always return false. + * + * Slices are serializable. However! They use the default Java + * serialization layout, which is not that efficient, and could be a + * disaster in cases where a large shared string might be serialized + * many times in different slices. + */ +@SerialVersionUID(1L) +final class Slice private[jawn] (s: String, start: Int, limit: Int) extends CharSequence with Serializable { + + final val length: Int = + limit - start + + def charAt(i: Int): Char = + if (i < 0 || length <= i) throw new StringIndexOutOfBoundsException(s"index out of range: $i") + else s.charAt(start + i) + + def subSequence(i: Int, j: Int): Slice = + Slice(s, start + i, start + j) + + override def toString: String = + s.substring(start, limit) + + override def equals(that: Any): Boolean = + that match { + case t: AnyRef if this eq t => + true + case slice: Slice => + if (length != slice.length) return false + var i: Int = 0 + while (i < length) { + if (charAt(i) != slice.charAt(i)) return false + i += 1 + } + true + case _ => + false + } + + override def hashCode: Int = { + var hash: Int = 0x90decade + var i: Int = start + while (i < limit) { + hash = s.charAt(i) + (hash * 103696301) // prime + i += 1 + } + hash + } +} + +object Slice { + + val Empty: Slice = Slice("", 0, 0) + + def empty: Slice = Empty + + def apply(s: String): Slice = + new Slice(s, 0, s.length) + + def apply(s: String, start: Int, limit: Int): Slice = + if (start < 0 || limit < start || s.length < limit) { + throw new IndexOutOfBoundsException(s"invalid slice: start=$start, limit=$limit, length=${s.length}") + } else { + new Slice(s, start, limit) + } + + def unsafe(s: String, start: Int, limit: Int): Slice = + new Slice(s, start, limit) +} diff --git a/util/src/main/scala/jawn/util/package.scala b/util/src/main/scala/jawn/util/package.scala new file mode 100644 index 0000000..08f7ae3 --- /dev/null +++ b/util/src/main/scala/jawn/util/package.scala @@ -0,0 +1,96 @@ +package jawn + +package object util { + + /** + * Parse the given character sequence as a single Long value (64-bit + * signed integer) in decimal (base-10). + * + * Other than "0", leading zeros are not allowed, nor are leading + * plusses. At most one leading minus is allowed. The value "-0" is + * allowed, and is interpreted as 0. + * + * Stated more precisely, accepted values: + * + * - conform to the pattern: -?(0|([1-9][0-9]*)) + * - are within [-9223372036854775808, 9223372036854775807] + * + * This method will throw an `InvalidLong` exception on invalid + * input. + */ + def parseLong(cs: CharSequence): Long = { + + // we store the inverse of the positive sum, to ensure we don't + // incorrectly overflow on Long.MinValue. for positive numbers + // this inverse sum will be inverted before being returned. + var inverseSum: Long = 0L + var inverseSign: Long = -1L + var i: Int = 0 + + if (cs.charAt(0) == '-') { + inverseSign = 1L + i = 1 + } + + val len = cs.length + val size = len - i + if (i >= len) throw InvalidLong(cs.toString) + if (size > 19) throw InvalidLong(cs.toString) + if (cs.charAt(i) == '0' && size > 1) throw InvalidLong(cs.toString) + + while (i < len) { + val digit = cs.charAt(i).toInt - 48 + if (digit < 0 || 9 < digit) throw InvalidLong(cs.toString) + inverseSum = inverseSum * 10L - digit + i += 1 + } + + // detect and throw on overflow + if (size == 19 && (inverseSum >= 0 || (inverseSum == Long.MinValue && inverseSign < 0))) { + throw InvalidLong(cs.toString) + } + + inverseSum * inverseSign + } + + /** + * Parse the given character sequence as a single Long value (64-bit + * signed integer) in decimal (base-10). + * + * For valid inputs, this method produces the same values as + * `parseLong`. However, by avoiding input validation it is up to + * 50% faster. + * + * For inputs which `parseLong` throws an error on, + * `parseLongUnsafe` may (or may not) throw an error, or return a + * bogus value. This method makes no guarantees about how it handles + * invalid input. + * + * This method should only be used on sequences which have already + * been parsed (e.g. by a Jawn parser). When in doubt, use + * `parseLong(cs)`, which is still significantly faster than + * `java.lang.Long.parseLong(cs.toString)`. + */ + def parseLongUnsafe(cs: CharSequence): Long = { + + // we store the inverse of the positive sum, to ensure we don't + // incorrectly overflow on Long.MinValue. for positive numbers + // this inverse sum will be inverted before being returned. + var inverseSum: Long = 0L + var inverseSign: Long = -1L + var i: Int = 0 + + if (cs.charAt(0) == '-') { + inverseSign = 1L + i = 1 + } + + val len = cs.length + while (i < len) { + inverseSum = inverseSum * 10L - (cs.charAt(i).toInt - 48) + i += 1 + } + + inverseSum * inverseSign + } +} diff --git a/util/src/test/scala/jawn/util/ParseLongCheck.scala b/util/src/test/scala/jawn/util/ParseLongCheck.scala new file mode 100644 index 0000000..69c4a0e --- /dev/null +++ b/util/src/test/scala/jawn/util/ParseLongCheck.scala @@ -0,0 +1,72 @@ +package jawn +package util + +import org.scalatest._ +import prop._ +import org.scalacheck._ + +import scala.util._ + +class ParseLongCheck extends PropSpec with Matchers with PropertyChecks { + + case class UniformLong(value: Long) + + object UniformLong { + implicit val arbitraryUniformLong: Arbitrary[UniformLong] = + Arbitrary(Gen.choose(Long.MinValue, Long.MaxValue).map(UniformLong(_))) + } + + property("both parsers accept on valid input") { + forAll { (n0: UniformLong, prefix: String, suffix: String) => + val n = n0.value + val payload = n.toString + val s = prefix + payload + suffix + val i = prefix.length + val cs = s.subSequence(i, payload.length + i) + cs.toString shouldBe payload + parseLong(cs) shouldBe n + parseLongUnsafe(cs) shouldBe n + } + + forAll { (s: String) => + Try(parseLong(s)) match { + case Success(n) => parseLongUnsafe(s) shouldBe n + case Failure(_) => succeed + } + } + } + + property("safe parser fails on invalid input") { + forAll { (n: Long, m: Long, suffix: String) => + val s1 = n.toString + suffix + Try(parseLong(s1)) match { + case Success(n) => n shouldBe s1.toLong + case Failure(_) => Try(s1.toLong).isFailure + } + + val s2 = n.toString + (m & 0x7fffffffffffffffL).toString + Try(parseLong(s2)) match { + case Success(n) => n shouldBe s2.toLong + case Failure(_) => Try(s2.toLong).isFailure + } + } + + Try(parseLong("9223372036854775807")) shouldBe Try(Long.MaxValue) + Try(parseLong("-9223372036854775808")) shouldBe Try(Long.MinValue) + Try(parseLong("-0")) shouldBe Try(0L) + + assert(Try(parseLong("")).isFailure) + assert(Try(parseLong("+0")).isFailure) + assert(Try(parseLong("00")).isFailure) + assert(Try(parseLong("01")).isFailure) + assert(Try(parseLong("+1")).isFailure) + assert(Try(parseLong("-")).isFailure) + assert(Try(parseLong("--1")).isFailure) + assert(Try(parseLong("9223372036854775808")).isFailure) + assert(Try(parseLong("-9223372036854775809")).isFailure) + } + + // NOTE: parseLongUnsafe is not guaranteed to crash, or do anything + // predictable, on invalid input, so we don't test this direction. + // Its "unsafe" suffix is there for a reason. +} diff --git a/util/src/test/scala/jawn/util/SliceCheck.scala b/util/src/test/scala/jawn/util/SliceCheck.scala new file mode 100644 index 0000000..b56e105 --- /dev/null +++ b/util/src/test/scala/jawn/util/SliceCheck.scala @@ -0,0 +1,131 @@ +package jawn +package util + +import org.scalatest._ +import prop._ +import org.scalacheck._ + +import Arbitrary.arbitrary + +import scala.util._ + +class SliceCheck extends PropSpec with Matchers with PropertyChecks { + + val genSlice: Gen[Slice] = { + val g = arbitrary[String] + def c(start: Int, end: Int): Gen[Int] = + if (end <= start) Gen.const(start) + else Gen.choose(start, end) + Gen.oneOf( + g.map(Slice(_)), + for { s <- g; n = s.length; i <- c(0, n) } yield Slice(s, i, n), + for { s <- g; n = s.length; j <- c(0, n) } yield Slice(s, 0, j), + for { s <- g; n = s.length; i <- c(0, n); j <- c(i, n) } yield Slice(s, i, j)) + } + + implicit val arbitrarySlice: Arbitrary[Slice] = + Arbitrary(genSlice) + + def tryEqual[A](got0: => A, expected0: => A): Unit = { + val got = Try(got0) + val expected = Try(expected0) + got match { + case Success(_) => got shouldBe expected + case Failure(_) => assert(expected.isFailure) + } + } + + property("Slice(s, i, j) ~ s.substring(i, j)") { + forAll { (s: String, i: Int, j: Int) => + tryEqual( + Slice(s, i, j).toString, + s.substring(i, j)) + } + } + + property("Slice(s, i, j).charAt(k) ~ s.substring(i, j).charAt(k)") { + forAll { (s: String, i: Int, j: Int, k: Int) => + tryEqual( + Slice(s, i, j).charAt(k), + s.substring(i, j).charAt(k)) + } + } + + property("slice.length >= 0") { + forAll { (cs: Slice) => + cs.length should be >= 0 + } + } + + property("slice.charAt(i) ~ slice.toString.charAt(i)") { + forAll { (cs: Slice, i: Int) => + tryEqual( + cs.charAt(i), + cs.toString.charAt(i)) + } + } + + property("Slice(s, i, j).subSequence(k, l) ~ s.substring(i, j).substring(k, l)") { + forAll { (s: String, i: Int, j: Int, k: Int, l: Int) => + tryEqual( + Slice(s, i, j).subSequence(k, l).toString, + s.substring(i, j).substring(k, l)) + } + } + + property("Slice(s) ~ Slice(s, 0, s.length)") { + forAll { (s: String) => + tryEqual( + Slice(s).toString, + Slice(s, 0, s.length).toString) + } + } + + property("Slice(s, i, j) => Slice.unsafe(s, i, j)") { + forAll { (s: String, i: Int, j: Int) => + Try(Slice(s, i, j).toString) match { + case Success(r) => r shouldBe Slice.unsafe(s, i, j).toString + case Failure(_) => succeed + } + } + } + + property("x == x") { + forAll { (x: Slice) => x shouldBe x } + } + + property("(x == y) = (x.toString == y.toString)") { + forAll { (x: Slice, y: Slice) => + (x == y) shouldBe (x.toString == y.toString) + } + } + + property("(x == y) -> (x.## == y.##)") { + forAll { (x: Slice, y: Slice) => + if (x == y) x.## shouldBe y.## + else (x.## == y.##) shouldBe false + } + } + + property("x == Slice(x.toString)") { + forAll { (x: Slice) => + Slice(x.toString) shouldBe x + } + } + + property("slice is serializable") { + import java.io._ + + forAll { (x: Slice) => + val baos = new ByteArrayOutputStream() + val oos = new ObjectOutputStream(baos) + oos.writeObject(x) + oos.close() + val bytes = baos.toByteArray + val bais = new ByteArrayInputStream(bytes) + val ois = new ObjectInputStream(bais) + Try(ois.readObject()) shouldBe Try(x) + ois.close() + } + } +} diff --git a/version.sbt b/version.sbt index db3a659..bd11770 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -version in ThisBuild := "0.10.4" \ No newline at end of file +version in ThisBuild := "0.11.1"