Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support BQ Json arrays and literals #5544

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,12 @@ ThisBuild / mimaBinaryIssueFilters ++= Seq(
ProblemFilters.exclude[MissingTypesProblem](
"com.spotify.scio.bigquery.types.package$Json$"
),
ProblemFilters.exclude[IncompatibleMethTypeProblem](
"com.spotify.scio.bigquery.types.package#Json.apply"
),
ProblemFilters.exclude[IncompatibleResultTypeProblem](
"com.spotify.scio.bigquery.types.package#Json.parse"
),
// tf-metadata upgrade
ProblemFilters.exclude[Problem](
"org.tensorflow.metadata.v0.*"
Expand Down Expand Up @@ -1030,6 +1036,7 @@ lazy val `scio-google-cloud-platform` = project
libraryDependencies ++= Seq(
// compile
"com.esotericsoftware" % "kryo-shaded" % kryoVersion,
"com.fasterxml.jackson.core" % "jackson-core" % jacksonVersion,
"com.fasterxml.jackson.core" % "jackson-databind" % jacksonVersion,
"com.fasterxml.jackson.datatype" % "jackson-datatype-joda" % jacksonVersion,
"com.fasterxml.jackson.datatype" % "jackson-datatype-jsr310" % jacksonVersion,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,20 @@ object TypedBigQueryIT {
y <- Gen.numChar
} yield Geography(s"POINT($x $y)")
)
implicit val arbJson: Arbitrary[Json] = Arbitrary(
for {
key <- Gen.alphaStr
value <- Gen.alphaStr
} yield Json(s"""{"$key":"$value"}""")
)
implicit val arbJson: Arbitrary[Json] = Arbitrary {
import Arbitrary._
import Gen._
Gen
.oneOf(
alphaLowerStr.flatMap(str => arbInt.arbitrary.map(num => s"""{"$str":$num}""")),
alphaLowerStr.flatMap(str => arbInt.arbitrary.map(num => s"""["$str",$num]""")),
alphaLowerStr.map(str => s"\"$str\""),
arbInt.arbitrary.map(_.toString),
arbBool.arbitrary.map(_.toString)
// Gen.const("null"), null json literal is lost, interpreted as missing field
)
.map(wkt => Json(wkt))
}

implicit val arbBigNumeric: Arbitrary[BigNumeric] = Arbitrary {
// Precision: 76.76 (the 77th digit is partial)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,17 @@ object TableRowOps {
}

def json(value: AnyRef): Json = value match {
case x: Json => x
case x: TableRow => Json(x)
case x: String => Json(x)
case _ => throw new UnsupportedOperationException("Cannot convert to json: " + value)
case x: Json => x
// literals
case null => Json(null)
case x: java.lang.Number => Json(x)
case x: java.lang.Boolean => Json(x)
case x: java.lang.String => Json(x) // also handles json string
// object
case x: java.util.Map[_, _] => Json(x)
// array
case x: java.util.List[_] => Json(x)
case _ => throw new UnsupportedOperationException("Cannot convert to json: " + value)
}

def bignumeric(value: AnyRef): BigNumeric = value match {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ import org.typelevel.scalaccompat.annotation.nowarn

import java.math.MathContext
import java.nio.ByteBuffer
import scala.annotation.StaticAnnotation
import scala.annotation.{unused, StaticAnnotation}
import scala.util.Try

package object types {

/**
Expand Down Expand Up @@ -63,17 +65,29 @@ package object types {
* @param wkt
* Well Known Text formatted string that BigQuery displays for Json
*/
case class Json(wkt: String)
case class Json private (wkt: String)
object Json {
implicit val jsonCoder: Coder[Json] =
Coder.xmap(Coder[String])(new Json(_), _.wkt)

// Use same mapper as the TableRowJsonCoder
private lazy val mapper = new ObjectMapper()
.registerModule(new JavaTimeModule())
.registerModule(new JodaModule())
.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS)
.disable(SerializationFeature.FAIL_ON_EMPTY_BEANS);

def apply(row: TableRow): Json = Json(mapper.writeValueAsString(row))
def parse(json: Json): TableRow = mapper.readValue(json.wkt, classOf[TableRow])
// force to use the apply(AnyRef)
@unused private def apply(wkt: String): Json = new Json(wkt)

def apply(value: AnyRef): Json = value match {
case str: String if Try(mapper.readTree(str)).isSuccess =>
// string formatted json vs string literal
new Json(str)
case _ =>
new Json(mapper.writeValueAsString(value))
}
def parse(json: Json): AnyRef = mapper.readValue(json.wkt, classOf[Object])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def parse(json: Json): AnyRef = mapper.readValue(json.wkt, classOf[Object])
def parse(json: Json): AnyRef = json.wkt

}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,22 @@ final class ConverterProviderSpec
.retryUntil(_.precision <= Numeric.MaxNumericPrecision)
.map(Numeric.apply)
}
implicit val arbJson: Arbitrary[Json] = Arbitrary(
for {
// f is a key field from TableRow. It cannot be used as column name
// see https://github.com/apache/beam/issues/33531
key <- Gen.alphaStr.retryUntil(_ != "f")
value <- Gen.alphaStr
} yield Json(s"""{"$key":"$value"}""")
)

implicit val arbJson: Arbitrary[Json] = Arbitrary {
import Arbitrary._
import Gen._
Gen
.oneOf(
alphaLowerStr.flatMap(str => arbInt.arbitrary.map(num => s"""{"$str":$num}""")),
alphaLowerStr.flatMap(str => arbInt.arbitrary.map(num => s"""["$str",$num]""")),
alphaLowerStr.map(str => s"\"$str\""),
arbInt.arbitrary.map(_.toString),
arbBool.arbitrary.map(_.toString)
// Gen.const("null"), null json literal is lost, interpreted as missing field
)
.map(wkt => Json(wkt))
}

implicit val eqByteArrays: Eq[Array[Byte]] = Eq.instance[Array[Byte]](_.toList == _.toList)
implicit val eqByteString: Eq[ByteString] = Eq.instance[ByteString](_ == _)
implicit val eqInstant: Eq[Instant] = Eq.instance[Instant](_ == _)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import org.joda.time.{Instant, LocalDate, LocalDateTime, LocalTime}
import org.scalatest.matchers.should.Matchers
import org.scalatest.flatspec.AnyFlatSpec

import scala.jdk.CollectionConverters._

class ConverterProviderTest extends AnyFlatSpec with Matchers {
import ConverterProviderTest._

Expand All @@ -49,13 +51,84 @@ class ConverterProviderTest extends AnyFlatSpec with Matchers {
}

it should "handle required json type" in {
val wkt = """{"name":"Alice","age":30}"""
val parsed = new TableRow()
.set("name", "Alice")
.set("age", 30)

RequiredJson.fromTableRow(TableRow("a" -> parsed)) shouldBe RequiredJson(Json(wkt))
BigQueryType.toTableRow[RequiredJson](RequiredJson(Json(wkt))) shouldBe TableRow("a" -> parsed)
// JSON object
{
val wkt =
"""{"name":"Alice","age":30,"job":null,"address":{"street":"Broadway","city":"New York"}}"""
val parsed = Map(
"name" -> "Alice",
"age" -> 30,
"job" -> null,
"address" -> Map(
"street" -> "Broadway",
"city" -> "New York"
).asJava
).asJava

RequiredJson.fromTableRow(TableRow("a" -> wkt)) shouldBe RequiredJson(Json(wkt))
RequiredJson.fromTableRow(TableRow("a" -> parsed)) shouldBe RequiredJson(Json(wkt))
BigQueryType.toTableRow[RequiredJson](RequiredJson(Json(wkt))) shouldBe TableRow(
"a" -> parsed
)
}

// JSON array
{
val wkt = """["Alice",30,null,{"street":"Broadway","city":"New York"}]"""
val parsed = List(
"Alice",
30,
null,
Map(
"street" -> "Broadway",
"city" -> "New York"
).asJava
).asJava

RequiredJson.fromTableRow(TableRow("a" -> wkt)) shouldBe RequiredJson(Json(wkt))
RequiredJson.fromTableRow(TableRow("a" -> parsed)) shouldBe RequiredJson(Json(wkt))
BigQueryType.toTableRow[RequiredJson](RequiredJson(Json(wkt))) shouldBe TableRow(
"a" -> parsed
)
}

// JSON string literal
{
val wkt = "\"Hello world!\""
val parsed = "Hello world!"

RequiredJson.fromTableRow(TableRow("a" -> wkt)) shouldBe RequiredJson(Json(wkt))
RequiredJson.fromTableRow(TableRow("a" -> parsed)) shouldBe RequiredJson(Json(wkt))
BigQueryType.toTableRow[RequiredJson](RequiredJson(Json(wkt))) shouldBe TableRow(
"a" -> parsed
)
}

// JSON boolean literal
{
val wkt = "false"
val parsed = false

RequiredJson.fromTableRow(TableRow("a" -> wkt)) shouldBe RequiredJson(Json(wkt))
RequiredJson.fromTableRow(TableRow("a" -> parsed)) shouldBe RequiredJson(Json(wkt))
BigQueryType.toTableRow[RequiredJson](RequiredJson(Json(wkt))) shouldBe TableRow(
"a" -> parsed
)
}

// JSON number literal
{
val wkt = "42"
val parsed = 42

RequiredJson.fromTableRow(TableRow("a" -> wkt)) shouldBe RequiredJson(Json(wkt))
RequiredJson.fromTableRow(TableRow("a" -> parsed)) shouldBe RequiredJson(Json(wkt))
BigQueryType.toTableRow[RequiredJson](RequiredJson(Json(wkt))) shouldBe TableRow(
"a" -> parsed
)
}

// JSON null literal is ambiguous with NULLABLE column
}

it should "handle required big numeric type" in {
Expand Down
Loading