Skip to content

Commit 576bb44

Browse files
authored
Gen Examples (#49)
* Avro example with Test * Cleanup unused code * Change intGen to arbitrary * Add TableRow example * Add comments to examples
1 parent 39b399d commit 576bb44

File tree

6 files changed

+370
-0
lines changed

6 files changed

+370
-0
lines changed

build.sbt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,21 @@ lazy val ratatoolScalacheck = project
203203
.enablePlugins(ProtobufPlugin)
204204
.dependsOn(ratatoolCommon % "compile->compile;test->test")
205205

206+
lazy val ratatoolExamples = project
207+
.in(file("ratatool-examples"))
208+
.settings(commonSettings ++ noPublishSettings)
209+
.settings(
210+
name := "ratatool-examples",
211+
libraryDependencies ++= Seq(
212+
"com.google.apis" % "google-api-services-bigquery" % bigqueryVersion
213+
)
214+
)
215+
.enablePlugins(ProtobufPlugin, PackPlugin)
216+
.dependsOn(
217+
ratatoolCommon,
218+
ratatoolScalacheck
219+
)
220+
206221
val root = project.in(file("."))
207222
.settings(commonSettings ++ noPublishSettings)
208223
.aggregate(
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
{
2+
"type": "record",
3+
"name": "ExampleRecord",
4+
"namespace": "com.spotify.ratatool.avro.specific",
5+
"doc": "Record for Examples",
6+
"fields": [
7+
{
8+
"name": "record_id",
9+
"type": "string",
10+
"doc": "Unique record UUID"
11+
},
12+
{
13+
"name": "independent_int_field",
14+
"type": "int"
15+
},
16+
{
17+
"name": "dependent_int_field",
18+
"type": "int",
19+
"doc": "If independent_int_field is 0, then Max Int, otherwise half of independent_int_field"
20+
},
21+
{
22+
"name": "independent_string_field",
23+
"type": "string",
24+
"doc": "Output result field that may sometimes contain error messages"
25+
},
26+
{
27+
"name": "dependent_enum_field",
28+
"type": {"name": "EnumField", "type": "enum", "symbols": ["Success", "Failure"]},
29+
"doc": "Is Failure if independent_string_field is an Exception instead of an output, otherwise Success"
30+
},
31+
{
32+
"name": "nested_record_field",
33+
"type": {
34+
"type": "record",
35+
"name": "NestedExampleRecord",
36+
"namespace": "com.spotify.ratatool.avro.specific",
37+
"doc": "Record ",
38+
"fields": [
39+
{
40+
"name": "int_field",
41+
"type": "int"
42+
},
43+
{
44+
"name": "map_field",
45+
"type": {"type": "map", "values": "int"},
46+
"doc": "A map that contains 0-5 values"
47+
}
48+
]
49+
}
50+
},
51+
{
52+
"name": "bounded_double_field",
53+
"type": "double",
54+
"doc": "A double between -1.0 and 1.0"
55+
}
56+
]
57+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
{
2+
"fields": [
3+
{
4+
"mode": "NULLABLE",
5+
"name": "nullable_record",
6+
"type": "RECORD",
7+
"fields": [
8+
{
9+
"mode": "REPEATED",
10+
"name": "repeated_int_field",
11+
"type": "INTEGER",
12+
"description": "List of 3 Integers"
13+
},
14+
{
15+
"mode": "REQUIRED",
16+
"name": "frequency_string_field",
17+
"type": "STRING",
18+
"description": "One of (Foo, Bar, Fizz, Buzz), twice as likely to be Foo or Bar"
19+
}
20+
]
21+
},
22+
{
23+
"mode": "REQUIRED",
24+
"name": "required_record",
25+
"type": "RECORD",
26+
"fields": [
27+
{
28+
"mode": "REQUIRED",
29+
"name": "independent_string_field",
30+
"type": "STRING"
31+
},
32+
{
33+
"mode": "REQUIRED",
34+
"name": "dependent_bytes_field",
35+
"type": "BYTES",
36+
"description": "Byte representation of independent_string_field"
37+
}
38+
]
39+
}
40+
]
41+
}
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
/*
2+
* Copyright 2018 Spotify AB.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing,
11+
* software distributed under the License is distributed on an
12+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13+
* KIND, either express or implied. See the License for the
14+
* specific language governing permissions and limitations
15+
* under the License.
16+
*/
17+
18+
package com.spotify.ratatool.examples
19+
20+
import java.util
21+
22+
import com.google.api.client.json.JsonObjectParser
23+
import com.google.api.client.json.jackson2.JacksonFactory
24+
import com.google.api.services.bigquery.model.{TableRow, TableSchema}
25+
import com.google.common.base.Charsets
26+
import com.spotify.ratatool.avro.specific.{EnumField, ExampleRecord, NestedExampleRecord}
27+
import com.spotify.ratatool.scalacheck._
28+
import org.apache.avro.util.Utf8
29+
import org.scalacheck.{Arbitrary, Gen}
30+
import scala.collection.JavaConverters._
31+
32+
object ExampleAvroGen {
33+
private val utfGen: Gen[Utf8] = Arbitrary.arbString.arbitrary.map(new Utf8(_))
34+
35+
private val kvGen: Gen[(Utf8, Int)] = for {
36+
k <- utfGen
37+
v <- Arbitrary.arbInt.arbitrary
38+
} yield (k, v)
39+
40+
/**
41+
* Generates a map of size 1-5
42+
*/
43+
private val sizedMapGen: Gen[util.Map[CharSequence, java.lang.Integer]] =
44+
Gen.mapOfN(5, kvGen).map { m =>
45+
val map = new util.HashMap[Utf8, java.lang.Integer]()
46+
m.foreach{case (k, v) => map.put(k, v)}
47+
map.asInstanceOf[util.Map[CharSequence, java.lang.Integer]]
48+
}
49+
50+
private val nestedRecordGen: Gen[NestedExampleRecord] = specificRecordOf[NestedExampleRecord]
51+
.amend(sizedMapGen)(_.setMapField)
52+
53+
private val boundedDoubleGen: Gen[Double] = Gen.chooseNum(-1.0, 1.0)
54+
55+
private val intGen: Gen[Int] = Arbitrary.arbInt.arbitrary
56+
57+
private val errorGen: Gen[String] = for {
58+
e <- Gen.const("Exception: Ratatool Exception. ")
59+
m <- Gen.alphaNumStr
60+
} yield e + m
61+
62+
private val stringGen: Gen[String] = Gen.oneOf(Gen.alphaNumStr, errorGen)
63+
64+
/**
65+
* This and dependentEnumFunc are used to produce fields based on critera
66+
*/
67+
private def dependentIntFunc(i: Int): Int = {
68+
if (i == 0) {
69+
Int.MaxValue
70+
} else {
71+
i / 2
72+
}
73+
}
74+
75+
private def dependentEnumFunc(s: String): EnumField = {
76+
if (s.length > 0 && s.startsWith("Exception")) {
77+
EnumField.Failure
78+
} else {
79+
EnumField.Success
80+
}
81+
}
82+
83+
/**
84+
* An example of generating avro data with specific requirements.
85+
*
86+
* See [[com.spotify.ratatool.avro.specific.ExampleRecord]] for documentation on field
87+
* requirements, dependencies, and bounds.
88+
*/
89+
val exampleRecordGen: Gen[ExampleRecord] =
90+
specificRecordOf[ExampleRecord]
91+
.amend(nestedRecordGen)(_.setNestedRecordField)
92+
.amend(boundedDoubleGen)(_.setBoundedDoubleField)
93+
.amend(Gen.uuid.map(_.toString))(_.setRecordId)
94+
95+
/**
96+
* Set dependent fields based on Schema criteria. This is done in a single amend with
97+
* a single gen to ensure values are consistent per record
98+
*/
99+
.amend(intGen)(m => i => {
100+
m.setIndependentIntField(i)
101+
m.setDependentIntField(dependentIntFunc(i))
102+
})
103+
.amend(stringGen)(m => s => {
104+
m.setIndependentStringField(s)
105+
m.setDependentEnumField(dependentEnumFunc(s))
106+
})
107+
}
108+
109+
object ExampleTableRowGen {
110+
private val tableSchema = new JsonObjectParser(new JacksonFactory)
111+
.parseAndClose(
112+
this.getClass.getResourceAsStream("/schema.json"),
113+
Charsets.UTF_8,
114+
classOf[TableSchema])
115+
116+
private val freqGen: Gen[String] = Gen.frequency(
117+
(2, Gen.oneOf("Foo", "Bar")),
118+
(1, Gen.oneOf("Fizz", "Buzz"))
119+
)
120+
121+
private val intListGen: Gen[java.util.List[Int]] = Gen.listOfN(
122+
3,
123+
Arbitrary.arbInt.arbitrary
124+
).map(_.asJava)
125+
126+
/**
127+
* Nested record Generator where one field depends on another (therefore have to have the same gen
128+
* and be set in the same fn)
129+
*/
130+
private val rrGen: Gen[TableRow] = Arbitrary.arbString.arbitrary.map {
131+
s =>
132+
val bytes = s.getBytes(Charsets.UTF_8)
133+
val t = new TableRow()
134+
t.set("independent_string_field", s)
135+
t.set("dependent_bytes_field", bytes)
136+
t
137+
}
138+
139+
/**
140+
* An example of generating BigQuery table row with specific requirements.
141+
*
142+
* See `resources/schema.json` for schema and documentation on requirements.
143+
*/
144+
val tableRowGen: Gen[TableRow] =
145+
tableRowOf(tableSchema)
146+
.amend(rrGen)(_.set("required_record"))
147+
148+
/**
149+
* Since nullable_record may not exist, we use tryAmend so that it fails
150+
* silently if it does not exist
151+
*/
152+
.tryAmend(intListGen)(_.getRecord("nullable_record").set("repeated_int_field"))
153+
.tryAmend(freqGen)(_.getRecord("nullable_record").set("frequency_string_field"))
154+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/*
2+
* Copyright 2018 Spotify AB.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing,
11+
* software distributed under the License is distributed on an
12+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13+
* KIND, either express or implied. See the License for the
14+
* specific language governing permissions and limitations
15+
* under the License.
16+
*/
17+
18+
package com.spotify.ratatool.examples
19+
20+
import java.util.UUID
21+
22+
import com.spotify.ratatool.avro.specific.{EnumField, ExampleRecord}
23+
import org.scalacheck.{Gen, Prop, Properties}
24+
import org.scalacheck.Prop.{AnyOperators, BooleanOperators, forAll}
25+
26+
import scala.collection.JavaConverters._
27+
28+
object ExampleAvroGenTest extends Properties("ExampleAvroGenerator") {
29+
val gen: Gen[ExampleRecord] = ExampleAvroGen.exampleRecordGen
30+
31+
property("round trips UUID") = forAll(gen) { m =>
32+
UUID.fromString(m.getRecordId.toString).toString ?= m.getRecordId.toString
33+
}
34+
35+
property("generates valid dependent int") = forAll(gen) { m =>
36+
(m.getIndependentIntField == 0
37+
&& m.getDependentIntField == Int.MaxValue) :| "Max if indep is 0" ||
38+
(m.getIndependentIntField != 0
39+
&& m.getDependentIntField == m.getIndependentIntField/2) :| "Half when indep is not 0"
40+
}
41+
42+
property("generates valid dependent enum") = forAll(gen) { m =>
43+
(m.getIndependentStringField.toString.startsWith("Exception") &&
44+
m.getDependentEnumField == EnumField.Failure) :| "Is Failure on Exception" ||
45+
(!m.getIndependentStringField.toString.startsWith("Exception") &&
46+
m.getDependentEnumField == EnumField.Success) :| "Is Success when non-Exception"
47+
}
48+
49+
property("double field within bounds") = forAll(gen) { m =>
50+
m.getBoundedDoubleField <= 1.0 && m.getBoundedDoubleField >= -1.0
51+
}
52+
53+
property("map field size within bounds") = forAll(gen) { m =>
54+
val size = m.getNestedRecordField.getMapField.asScala.size
55+
size <= 5 && size >= 0
56+
}
57+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/*
2+
* Copyright 2018 Spotify AB.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing,
11+
* software distributed under the License is distributed on an
12+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13+
* KIND, either express or implied. See the License for the
14+
* specific language governing permissions and limitations
15+
* under the License.
16+
*/
17+
18+
package com.spotify.ratatool.examples
19+
20+
import com.google.api.services.bigquery.model.TableRow
21+
import com.spotify.ratatool.scalacheck._
22+
import org.scalacheck.{Gen, Properties}
23+
import org.scalacheck.Prop.{AnyOperators, forAll}
24+
25+
26+
object ExampleTableRowGenTest extends Properties("ExampleTableRowGenerator") {
27+
val gen: Gen[TableRow] = ExampleTableRowGen.tableRowGen
28+
val listGen: Gen[List[TableRow]] = Gen.listOfN(1000, gen)
29+
30+
property("generates Foo and Bar more frequently than Fizz and Buzz") = forAll(listGen) { l =>
31+
val stringFields: Seq[String] = l.flatMap { r =>
32+
Option(r.getRecord("nullable_record"))
33+
.map(_.get("frequency_string_field").asInstanceOf[String])
34+
}
35+
36+
stringFields.count(s => s == "Foo" || s == "Bar") >
37+
stringFields.count(s => s == "Fizz" || s == "Buzz")
38+
}
39+
40+
property("generates valid dependent bytes") = forAll(gen) { r =>
41+
val s = r.getRecord("required_record").get("independent_string_field").asInstanceOf[String]
42+
val b = r.getRecord("required_record").get("dependent_bytes_field").asInstanceOf[Array[Byte]]
43+
new String(b) ?= s
44+
}
45+
46+
}

0 commit comments

Comments
 (0)