Skip to content

Commit dcd06cc

Browse files
committed
json: Add a tutorial
1 parent 390e288 commit dcd06cc

File tree

4 files changed

+286
-0
lines changed

4 files changed

+286
-0
lines changed

src/JSON/AST.dfy

+5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
module {:options "-functionSyntax:4"} JSON.AST {
22
datatype Decimal =
33
Decimal(n: int, e10: int) // (n) * 10^(e10)
4+
5+
function Int(n: int): Decimal {
6+
Decimal(n, 0)
7+
}
8+
49
datatype JSON =
510
| Null
611
| Bool(b: bool)

src/JSON/Tutorial.dfy

+269
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
// RUN: %dafny -compile:3 -runAllTests:1 "%s"
2+
/// # Using the JSON library
3+
4+
include "API.dfy"
5+
include "ZeroCopy/API.dfy"
6+
7+
/// This library offers two APIs: a high-level one and a low-level one.
8+
///
9+
/// ## High-level API
10+
11+
module {:options "-functionSyntax:4"} JSON.Examples.HighLevel {
12+
import API
13+
import Utils.Unicode
14+
import opened AST
15+
import opened Wrappers
16+
17+
/// The high-level API works with fairly simple ASTs that contain native Dafny
18+
/// strings:
19+
20+
method {:test} Test() {
21+
22+
/// Use `API.Deserialize` to deserialize a byte string.
23+
///
24+
/// For example, here is how to decode the JSON test `"[true]"`. (We need to
25+
/// convert from Dafny's native strings to byte strings because Dafny does not
26+
/// have syntax for byte strings; in a real application, we would be reading and
27+
/// writing raw bytes directly from disk or from the network instead).
28+
29+
var SIMPLE_JS := Unicode.Transcode16To8("[true]");
30+
var SIMPLE_AST := Array([Bool(true)]);
31+
expect API.Deserialize(SIMPLE_JS) == Success(SIMPLE_AST);
32+
33+
/// Here is a larger object, written using a verbatim string (with `@"`). In
34+
/// verbatim strings `""` represents a single double-quote character):
35+
36+
var CITIES_JS := Unicode.Transcode16To8(@"{
37+
""Cities"": [
38+
{
39+
""Name"": ""Boston"",
40+
""Founded"": 1630,
41+
""Population"": 689386,
42+
""Area (km2)"": 4584.2
43+
}, {
44+
""Name"": ""Rome"",
45+
""Founded"": -753,
46+
""Population"": 2.873e6,
47+
""Area (km2)"": 1285
48+
}, {
49+
""Name"": ""Paris"",
50+
""Founded"": null,
51+
""Population"": 2.161e6,
52+
""Area (km2)"": 2383.5
53+
}
54+
]
55+
}");
56+
var CITIES_AST := Object([
57+
("Cities", Array([
58+
Object([
59+
("Name", String("Boston")),
60+
("Founded", Number(Int(1630))),
61+
("Population", Number(Int(689386))),
62+
("Area (km2)", Number(Decimal(45842, -1)))
63+
]),
64+
Object([
65+
("Name", String("Rome")),
66+
("Founded", Number(Int(-753))),
67+
("Population", Number(Decimal(2873, 3))),
68+
("Area (km2)", Number(Int(1285)))
69+
]),
70+
Object([
71+
("Name", String("Paris")),
72+
("Founded", Null),
73+
("Population", Number(Decimal(2161, 3))),
74+
("Area (km2)", Number(Decimal(23835, -1)))
75+
])
76+
]))
77+
]);
78+
expect API.Deserialize(CITIES_JS) == Success(CITIES_AST);
79+
80+
/// Serialization works similarly, with `API.Serialize`. For this first example
81+
/// the generated string matches what we started with exactly:
82+
83+
expect API.Serialize(SIMPLE_AST) == Success(SIMPLE_JS);
84+
85+
/// For more complex object, the generated layout may not be exactly the same; note in particular how the representation of numbers and the whitespace have changed.
86+
87+
expect API.Serialize(CITIES_AST) == Success(Unicode.Transcode16To8(
88+
@"{""Cities"":[{""Name"":""Boston"",""Founded"":1630,""Population"":689386,""Area (km2)"":45842e-1},{""Name"":""Rome"",""Founded"":-753,""Population"":2873e3,""Area (km2)"":1285},{""Name"":""Paris"",""Founded"":null,""Population"":2161e3,""Area (km2)"":23835e-1}]}"
89+
));
90+
91+
/// Additional methods are defined in `API.dfy` to serialize an object into an
92+
/// existing buffer or into an array. Below is the smaller example from the
93+
/// README, as a sanity check:
94+
95+
var CITY_JS := Unicode.Transcode16To8(@"{""Cities"": [{
96+
""Name"": ""Boston"",
97+
""Founded"": 1630,
98+
""Population"": 689386,
99+
""Area (km2)"": 4584.2}]}");
100+
101+
var CITY_AST := Object([("Cities", Array([
102+
Object([
103+
("Name", String("Boston")),
104+
("Founded", Number(Int(1630))),
105+
("Population", Number(Int(689386))),
106+
("Area (km2)", Number(Decimal(45842, -1)))])]))]);
107+
108+
expect API.Deserialize(CITY_JS) == Success(CITY_AST);
109+
110+
expect API.Serialize(CITY_AST) == Success(Unicode.Transcode16To8(
111+
@"{""Cities"":[{""Name"":""Boston"",""Founded"":1630,""Population"":689386,""Area (km2)"":45842e-1}]}"
112+
));
113+
}
114+
}
115+
116+
/// ## Low-level API
117+
///
118+
/// If you care about low-level performance, or about preserving existing
119+
/// formatting as much as possible, you may prefer to use the lower-level API:
120+
121+
module {:options "-functionSyntax:4"} JSON.Examples.LowLevel {
122+
import ZeroCopy.API
123+
import Utils.Unicode
124+
import opened Grammar
125+
import opened Wrappers
126+
127+
/// The low-level API works with ASTs that record all details of formatting and
128+
/// encoding: each node contains pointers to parts of a string, such that
129+
/// concatenating the fields of all nodes reconstructs the serialized value.
130+
131+
method {:test} Test() {
132+
133+
/// The low-level API exposes the same functions and methods as the high-level
134+
/// one, but the type that they consume and produce is `Grammar.JSON` (defined
135+
/// in `Grammar.dfy` as a `Grammar.Value` surrounded by optional whitespace)
136+
/// instead of `AST.JSON` (defined in `AST.dfy`). Since `Grammar.JSON` contains
137+
/// all formatting information, re-serializing an object produces the original
138+
/// value:
139+
140+
var CITIES := Unicode.Transcode16To8(@"{
141+
""Cities"": [
142+
{
143+
""Name"": ""Boston"",
144+
""Founded"": 1630,
145+
""Population"": 689386,
146+
""Area (km2)"": 4600
147+
}, {
148+
""Name"": ""Rome"",
149+
""Founded"": -753,
150+
""Population"": 2.873e6,
151+
""Area (km2)"": 1285
152+
}, {
153+
""Name"": ""Paris"",
154+
""Founded"": null,
155+
""Population"": 2.161e6,
156+
""Area (km2)"": 2383.5
157+
}
158+
]
159+
}");
160+
161+
var deserialized :- expect API.Deserialize(CITIES);
162+
expect API.Serialize(deserialized) == Success(CITIES);
163+
164+
/// Since the formatting is preserved, it is also possible to write
165+
/// minimally-invasive transformations over an AST. For example, let's replace
166+
/// `null` in the object above with `"Unknown"`.
167+
///
168+
/// First, we construct a JSON value for the string `"Unknown"`; this could be
169+
/// done by hand using `View.OfBytes()`, but using `API.Deserialize` is even
170+
/// simpler:
171+
172+
var UNKNOWN :- expect API.Deserialize(Unicode.Transcode16To8(@"""Unknown"""));
173+
174+
/// `UNKNOWN` is of type `Grammar.JSON`, which contains optional whitespace and
175+
/// a `Grammar.Value` under the name `UNKNOWN.t`, which we can use in the
176+
/// replacement:
177+
178+
var without_null := deserialized.(t := ReplaceNull(deserialized.t, UNKNOWN.t));
179+
180+
/// Then, if we reserialize, we see that all formatting (and, in fact, all of
181+
/// the serialization work) has been reused:
182+
183+
expect API.Serialize(without_null) == Success(Unicode.Transcode16To8(@"{
184+
""Cities"": [
185+
{
186+
""Name"": ""Boston"",
187+
""Founded"": 1630,
188+
""Population"": 689386,
189+
""Area (km2)"": 4600
190+
}, {
191+
""Name"": ""Rome"",
192+
""Founded"": -753,
193+
""Population"": 2.873e6,
194+
""Area (km2)"": 1285
195+
}, {
196+
""Name"": ""Paris"",
197+
""Founded"": ""Unknown"",
198+
""Population"": 2.161e6,
199+
""Area (km2)"": 2383.5
200+
}
201+
]
202+
}"));
203+
}
204+
205+
/// All that remains is to write the recursive traversal:
206+
207+
import Seq
208+
209+
function ReplaceNull(js: Value, replacement: Value): Value {
210+
match js
211+
212+
/// Non-recursive cases are untouched:
213+
214+
case Bool(_) => js
215+
case String(_) => js
216+
case Number(_) => js
217+
218+
/// `Null` is replaced with the new `replacement` value:
219+
220+
case Null(_) => replacement
221+
222+
/// … and objects and arrays are traversed recursively (only the data part of is
223+
/// traversed: other fields record information about the formatting of braces,
224+
/// square brackets, and whitespace, and can thus be reused without
225+
/// modifications):
226+
227+
case Object(obj) =>
228+
Object(obj.(data := MapSuffixedSequence(obj.data, (s: Suffixed<jkv, jcomma>) requires s in obj.data =>
229+
s.t.(v := ReplaceNull(s.t.v, replacement)))))
230+
case Array(arr) =>
231+
Array(arr.(data := MapSuffixedSequence(arr.data, (s: Suffixed<Value, jcomma>) requires s in arr.data =>
232+
ReplaceNull(s.t, replacement))))
233+
}
234+
235+
/// Note that well-formedness criteria on the low-level AST are enforced using
236+
/// subset types, which is why we need a bit more work to iterate over the
237+
/// sequences of key-value paris and of values in objects and arrays.
238+
/// Specifically, we need to prove that mapping over these sequences doesn't
239+
/// introduce dangling punctuation (`NoTrailingSuffix`). We package this
240+
/// reasoning into a `MapSuffixedSequence` function:
241+
242+
function MapSuffixedSequence<D, S>(sq: SuffixedSequence<D, S>, fn: Suffixed<D, S> --> D)
243+
: SuffixedSequence<D, S>
244+
requires forall suffixed | suffixed in sq :: fn.requires(suffixed)
245+
{
246+
// BUG(https://github.com/dafny-lang/dafny/issues/2184)
247+
// BUG(https://github.com/dafny-lang/dafny/issues/2690)
248+
var fn' := (sf: Suffixed<D, S>) requires (ghost var in_sq := sf => sf in sq; in_sq(sf)) => sf.(t := fn(sf));
249+
var sq' := Seq.Map(fn', sq);
250+
251+
assert NoTrailingSuffix(sq') by {
252+
forall idx | 0 <= idx < |sq'| ensures sq'[idx].suffix.Empty? <==> idx == |sq'| - 1 {
253+
calc {
254+
sq'[idx].suffix.Empty?;
255+
fn'(sq[idx]).suffix.Empty?;
256+
sq[idx].suffix.Empty?;
257+
idx == |sq| - 1;
258+
idx == |sq'| - 1;
259+
}
260+
}
261+
}
262+
263+
sq'
264+
}
265+
}
266+
267+
/// The examples in this file can be run with `Dafny -compile:4 -runAllTests:1`
268+
/// (the tests produce no output, but their calls to `expect` will be checked
269+
/// dynamically).

src/JSON/Tutorial.dfy.expect

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
2+
Dafny program verifier finished with 3 verified, 0 errors
3+
JSON.Examples.HighLevel.Test: PASSED
4+
JSON.Examples.LowLevel.Test: PASSED

src/JSON/Utils/Unicode.dfy

+8
Original file line numberDiff line numberDiff line change
@@ -143,4 +143,12 @@ module {:options "-functionSyntax:4"} JSON.Utils.Unicode {
143143
function Transcode8To16(s: seq<uint8>): string {
144144
Utf16Encode(Utf8Decode(s))
145145
}
146+
147+
function ASCIIToBytes(s: string): seq<uint8>
148+
// Keep ASCII characters in `s` and discard all other characters
149+
{
150+
seq(|s|, idx requires 0 <= idx < |s| =>
151+
if s[idx] as uint16 < 128 then s[idx] as uint8
152+
else 0 as uint8)
153+
}
146154
}

0 commit comments

Comments
 (0)