-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add context free proto file grammar #2
base: master
Are you sure you want to change the base?
Changes from 5 commits
bb3e77f
39d6cd3
f957bca
0063e9d
0bc288b
01e6468
5d9a700
f7613f6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
(* | ||
Protocol Buffers proto file text grammar | ||
|
||
This document describes a context free protobuf grammar in EBNF format. | ||
It does not differentiate between proto2 and proto3 syntax. | ||
|
||
Some things are allowed in this grammar that are not allowed in proto files. | ||
For example, the "syntax" statement can be used with any string literal. | ||
*) | ||
|
||
any = ? * ?; (* anything matches here, check usages for any specific exceptions *) | ||
whitespace_char = " " | "\n" | "\t" | "\r" | "\v" | "\f" ; | ||
line_comment = "//" , { any - "\n" } , "\n" ; | ||
block_comment = "/*" , { any - "*/" } , "*/" ; | ||
whitespace = { whitespace_char | line_comment | block_comment } ; | ||
|
||
uppercase = | ||
"A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | | ||
"I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | | ||
"Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | | ||
"Y" | "Z" | ||
; | ||
|
||
lowercase = | ||
"a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | | ||
"i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | | ||
"q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | | ||
"y" | "z" | ||
; | ||
|
||
letter = uppercase | lowercase ; | ||
|
||
decimal_digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; | ||
octal_digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" ; | ||
hex_digit = | ||
decimal_digit | | ||
"A" | "B" | "C" | "D" | "E" | "F" | | ||
"a" | "b" | "c" | "d" | "e" | "f" | ||
; | ||
|
||
identifier = letter , { letter | decimal_digit | "_" } ; | ||
full_identifier = identifier , { "." , identifier } ; | ||
|
||
type_name = ["."] , full_identifier ; | ||
|
||
group_identifier = uppercase , { letter | decimal_digit | "_" } ; | ||
|
||
decimal_literal = ("1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9") , { decimal_digit } ; | ||
octal_literal = "0" , { octal_digit } ; | ||
hex_literal = "0" , ("x" | "X") , hex_digit , { hex_digit } ; | ||
int_literal = decimal_literal | octal_literal | hex_literal ; | ||
|
||
decimal_digits = decimal_digit , { decimal_digit } ; | ||
exponent = ("e" | "E") , ["+" | "-"] , decimal_digits ; | ||
float_literal = ((decimal_digits , "." , [decimal_digits] , [exponent]) | (decimal_digits , exponent) | ("." , decimal_digits , [exponent])) | "inf" | "nan" ; | ||
|
||
bool_literal = "true" | "false" ; | ||
|
||
hex_escape = "\" , ("x" | "X") , hex_digit , hex_digit ; | ||
octal_escape = "\" , octal_digit , octal_digit , octal_digit ; | ||
char_escape = "\" , ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | "\" | "'" | '"' ) ; | ||
char = hex_escape | octal_escape | char_escape | ? /[^\0\n\\]/ ? ; | ||
string_literal = ('"' , { char } , '"') | ("'" , { char } , "'") ; | ||
ObsidianMinor marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
end_statement = ";" ; | ||
|
||
aggregate_literal = "{" , { identifier , ":" , literal } , "}" ; (* whitespace and or comments seperate each field value *) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This one is a little tricky. Aggregate values referenced by an option must start with In particular:
|
||
literal = full_identifier | (["-" | "+"] , int_literal) | (["-" | "+"] , float_literal) | string_literal | bool_literal | aggregate_literal ; | ||
|
||
(* after this point, unless specified, concatinated terminals can be seperated by whitespace and or comments, as defined at the top of this file *) | ||
|
||
syntax = "syntax" , "=" , string_literal , end_statement ; | ||
import = "import" , ["weak" | "public"] , string_literal , end_statement ; | ||
package = "package" , full_identifier , end_statement ; | ||
|
||
option_name = (identifier | "(" , full_identifier , ")") , {"." , identifier } ; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The trailing portions are defined here only as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After checking the parser source code, it appears that an easier way to represent it would be to have an at-least one name_part = identifier | ("(" , full_identifier , ")") ;
option_name = name_part , { "." , name_part } ; If I'm correct then this allows for everything the parser allows, including foo // simple identifiers
foo.bar // full identifiers
(foo).bar // extensions
(foo.bar).baz // extensions with full identifiers
(foo.bar).(baz) // extensions on extensions
(foo).(bar).(baz) // deep extensions on extensions |
||
option = "option" , option_name , "=" , literal , end_statement ; | ||
|
||
short_option = option_name , "=" , literal ; | ||
short_options = short_option , { "," , short_option } ; | ||
|
||
field_label = "optional" | "repeated" | "required" ; | ||
field_type = | ||
"double" | "float" | "int32" | "int64" | "uint32" | "uint64" | | ||
"sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | | ||
"bool" | "string" | "bytes" | type_name | ||
; | ||
group = [field_label] , "group" , group_identifier , "=" , int_literal , ["[" , short_options , "]"] , message_body ; | ||
field = [field_label] , field_type , identifier , "=" , int_literal , ["[" , short_options , "]"] , end_statement ; | ||
|
||
oneof_field = field_type , identifier , "=" , int_literal , ["[" , short_options , "]"] , end_statement ; | ||
oneof = "oneof" , identifier , "{" , { oneof_field | option | end_statement } , "}" ; | ||
|
||
key = | ||
"int32" | "int64" | "uint32" | "uint64" | | ||
"sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | | ||
"bool" | "string" | ||
; | ||
map = "map" , "<" , key , "," field_type , ">" , identifier , "=" , int_literal , ["[" , short_options , "]"] , end_statement ; | ||
|
||
range = int_literal , ["to" , (int_literal | "max")] ; | ||
ranges = range , { "," , range } ; | ||
extensions = "extensions" , ranges , ["[" , short_options , "]"] , end_statement ; | ||
|
||
field_names = identifier , { "," , identifier } ; | ||
reserved = "reserved" , (ranges | field_names) , end_statement ; | ||
|
||
enum_value = identifier , "=" , ["-"] , int_literal , ["[" , short_options , "]"] , end_statement ; | ||
enum = "enum" , identifier , "{" , { option | enum_value | reserved | end_statement } , "}" ; | ||
|
||
message_body = "{" , { field | enum | message | extend | extensions | group | option | oneof | map | reserved | end_statement } , "}" ; | ||
message = "message" , identifier , message_body ; | ||
|
||
extend = "extend" , identifier , "{" , { field | group | end_statement } , "}" ; | ||
|
||
rpc = "rpc" , identifier , "(" , ["stream"] , type_name , ")" , "returns" , "(" , ["stream"] , type_name , ")" , (("{" , { option , end_statement } , "}") | end_statement) ; | ||
stream = "stream" , identifier , "(" , type_name , "," , type_name , ")" , (("{" , { option , end_statement } , "}") | end_statement) ; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. While this is indicated in one of the grammars listed on the main docs site, protoc does not actually accept it. So it should be removed (from this grammar and from the docs site). I'm pretty sure this was transcribed from an internal version of protoc to support earlier versions of streaming stubby (the internal RPC framework at Google, pre-cursor to gRPC). Streams are instead defined only with |
||
service = "service" , identifier , "{" , { option , rpc , stream , end_statement } , "}" ; | ||
|
||
file = [syntax] , { import | package | option | message | enum | extend | service | end_statement } ; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Most places that accept a
full_identifier
do in fact support a preceding ".", to explicitly indicate an absolute fully-qualified name (e.g. not relative to the current context). This includes references to custom option names.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't believe this works for custom option names (at least anymore). I have a rule for those cases however where you can have a preceding dot called
type_name
and it's used in areas where preceding dot is allowed.