|
| 1 | +# Grammar for 2to3. This grammar supports Python 2.x and 3.x. |
| 2 | + |
| 3 | +# NOTE WELL: You should also follow all the steps listed at |
| 4 | +# https://devguide.python.org/grammar/ |
| 5 | + |
| 6 | +# Start symbols for the grammar: |
| 7 | +# file_input is a module or sequence of commands read from an input file; |
| 8 | +# single_input is a single interactive statement; |
| 9 | +# eval_input is the input for the eval() and input() functions. |
| 10 | +# NB: compound_stmt in single_input is followed by extra NEWLINE! |
| 11 | +file_input: (NEWLINE | stmt)* ENDMARKER |
| 12 | +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE |
| 13 | +eval_input: testlist NEWLINE* ENDMARKER |
| 14 | + |
| 15 | +decorator: '@' namedexpr_test NEWLINE |
| 16 | +decorators: decorator+ |
| 17 | +decorated: decorators (classdef | funcdef | async_funcdef) |
| 18 | +async_funcdef: ASYNC funcdef |
| 19 | +funcdef: 'def' NAME parameters ['->' test] ':' suite |
| 20 | +parameters: '(' [typedargslist] ')' |
| 21 | + |
| 22 | +# The following definition for typedarglist is equivalent to this set of rules: |
| 23 | +# |
| 24 | +# arguments = argument (',' argument)* |
| 25 | +# argument = tfpdef ['=' test] |
| 26 | +# kwargs = '**' tname [','] |
| 27 | +# args = '*' [tname_star] |
| 28 | +# kwonly_kwargs = (',' argument)* [',' [kwargs]] |
| 29 | +# args_kwonly_kwargs = args kwonly_kwargs | kwargs |
| 30 | +# poskeyword_args_kwonly_kwargs = arguments [',' [args_kwonly_kwargs]] |
| 31 | +# typedargslist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs |
| 32 | +# typedarglist = arguments ',' '/' [',' [typedargslist_no_posonly]])|(typedargslist_no_posonly)" |
| 33 | +# |
| 34 | +# It needs to be fully expanded to allow our LL(1) parser to work on it. |
| 35 | + |
| 36 | +typedargslist: tfpdef ['=' test] (',' tfpdef ['=' test])* ',' '/' [ |
| 37 | + ',' [((tfpdef ['=' test] ',')* ('*' [tname_star] (',' tname ['=' test])* |
| 38 | + [',' ['**' tname [',']]] | '**' tname [',']) |
| 39 | + | tfpdef ['=' test] (',' tfpdef ['=' test])* [','])] |
| 40 | + ] | ((tfpdef ['=' test] ',')* ('*' [tname_star] (',' tname ['=' test])* |
| 41 | + [',' ['**' tname [',']]] | '**' tname [',']) |
| 42 | + | tfpdef ['=' test] (',' tfpdef ['=' test])* [',']) |
| 43 | + |
| 44 | +tname: NAME [':' test] |
| 45 | +tname_star: NAME [':' (test|star_expr)] |
| 46 | +tfpdef: tname | '(' tfplist ')' |
| 47 | +tfplist: tfpdef (',' tfpdef)* [','] |
| 48 | + |
| 49 | +# The following definition for varargslist is equivalent to this set of rules: |
| 50 | +# |
| 51 | +# arguments = argument (',' argument )* |
| 52 | +# argument = vfpdef ['=' test] |
| 53 | +# kwargs = '**' vname [','] |
| 54 | +# args = '*' [vname] |
| 55 | +# kwonly_kwargs = (',' argument )* [',' [kwargs]] |
| 56 | +# args_kwonly_kwargs = args kwonly_kwargs | kwargs |
| 57 | +# poskeyword_args_kwonly_kwargs = arguments [',' [args_kwonly_kwargs]] |
| 58 | +# vararglist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs |
| 59 | +# varargslist = arguments ',' '/' [','[(vararglist_no_posonly)]] | (vararglist_no_posonly) |
| 60 | +# |
| 61 | +# It needs to be fully expanded to allow our LL(1) parser to work on it. |
| 62 | + |
| 63 | +varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ |
| 64 | + ((vfpdef ['=' test] ',')* ('*' [vname] (',' vname ['=' test])* |
| 65 | + [',' ['**' vname [',']]] | '**' vname [',']) |
| 66 | + | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) |
| 67 | + ]] | ((vfpdef ['=' test] ',')* |
| 68 | + ('*' [vname] (',' vname ['=' test])* [',' ['**' vname [',']]]| '**' vname [',']) |
| 69 | + | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) |
| 70 | + |
| 71 | +vname: NAME |
| 72 | +vfpdef: vname | '(' vfplist ')' |
| 73 | +vfplist: vfpdef (',' vfpdef)* [','] |
| 74 | + |
| 75 | +stmt: simple_stmt | compound_stmt |
| 76 | +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE |
| 77 | +small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | |
| 78 | + import_stmt | global_stmt | exec_stmt | assert_stmt) |
| 79 | +expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | |
| 80 | + ('=' (yield_expr|testlist_star_expr))*) |
| 81 | +annassign: ':' test ['=' (yield_expr|testlist_star_expr)] |
| 82 | +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] |
| 83 | +augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | |
| 84 | + '<<=' | '>>=' | '**=' | '//=') |
| 85 | +# For normal and annotated assignments, additional restrictions enforced by the interpreter |
| 86 | +print_stmt: 'print' ( [ test (',' test)* [','] ] | |
| 87 | + '>>' test [ (',' test)+ [','] ] ) |
| 88 | +del_stmt: 'del' exprlist |
| 89 | +pass_stmt: 'pass' |
| 90 | +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt |
| 91 | +break_stmt: 'break' |
| 92 | +continue_stmt: 'continue' |
| 93 | +return_stmt: 'return' [testlist_star_expr] |
| 94 | +yield_stmt: yield_expr |
| 95 | +raise_stmt: 'raise' [test ['from' test | ',' test [',' test]]] |
| 96 | +import_stmt: import_name | import_from |
| 97 | +import_name: 'import' dotted_as_names |
| 98 | +import_from: ('from' ('.'* dotted_name | '.'+) |
| 99 | + 'import' ('*' | '(' import_as_names ')' | import_as_names)) |
| 100 | +import_as_name: NAME ['as' NAME] |
| 101 | +dotted_as_name: dotted_name ['as' NAME] |
| 102 | +import_as_names: import_as_name (',' import_as_name)* [','] |
| 103 | +dotted_as_names: dotted_as_name (',' dotted_as_name)* |
| 104 | +dotted_name: NAME ('.' NAME)* |
| 105 | +global_stmt: ('global' | 'nonlocal') NAME (',' NAME)* |
| 106 | +exec_stmt: 'exec' expr ['in' test [',' test]] |
| 107 | +assert_stmt: 'assert' test [',' test] |
| 108 | + |
| 109 | +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt | match_stmt |
| 110 | +async_stmt: ASYNC (funcdef | with_stmt | for_stmt) |
| 111 | +if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite] |
| 112 | +while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite] |
| 113 | +for_stmt: 'for' exprlist 'in' testlist_star_expr ':' suite ['else' ':' suite] |
| 114 | +try_stmt: ('try' ':' suite |
| 115 | + ((except_clause ':' suite)+ |
| 116 | + ['else' ':' suite] |
| 117 | + ['finally' ':' suite] | |
| 118 | + 'finally' ':' suite)) |
| 119 | +with_stmt: 'with' asexpr_test (',' asexpr_test)* ':' suite |
| 120 | + |
| 121 | +# NB compile.c makes sure that the default except clause is last |
| 122 | +except_clause: 'except' ['*'] [test [(',' | 'as') test]] |
| 123 | +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT |
| 124 | + |
| 125 | +# Backward compatibility cruft to support: |
| 126 | +# [ x for x in lambda: True, lambda: False if x() ] |
| 127 | +# even while also allowing: |
| 128 | +# lambda x: 5 if x else 2 |
| 129 | +# (But not a mix of the two) |
| 130 | +testlist_safe: old_test [(',' old_test)+ [',']] |
| 131 | +old_test: or_test | old_lambdef |
| 132 | +old_lambdef: 'lambda' [varargslist] ':' old_test |
| 133 | + |
| 134 | +namedexpr_test: asexpr_test [':=' asexpr_test] |
| 135 | + |
| 136 | +# This is actually not a real rule, though since the parser is very |
| 137 | +# limited in terms of the strategy about match/case rules, we are inserting |
| 138 | +# a virtual case (<expr> as <expr>) as a valid expression. Unless a better |
| 139 | +# approach is thought, the only side effect of this seem to be just allowing |
| 140 | +# more stuff to be parser (which would fail on the ast). |
| 141 | +asexpr_test: test ['as' test] |
| 142 | + |
| 143 | +test: or_test ['if' or_test 'else' test] | lambdef |
| 144 | +or_test: and_test ('or' and_test)* |
| 145 | +and_test: not_test ('and' not_test)* |
| 146 | +not_test: 'not' not_test | comparison |
| 147 | +comparison: expr (comp_op expr)* |
| 148 | +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' |
| 149 | +star_expr: '*' expr |
| 150 | +expr: xor_expr ('|' xor_expr)* |
| 151 | +xor_expr: and_expr ('^' and_expr)* |
| 152 | +and_expr: shift_expr ('&' shift_expr)* |
| 153 | +shift_expr: arith_expr (('<<'|'>>') arith_expr)* |
| 154 | +arith_expr: term (('+'|'-') term)* |
| 155 | +term: factor (('*'|'@'|'/'|'%'|'//') factor)* |
| 156 | +factor: ('+'|'-'|'~') factor | power |
| 157 | +power: [AWAIT] atom trailer* ['**' factor] |
| 158 | +atom: ('(' [yield_expr|testlist_gexp] ')' | |
| 159 | + '[' [listmaker] ']' | |
| 160 | + '{' [dictsetmaker] '}' | |
| 161 | + '`' testlist1 '`' | |
| 162 | + NAME | NUMBER | STRING+ | '.' '.' '.') |
| 163 | +listmaker: (namedexpr_test|star_expr) ( old_comp_for | (',' (namedexpr_test|star_expr))* [','] ) |
| 164 | +testlist_gexp: (namedexpr_test|star_expr) ( old_comp_for | (',' (namedexpr_test|star_expr))* [','] ) |
| 165 | +lambdef: 'lambda' [varargslist] ':' test |
| 166 | +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME |
| 167 | +subscriptlist: (subscript|star_expr) (',' (subscript|star_expr))* [','] |
| 168 | +subscript: test [':=' test] | [test] ':' [test] [sliceop] |
| 169 | +sliceop: ':' [test] |
| 170 | +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] |
| 171 | +testlist: test (',' test)* [','] |
| 172 | +dictsetmaker: ( ((test ':' asexpr_test | '**' expr) |
| 173 | + (comp_for | (',' (test ':' asexpr_test | '**' expr))* [','])) | |
| 174 | + ((test [':=' test] | star_expr) |
| 175 | + (comp_for | (',' (test [':=' test] | star_expr))* [','])) ) |
| 176 | + |
| 177 | +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite |
| 178 | + |
| 179 | +arglist: argument (',' argument)* [','] |
| 180 | + |
| 181 | +# "test '=' test" is really "keyword '=' test", but we have no such token. |
| 182 | +# These need to be in a single rule to avoid grammar that is ambiguous |
| 183 | +# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, |
| 184 | +# we explicitly match '*' here, too, to give it proper precedence. |
| 185 | +# Illegal combinations and orderings are blocked in ast.c: |
| 186 | +# multiple (test comp_for) arguments are blocked; keyword unpackings |
| 187 | +# that precede iterable unpackings are blocked; etc. |
| 188 | +argument: ( test [comp_for] | |
| 189 | + test ':=' test [comp_for] | |
| 190 | + test 'as' test | |
| 191 | + test '=' asexpr_test | |
| 192 | + '**' test | |
| 193 | + '*' test ) |
| 194 | + |
| 195 | +comp_iter: comp_for | comp_if |
| 196 | +comp_for: [ASYNC] 'for' exprlist 'in' or_test [comp_iter] |
| 197 | +comp_if: 'if' old_test [comp_iter] |
| 198 | + |
| 199 | +# As noted above, testlist_safe extends the syntax allowed in list |
| 200 | +# comprehensions and generators. We can't use it indiscriminately in all |
| 201 | +# derivations using a comp_for-like pattern because the testlist_safe derivation |
| 202 | +# contains comma which clashes with trailing comma in arglist. |
| 203 | +# |
| 204 | +# This was an issue because the parser would not follow the correct derivation |
| 205 | +# when parsing syntactically valid Python code. Since testlist_safe was created |
| 206 | +# specifically to handle list comprehensions and generator expressions enclosed |
| 207 | +# with parentheses, it's safe to only use it in those. That avoids the issue; we |
| 208 | +# can parse code like set(x for x in [],). |
| 209 | +# |
| 210 | +# The syntax supported by this set of rules is not a valid Python 3 syntax, |
| 211 | +# hence the prefix "old". |
| 212 | +# |
| 213 | +# See https://bugs.python.org/issue27494 |
| 214 | +old_comp_iter: old_comp_for | old_comp_if |
| 215 | +old_comp_for: [ASYNC] 'for' exprlist 'in' testlist_safe [old_comp_iter] |
| 216 | +old_comp_if: 'if' old_test [old_comp_iter] |
| 217 | + |
| 218 | +testlist1: test (',' test)* |
| 219 | + |
| 220 | +# not used in grammar, but may appear in "node" passed from Parser to Compiler |
| 221 | +encoding_decl: NAME |
| 222 | + |
| 223 | +yield_expr: 'yield' [yield_arg] |
| 224 | +yield_arg: 'from' test | testlist_star_expr |
| 225 | + |
| 226 | + |
| 227 | +# 3.10 match statement definition |
| 228 | + |
| 229 | +# PS: normally the grammar is much much more restricted, but |
| 230 | +# at this moment for not trying to bother much with encoding the |
| 231 | +# exact same DSL in a LL(1) parser, we will just accept an expression |
| 232 | +# and let the ast.parse() step of the safe mode to reject invalid |
| 233 | +# grammar. |
| 234 | + |
| 235 | +# The reason why it is more restricted is that, patterns are some |
| 236 | +# sort of a DSL (more advanced than our LHS on assignments, but |
| 237 | +# still in a very limited python subset). They are not really |
| 238 | +# expressions, but who cares. If we can parse them, that is enough |
| 239 | +# to reformat them. |
| 240 | + |
| 241 | +match_stmt: "match" subject_expr ':' NEWLINE INDENT case_block+ DEDENT |
| 242 | + |
| 243 | +# This is more permissive than the actual version. For example it |
| 244 | +# accepts `match *something:`, even though single-item starred expressions |
| 245 | +# are forbidden. |
| 246 | +subject_expr: (namedexpr_test|star_expr) (',' (namedexpr_test|star_expr))* [','] |
| 247 | + |
| 248 | +# cases |
| 249 | +case_block: "case" patterns [guard] ':' suite |
| 250 | +guard: 'if' namedexpr_test |
| 251 | +patterns: pattern (',' pattern)* [','] |
| 252 | +pattern: (expr|star_expr) ['as' expr] |
0 commit comments