Skip to content

Commit b3c94ba

Browse files
committed
Use Ragel to distinguish block args from | ops.
1 parent 9da2d85 commit b3c94ba

File tree

3 files changed

+54
-7
lines changed

3 files changed

+54
-7
lines changed

examples/input.seg

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
# This is a bunch of statements!
2-
31
{
2+
|a, b|
43
3 + 4
54
}

src/lexer.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,10 @@
1111
*/
1212
seg_statementlist_node *seg_parse(char *content, off_t length, seg_options *opts);
1313

14+
/* The initial size of the Ragel stack. */
15+
#define RAGEL_INIT_STACK_SIZE 5
16+
17+
/* The amount by which the Ragel stack will be increased or decreased. */
18+
#define RAGEL_STACK_INCR 10
19+
1420
#endif

src/lexer.rl

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <string.h>
44
#include <sys/types.h>
55

6+
#include "lexer.h"
67
#include "ast.h"
78
#include "token.h"
89
#include "segment.h"
@@ -43,16 +44,30 @@ static void report(const char *name, const char *ts, const char *te) {
4344
control = [(){};=.|,%@];
4445
op = [&|+\-*/%^];
4546

46-
nonws = ^whitespace;
47+
nonws = ^whitespace & [^\r\n];
4748
noncontrol = ^control;
4849
nonop = ^op;
50+
nonnumeric = [^0-9];
4951

50-
iboundary = noncontrol & nonop & nonws;
52+
iboundary = nonws & noncontrol & nonnumeric;
5153
imiddle = nonws;
5254

53-
identifier = iboundary imiddle iboundary | iboundary;
55+
identifier = iboundary imiddle* iboundary | iboundary;
5456
symbol = ':' identifier | ':' string;
5557

58+
blockargs := |*
59+
comment;
60+
whitespace;
61+
[\r\n];
62+
63+
identifier => { CAPTURE(IDENTIFIER); };
64+
',' => { EMPTY(COMMA); };
65+
'|' => {
66+
EMPTY(BAR);
67+
fret;
68+
};
69+
*|;
70+
5671
main := |*
5772
comment;
5873

@@ -71,9 +86,13 @@ static void report(const char *name, const char *ts, const char *te) {
7186
'\n' => { EMPTY(NEWLINE); };
7287
'=' => { EMPTY(ASSIGNMENT); };
7388
'.' => { EMPTY(PERIOD); };
74-
'|' => { EMPTY(BAR); };
7589
',' => { EMPTY(COMMA); };
7690

91+
'|' => {
92+
EMPTY(BAR);
93+
fcall blockargs;
94+
};
95+
7796
identifier '(' => { EMPTY(METHODNAME); };
7897

7998
identifier ':' => { EMPTY(KEYWORD); };
@@ -94,6 +113,26 @@ static void report(const char *name, const char *ts, const char *te) {
94113

95114
whitespace;
96115
*|;
116+
117+
# Stack management
118+
119+
prepush {
120+
if (top >= stack_size) {
121+
if (opts->verbose) {
122+
printf("Growing stack from %d to %d.\n", stack_size, stack_size + RAGEL_STACK_INCR);
123+
}
124+
stack = realloc(stack, stack_size + RAGEL_STACK_INCR);
125+
}
126+
}
127+
128+
postpop {
129+
if (stack_size - top >= RAGEL_STACK_INCR) {
130+
if (opts->verbose) {
131+
printf("Shrinking stack from %d to %d.\n", stack_size, stack_size - RAGEL_STACK_INCR);
132+
}
133+
stack = realloc(stack, stack_size - RAGEL_STACK_INCR);
134+
}
135+
}
97136
}%%
98137
/* Syntax Highlighting */
99138

@@ -102,12 +141,15 @@ static void report(const char *name, const char *ts, const char *te) {
102141
seg_statementlist_node *seg_parse(char *content, off_t length, seg_options *opts)
103142
{
104143
/* Variables used by Ragel. */
105-
int cs, act;
144+
int cs, act, top;
106145
char *ts, *te;
107146
char *p = content;
108147
const char *pe = content + length;
109148
const char *eof = pe;
110149

150+
int *stack = malloc(sizeof(int) * RAGEL_INIT_STACK_SIZE);
151+
int stack_size = RAGEL_INIT_STACK_SIZE;
152+
111153
/* Parser state */
112154
int lexer_error = 0;
113155
void *parser = ParseAlloc(malloc);

0 commit comments

Comments
 (0)