-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.mll
130 lines (122 loc) · 3.21 KB
/
lexer.mll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
{
open Parser
open Printf
exception Eof
exception Err
let brace_depth = ref 0
and comment_depth = ref 0
let incline lexbuf =
let pos = lexbuf.Lexing.lex_curr_p in
lexbuf.Lexing.lex_curr_p <- { pos with
Lexing.pos_lnum = pos.Lexing.pos_lnum + 1;
Lexing.pos_bol = pos.Lexing.pos_cnum;
}
exception Lexical_error of string * string * int * int
let handle_lexical_error fn lexbuf =
let p = Lexing.lexeme_start_p lexbuf in
let line = p.Lexing.pos_lnum
and column = p.Lexing.pos_cnum - p.Lexing.pos_bol + 1
and file = p.Lexing.pos_fname
in
try
fn lexbuf
with Lexical_error (msg, "", 0, 0) ->
raise(Lexical_error(msg, file, line, column))
}
let digit = ['0'-'9']
let id = ['a'-'z' 'A'-'Z'] ['a'-'z' 'A'-'Z' '0'-'9' '-']*
let ws = [' ' '\t']
let location =['l']['0'-'9']*
let literal = ['"']['a'-'z' 'A'-'Z' '0'-'9' '-']*['"']
let array = ['[']['0'-'9']*[']']
rule token = parse
| ws { token lexbuf }
| '\n' { incline lexbuf; token lexbuf }
| "==" { EQUALS }
| "!=" { NEQUALS }
| "+" { PLUS }
| "-" { MINUS}
| "^" { TIMES }
| "%" { MODULO }
| "<" { LESSTHAN}
| "(" { LPAREN }
| ")" { RPAREN }
| "{" {LCURLY}
| "}" {RCURLY}
| "[" {LSQBR}
| "]" {RSQBR}
| "." { DOT }
| "," {COMMA}
| ";" { SEQ }
| ":=" { ASSIGN }
| ":" {COLON}
| "true" { TRUE }
| "false" { FALSE }
| "isunset" {ISUNSET}
| "fst" {FST}
| "snd" {SND}
| "skip" { SKIP }
| "if" { IF }
| "then" { THEN }
| "else" { ELSE }
| "fi" {ENDIF}
| "lambda" { LAMBDA }
| "declassify" {DECLASSIFY}
| "<-" { UPDATE }
| "output" {OUTPUT}
| "while" { WHILE}
| "do" { DO }
| "end" {END}
| "call" {CALL}
| "set" {SET}
| "int" { INT }
| "bool" { BOOL }
| "string" { STRING }
| "cond" {COND}
| "func" {FUNC}
| "ref" {REF}
| "_" {UNDERSCORE}
| "low" {LOW}
| "high" {HIGH}
| "top" {TOP}
| "L"|"H" as channel {CHANNEL(channel)}
| "~" { ERASE}
| location as l {LOC(int_of_string (String.sub l 1 ((String.length l)-1)))}
(* | array as arr {ARRAY(int_of_string (String.sub arr 1 ((String.length arr)-2)))} *)
| literal as strlit {LITERAL(strlit)}
| id as v { VAR(v) }
| "*" { DEREF }
| digit+ as n { INTEGER(int_of_string n) }
| eof { EOF }
| "(#"
{ comment_depth := 1;
handle_lexical_error comment lexbuf;
token lexbuf }
| _ as c {
let pos = lexbuf.Lexing.lex_curr_p in
printf "Error at line %d\n" pos.Lexing.pos_lnum;
printf "Unrecognized character: [%c]\n" c;
exit 1
}
and comment = parse
"(#"
{ incr comment_depth; comment lexbuf }
| "#)"
{ decr comment_depth;
if !comment_depth = 0 then () else comment lexbuf }
| "'"
{ skip_char lexbuf ;
comment lexbuf }
| eof
{ raise(Lexical_error("unterminated comment", "", 0, 0)) }
| _
{ comment lexbuf }
and skip_char = parse
| [^ '\\' '\''] "'" (* regular character *)
(* one character and numeric escape sequences *)
| '\\' _ "'"
| '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
| '\\' 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] "'"
{()}
(* Perilous *)
| "" {()}