@@ -15,6 +15,7 @@ import Data.Text (Text)
15
15
import Data.Text qualified as T
16
16
import Data.Void
17
17
import Data.Word
18
+ import qualified Data.Map.Strict as M
18
19
19
20
import Text.Megaparsec hiding (Label , State )
20
21
import Text.Megaparsec.Char hiding (space , eol )
@@ -25,15 +26,18 @@ import Text.Megaparsec.Debug
25
26
import Err
26
27
import SourceInfo
27
28
import Types.Primitives
29
+ import Types.Source
30
+ import Util (toSnocList )
28
31
29
32
data ParseCtx = ParseCtx
30
33
{ curIndent :: Int -- used Reader-style (i.e. ask/local)
31
34
, canBreak :: Bool -- used Reader-style (i.e. ask/local)
32
35
, prevWhitespace :: Bool -- tracks whether we just consumed whitespace
33
- }
36
+ , sourceIdCounter :: Int
37
+ , curSourceMap :: SourceMaps } -- append to, writer-style
34
38
35
39
initParseCtx :: ParseCtx
36
- initParseCtx = ParseCtx 0 False False
40
+ initParseCtx = ParseCtx 0 False False 0 mempty
37
41
38
42
type Parser = StateT ParseCtx (Parsec Void Text )
39
43
@@ -64,7 +68,7 @@ nextChar = do
64
68
{-# INLINE nextChar #-}
65
69
66
70
anyCaseName :: Lexer SourceName
67
- anyCaseName = label " name" $ lexeme $
71
+ anyCaseName = label " name" $ lexeme LowerName $ -- TODO: distinguish lowercase/uppercase
68
72
checkNotKeyword $ (:) <$> satisfy (\ c -> isLower c || isUpper c) <*>
69
73
(T. unpack <$> takeWhileP Nothing (\ c -> isAlphaNum c || c == ' \' ' || c == ' _' ))
70
74
@@ -121,7 +125,7 @@ keyWordToken = \case
121
125
PassKW -> " pass"
122
126
123
127
keyWord :: KeyWord -> Lexer ()
124
- keyWord kw = lexeme $ try $ string (fromString $ keyWordToken kw)
128
+ keyWord kw = lexeme Keyword $ try $ string (fromString $ keyWordToken kw)
125
129
>> notFollowedBy nameTailChar
126
130
127
131
keyWordSet :: HS. HashSet String
@@ -131,19 +135,19 @@ keyWordStrs :: [String]
131
135
keyWordStrs = map keyWordToken [DefKW .. PassKW ]
132
136
133
137
primName :: Lexer String
134
- primName = lexeme $ try $ char ' %' >> some alphaNumChar
138
+ primName = lexeme MiscLexeme $ try $ char ' %' >> some alphaNumChar
135
139
136
140
charLit :: Lexer Char
137
- charLit = lexeme $ char ' \' ' >> L. charLiteral <* char ' \' '
141
+ charLit = lexeme MiscLexeme $ char ' \' ' >> L. charLiteral <* char ' \' '
138
142
139
143
strLit :: Lexer String
140
- strLit = lexeme $ char ' "' >> manyTill L. charLiteral (char ' "' )
144
+ strLit = lexeme StringLiteralLexeme $ char ' "' >> manyTill L. charLiteral (char ' "' )
141
145
142
146
natLit :: Lexer Word64
143
- natLit = lexeme $ try $ L. decimal <* notFollowedBy (char ' .' )
147
+ natLit = lexeme LiteralLexeme $ try $ L. decimal <* notFollowedBy (char ' .' )
144
148
145
149
doubleLit :: Lexer Double
146
- doubleLit = lexeme $
150
+ doubleLit = lexeme LiteralLexeme $
147
151
try L. float
148
152
<|> try (fromIntegral <$> (L. decimal :: Parser Int ) <* char ' .' )
149
153
<|> try do
@@ -161,22 +165,22 @@ knownSymStrs = HS.fromList
161
165
162
166
-- string must be in `knownSymStrs`
163
167
sym :: Text -> Lexer ()
164
- sym s = lexeme $ try $ string s >> notFollowedBy symChar
168
+ sym s = lexeme Symbol $ try $ string s >> notFollowedBy symChar
165
169
166
170
anySym :: Lexer String
167
- anySym = lexeme $ try $ do
171
+ anySym = lexeme Symbol $ try $ do
168
172
s <- some symChar
169
173
failIf (s `HS.member` knownSymStrs) " "
170
174
return s
171
175
172
176
symName :: Lexer SourceName
173
- symName = label " symbol name" $ lexeme $ try $ do
177
+ symName = label " symbol name" $ lexeme Symbol $ try $ do
174
178
s <- between (char ' (' ) (char ' )' ) $ some symChar
175
179
return $ " (" <> s <> " )"
176
180
177
181
backquoteName :: Lexer SourceName
178
182
backquoteName = label " backquoted name" $
179
- lexeme $ try $ between (char ' `' ) (char ' `' ) anyCaseName
183
+ lexeme Symbol $ try $ between (char ' `' ) (char ' `' ) anyCaseName
180
184
181
185
-- brackets and punctuation
182
186
-- (can't treat as sym because e.g. `((` is two separate lexemes)
@@ -192,7 +196,7 @@ semicolon = charLexeme ';'
192
196
underscore = charLexeme ' _'
193
197
194
198
charLexeme :: Char -> Parser ()
195
- charLexeme c = void $ lexeme $ char c
199
+ charLexeme c = void $ lexeme Symbol $ char c
196
200
197
201
nameTailChar :: Parser Char
198
202
nameTailChar = alphaNumChar <|> char ' \' ' <|> char ' _'
@@ -243,10 +247,10 @@ recordNonWhitespace = modify \ctx -> ctx { prevWhitespace = False }
243
247
{-# INLINE recordNonWhitespace #-}
244
248
245
249
nameString :: Parser String
246
- nameString = lexeme . try $ (:) <$> lowerChar <*> many alphaNumChar
250
+ nameString = lexeme LowerName . try $ (:) <$> lowerChar <*> many alphaNumChar
247
251
248
252
thisNameString :: Text -> Parser ()
249
- thisNameString s = lexeme $ try $ string s >> notFollowedBy alphaNumChar
253
+ thisNameString s = lexeme MiscLexeme $ try $ string s >> notFollowedBy alphaNumChar
250
254
251
255
bracketed :: Parser () -> Parser () -> Parser a -> Parser a
252
256
bracketed left right p = between left right $ mayBreak $ sc >> p
@@ -310,10 +314,37 @@ failIf :: Bool -> String -> Parser ()
310
314
failIf True s = fail s
311
315
failIf False _ = return ()
312
316
313
- lexeme :: Parser a -> Parser a
314
- lexeme p = L. lexeme sc (p <* recordNonWhitespace)
317
+ newSourceId :: Parser SourceId
318
+ newSourceId = do
319
+ c <- gets sourceIdCounter
320
+ modify \ ctx -> ctx { sourceIdCounter = c + 1 }
321
+ return $ SourceId c
322
+
323
+ withSourceMaps :: Parser a -> Parser (SourceMaps , a )
324
+ withSourceMaps cont = do
325
+ smPrev <- gets curSourceMap
326
+ modify \ ctx -> ctx { curSourceMap = mempty }
327
+ result <- cont
328
+ sm <- gets curSourceMap
329
+ modify \ ctx -> ctx { curSourceMap = smPrev }
330
+ return (sm, result)
331
+
332
+ emitSourceMaps :: SourceMaps -> Parser ()
333
+ emitSourceMaps m = modify \ ctx -> ctx { curSourceMap = curSourceMap ctx <> m }
334
+
335
+ lexeme :: LexemeType -> Parser a -> Parser a
336
+ lexeme lexemeType p = do
337
+ start <- getOffset
338
+ ans <- p
339
+ end <- getOffset
340
+ recordNonWhitespace
341
+ sc
342
+ name <- newSourceId
343
+ emitSourceMaps $ mempty
344
+ { lexemeList = toSnocList [name]
345
+ , lexemeInfo = M. singleton name (lexemeType, (start, end)) }
346
+ return ans
315
347
{-# INLINE lexeme #-}
316
348
317
349
symbol :: Text -> Parser ()
318
350
symbol s = void $ L. symbol sc s
319
-
0 commit comments