Skip to content

Commit

Permalink
Fix several lexer bugs (#57)
Browse files Browse the repository at this point in the history
* Significant refactoring

* Linter check failing
  • Loading branch information
chameco authored and bmcutler committed Dec 6, 2017
1 parent 0d93b78 commit b0f9d5b
Show file tree
Hide file tree
Showing 22 changed files with 245 additions and 266 deletions.
11 changes: 5 additions & 6 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,20 @@ install:
- travis_wait 30 stack --install-ghc --copy-bins install language-python
- travis_wait 30 stack --install-ghc --copy-bins build
- stack install hlint
- cp hlint ~/.local/bin
- gem install danger danger-hlint
script:
- stack test --only-dependencies
- "./hlint '--ignore=Parse error' src"
- "./hlint '--ignore=Parse error' app"
- "./bin/hlint '--ignore=Parse error' src"
- "./bin/hlint '--ignore=Parse error' app"
- danger
deploy:
provider: releases
api_key:
secure: zM6T6ZwD1Tv0Igk9LORh3YZJvLlZHtjEbb6vkAQTicQ82Y/Ls88+67C0S4IkJbCkWqLUkfLkCv2x8aaZ7O6cbGi2xDtSPf+NyWfagvA+E2I6T2USJ93iYNs10Ep/SoAx7NEn6hnEKkNJd/kdFYtx/CvgXeN+PYHRfBD+NF106zoV4b9cF/mgSev/fxXqwT3GhaBJ/EEQ9LtEvgek3vjVWiSchCuubDdklgfAcgSBzGoQQLxyCq/BRF28ekFefKRQKiUNS4WI2btv+VnrP8xOvIPXnMSkFeF++druwPI75LRWYrhZA1ayj/5/NMHnn+TE4BMm5a2bdoLocvRHOwFIeK29oS5CdDttfiwpGHIykZGCoMIehfTJIecXyxezs1K8p0E6PV3eoCGyodXCoRDUXMWVUSaGbWnaIR8XcIOi3Rl6aYYfXWJc0MKiaQYSNQ7X2f4CqDDANqu587FPBQyHwlaEpXbb/bxFR6tvGSwnWgeU+bHseSKMesDz+wzhFHbyA77ls48dlKS2jpm/6qGx2Av1agz/cT7AJjrfuOhj8ZhpSIMtl3p1CfUCw1hh6lm42LrwOTRSBLMW7ErYDD5w4xw1YLQKMZVERGDrwYDVcbjgymZO0kE4kE1eh7KJJFZhsMkVCc/C3hi0TCtgjfiZ6uLKs5NYUUmiD9rCbp5UNeY=
file:
- count
- plagiarism
- diagnostics
- bin/count
- bin/plagiarism
- bin/diagnostics
skip_cleanup: true
on:
tags: true
Expand Down
20 changes: 0 additions & 20 deletions build.sh

This file was deleted.

6 changes: 4 additions & 2 deletions lichen.cabal
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: lichen
version: 0.3.0
version: 0.3.1
synopsis: Plagiarism Detection and Other Static Analysis
description: Please see README.md
homepage: https://github.com/Submitty/AnalysisTools
Expand All @@ -18,6 +18,7 @@ library
, Lichen.Config

, Lichen.Lexer
, Lichen.Lexer.Text
, Lichen.Lexer.C
, Lichen.Lexer.Python
, Lichen.Lexer.Java
Expand All @@ -27,12 +28,13 @@ library

, Lichen.Plagiarism.Main
, Lichen.Plagiarism.Config
, Lichen.Plagiarism.Submitty
, Lichen.Plagiarism.Winnow
, Lichen.Plagiarism.Compare
, Lichen.Plagiarism.Walk
, Lichen.Plagiarism.Concatenate
, Lichen.Plagiarism.Highlight
, Lichen.Plagiarism.Shared
, Lichen.Plagiarism.Provided
, Lichen.Plagiarism.AssignmentSettings
, Lichen.Plagiarism.Report
, Lichen.Plagiarism.Render
Expand Down
13 changes: 6 additions & 7 deletions src/Lichen/Count/Config.hs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,12 @@ import Lichen.Config
import Lichen.Languages
import Lichen.Count.Counters

data Config = Config
{ configFile :: FilePath
, language :: Language
, counter :: Counter
, toCount :: Maybe String
, sourceFiles :: [FilePath]
}
data Config = Config { configFile :: FilePath
, language :: Language
, counter :: Counter
, toCount :: Maybe String
, sourceFiles :: [FilePath]
}
instance FromJSON Config where
parseJSON = withObject "config_count" $ \o -> do
configFile <- fromMaybe (configFile defaultConfig) <$> o .:? "config_file"
Expand Down
9 changes: 4 additions & 5 deletions src/Lichen/Diagnostics/Config.hs
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,10 @@ import Data.Aeson
import Lichen.Config
import Lichen.Languages

data Config = Config
{ configFile :: FilePath
, language :: Language
, sourceFiles :: [FilePath]
}
data Config = Config { configFile :: FilePath
, language :: Language
, sourceFiles :: [FilePath]
}
instance FromJSON Config where
parseJSON = withObject "config_diagnostics" $ \o -> do
configFile <- fromMaybe (configFile defaultConfig) <$> o .:? "config_file"
Expand Down
36 changes: 32 additions & 4 deletions src/Lichen/Languages.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import Lichen.Parser
import qualified Lichen.Lexer.C as C
import qualified Lichen.Lexer.Python as Python
import qualified Lichen.Lexer.Java as Java
import qualified Lichen.Lexer.Text as Text
import qualified Lichen.Parser.Python as Python

-- Configuration for the winnowing algorithm. Token sequences shorter than
Expand Down Expand Up @@ -53,16 +54,38 @@ smartRead s = case readMaybe s of Just t -> pure t
Nothing -> throwError . InvalidTokenError $ T.pack s

langDummy :: Language
langDummy = Language [] (WinnowConfig 0 0) (const $ pure ()) (dummy "No valid language specified") (dummy "No valid language specified")
langDummy = Language []
(WinnowConfig 0 0)
(const $ pure ())
(dummy "No valid language specified")
(dummy "No valid language specified")

langC :: Language
langC = Language [".c", ".h", ".cpp", ".hpp", ".C", ".H", ".cc"] (WinnowConfig 16 9) (smartRead :: String -> Erring C.Tok) C.lex (dummy "The C tooling does not currently support the requested feature")
langC = Language [".c", ".h", ".cpp", ".cc", ".cxx", ".hpp", ".C", ".H", ".CPP", ".CC", ".CXX", ".CPP"]
(WinnowConfig 16 9)
(smartRead :: String -> Erring C.Tok)
C.lex
(dummy "The C tooling does not currently support the requested feature")

langPython :: Language
langPython = Language [".py"] (WinnowConfig 16 9) (smartRead :: String -> Erring Python.Tok) Python.lex Python.parse
langPython = Language [".py"]
(WinnowConfig 16 9)
(smartRead :: String -> Erring Python.Tok)
Python.lex Python.parse

langJava :: Language
langJava = Language [".java"] (WinnowConfig 16 9) (smartRead :: String -> Erring Java.Tok) Java.lex (dummy "The Java tooling does not currently support the requested feature")
langJava = Language [".java"]
(WinnowConfig 16 9)
(smartRead :: String -> Erring Java.Tok)
Java.lex
(dummy "The Java tooling does not currently support the requested feature")

langText :: Language
langText = Language [".txt", ""]
(WinnowConfig 16 9)
(pure . T.pack)
Text.lex
(dummy "Plain text cannot be parsed, so this feature is unavailable")

languageChoice :: Language -> Maybe String -> Language
languageChoice d Nothing = d
Expand All @@ -73,4 +96,9 @@ languageChoice _ (Just "python") = langPython
languageChoice _ (Just "py") = langPython
languageChoice _ (Just "java") = langJava
languageChoice _ (Just "Java") = langJava
languageChoice _ (Just "text") = langText
languageChoice _ (Just "Text") = langText
languageChoice _ (Just "txt") = langText
languageChoice _ (Just "plaintext") = langText
languageChoice _ (Just "plain") = langText
languageChoice _ _ = langDummy
33 changes: 20 additions & 13 deletions src/Lichen/Lexer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,36 @@ import Lichen.Error

type Lexer a = FilePath -> BS.ByteString -> Erring [Tagged a]

data TokPos = TokPos
{ startLine :: !Pos
, endLine :: !Pos
, startCol :: !Pos
, endCol :: !Pos
} deriving (Show, Eq, Ord)
data TokPos = TokPos { startLine :: !Pos
, endLine :: !Pos
, startCol :: !Pos
, endCol :: !Pos
} deriving (Show, Eq, Ord)
data Tagged a = Tagged { tdata :: a, tpos :: TokPos } deriving (Show, Eq)
instance Ord a => Ord (Tagged a) where
compare (Tagged x _) (Tagged y _) = compare x y
compare (Tagged x _) (Tagged y _) = compare x y
instance Functor Tagged where
fmap f (Tagged x p) = Tagged (f x) p
instance Show a => ToJSON (Tagged a) where
toJSON (Tagged a p) = object [ "token" .= show a
, "start_line" .= unPos (startLine p)
, "end_line" .= unPos (endLine p)
, "start_col" .= unPos (startCol p)
, "end_col" .= unPos (endCol p)
]
toJSON (Tagged x p) = object [ "token" .= show x
, "start_line" .= unPos (startLine p)
, "end_line" .= unPos (endLine p)
, "start_col" .= unPos (startCol p)
, "end_col" .= unPos (endCol p)
]

wrap :: Foldable t => Parser (t a) -> b -> Parser (Tagged b)
wrap p x = do
pos <- NE.head . statePos <$> getParserState
s <- p
return . Tagged x $ TokPos (sourceLine pos) (sourceLine pos) (sourceColumn pos) (sourceColumn pos <> unsafePos (fromIntegral $ length s))

wrapid :: Foldable t => Parser (t a) -> Parser (Tagged (t a))
wrapid p = do
pos <- NE.head . statePos <$> getParserState
s <- p
return . Tagged s $ TokPos (sourceLine pos) (sourceLine pos) (sourceColumn pos) (sourceColumn pos <> unsafePos (fromIntegral $ length s))

-- Parse a C-style character literal. Ex: 'a', '@'.
charLit :: Parser String
charLit = char '\'' *> manyTill (noneOf ['\'']) (char '\'' <|> (eof >> pure ' '))
Expand Down
10 changes: 7 additions & 3 deletions src/Lichen/Lexer/C.hs
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,22 @@ data Tok = Auto | Break | Case | Char | Const | Continue | Default | Do
| Comma | Colon | Equal | LeftParen | RightParen | LeftSquare
| RightSquare | Dot | Ampersand | Exclamation | Tilde | Minus | Plus
| Asterisk | Slash | Percent | LessThan | GreaterThan | Caret | Pipe
| Question | Unknown
| Question
| Unknown | Comment
deriving (Show, Read, Eq, Generic)
instance Hashable Tok

sc :: Parser ()
sc = L.space (void spaceChar) (L.skipLineComment "//" <|> L.skipLineComment "#") (L.skipBlockComment "/*" "*/")
sc = void (many spaceChar)

reserved :: String -> Parser String
reserved = try . string

onetoken :: Parser (Tagged Tok)
onetoken = wrap (reserved "auto") Auto
onetoken = wrap (reserved "//" *> manyTill anyChar (char '\r' <|> (head <$> eol))) Comment
<|> wrap (reserved "#" *> manyTill anyChar (char '\r' <|> (head <$> eol))) Comment
<|> wrap (reserved "/*" *> manyTill anyChar (head <$> reserved "*/")) Comment
<|> wrap (reserved "auto") Auto
<|> wrap (reserved "break") Break
<|> wrap (reserved "case") Case
<|> wrap (reserved "char") Char
Expand Down
23 changes: 23 additions & 0 deletions src/Lichen/Lexer/Text.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{-# LANGUAGE OverloadedStrings #-}

module Lichen.Lexer.Text where

import Control.Applicative
import Control.Monad.Except

import Data.Char
import qualified Data.Text as T

import Text.Megaparsec
import Text.Megaparsec.ByteString

import Lichen.Error
import Lichen.Lexer

sc :: Parser ()
sc = void $ many spaceChar

lex :: Lexer T.Text
lex p d = case runParser (many (sc *> wrapid (some (satisfy (not . isSpace))) <* sc)) p d of
Left e -> throwError $ LexError e
Right t -> return (fmap T.pack <$> t)
10 changes: 5 additions & 5 deletions src/Lichen/Plagiarism/Compare.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ import Data.List
import qualified Data.Set as Set

import Lichen.Lexer
import Lichen.Plagiarism.Winnow
import Lichen.Plagiarism.Submitty

-- Naively compare two sets of fingerprints to obtain a percent match.
compareFingerprints :: ((Fingerprints, a), (Fingerprints, a)) -> (Double, (Fingerprints, a), (Fingerprints, a))
compareFingerprints :: (([Fingerprint], a), ([Fingerprint], a)) -> (Double, ([Fingerprint], a), ([Fingerprint], a))
compareFingerprints ((al, x), (bl, y)) = (fromIntegral (Set.size is) / fromIntegral (Set.size un), (matching is al, x), (matching is bl, y)) where
matching :: Set.Set Int -> Fingerprints -> Fingerprints
matching :: Set.Set Int -> [Fingerprint] -> [Fingerprint]
matching s = filter (flip Set.member s . tdata)
a = Set.fromList (tdata <$> al)
b = Set.fromList (tdata <$> bl)
Expand All @@ -21,9 +21,9 @@ compareFingerprints ((al, x), (bl, y)) = (fromIntegral (Set.size is) / fromInteg
-- another list of tagged past fingerprints, compare each possible pair of
-- fingerprints, returning a list of percent matches associated with the tags
-- of the two fingeprint sets compared.
crossCompare :: [(Fingerprints, a)] -> [(Fingerprints, a)] -> [(Double, (Fingerprints, a), (Fingerprints, a))]
crossCompare :: [([Fingerprint], a)] -> [([Fingerprint], a)] -> [(Double, ([Fingerprint], a), ([Fingerprint], a))]
crossCompare prints past = compareFingerprints <$> (pairs prints ++ oldpairs)
where pairs :: [(Fingerprints, a)] -> [((Fingerprints, a), (Fingerprints, a))]
where pairs :: [([Fingerprint], a)] -> [(([Fingerprint], a), ([Fingerprint], a))]
pairs lst = tails lst >>= subpairs
subpairs [] = []
subpairs (x:xs) = (\y -> (x, y)) <$> xs
Expand Down
Loading

0 comments on commit b0f9d5b

Please sign in to comment.