Skip to content

Commit

Permalink
Merge pull request #25 from Submitty/plagiarism_reports
Browse files Browse the repository at this point in the history
Plagiarism reports and configuration updates
  • Loading branch information
chameco authored Jul 7, 2017
2 parents 5d873f1 + 4f218d1 commit 9b474dc
Show file tree
Hide file tree
Showing 22 changed files with 588 additions and 292 deletions.
6 changes: 5 additions & 1 deletion lichen.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@ library
, Lichen.Plagiarism.Walk
, Lichen.Plagiarism.Concatenate
, Lichen.Plagiarism.Highlight
, Lichen.Plagiarism.Report
, Lichen.Plagiarism.AssignmentSettings
, Lichen.Plagiarism.Render
, Lichen.Plagiarism.Render.Index
, Lichen.Plagiarism.Render.Compare
, Lichen.Count.Main
, Lichen.Count.Counters
build-depends: base >= 4.7 && < 5
, containers
, split
Expand All @@ -44,12 +46,14 @@ library
, process
, directory
, filepath
, json
, aeson
, megaparsec
, optparse-applicative
, language-python
, blaze-html
, blaze-markup
, clay
, jmacro
ghc-options: -Wall -Werror -fwarn-incomplete-patterns
default-language: Haskell2010

Expand Down
30 changes: 18 additions & 12 deletions src/Lichen/Config/Count.hs
Original file line number Diff line number Diff line change
@@ -1,28 +1,34 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedStrings, RecordWildCards #-}

module Lichen.Config.Count where

import Control.Monad.Except
import Data.Maybe
import Data.Aeson

import Lichen.Error
import Lichen.Config
import Lichen.Config.Languages

type Counter = Language -> String -> FilePath -> Erring Integer

counterDummy :: Counter
counterDummy _ _ _ = throwError $ InvocationError "Invalid counting method specified"
import Lichen.Count.Counters

data Config = Config
{ language :: Language
, method :: Counter
{ dataDir :: FilePath
, language :: Language
, counter :: Counter
, toCount :: Maybe String
, sourceFiles :: [FilePath]
}
instance FromJSON Config where
parseJSON = withObject "config_count" $ \o -> do
dataDir <- fromMaybe (dataDir defaultConfig) <$> o .:? "data_dir"
language <- fromMaybe (language defaultConfig) <$> o .:? "language"
counter <- fromMaybe (counter defaultConfig) <$> o .:? "counter"
toCount <- fromMaybe (toCount defaultConfig) <$> o .:? "to_count"
sourceFiles <- fromMaybe (sourceFiles defaultConfig) <$> o .:? "source_files"
return Config{..}

defaultConfig :: Config
defaultConfig = Config { language = langDummy
, method = counterDummy
defaultConfig = Config { dataDir = ".lichen"
, language = langDummy
, counter = counterDummy
, toCount = Nothing
, sourceFiles = []
}
Expand Down
32 changes: 20 additions & 12 deletions src/Lichen/Config/Languages.hs
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
{-# LANGUAGE OverloadedStrings, GADTs #-}
{-# LANGUAGE OverloadedStrings, GADTs, DeriveGeneric, StandaloneDeriving #-}

module Lichen.Config.Languages where

import GHC.Generics

import Data.Hashable
import Data.Aeson
import qualified Data.Text as T

import Control.Monad.Except

import Text.Read (readMaybe)

import Lichen.Error
import Lichen.Lexer
import Lichen.Parser
Expand All @@ -19,7 +25,8 @@ import qualified Lichen.Parser.Python as Python
data WinnowConfig = WinnowConfig
{ signalThreshold :: Int
, noiseThreshold :: Int
}
} deriving Generic
instance FromJSON WinnowConfig

-- Configuration for a given language. Should typically not need to be
-- modified, but can be overwritten in the case of unexpected instructor
Expand All @@ -30,27 +37,28 @@ data Language where
Language :: (Hashable a, Show a) => { exts :: [FilePath]
, lexer :: Lexer a
, winnowConfig :: WinnowConfig
, readToken :: String -> a
, readToken :: String -> Erring a
, parser :: Parser Node
} -> Language
instance FromJSON Language where
parseJSON (String s) = pure $ languageChoice langDummy (Just $ T.unpack s)
parseJSON _ = pure langDummy

dummy :: a -> b -> Erring c
dummy _ _ = throwError $ InvocationError "Specified analysis method is undefined for language"

langDummy :: Language
langDummy = Language [] dummy (WinnowConfig 0 0) (const ()) dummy
smartRead :: Read a => String -> Erring a
smartRead s = case readMaybe s of Just t -> pure t
Nothing -> throwError . InvalidTokenError $ T.pack s

readC :: String -> C.Tok
readC = read
langDummy :: Language
langDummy = Language [] dummy (WinnowConfig 0 0) (const $ pure ()) dummy

langC :: Language
langC = Language [".c", ".h", ".cpp", ".hpp", ".C", ".H", ".cc"] C.lex (WinnowConfig 9 5) readC dummy

readPython :: String -> Python.Tok
readPython = read
langC = Language [".c", ".h", ".cpp", ".hpp", ".C", ".H", ".cc"] C.lex (WinnowConfig 9 5) (smartRead :: String -> Erring C.Tok) dummy

langPython :: Language
langPython = Language [".py"] Python.lex (WinnowConfig 9 5) readPython Python.parse
langPython = Language [".py"] Python.lex (WinnowConfig 9 5) (smartRead :: String -> Erring Python.Tok) Python.parse

languageChoice :: Language -> Maybe String -> Language
languageChoice d Nothing = d
Expand Down
20 changes: 20 additions & 0 deletions src/Lichen/Config/Plagiarism.hs
Original file line number Diff line number Diff line change
@@ -1,20 +1,40 @@
{-# LANGUAGE OverloadedStrings, RecordWildCards #-}

module Lichen.Config.Plagiarism where

import Data.Maybe
import Data.Aeson
import qualified Data.Text as T

import Lichen.Config
import Lichen.Config.Languages

data Config = Config
{ dataDir :: FilePath
, concatDir :: FilePath
, highlightDir :: FilePath
, reportDir :: FilePath
, reportTitle :: T.Text
, language :: Language
, sourceDir :: Maybe FilePath
}
instance FromJSON Config where
parseJSON = withObject "config_plagiarism" $ \o -> do
dataDir <- fromMaybe (dataDir defaultConfig) <$> o .:? "data_dir"
concatDir <- fromMaybe (concatDir defaultConfig) <$> o .:? "concat_dir"
highlightDir <- fromMaybe (highlightDir defaultConfig) <$> o .:? "highlight_dir"
reportDir <- fromMaybe (reportDir defaultConfig) <$> o .:? "report_dir"
reportTitle <- fromMaybe (reportTitle defaultConfig) <$> o .:? "report_tkitle"
language <- fromMaybe (language defaultConfig) <$> o .:? "language"
sourceDir <- fromMaybe (sourceDir defaultConfig) <$> o .:? "source_dir"
return Config{..}

defaultConfig :: Config
defaultConfig = Config { dataDir = ".lichen"
, concatDir = "concatenated"
, highlightDir = "highlighted"
, reportDir = "report"
, reportTitle = "Plagiarism Detection"
, language = langDummy
, sourceDir = Nothing
}
Expand Down
49 changes: 49 additions & 0 deletions src/Lichen/Count/Counters.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{-# LANGUAGE OverloadedStrings #-}

module Lichen.Count.Counters where

import Data.Hashable
import Data.Aeson
import qualified Data.Text as T
import qualified Data.ByteString as BS

import Control.Monad.Except

import Lichen.Error
import Lichen.Config.Languages
import Lichen.Lexer
import qualified Lichen.Parser as P

newtype Counter = Counter { runCounter :: Language -> String -> FilePath -> Erring Integer }
instance FromJSON Counter where
parseJSON (String s) = pure $ counterChoice counterDummy (Just $ T.unpack s)
parseJSON _ = pure counterDummy

counterDummy :: Counter
counterDummy = Counter $ \_ _ _ -> throwError $ InvocationError "Invalid counting method specified"

counterToken :: Counter
counterToken = Counter $ \(Language _ l _ readTok _) t p -> do
src <- liftIO $ BS.readFile p
tokens <- l p src
rt <- readTok t
return . fromIntegral . length . filter (hash rt ==) . fmap (hash . tdata) $ tokens

counterNode :: Counter
counterNode = Counter $ \l t p -> do
src <- liftIO $ BS.readFile p
tree <- parser l p src
return $ P.countTag (T.pack t) tree

counterCall :: Counter
counterCall = Counter $ \l t p -> do
src <- liftIO $ BS.readFile p
tree <- parser l p src
return $ P.countCall (T.pack t) tree

counterChoice :: Counter -> Maybe String -> Counter
counterChoice d Nothing = d
counterChoice _ (Just "token") = counterToken
counterChoice _ (Just "node") = counterNode
counterChoice _ (Just "call") = counterCall
counterChoice _ _ = counterDummy
52 changes: 18 additions & 34 deletions src/Lichen/Count/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -3,62 +3,46 @@
module Lichen.Count.Main where

import System.Directory
import System.FilePath

import Data.Hashable
import Data.Aeson
import Data.Semigroup ((<>))
import qualified Data.Text as T
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BS

import Control.Monad.Reader
import Control.Monad.Except

import Options.Applicative

import Lichen.Util
import Lichen.Error
import Lichen.Config
import Lichen.Config.Languages
import Lichen.Config.Count
import qualified Lichen.Parser as P

countToken :: Language -> String -> FilePath -> Erring Integer
countToken (Language _ l _ readTok _) t p = do
src <- liftIO $ BS.readFile p
tokens <- l p src
return . fromIntegral . length . filter (hash (readTok t) ==) . fmap hash $ tokens

countNode :: Language -> String -> FilePath -> Erring Integer
countNode l t p = do
src <- liftIO $ BS.readFile p
tree <- parser l p src
return $ P.countTag (T.pack t) tree

countCall :: Language -> String -> FilePath -> Erring Integer
countCall l t p = do
src <- liftIO $ BS.readFile p
tree <- parser l p src
return $ P.countCall (T.pack t) tree

dispatchCount :: String -> Language -> String -> FilePath -> Erring Integer
dispatchCount "token" = countToken
dispatchCount "node" = countNode
dispatchCount "call" = countCall
dispatchCount "function" = countCall
dispatchCount _ = counterDummy
import Lichen.Count.Counters

parseOptions :: Config -> Parser Config
parseOptions dc = Config
<$> (languageChoice (language dc) <$> (optional . strOption $ long "language" <> short 'l' <> metavar "LANG" <> help "Language of student code"))
<*> fmap dispatchCount (argument str (metavar "COUNTER"))
<$> strOption (long "data-dir" <> short 'd' <> metavar "DIR" <> showDefault <> value (dataDir dc) <> help "Directory to store internal data")
<*> (languageChoice (language dc) <$> (optional . strOption $ long "language" <> short 'l' <> metavar "LANG" <> help "Language of student code"))
<*> (counterChoice (counter dc) <$> (optional . strOption $ long "counter" <> short 'c' <> metavar "COUNTER" <> help "Counting method"))
<*> optional (argument str (metavar "ELEMENT"))
<*> many (argument str (metavar "SOURCE"))

realMain :: Config -> IO ()
realMain c = do
options <- liftIO $ execParser opts
realMain ic = do
iopts <- liftIO . execParser $ opts ic
mcsrc <- readSafe BS.readFile Nothing (dataDir iopts </> "config_count.json")
options <- case mcsrc of Just csrc -> do
c <- case eitherDecode csrc of Left e -> (printError . JSONDecodingError $ T.pack e) >> pure ic
Right t -> pure t
liftIO . execParser $ opts c
Nothing -> pure iopts
flip runConfigured options $ do
config <- ask
t <- case toCount config of Just t -> return t; Nothing -> throwError $ InvocationError "No countable element specified"
ps <- liftIO . mapM canonicalizePath $ sourceFiles config
counts <- lift $ mapM (method config (language config) t) ps
counts <- lift $ mapM (runCounter (counter config) (language config) t) ps
liftIO . print $ sum counts
where opts = info (helper <*> parseOptions c) (fullDesc <> progDesc "Count occurences of a specific AST node" <> header "lichen-count-node - token counting")
where opts c = info (helper <*> parseOptions c) (fullDesc <> progDesc "Count occurences of a specific AST node" <> header "lichen-count-node - token counting")
2 changes: 2 additions & 0 deletions src/Lichen/Error.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ type Erring = ExceptT LichenError IO

data LichenError = LexError (ParseError (Token BS.ByteString) Dec)
| ParseError T.Text
| InvalidTokenError T.Text
| InvocationError T.Text
| JSONDecodingError T.Text
deriving Show

printError :: LichenError -> IO ()
printError (LexError e) = T.IO.hPutStrLn stderr "Lexer error: " >> putStrLn (parseErrorPretty e)
printError (ParseError t) = T.IO.hPutStrLn stderr ("Parser error: " <> t)
printError (InvalidTokenError t) = T.IO.hPutStrLn stderr ("Invalid token error: " <> t)
printError (InvocationError t) = T.IO.hPutStrLn stderr ("Invocation error: " <> t)
printError (JSONDecodingError t) = T.IO.hPutStrLn stderr ("JSON decoding error: " <> t)
23 changes: 22 additions & 1 deletion src/Lichen/Lexer.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
module Lichen.Lexer where

import Data.Foldable()
import Data.Semigroup ((<>))
import qualified Data.List.NonEmpty as NE
import qualified Data.ByteString as BS

import Text.Megaparsec
Expand All @@ -8,7 +11,25 @@ import qualified Text.Megaparsec.Lexer as L

import Lichen.Error

type Lexer a = FilePath -> BS.ByteString -> Erring [a]
type Lexer a = FilePath -> BS.ByteString -> Erring [Tagged a]

data TokPos = TokPos
{ startLine :: !Pos
, endLine :: !Pos
, startCol :: !Pos
, endCol :: !Pos
} deriving (Show, Eq, Ord)
data Tagged a = Tagged { tdata :: a, tpos :: TokPos } deriving (Show, Eq)
instance Ord a => Ord (Tagged a) where
compare (Tagged x _) (Tagged y _) = compare x y

wrap :: Foldable t => Parser (t a) -> b -> Parser (Tagged b)
wrap p x = do
pos <- NE.head . statePos <$> getParserState
s <- p
--_ <- p
return . Tagged x $ TokPos (sourceLine pos) (sourceLine pos) (sourceColumn pos) (sourceColumn pos <> unsafePos (fromIntegral $ length s))
--return (x, TokPos (sourceLine pos) (sourceLine pos) (sourceColumn pos) (sourceColumn pos <> unsafePos 1))

-- Parse a C-style character literal. Ex: 'a', '@'.
charLit :: Parser Char
Expand Down
Loading

0 comments on commit 9b474dc

Please sign in to comment.