mirror of
https://github.com/IHaskell/IHaskell.git
synced 2025-04-17 11:56:12 +00:00
366 lines
15 KiB
Haskell
366 lines
15 KiB
Haskell
{-# LANGUAGE NoImplicitPrelude, OverloadedStrings #-}
|
|
module IHaskell.Eval.Parser (
|
|
parseString,
|
|
CodeBlock(..),
|
|
StringLoc(..),
|
|
DirectiveType(..),
|
|
LineNumber,
|
|
ColumnNumber,
|
|
ErrMsg,
|
|
splitAtLoc,
|
|
layoutChunks,
|
|
parseDirective,
|
|
getModuleName
|
|
) where
|
|
|
|
-- Hide 'unlines' to use our own 'joinLines' instead.
|
|
import ClassyPrelude hiding (liftIO, unlines)
|
|
|
|
import Data.List (findIndex, maximumBy, maximum, inits)
|
|
import Data.String.Utils (startswith, strip, split)
|
|
import Data.List.Utils (subIndex)
|
|
import Prelude (init, last, head, tail)
|
|
|
|
import Bag
|
|
import ErrUtils hiding (ErrMsg)
|
|
import FastString
|
|
import GHC
|
|
import Lexer
|
|
import OrdList
|
|
import Outputable hiding ((<>))
|
|
import SrcLoc
|
|
import StringBuffer
|
|
|
|
import IHaskell.GHC.HaskellParser
|
|
|
|
-- | A line number in an input string.
|
|
type LineNumber = Int
|
|
|
|
-- | A column number in an input string.
|
|
type ColumnNumber = Int
|
|
|
|
-- | An error message string.
|
|
type ErrMsg = String
|
|
|
|
-- | A location in an input string.
|
|
data StringLoc = Loc LineNumber ColumnNumber deriving (Show, Eq)
|
|
|
|
-- | A block of code to be evaluated.
|
|
-- Each block contains a single element - one declaration, statement,
|
|
-- expression, etc. If parsing of the block failed, the block is instead
|
|
-- a ParseError, which has the error location and error message.
|
|
data CodeBlock
|
|
= Expression String -- ^ A Haskell expression.
|
|
| Declaration String -- ^ A data type or function declaration.
|
|
| Statement String -- ^ A Haskell statement (as if in a `do` block).
|
|
| Import String -- ^ An import statement.
|
|
| TypeSignature String -- ^ A lonely type signature (not above a function declaration).
|
|
| Directive DirectiveType String -- ^ An IHaskell directive.
|
|
| Module String -- ^ A full Haskell module, to be compiled and loaded.
|
|
| ParseError StringLoc ErrMsg -- ^ An error indicating that parsing the code block failed.
|
|
deriving (Show, Eq)
|
|
|
|
-- | Directive types. Each directive is associated with a string in the
|
|
-- directive code block.
|
|
data DirectiveType
|
|
= GetType -- ^ Get the type of an expression via ':type' (or unique prefixes)
|
|
| GetInfo -- ^ Get info about the identifier via ':info' (or unique prefixes)
|
|
| SetExtension -- ^ Enable or disable an extension via ':extension' (or prefixes)
|
|
| HelpForSet -- ^ Provide useful info if people try ':set'.
|
|
| GetHelp -- ^ General help via ':?' or ':help'.
|
|
deriving (Show, Eq)
|
|
|
|
-- | Output from running a parser.
|
|
data ParseOutput a
|
|
= Failure ErrMsg StringLoc -- ^ Parser failed with given error message and location.
|
|
| Success a (String, String) -- ^ Parser succeeded with an output.
|
|
deriving (Eq, Show) -- Auxiliary strings say what part of the
|
|
-- input string was used and what
|
|
-- part is remaining.
|
|
|
|
-- | Parse a string into code blocks.
|
|
parseString :: GhcMonad m => String -> m [CodeBlock]
|
|
parseString codeString = do
|
|
-- Try to parse this as a single module.
|
|
flags <- getSessionDynFlags
|
|
let output = runParser flags fullModule codeString
|
|
case output of
|
|
Success {} -> return [Module codeString]
|
|
Failure {} ->
|
|
-- Split input into chunks based on indentation.
|
|
let chunks = layoutChunks $ dropComments codeString in
|
|
joinFunctions <$> processChunks 1 [] chunks
|
|
where
|
|
parseChunk :: GhcMonad m => String -> LineNumber -> m CodeBlock
|
|
parseChunk chunk line =
|
|
if isDirective chunk
|
|
then return $ parseDirective chunk line
|
|
else parseCodeChunk chunk line
|
|
|
|
processChunks :: GhcMonad m => LineNumber -> [CodeBlock] -> [String] -> m [CodeBlock]
|
|
processChunks line accum remaining =
|
|
case remaining of
|
|
-- If we have no more remaining lines, return the accumulated results.
|
|
[] -> return $ reverse accum
|
|
|
|
-- If we have more remaining, parse the current chunk and recurse.
|
|
chunk:remaining -> do
|
|
block <- parseChunk chunk line
|
|
processChunks (line + nlines chunk) (block : accum) remaining
|
|
|
|
-- Test wither a given chunk is a directive.
|
|
isDirective :: String -> Bool
|
|
isDirective = startswith ":" . strip
|
|
|
|
-- Number of lines in this string.
|
|
nlines :: String -> Int
|
|
nlines = length . lines
|
|
|
|
-- | Parse a single chunk of code, as indicated by the layout of the code.
|
|
parseCodeChunk :: GhcMonad m => String -> LineNumber -> m CodeBlock
|
|
parseCodeChunk code startLine = do
|
|
flags <- getSessionDynFlags
|
|
let
|
|
-- Try each parser in turn.
|
|
rawResults = map (tryParser code) (parsers flags)
|
|
|
|
-- Convert statements into expressions where we can
|
|
results = map (statementToExpression flags) rawResults in
|
|
case successes results of
|
|
-- If none of them succeeded, choose the best error message to
|
|
-- display. Only one of the error messages is actually relevant.
|
|
[] -> return $ bestError $ failures results
|
|
|
|
-- If one of the parsers succeeded
|
|
(result, used, remaining):_ ->
|
|
return $ if not . null . strip $ remaining
|
|
then ParseError (Loc 1 1) $ "Could not parse " ++ code
|
|
else result
|
|
where
|
|
successes :: [ParseOutput a] -> [(a, String, String)]
|
|
successes [] = []
|
|
successes (Success a (used, rem):rest) = (a, used, rem) : successes rest
|
|
successes (_:rest) = successes rest
|
|
|
|
failures :: [ParseOutput a] -> [(ErrMsg, LineNumber, ColumnNumber)]
|
|
failures [] = []
|
|
failures (Failure msg (Loc line col):rest) = (msg, line, col) : failures rest
|
|
failures (_:rest) = failures rest
|
|
|
|
bestError :: [(ErrMsg, LineNumber, ColumnNumber)] -> CodeBlock
|
|
bestError errors = ParseError (Loc line col) msg
|
|
where
|
|
(msg, line, col) = maximumBy compareLoc errors
|
|
compareLoc (_, line1, col1) (_, line2, col2) = compare line1 line2 <> compare col1 col2
|
|
|
|
statementToExpression :: DynFlags -> ParseOutput CodeBlock -> ParseOutput CodeBlock
|
|
statementToExpression flags (Success (Statement stmt) strs) = Success result strs
|
|
where result = if isExpr flags stmt
|
|
then Expression stmt
|
|
else Statement stmt
|
|
statementToExpression _ other = other
|
|
|
|
-- Check whether a string is a valid expression.
|
|
isExpr :: DynFlags -> String -> Bool
|
|
isExpr flags str = case runParser flags fullExpression str of
|
|
Failure {} -> False
|
|
Success {} -> True
|
|
|
|
tryParser :: String -> (String -> CodeBlock, String -> ParseOutput String) -> ParseOutput CodeBlock
|
|
tryParser string (blockType, parser) = case parser string of
|
|
Success res (used, remaining) -> Success (blockType res) (used, remaining)
|
|
Failure err loc -> Failure err loc
|
|
|
|
parsers :: DynFlags -> [(String -> CodeBlock, String -> ParseOutput String)]
|
|
parsers flags =
|
|
[ (Import, unparser toCode partialImport)
|
|
, (TypeSignature, unparser toCode partialTypeSignature)
|
|
, (Declaration, unparser listCode partialDeclaration)
|
|
, (Statement, unparser toCode partialStatement)
|
|
]
|
|
where
|
|
toCode :: Outputable a => a -> String
|
|
toCode = strip . showSDoc flags . ppr
|
|
|
|
listCode :: Outputable a => OrdList a -> String
|
|
listCode = joinLines . map toCode . fromOL
|
|
|
|
unparser :: (a -> String) -> P a -> String -> ParseOutput String
|
|
unparser postprocess parser code =
|
|
case runParser flags parser code of
|
|
Success out strs -> Success (postprocess out) strs
|
|
Failure err loc -> Failure err loc
|
|
|
|
-- | Find consecutive declarations of the same function and join them into
|
|
-- a single declaration. These declarations may also include a type
|
|
-- signature, which is also joined with the subsequent declarations.
|
|
joinFunctions :: [CodeBlock] -> [CodeBlock]
|
|
joinFunctions (Declaration decl : rest) =
|
|
-- Find all declarations having the same name as this one.
|
|
let (decls, other) = havingSameName rest in
|
|
-- Convert them into a single declaration.
|
|
Declaration (joinLines $ map undecl decls) : joinFunctions other
|
|
where
|
|
undecl (Declaration decl) = decl
|
|
undecl _ = error "Expected declaration!"
|
|
|
|
-- Get all declarations with the same name as the first declaration.
|
|
-- The name of a declaration is the first word, which we expect to be
|
|
-- the name of the function.
|
|
havingSameName :: [CodeBlock] -> ([CodeBlock], [CodeBlock])
|
|
havingSameName blocks =
|
|
let name = head $ words decl
|
|
sameName = takeWhile (isNamedDecl name) rest
|
|
others = drop (length sameName) rest in
|
|
(Declaration decl : sameName, others)
|
|
|
|
isNamedDecl :: String -> CodeBlock -> Bool
|
|
isNamedDecl name (Declaration dec) = head (words dec) == name
|
|
isNamedDecl _ _ = False
|
|
|
|
-- Allow a type signature followed by declarations to be joined to the
|
|
-- declarations. Parse the declaration joining separately.
|
|
joinFunctions (TypeSignature sig : Declaration decl : rest) = (Declaration $ sig ++ "\n" ++ joinedDecl):remaining
|
|
where Declaration joinedDecl:remaining = joinFunctions $ Declaration decl : rest
|
|
|
|
joinFunctions (x:xs) = x : joinFunctions xs
|
|
joinFunctions [] = []
|
|
|
|
|
|
-- | Parse a directive of the form :directiveName.
|
|
parseDirective :: String -- ^ Directive string.
|
|
-> Int -- ^ Line number at which the directive appears.
|
|
-> CodeBlock -- ^ Directive code block or a parse error.
|
|
parseDirective (':':directive) line = case find rightDirective directives of
|
|
Just (directiveType, _) -> Directive directiveType arg
|
|
where arg = unwords restLine
|
|
_:restLine = words directive
|
|
Nothing ->
|
|
let directiveStart = case words directive of
|
|
[] -> ""
|
|
first:_ -> first in
|
|
ParseError (Loc line 1) $ "Unknown directive: '" ++ directiveStart ++ "'."
|
|
where
|
|
rightDirective (_, dirname) = case words directive of
|
|
[] -> False
|
|
dir:_ -> dir `elem` tail (inits dirname)
|
|
directives =
|
|
[(GetType, "type")
|
|
,(GetInfo, "info")
|
|
,(SetExtension, "extension")
|
|
,(HelpForSet, "set")
|
|
,(GetHelp, "?")
|
|
,(GetHelp, "help")
|
|
]
|
|
parseDirective _ _ = error "Directive must start with colon!"
|
|
|
|
-- | Run a GHC parser on a string. Return success or failure with
|
|
-- associated information for both.
|
|
runParser :: DynFlags -> P a -> String -> ParseOutput a
|
|
runParser flags parser str =
|
|
-- Create an initial parser state.
|
|
let filename = "<interactive>"
|
|
location = mkRealSrcLoc (mkFastString filename) 1 1
|
|
buffer = stringToStringBuffer str
|
|
parseState = mkPState flags buffer location in
|
|
-- Convert a GHC parser output into our own.
|
|
toParseOut $ unP parser parseState
|
|
where
|
|
toParseOut :: ParseResult a -> ParseOutput a
|
|
toParseOut (PFailed span@(RealSrcSpan realSpan) err) =
|
|
let errMsg = printErrorBag $ unitBag $ mkPlainErrMsg flags span err
|
|
line = srcLocLine $ realSrcSpanStart realSpan
|
|
col = srcLocCol $ realSrcSpanStart realSpan
|
|
in Failure errMsg $ Loc line col
|
|
|
|
toParseOut (PFailed span err) =
|
|
let errMsg = printErrorBag $ unitBag $ mkPlainErrMsg flags span err
|
|
in Failure errMsg $ Loc 0 0
|
|
|
|
toParseOut (POk parseState result) =
|
|
let parseEnd = realSrcSpanStart $ last_loc parseState
|
|
endLine = srcLocLine parseEnd
|
|
endCol = srcLocCol parseEnd
|
|
(before, after) = splitAtLoc endLine endCol str in
|
|
Success result (before, after)
|
|
|
|
-- Convert the bag of errors into an error string.
|
|
printErrorBag bag = joinLines . map show $ bagToList bag
|
|
|
|
-- | Split a string at a given line and column. The column is included in
|
|
-- the second part of the split.
|
|
splitAtLoc :: LineNumber -> ColumnNumber -> String -> (String, String)
|
|
splitAtLoc line col string =
|
|
if line > length (lines string)
|
|
then (string, "")
|
|
else (before, after)
|
|
where
|
|
(beforeLines, afterLines) = splitAt line $ lines string
|
|
theLine = last beforeLines
|
|
(beforeChars, afterChars) = splitAt (col - 1) theLine
|
|
|
|
before = joinLines (init beforeLines) ++ '\n' : beforeChars
|
|
after = joinLines $ afterChars : afterLines
|
|
|
|
-- | Split an input string into chunks based on indentation.
|
|
-- A chunk is a line and all lines immediately following that are indented
|
|
-- beyond the indentation of the first line. This parses Haskell layout
|
|
-- rules properly, and allows using multiline expressions via indentation.
|
|
layoutChunks :: String -> [String]
|
|
layoutChunks string = layoutLines (lines string)
|
|
where
|
|
layoutLines :: [String] -> [String]
|
|
-- Empty string case. If there's no input, output is empty.
|
|
layoutLines [] = []
|
|
|
|
-- Use the indent of the first line to find the end of the first block.
|
|
layoutLines (firstLine:rest) =
|
|
let firstIndent = indentLevel firstLine
|
|
blockEnded line = indentLevel line <= firstIndent in
|
|
case findIndex blockEnded rest of
|
|
-- If the first block doesn't end, return the whole string, since
|
|
-- that just means the block takes up the entire string.
|
|
Nothing -> [string]
|
|
|
|
-- We found the end of the block. Split this bit out and recurse.
|
|
Just idx ->
|
|
joinLines (firstLine:take idx rest) : layoutChunks (joinLines $ drop idx rest)
|
|
|
|
-- Compute indent level of a string as number of leading spaces.
|
|
indentLevel :: String -> Int
|
|
indentLevel (' ':str) = 1 + indentLevel str
|
|
indentLevel _ = 0
|
|
|
|
-- Not the same as 'unlines', due to trailing \n
|
|
joinLines :: [String] -> String
|
|
joinLines = intercalate "\n"
|
|
|
|
-- | Drop comments from Haskell source.
|
|
dropComments :: String -> String
|
|
dropComments = removeOneLineComments . removeMultilineComments
|
|
where
|
|
removeOneLineComments ('-':'-':remaining) = removeOneLineComments (dropWhile (/= '\n') remaining)
|
|
removeOneLineComments (x:xs) = x:removeOneLineComments xs
|
|
removeOneLineComments x = x
|
|
|
|
removeMultilineComments ('{':'-':remaining) =
|
|
case subIndex "-}" remaining of
|
|
Nothing -> ""
|
|
Just idx -> removeMultilineComments $ drop (2 + idx) remaining
|
|
removeMultilineComments (x:xs) = x:removeMultilineComments xs
|
|
removeMultilineComments x = x
|
|
|
|
-- | Parse a module and return the name declared in the 'module X where'
|
|
-- line. That line is required, and if it does not exist, this will error.
|
|
-- Names with periods in them are returned piece y piece.
|
|
getModuleName :: GhcMonad m => String -> m [String]
|
|
getModuleName moduleSrc = do
|
|
flags <- getSessionDynFlags
|
|
let output = runParser flags fullModule moduleSrc
|
|
case output of
|
|
Failure {} -> error "Module parsing failed."
|
|
Success mod _ ->
|
|
case unLoc <$> hsmodName (unLoc mod) of
|
|
Nothing -> error "Module must have a name."
|
|
Just name -> return $ split "." $ moduleNameString name
|