blob: 02eeefd4b7a86d0f825bbbd2004626befea6be9a [file] [log] [blame] [edit]
-- |
-- Module : Minigent.Syntax.Parser
-- Copyright : (c) Data61 2018-2019
-- Commonwealth Science and Research Organisation (CSIRO)
-- ABN 41 687 119 230
-- License : BSD3
--
-- This parser uses the @Earley@ library to make it based on a nice declarative grammar
-- specification without too much noise.
--
-- The algorithm is cubic in the worst case but linear normally. We test for and avoid ambiguity,
-- even though the Earley algorithm can handle ambiguous grammars, so that the parser can have
-- good errors and fast performance.
--
-- It can be imported unqualified or qualified, as there are very few exports.
{-# LANGUAGE RecursiveDo #-}
module Minigent.Syntax.Parser where
import Text.Earley
import Control.Applicative
import Data.Maybe (isJust)
import qualified Minigent.Syntax.Lexer as L
import Minigent.Syntax
import Minigent.Syntax.Utils
import qualified Minigent.Syntax.Utils.Row as Row
-- | The grammar for multiple top-level declarations.
toplevels :: Grammar r (Prod r String L.Token [RawTopLevel])
toplevels = mdo
tl <- toplevel
tls <- rule $ many tl
return tls
-- | The grammar for a single top-level declarations.
toplevel :: Grammar r (Prod r String L.Token RawTopLevel)
toplevel = mdo
let fromLowerIdent (L.LowerIdent n) = Just n
fromLowerIdent _ = Nothing
fromNumeric (L.Numeric n) = Just n
fromNumeric _ = Nothing
reservedTypeNames = ["U8","U16","U32", "U64", "Bool","Unit"]
reservedValueNames = ["Unit","True","False"]
fromUpperType (L.UpperIdent n) | n `notElem` reservedTypeNames
= Just n
fromUpperType _ = Nothing
fromUpperValue (L.UpperIdent n) | n `notElem` reservedValueNames
= Just n
fromUpperValue _ = Nothing
primTy <- rule $ U8 <$ token (L.UpperIdent "U8")
<|> U16 <$ token (L.UpperIdent "U16")
<|> U32 <$ token (L.UpperIdent "U32")
<|> U64 <$ token (L.UpperIdent "U64")
<|> Bool <$ token (L.UpperIdent "Bool")
<|> Unit <$ token (L.UpperIdent "Unit")
<?> "primitive type"
typeVar <- rule $ terminal fromLowerIdent <?> "type variable"
var <- rule $ terminal fromLowerIdent <?> "variable"
fieldName <- rule $ terminal fromLowerIdent <?> "field name"
absTypeName <- rule $ terminal fromUpperType <?> "abstract type"
conName <- rule $ terminal fromUpperValue <?> "Constructor"
takenTag <- rule $ isJust <$> optional (token (L.Keyword L.Take))
<?> "optional taken tag"
fieldEntry <- rule $ Entry <$> fieldName <* token L.Colon <*> ty <*> takenTag
<?> "field entry"
fieldEntries <- rule $ (:) <$> fieldEntry <* token L.Comma <*> fieldEntries
<|> (:[]) <$> fieldEntry
<?> "field entries"
conEntry <- rule $ Entry <$> conName <*> ty <*> takenTag
<?> "constructor entry"
conEntries <- rule $ (:) <$> conEntry <* token L.Bar <*> conEntries
<|> (:[]) <$> conEntry
<?> "constructor entries"
sigil <- rule $ Unboxed <$ token L.Hash
<|> ReadOnly <$ token L.Bang
<|> pure Writable
<?> "sigil"
atomicTy <- rule $ PrimType <$> primTy
<|> TypeVar <$> typeVar
<|> TypeVarBang <$> typeVar <* token (L.Bang)
<|> Record <$> recTy
<* token (L.Open L.Brace)
<*> (Row.fromList <$> fieldEntries)
<* token (L.Close L.Brace)
<*> sigil
<|> Variant <$ token (L.Operator Less)
<*> (Row.fromList <$> conEntries )
<* token (L.Operator Greater)
<|> AbsType <$> absTypeName
<*> sigil
<*> pure []
<|> id <$ token (L.Open L.Paren)
<*> ty
<* token (L.Close L.Paren)
ty' <- rule $ AbsType <$> absTypeName
<*> sigil
<*> some atomicTy
<|> atomicTy
ty <- rule $ Function <$> ty' <* token (L.Arrow) <*> ty
<|> ty'
bool <- rule $ True <$ token (L.UpperIdent "True")
<|> False <$ token (L.UpperIdent "False")
<?> "boolean"
number <- rule $ terminal fromNumeric <?> "number"
literal <- rule $ BoolV <$> bool
<|> IntV <$> number
<|> UnitV <$ token (L.UpperIdent "Unit")
<?> "literal"
types <- rule $ (:) <$> ty <* token (L.Comma) <*> types
<|> (:[]) <$> ty
<|> pure []
fieldInitialiser <- rule $ (,) <$> fieldName <* token (L.Equals) <*> exp
<?> "field initialiser"
fieldInitialisers <- rule $ (:) <$> fieldInitialiser <* token (L.Comma) <*> fieldInitialisers
<|> (:[]) <$> fieldInitialiser
atomExp <- rule $ Literal <$> literal
<|> Var <$> var
<|> TypeApp <$> var
<* token (L.Open L.Square)
<*> types
<* token (L.Close L.Square)
<|> If <$ token (L.Keyword L.If)
<*> exp
<* token (L.Keyword L.Then)
<*> exp
<* token (L.Keyword L.Else)
<*> exp
<* token (L.Keyword L.End)
<|> Let <$ token (L.Keyword L.Let)
<*> var
<* token (L.Equals)
<*> exp
<* token (L.Keyword L.In)
<*> exp
<* token (L.Keyword L.End)
<|> LetBang <$ token (L.Keyword L.Let)
<* token (L.Bang)
<* token (L.Open L.Paren)
<*> many var
<* token (L.Close L.Paren)
<*> var
<* token (L.Equals)
<*> exp
<* token (L.Keyword L.In)
<*> exp
<* token (L.Keyword L.End)
<|> Struct <$ token (L.Open L.Brace)
<*> fieldInitialisers
<* token (L.Close L.Brace)
<|> Case <$ token (L.Keyword L.Case)
<*> exp
<* token (L.Keyword L.Of)
<*> conName
<*> var
<* token (L.Arrow)
<*> exp
<* token (L.Bar)
<*> var
<* token (L.Arrow)
<*> exp
<* token (L.Keyword L.End)
<|> Esac <$ token (L.Keyword L.Case)
<*> exp
<* token (L.Keyword L.Of)
<*> conName
<*> var
<* token (L.Arrow)
<*> exp
<* token (L.Keyword L.End)
<|> Take <$ token (L.Keyword L.Take)
<*> var
<* token (L.Open L.Brace)
<*> fieldName
<* token (L.Equals)
<*> var
<* token (L.Close L.Brace)
<* token (L.Equals)
<*> exp
<* token (L.Keyword L.In)
<*> exp
<* token (L.Keyword L.End)
<|> Put <$ token (L.Keyword L.Put)
<*> exp
<* token (L.Dot)
<*> fieldName
<* token (L.Colon)
<* token (L.Equals)
<*> exp
<* token (L.Keyword L.End)
<|> Member <$> atomExp <* token (L.Dot) <*> fieldName
<|> id <$ token (L.Open L.Paren)
<*> exp
<* token (L.Close L.Paren)
<?> "expression (atomic)"
appExp <- rule $ Apply <$> appExp <*> atomExp
<|> Con <$> conName <*> atomExp
<|> PrimOp Not . pure <$ token (L.Operator Not) <*> atomExp
<|> atomExp
<?> "expression (appExp)"
let binOp e1 o e2 = PrimOp o [e1, e2]
fromOp set (L.Operator o) | o `elem` set = Just o
fromOp _ _ = Nothing
prodExp <- rule $ binOp <$> prodExp <*> terminal (fromOp prodOps) <*> appExp
<|> appExp
<?> "expression (prodExp)"
sumExp <- rule $ binOp <$> sumExp <*> terminal (fromOp sumOps) <*> prodExp
<|> prodExp
<?> "expression (sumExp)"
compExp <- rule $ binOp <$> sumExp <*> terminal (fromOp compOps) <*> sumExp
<|> sumExp
<?> "expression (compExp)"
boolExp <- rule $ binOp <$> boolExp <*> terminal (fromOp boolOps) <*> compExp
<|> compExp
<?> "expression (boolExp)"
exp <- rule $ Sig <$> exp <* token (L.Colon) <*> ty
<|> boolExp <?> "expression"
typeVars <- rule $ (:) <$> typeVar <* token (L.Comma) <*> typeVars
<|> (:[]) <$> typeVar
<|> pure []
-- rec t
recTy <- rule $ Rec <$> (token (L.Keyword L.Rec) *> typeVar)
-- recursive parameter ommitted
<|> pure None
<?> "recursive type"
constraint <- rule $ Share . TypeVar <$ token (L.UpperIdent "Share") <*> typeVar
<|> Drop . TypeVar <$ token (L.UpperIdent "Drop") <*> typeVar
<|> Escape . TypeVar <$ token (L.UpperIdent "Escape") <*> typeVar
constraints <- rule $ (:) <$> constraint <* token (L.Comma) <*> constraints
<|> (:[]) <$> constraint
<|> pure []
polyType <- rule $ Forall <$ token (L.Open L.Square)
<*> typeVars
<* token (L.Close L.Square)
<*> constraints
<* token (L.Dot)
<*> ty
<|> Forall [] [] <$> ty
topLevel <- rule $ TypeSig <$> var
<* token (L.Colon)
<*> polyType
<* token (L.Semi)
<|> Equation <$> var
<*> var
<* token (L.Equals)
<*> exp
<* token (L.Semi)
return topLevel