pi/src/Parser/Parse.idr

472 lines
12 KiB
Idris
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

module Parser.Parse
import Text.Lexer
import Text.Parser
import Data.Vect
import Data.List
import Data.List1
import Data.Maybe
-- eventually start using elaborator representation here, or just a parser
-- exclusive one
import Core.Term
import Core.Misc
%default total
data PiTokenKind
= PTLambda
| PTIdentifier
| PTDot
| PTArrow
| PTPi
| PTSigma
| PTComma
| PTLParen
| PTRParen
| PTColon
| PTLet
| PTDefEq
| PTIn
| PTIgnore
| PTType
| PTTop
| PTStar
| PTTopInd
| PTBot
| PTBotInd
| PTNat
| PTNum
| PTSuc
| PTNatInd
| PTSigInd
| PTId
| PTRefl
| PTJ
Eq PiTokenKind where
(==) PTLambda PTLambda = True
(==) PTIdentifier PTIdentifier = True
(==) PTDot PTDot = True
(==) PTArrow PTArrow = True
(==) PTPi PTPi = True
(==) PTSigma PTSigma = True
(==) PTComma PTComma = True
(==) PTLParen PTLParen = True
(==) PTRParen PTRParen = True
(==) PTColon PTColon = True
(==) PTLet PTLet = True
(==) PTDefEq PTDefEq = True
(==) PTIn PTIn = True
(==) PTIgnore PTIgnore = True
(==) PTType PTType = True
(==) PTTop PTTop = True
(==) PTStar PTStar = True
(==) PTTopInd PTTopInd = True
(==) PTBot PTBot = True
(==) PTBotInd PTBotInd = True
(==) PTNat PTNat = True
(==) PTNum PTNum = True
(==) PTSuc PTSuc = True
(==) PTNatInd PTNatInd = True
(==) PTSigInd PTSigInd = True
(==) PTId PTId = True
(==) PTRefl PTRefl = True
(==) PTJ PTJ = True
(==) _ _ = False
Show PiTokenKind where
show PTLambda = "PTLambda"
show PTIdentifier = "PTIdentifier"
show PTDot = "PTDot"
show PTArrow = "PTArrow"
show PTPi = "PTPi"
show PTSigma = "PTSigma"
show PTComma = "PTComma"
show PTLParen = "PTLParen"
show PTRParen = "PTRParen"
show PTColon = "PTColon"
show PTLet = "PTLet"
show PTDefEq = "PTDefEq"
show PTIn = "PTIn"
show PTIgnore = "PTIgnore"
show PTType = "PTType"
show PTTop = "PTTop"
show PTStar = "PTSTar"
show PTTopInd = "PTTopInd"
show PTBot = "PTBot"
show PTBotInd = "PTBotInd"
show PTNat = "PTNat"
show PTNum = "PTNum "
show PTSuc = "PTSuc"
show PTNatInd = "PTNatInd"
show PTSigInd = "PTSigInd"
show PTId = "PTId"
show PTRefl = "PTRefl"
show PTJ = "PTJ"
PiToken : Type
PiToken = Token PiTokenKind
Show PiToken where
show (Tok kind text) = "Tok kind: " ++ show kind ++ " text: " ++ text
TokenKind PiTokenKind where
TokType PTIdentifier = String
TokType PTNum = Nat
TokType _ = ()
tokValue PTLambda _ = ()
tokValue PTIdentifier s = s
tokValue PTDot _ = ()
tokValue PTArrow _ = ()
tokValue PTPi _ = ()
tokValue PTSigma _ = ()
tokValue PTComma _ = ()
tokValue PTLParen _ = ()
tokValue PTRParen _ = ()
tokValue PTColon _ = ()
tokValue PTLet _ = ()
tokValue PTDefEq _ = ()
tokValue PTIn _ = ()
tokValue PTIgnore _ = ()
tokValue PTType _ = ()
tokValue PTTop _ = ()
tokValue PTStar _ = ()
tokValue PTTopInd _ = ()
tokValue PTBot _ = ()
tokValue PTBotInd _ = ()
tokValue PTNat _ = ()
tokValue PTNum s = cast s
tokValue PTSuc _ = ()
tokValue PTNatInd _ = ()
tokValue PTSigInd _ = ()
tokValue PTId _ = ()
tokValue PTRefl _ = ()
tokValue PTJ _ = ()
ignored : WithBounds PiToken -> Bool
ignored (MkBounded (Tok PTIgnore _) _ _) = True
ignored _ = False
identifier : Lexer
identifier = some (alphaNum <|> oneOf "-_")
keywords : List (String, PiTokenKind)
keywords = [
("in", PTIn),
("let", PTLet),
("suc", PTSuc),
("Type", PTType),
("J", PTJ),
("Id", PTId),
("refl", PTRefl)
]
tokenmap : List (Lexer, PiTokenKind)
tokenmap = [
(exact "-ind", PTTopInd),
(exact "⊥-ind", PTBotInd),
(exact "-ind", PTNatInd),
(exact "Σ-ind", PTSigInd),
(is 'λ', PTLambda),
(is '.', PTDot),
(is '', PTArrow),
(is 'Π', PTPi),
(is '', PTPi),
(is 'Σ', PTSigma),
(is '', PTSigma),
(is ',', PTComma),
(is '(', PTLParen),
(is ')', PTRParen),
(is ':', PTColon),
(is '', PTDefEq),
(is '', PTTop),
(is '', PTBot),
(is '', PTNat),
(is '', PTStar),
(digits, PTNum)
]
piTokenMap : TokenMap PiToken
piTokenMap =
(spaces, Tok PTIgnore) ::
(lineComment (exact "--"), Tok PTIgnore) ::
(blockComment (exact "{-") (exact "-}"), Tok PTIgnore) ::
toTokenMap tokenmap ++
[(identifier, \s =>
case lookup s keywords of
(Just kind) => Tok kind s
Nothing => Tok PTIdentifier s
)
]
lexPi : String -> Maybe (List (WithBounds PiToken))
lexPi str =
case lex piTokenMap str of
(tokens, _, _, "") => Just tokens
_ => Nothing
{- Basic idea, parsing has a list of the identifiers to convert to -}
{- de bruijn indecies, and a Nat to keep track of context size -}
mutual
expr : (n : Nat) -> Vect n String -> Grammar () PiToken True (Term n)
expr n env = tpi n env
<|> tsigma n env
<|> tarr n env
<|> (do
e <- expr1 n env
tapp n env e <|> pure e)
expr1 : (n : Nat) -> Vect n String -> Grammar () PiToken True (Term n)
expr1 n env = ttopind n env
<|> tbotind n env
<|> tsuc n env
<|> tnatind n env
<|> tsigind n env
<|> tid n env
<|> trefl n env
<|> tj n env
<|> (do
t <- term n env
tapp n env t <|> pure t)
term : (n : Nat) -> Vect n String -> Grammar () PiToken True (Term n)
term n env = ttype
<|> ttop
<|> tstar
<|> tbot
<|> tnat
<|> tnum
<|> tpair n env
<|> tlet n env
<|> tlam n env
<|> tvar n env
<|> paren n env
ttype : Grammar () PiToken True (Term n)
ttype = match PTType >> pure TType
ttop : Grammar () PiToken True (Term n)
ttop = match PTTop >> pure TTop
tstar : Grammar () PiToken True (Term n)
tstar = match PTStar >> pure TStar
ttopind : (n : Nat) -> Vect n String -> Grammar () PiToken True (Term n)
ttopind n env = do
match PTTopInd
commit
c <- term n env
st <- term n env
pure (TTopInd c st)
tbot : Grammar () PiToken True (Term n)
tbot = match PTBot >> pure TBot
tbotind : (n : Nat) -> Vect n String -> Grammar () PiToken True (Term n)
tbotind n env = do
match PTBotInd
commit
c <- term n env
pure (TBotInd c)
tnat : Grammar () PiToken True (Term n)
tnat = match PTNat >> pure TNat
tnum : Grammar () PiToken True (Term n)
tnum = do
n <- match PTNum
pure (conv n)
where
conv : Nat -> Term n
conv 0 = TZero
conv (S n) = TSuc (conv n)
tsuc : (n : Nat) -> Vect n String -> Grammar () PiToken True (Term n)
tsuc n env = do
match PTSuc
commit
TSuc <$> term n env
tnatind : (n : Nat) -> Vect n String -> Grammar () PiToken True (Term n)
tnatind n env = do
match PTNatInd
commit
c <- term n env
z <- term n env
s <- term n env
pure (TNatInd c z s)
tsigma : (n : Nat) -> Vect n String -> Grammar () PiToken True (Term n)
tsigma n env = do
match PTSigma
commit
match PTLParen
arg <- match PTIdentifier
match PTColon
a <- expr n env
match PTRParen
b <- expr (S n) (arg :: env)
pure (TSigma a (TLam b))
tpair : (n : Nat) -> Vect n String -> Grammar () PiToken True (Term n)
tpair n env = do
match PTLParen
x <- expr n env
match PTComma
commit
y <- expr n env
match PTRParen
pure (TPair x y)
tsigind : (n : Nat) -> Vect n String -> Grammar () PiToken True (Term n)
tsigind n env = do
match PTSigInd
commit
a <- term n env
b <- term n env
c <- term n env
f <- term n env
pure (TSigInd a b c f)
tid : (n : Nat) -> Vect n String -> Grammar () PiToken True (Term n)
tid n env = do
match PTId
commit
ty <- term n env
a <- term n env
b <- term n env
pure (TId ty a b)
trefl : (n : Nat) -> Vect n String -> Grammar () PiToken True (Term n)
trefl n env = do
match PTRefl
commit
ty <- term n env
tr <- term n env
pure (TRefl ty tr)
tj : (n : Nat) -> Vect n String -> Grammar () PiToken True (Term n)
tj n env = do
match PTJ
commit
ty <- term n env
a <- term n env
b <- term n env
c <- term n env
d <- term n env
pure (TJ ty a b c d)
tlet : (n : Nat) -> Vect n String -> Grammar () PiToken True (Term n)
tlet n env = do
match PTLet
commit
arg <- match PTIdentifier
match PTColon
ty <- expr n env
match PTDefEq
tr <- expr n env
match PTIn
tri <- expr (S n) (arg :: env)
pure (TLet ty tr tri)
tlam : (n : Nat) -> Vect n String -> Grammar () PiToken True (Term n)
tlam n env = do
match PTLambda
commit
arg <- match PTIdentifier
match PTDot
e <- expr (S n) (arg :: env)
pure (TLam e)
tpi : (n : Nat) -> Vect n String -> Grammar () PiToken True (Term n)
tpi n env = do
match PTPi
commit
match PTLParen
arg <- match PTIdentifier
match PTColon
a <- expr n env
match PTRParen
b <- expr (S n) (arg :: env)
pure (TPi a b)
tarr : (n : Nat) -> Vect n String -> Grammar () PiToken True (Term n)
tarr n env = do
l <- expr1 n env
match PTArrow
commit
r <- expr (S n) ("" :: env)
pure (TPi l r)
tapp : (n : Nat) -> Vect n String -> Term n -> Grammar () PiToken True (Term n)
tapp n env e1 = do
e2 <- term n env
tapp1 n env (TApp e1 e2)
tapp1 : (n : Nat) -> Vect n String -> Term n -> Grammar () PiToken False (Term n)
tapp1 n env e = tapp n env e <|> pure e
tvar : (n : Nat) -> Vect n String -> Grammar () PiToken True (Term n)
tvar n env = do
str <- match PTIdentifier
commit
fromMaybe (fail ("'" ++ str ++ "' not in env")) (pure . TVar <$> findIndex (== str) env)
paren : (n : Nat) -> Vect n String -> Grammar () PiToken True (Term n)
paren n env = do
match PTLParen
commit
e <- expr n env
match PTRParen
pure e
definitions : (n : Nat) -> Vect n String -> Grammar () PiToken True (Term n)
definitions n env = do
match PTLet
commit
arg <- match PTIdentifier
match PTColon
ty <- expr n env
match PTDefEq
tr <- expr n env
next <- definitions (S n) (arg :: env) <|> pure TStar
pure (TLet ty tr next)
toplevel : Grammar () PiToken True (Term 0)
toplevel = definitions 0 []
parsePi : (n : Nat) -> Vect n String -> Grammar () PiToken True (Term n) -> List (WithBounds PiToken) -> Either String (Term n)
parsePi n env parseEntry toks =
case parse parseEntry $ filter (not . ignored) toks of
Right (l, []) => Right l
Right (_, l) => Left ("contains tokens that were not consumed: " ++ show l)
Left e => (Left . show . map getErr) e
where
getErr : ParsingError tok -> String
getErr (Error "Unrecognised input" _) = ""
getErr (Error s _) = s
public export
parse : (n : Nat) -> Vect n String -> Grammar () PiToken True (Term n) -> String -> Either String (Term n)
parse n env parseEntry x =
case lexPi x of
Just toks => parsePi n env parseEntry toks
Nothing => Left "Failed to lex."
public export
parse0 : String -> Either String (Term 0)
parse0 = parse 0 [] (expr 0 [])
public export
parseEnv : (n : Nat) -> Vect n String -> String -> Either String (Term n)
parseEnv n env = parse n env (expr n env)
public export
parsetoplevel : String -> Either String (Term 0)
parsetoplevel = parse 0 [] (toplevel)