pi/src/Parser/Parse.idr

367 lines
9.2 KiB
Idris
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

module Parser.Parse
import Text.Lexer
import Text.Parser
import Data.Vect
import Data.List
import Data.List1
import Data.Maybe
-- eventually start using elaborator representation here, or just a parser
-- exclusive one
import Core.Term
import Core.Misc
%default total
data PiTokenKind
= PTLambda
| PTIdentifier
| PTDot
| PTArrow
| PTPi
| PTSigma
| PTComma
| PTLParen
| PTRParen
| PTColon
| PTLet
| PTDefEq
| PTIn
| PTIgnore
| PTType
| PTTop
| PTStar
| PTTopInd
| PTBot
| PTBotInd
| PTNat
| PTZero
| PTSuc
| PTNatInd
| PTSigInd
Eq PiTokenKind where
(==) PTLambda PTLambda = True
(==) PTIdentifier PTIdentifier = True
(==) PTDot PTDot = True
(==) PTArrow PTArrow = True
(==) PTPi PTPi = True
(==) PTSigma PTSigma = True
(==) PTComma PTComma = True
(==) PTLParen PTLParen = True
(==) PTRParen PTRParen = True
(==) PTColon PTColon = True
(==) PTLet PTLet = True
(==) PTDefEq PTDefEq = True
(==) PTIn PTIn = True
(==) PTIgnore PTIgnore = True
(==) PTType PTType = True
(==) PTTop PTTop = True
(==) PTStar PTStar = True
(==) PTTopInd PTTopInd = True
(==) PTBot PTBot = True
(==) PTBotInd PTBotInd = True
(==) PTNat PTNat = True
(==) PTZero PTZero = True
(==) PTSuc PTSuc = True
(==) PTNatInd PTNatInd = True
(==) PTSigInd PTSigInd = True
(==) _ _ = False
Show PiTokenKind where
show PTLambda = "PTLambda"
show PTIdentifier = "PTIdentifier"
show PTDot = "PTDot"
show PTArrow = "PTArrow"
show PTPi = "PTPi"
show PTSigma = "PTSigma"
show PTComma = "PTComma"
show PTLParen = "PTLParen"
show PTRParen = "PTRParen"
show PTColon = "PTColon"
show PTLet = "PTLet"
show PTDefEq = "PTDefEq"
show PTIn = "PTIn"
show PTIgnore = "PTIgnore"
show PTType = "PTType"
show PTTop = "PTTop"
show PTStar = "PTSTar"
show PTTopInd = "PTTopInd"
show PTBot = "PTBot"
show PTBotInd = "PTBotInd"
show PTNat = "PTNat"
show PTZero = "PTZero"
show PTSuc = "PTSuc"
show PTNatInd = "PTNatInd"
show PTSigInd = "PTSigInd"
PiToken : Type
PiToken = Token PiTokenKind
Show PiToken where
show (Tok kind text) = "Tok kind: " ++ show kind ++ " text: " ++ text
TokenKind PiTokenKind where
TokType PTIdentifier = String
TokType _ = ()
tokValue PTLambda _ = ()
tokValue PTIdentifier s = s
tokValue PTDot _ = ()
tokValue PTArrow _ = ()
tokValue PTPi _ = ()
tokValue PTSigma _ = ()
tokValue PTComma _ = ()
tokValue PTLParen _ = ()
tokValue PTRParen _ = ()
tokValue PTColon _ = ()
tokValue PTLet _ = ()
tokValue PTDefEq _ = ()
tokValue PTIn _ = ()
tokValue PTIgnore _ = ()
tokValue PTType _ = ()
tokValue PTTop _ = ()
tokValue PTStar _ = ()
tokValue PTTopInd _ = ()
tokValue PTBot _ = ()
tokValue PTBotInd _ = ()
tokValue PTNat _ = ()
tokValue PTZero _ = ()
tokValue PTSuc _ = ()
tokValue PTNatInd _ = ()
tokValue PTSigInd _ = ()
ignored : WithBounds PiToken -> Bool
ignored (MkBounded (Tok PTIgnore _) _ _) = True
ignored _ = False
identifier : Lexer
identifier = some (alphaNum <|> symbol)
keywords : List (String, PiTokenKind)
keywords = [
("in", PTIn),
("let", PTLet),
("Type", PTType),
("-ind", PTTopInd),
("⊥-ind", PTBotInd),
("-ind", PTNatInd),
("Σ-ind", PTSigInd),
("λ", PTLambda),
(".", PTDot),
("", PTArrow),
("Π", PTPi),
("Σ", PTSigma),
(",", PTComma),
("(", PTLParen),
(")", PTRParen),
(":", PTColon),
("", PTDefEq),
("", PTTop),
("", PTBot),
("", PTNat),
("", PTStar)
]
piTokenMap : TokenMap PiToken
piTokenMap = (spaces, Tok PTIgnore) ::
[(identifier, \s =>
case lookup s keywords of
(Just kind) => Tok kind s
Nothing => Tok PTIdentifier s
)
]
lexPi : String -> Maybe (List (WithBounds PiToken))
lexPi str =
case lex piTokenMap str of
(tokens, _, _, "") => Just tokens
_ => Nothing
{- Basic idea, parsing has a list of the identifiers to convert to -}
{- de bruijn indecies, and a Nat to keep track of context size -}
mutual
expr : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
expr n env = ttopind n env
<|> tbotind n env
<|> tsuc n env
<|> tnatind n env
<|> tsigind n env
<|> (do
t <- term n env
tapp n env t <|> pure t)
term : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
term n env = ttype
<|> ttop
<|> tstar
<|> tbot
<|> tnat
<|> tzero
<|> tsigma n env
<|> tpair n env
<|> tlet n env
<|> tlam n env
<|> tpi n env
<|> tvar n env
<|> paren n env
ttype : Grammar state PiToken True (Term n)
ttype = match PTType >> pure TType
ttop : Grammar state PiToken True (Term n)
ttop = match PTTop >> pure TTop
tstar : Grammar state PiToken True (Term n)
tstar = match PTStar >> pure TStar
ttopind : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
ttopind n env = do
match PTTopInd
commit
c <- term n env
st <- term n env
pure (TTopInd c st)
tbot : Grammar state PiToken True (Term n)
tbot = match PTBot >> pure TBot
tbotind : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
tbotind n env = do
match PTBotInd
commit
c <- term n env
pure (TBotInd c)
tnat : Grammar state PiToken True (Term n)
tnat = match PTNat >> pure TNat
tzero : Grammar state PiToken True (Term n)
tzero = match PTZero >> pure TZero
tsuc : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
tsuc n env = do
match PTSuc
commit
m <- term n env
pure (TSuc m)
tnatind : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
tnatind n env = do
match PTNatInd
commit
c <- term n env
z <- term n env
s <- term n env
pure (TNatInd c z s)
tsigma : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
tsigma n env = do
match PTSigma
commit
match PTLParen
arg <- match PTIdentifier
match PTColon
a <- term n env
match PTRParen
b <- term (S n) (arg :: env)
pure (TSigma a (TLam b))
tpair : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
tpair n env = do
match PTLParen
commit
x <- expr n env
match PTComma
y <- expr n env
match PTRParen
pure (TPair x y)
tsigind : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
tsigind n env = do
match PTSigInd
commit
a <- term n env
b <- term n env
c <- term n env
f <- term n env
pure (TSigInd a b c f)
tlet : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
tlet n env = do
match PTLet
commit
arg <- match PTIdentifier
match PTColon
ty <- expr n env
match PTDefEq
tr <- expr n env
match PTIn
tri <- expr (S n) (arg :: env)
pure (TLet ty tr tri)
tlam : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
tlam n env = do
match PTLambda
commit
arg <- match PTIdentifier
match PTDot
e <- term (S n) (arg :: env)
pure (TLam e)
tpi : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
tpi n env = do
match PTPi
commit
match PTLParen
arg <- match PTIdentifier
match PTColon
a <- term n env
match PTRParen
b <- term (S n) (arg :: env)
pure (TPi a b)
tapp : (n : Nat) -> Vect n String -> Term n -> Grammar state PiToken True (Term n)
tapp n env e1 = do
e2 <- term n env
tapp1 n env (TApp e1 e2)
tapp1 : (n : Nat) -> Vect n String -> Term n -> Grammar state PiToken False (Term n)
tapp1 n env e = tapp n env e <|> pure e
tvar : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
tvar n env = do
str <- match PTIdentifier
fromMaybe (fail ("'" ++ str ++ "' not in env")) (pure . TVar <$> findIndex (== str) env)
paren : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
paren n env = do
match PTLParen
e <- expr n env
match PTRParen
pure e
parsePi : (n : Nat) -> Vect n String -> List (WithBounds PiToken) -> Either String (Term n)
parsePi n env toks =
case parse (expr n env) $ filter (not . ignored) toks of
Right (l, []) => Right l
Right e => Left "contains tokens that were not consumed"
Left e => (Left . show . map getErr) e
where
getErr : ParsingError tok -> String
getErr (Error s _) = s
public export
parse : (n : Nat) -> Vect n String -> String -> Either String (Term n)
parse n env x =
case lexPi x of
Just toks => parsePi n env toks
Nothing => Left "Failed to lex."
public export
parse0 : String -> Either String (Term 0)
parse0 = parse 0 []