pi/src/Parser/Parse.idr

module Parser.Parse

import Text.Lexer
import Text.Parser

import Data.Vect
import Data.List
import Data.List1
import Data.Maybe

-- eventually start using elaborator representation here, or just a parser
-- exclusive one
import Core.Term
import Core.Misc

%default total

data PiTokenKind
  = PTLambda
  | PTIdentifier
  | PTDot
  | PTArrow
  | PTPi
  | PTSigma
  | PTComma
  | PTLParen
  | PTRParen
  | PTColon
  | PTLet
  | PTDefEq
  | PTIn
  | PTIgnore
  | PTType
  | PTTop
  | PTStar
  | PTTopInd
  | PTBot
  | PTBotInd
  | PTNat
  | PTZero
  | PTSuc
  | PTNatInd
  | PTSigInd

Eq PiTokenKind where
  (==) PTLambda     PTLambda     = True
  (==) PTIdentifier PTIdentifier = True
  (==) PTDot        PTDot        = True
  (==) PTArrow      PTArrow      = True
  (==) PTPi         PTPi         = True
  (==) PTSigma      PTSigma      = True
  (==) PTComma      PTComma      = True
  (==) PTLParen     PTLParen     = True
  (==) PTRParen     PTRParen     = True
  (==) PTColon      PTColon      = True
  (==) PTLet        PTLet        = True
  (==) PTDefEq      PTDefEq      = True
  (==) PTIn         PTIn         = True
  (==) PTIgnore     PTIgnore     = True
  (==) PTType       PTType       = True
  (==) PTTop        PTTop        = True
  (==) PTStar       PTStar       = True
  (==) PTTopInd     PTTopInd     = True
  (==) PTBot        PTBot        = True
  (==) PTBotInd     PTBotInd     = True
  (==) PTNat        PTNat        = True
  (==) PTZero       PTZero       = True
  (==) PTSuc        PTSuc        = True
  (==) PTNatInd     PTNatInd     = True
  (==) PTSigInd     PTSigInd     = True
  (==) _            _            = False

Show PiTokenKind where
  show PTLambda     = "PTLambda"
  show PTIdentifier = "PTIdentifier"
  show PTDot        = "PTDot"
  show PTArrow      = "PTArrow"
  show PTPi         = "PTPi"
  show PTSigma      = "PTSigma"
  show PTComma      = "PTComma"
  show PTLParen     = "PTLParen"
  show PTRParen     = "PTRParen"
  show PTColon      = "PTColon"
  show PTLet        = "PTLet"
  show PTDefEq      = "PTDefEq"
  show PTIn         = "PTIn"
  show PTIgnore     = "PTIgnore"
  show PTType       = "PTType"
  show PTTop        = "PTTop"
  show PTStar       = "PTSTar"
  show PTTopInd     = "PTTopInd"
  show PTBot        = "PTBot"
  show PTBotInd     = "PTBotInd"
  show PTNat        = "PTNat"
  show PTZero       = "PTZero"
  show PTSuc        = "PTSuc"
  show PTNatInd     = "PTNatInd"
  show PTSigInd     = "PTSigInd"

PiToken : Type
PiToken = Token PiTokenKind

Show PiToken where
  show (Tok kind text) = "Tok kind: " ++ show kind ++ " text: " ++ text

TokenKind PiTokenKind where
  TokType PTIdentifier = String
  TokType _ = ()

  tokValue PTLambda     _ = ()
  tokValue PTIdentifier s = s
  tokValue PTDot        _ = ()
  tokValue PTArrow      _ = ()
  tokValue PTPi         _ = ()
  tokValue PTSigma      _ = ()
  tokValue PTComma      _ = ()
  tokValue PTLParen     _ = ()
  tokValue PTRParen     _ = ()
  tokValue PTColon      _ = ()
  tokValue PTLet        _ = ()
  tokValue PTDefEq      _ = ()
  tokValue PTIn         _ = ()
  tokValue PTIgnore     _ = ()
  tokValue PTType       _ = ()
  tokValue PTTop        _ = ()
  tokValue PTStar       _ = ()
  tokValue PTTopInd     _ = ()
  tokValue PTBot        _ = ()
  tokValue PTBotInd     _ = ()
  tokValue PTNat        _ = ()
  tokValue PTZero       _ = ()
  tokValue PTSuc        _ = ()
  tokValue PTNatInd     _ = ()
  tokValue PTSigInd     _ = ()

ignored : WithBounds PiToken -> Bool
ignored (MkBounded (Tok PTIgnore _) _ _) = True
ignored _ = False

identifier : Lexer
identifier = some (alphaNum <|> symbol)

keywords : List (String, PiTokenKind)
keywords = [
  ("in",    PTIn),
  ("let",   PTLet),
  ("Type",  PTType),
  ("⊤-ind", PTTopInd),
  ("⊥-ind", PTBotInd),
  ("ℕ-ind", PTNatInd),
  ("Σ-ind", PTSigInd),
  ("λ",     PTLambda),
  (".",     PTDot),
  ("→",     PTArrow),
  ("Π",     PTPi),
  ("Σ",     PTSigma),
  (",",     PTComma),
  ("(",     PTLParen),
  (")",     PTRParen),
  (":",     PTColon),
  ("≔",     PTDefEq),
  ("⊤",     PTTop),
  ("⊥",     PTBot),
  ("ℕ",     PTNat),
  ("★",     PTStar)
]

piTokenMap : TokenMap PiToken
piTokenMap = (spaces, Tok PTIgnore) ::
  [(identifier, \s =>
      case lookup s keywords of
        (Just kind) => Tok kind s
        Nothing => Tok PTIdentifier s
    )
  ]

lexPi : String -> Maybe (List (WithBounds PiToken))
lexPi str =
  case lex piTokenMap str of
    (tokens, _, _, "") => Just tokens
    _ => Nothing

{- Basic idea, parsing has a list of the identifiers to convert to -}
{- de bruijn indecies, and a Nat to keep track of context size -}
mutual
  expr : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
  expr n env = ttopind n env
           <|> tbotind n env
           <|> tsuc n env
           <|> tnatind n env
           <|> tsigind n env
           <|> (do
             t <- term n env
             tapp n env t <|> pure t)

  term : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
  term n env = ttype
           <|> ttop
           <|> tstar
           <|> tbot
           <|> tnat
           <|> tzero
           <|> tsigma n env
           <|> tpair n env
           <|> tlet n env
           <|> tlam n env
           <|> tpi  n env
           <|> tvar n env
           <|> paren n env

  ttype : Grammar state PiToken True (Term n)
  ttype = match PTType >> pure TType

  ttop : Grammar state PiToken True (Term n)
  ttop = match PTTop >> pure TTop

  tstar : Grammar state PiToken True (Term n)
  tstar = match PTStar >> pure TStar

  ttopind : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
  ttopind n env = do
    match PTTopInd
    commit
    c  <- term n env
    st <- term n env
    pure (TTopInd c st)

  tbot : Grammar state PiToken True (Term n)
  tbot = match PTBot >> pure TBot

  tbotind : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
  tbotind n env = do
    match PTBotInd
    commit
    c <- term n env
    pure (TBotInd c)

  tnat : Grammar state PiToken True (Term n)
  tnat = match PTNat >> pure TNat

  tzero : Grammar state PiToken True (Term n)
  tzero = match PTZero >> pure TZero

  tsuc : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
  tsuc n env = do
    match PTSuc
    commit
    m <- term n env
    pure (TSuc m)

  tnatind : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
  tnatind n env = do
    match PTNatInd
    commit
    c <- term n env
    z <- term n env
    s <- term n env
    pure (TNatInd c z s)

  tsigma : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
  tsigma n env = do
    match PTSigma
    commit
    match PTLParen
    arg <- match PTIdentifier
    match PTColon
    a <- term n env
    match PTRParen
    b <- term (S n) (arg :: env)
    pure (TSigma a (TLam b))

  tpair : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
  tpair n env = do
    match PTLParen
    commit
    x <- expr n env
    match PTComma
    y <- expr n env
    match PTRParen
    pure (TPair x y)

  tsigind : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
  tsigind n env = do
    match PTSigInd
    commit
    a <- term n env
    b <- term n env
    c <- term n env
    f <- term n env
    pure (TSigInd a b c f)

  tlet : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
  tlet n env = do
    match PTLet
    commit
    arg <- match PTIdentifier
    match PTColon
    ty <- expr n env
    match PTDefEq
    tr <- expr n env
    match PTIn
    tri <- expr (S n) (arg :: env)
    pure (TLet ty tr tri)

  tlam : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
  tlam n env = do
    match PTLambda
    commit
    arg <- match PTIdentifier
    match PTDot
    e <- term (S n) (arg :: env)
    pure (TLam e)

  tpi : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
  tpi n env = do
    match PTPi
    commit
    match PTLParen
    arg <- match PTIdentifier
    match PTColon
    a <- term n env
    match PTRParen
    b <- term (S n) (arg :: env)
    pure (TPi a b)

  tapp : (n : Nat) -> Vect n String -> Term n -> Grammar state PiToken True (Term n)
  tapp n env e1 = do
    e2 <- term n env
    tapp1 n env (TApp e1 e2)

  tapp1 : (n : Nat) -> Vect n String -> Term n -> Grammar state PiToken False (Term n)
  tapp1 n env e = tapp n env e <|> pure e

  tvar : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
  tvar n env = do
    str <- match PTIdentifier
    fromMaybe (fail ("'" ++ str ++ "' not in env")) (pure . TVar <$> findIndex (== str) env)

  paren : (n : Nat) -> Vect n String -> Grammar state PiToken True (Term n)
  paren n env = do
    match PTLParen
    e <- expr n env
    match PTRParen
    pure e

parsePi : (n : Nat) -> Vect n String -> List (WithBounds PiToken) -> Either String (Term n)
parsePi n env toks =
  case parse (expr n env) $ filter (not . ignored) toks of
    Right (l, []) => Right l
    Right e => Left "contains tokens that were not consumed"
    Left e  => (Left . show . map getErr) e
  where
    getErr : ParsingError tok -> String
    getErr (Error s _) = s


public export
parse : (n : Nat) -> Vect n String -> String -> Either String (Term n)
parse n env x =
  case lexPi x of
    Just toks => parsePi n env toks
    Nothing => Left "Failed to lex."

public export
parse0 : String -> Either String (Term 0)
parse0 = parse 0 []