epsilon/src/Language/Epsilon/Parser.hs

76 lines
2.3 KiB
Haskell

{-# LANGUAGE
BlockArguments, ApplicativeDo, OverloadedStrings, LambdaCase,
TypeApplications
#-}
module Language.Epsilon.Parser (parse) where
import Control.Arrow
import Control.Applicative (liftA2)
import Control.Monad
import Data.Char
import Data.Functor
import Data.Text (Text)
import Text.Parsec hiding (parse)
import qualified Text.Parsec as P
import Text.Parsec.Char
import qualified Data.Text as T
import Language.Epsilon.Types
type Parser = Parsec Text ()
parse :: FilePath -> Text -> Either ParseError Value
parse = P.parse (value <* eof)
value :: Parser Value
value = parens do end <- option Nil $ try $ value <* dot
es <- many value
pure $ foldl Snoc end es
<|> Quote <$ quote <*> value
<|> do sign <- option "" $ pure <$> char '-'
ds <- many1 digit
let intPart = sign ++ ds
Float . read . (intPart ++) <$> liftA2 (:) (char '.') (many1 digit)
<|> pure (Int $ read intPart)
<* spaces
<|> Char <$ char '?' <*> strChar '\'' <* spaces
<|> Str . T.pack <$ char '"' <*> many1 (strChar '"') <* char '"' <* spaces
<|> Symbol . T.pack <$> many1 (satisfy symbolChar) <* spaces
where symbolChar c = not (isSpace c) && notElem @[] c "()\"'?"
strChar :: Char -> Parser Char
strChar c = noneOf [c, '\\'] <|> char '\\' *> esc where
esc :: Parser Char
esc = anyChar >>= \case
c' | c' == c -> pure c
'\\' -> pure '\\'
'\n' -> spaces *> strChar c -- skip all following whitespace
'n' -> pure '\n'
'e' -> pure '\ESC'
'b' -> pure '\DEL'
'0' -> pure '\NUL'
't' -> pure '\t'
'x' -> hexChar 2
'u' -> hexChar 4
'U' -> hexChar 8
hexChar :: Int -> Parser Char
hexChar n = chr . parseHex <$> replicateM n hexDigit where
parseHex :: String -> Int
parseHex = foldr (\d n -> parseHexDigit d + n*16) 0
parseHexDigit :: Char -> Int
parseHexDigit = toLower >>> \case
'0' -> 0; '1' -> 1; '2' -> 2; '3' -> 3
'4' -> 4; '5' -> 5; '6' -> 6; '7' -> 7
'8' -> 8; '9' -> 9; 'a' -> 10; 'b' -> 11
'c' -> 12; 'd' -> 13; 'e' -> 14; 'f' -> 15
_ -> 0
parens :: Parser a -> Parser a
parens p = char '(' *> spaces *> p <* char ')' <* spaces
dot, quote :: Parser ()
dot = void $ char '.' <* spaces
quote = void $ char '\'' <* spaces