{-# OPTIONS_GHC -fno-implicit-prelude #-}
-----------------------------------------------------------------------------
-- |
-- Module : Text.Read.Lex
-- Copyright : (c) The University of Glasgow 2002
-- License : BSD-style (see the file libraries/base/LICENSE)
--
-- Maintainer : [email protected]
-- Stability : provisional
-- Portability : non-portable (uses Text.ParserCombinators.ReadP)
--
-- The cut-down Haskell lexer, used by Text.Read
--
-----------------------------------------------------------------------------
module Text.Read.Lex
-- lexing types
( Lexeme(..) -- :: *; Show, Eq
-- lexer
, lex -- :: ReadP Lexeme Skips leading spaces
, hsLex -- :: ReadP String
, lexChar -- :: ReadP Char Reads just one char, with H98 escapes
, readIntP -- :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
, readOctP -- :: Num a => ReadP a
, readDecP -- :: Num a => ReadP a
, readHexP -- :: Num a => ReadP a
)
where
import Text.ParserCombinators.ReadP
#ifdef __GLASGOW_HASKELL__
import GHC.Base
import GHC.Num( Num(..), Integer )
import GHC.Show( Show(..) )
#ifndef __HADDOCK__
import {-# SOURCE #-} GHC.Unicode ( isSpace, isAlpha, isAlphaNum )
#endif
import GHC.Real( Ratio(..), Integral, Rational, (%), fromIntegral,
toInteger, (^), (^^), infinity, notANumber )
import GHC.List
import GHC.Enum( maxBound )
#else
import Prelude hiding ( lex )
import Data.Char( chr, ord, isSpace, isAlpha, isAlphaNum )
import Data.Ratio( Ratio, (%) )
#endif
#ifdef __HUGS__
import Hugs.Prelude( Ratio(..) )
#endif
import Data.Maybe
import Control.Monad
-- -----------------------------------------------------------------------------
-- Lexing types
-- ^ Haskell lexemes.
data Lexeme
= Char Char -- ^ Character literal
| String String -- ^ String literal, with escapes interpreted
| Punc String -- ^ Punctuation or reserved symbol, e.g. @(@, @::@
| Ident String -- ^ Haskell identifier, e.g. @foo@, @Baz@
| Symbol String -- ^ Haskell symbol, e.g. @>>@, @:%@
| Int Integer -- ^ Integer literal
| Rat Rational -- ^ Floating point literal
| EOF
deriving (Eq, Show)
-- -----------------------------------------------------------------------------
-- Lexing
lex :: ReadP Lexeme
lex = skipSpaces >> lexToken
hsLex :: ReadP String
-- ^ Haskell lexer: returns the lexed string, rather than the lexeme
hsLex = do skipSpaces
(s,_) <- gather lexToken
return s
lexToken :: ReadP Lexeme
lexToken = lexEOF +++
lexLitChar +++
lexString +++
lexPunc +++
lexSymbol +++
lexId +++
lexNumber
-- ----------------------------------------------------------------------
-- End of file
lexEOF :: ReadP Lexeme
lexEOF = do s <- look
guard (null s)
return EOF
-- ---------------------------------------------------------------------------
-- Single character lexemes
lexPunc :: ReadP Lexeme
lexPunc =
do c <- satisfy isPuncChar
return (Punc [c])
where
isPuncChar c = c `elem` ",;()[]{}`"
-- ----------------------------------------------------------------------
-- Symbols
lexSymbol :: ReadP Lexeme
lexSymbol =
do s <- munch1 isSymbolChar
if s `elem` reserved_ops then
return (Punc s) -- Reserved-ops count as punctuation
else
return (Symbol s)
where
isSymbolChar c = c `elem` "!@#$%&*+./<=>?\\^|:-~"
reserved_ops = ["..", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]
-- ----------------------------------------------------------------------
-- identifiers
lexId :: ReadP Lexeme
lexId = lex_nan <++ lex_id
where
-- NaN and Infinity look like identifiers, so
-- we parse them first.
lex_nan = (string "NaN" >> return (Rat notANumber)) +++
(string "Infinity" >> return (Rat infinity))
lex_id = do c <- satisfy isIdsChar
s <- munch isIdfChar
return (Ident (c:s))
-- Identifiers can start with a '_'
isIdsChar c = isAlpha c || c == '_'
isIdfChar c = isAlphaNum c || c `elem` "_'"
#ifndef __GLASGOW_HASKELL__
infinity, notANumber :: Rational
infinity = 1 :% 0
notANumber = 0 :% 0
#endif
-- ---------------------------------------------------------------------------
-- Lexing character literals
lexLitChar :: ReadP Lexeme
lexLitChar =
do char '\''
(c,esc) <- lexCharE
guard (esc || c /= '\'') -- Eliminate '' possibility
char '\''
return (Char c)
lexChar :: ReadP Char
lexChar = do { (c,_) <- lexCharE; return c }
lexCharE :: ReadP (Char, Bool) -- "escaped or not"?
lexCharE =
do c <- get
if c == '\\'
then do c <- lexEsc; return (c, True)
else do return (c, False)
where
lexEsc =
lexEscChar
+++ lexNumeric
+++ lexCntrlChar
+++ lexAscii
lexEscChar =
do c <- get
case c of
'a' -> return '\a'
'b' -> return '\b'
'f' -> return '\f'
'n' -> return '\n'
'r' -> return '\r'
't' -> return '\t'
'v' -> return '\v'
'\\' -> return '\\'
'\"' -> return '\"'
'\'' -> return '\''
_ -> pfail
lexNumeric =
do base <- lexBaseChar <++ return 10
n <- lexInteger base
guard (n <= toInteger (ord maxBound))
return (chr (fromInteger n))
lexCntrlChar =
do char '^'
c <- get
case c of
'@' -> return '\^@'
'A' -> return '\^A'
'B' -> return '\^B'
'C' -> return '\^C'
'D' -> return '\^D'
'E' -> return '\^E'
'F' -> return '\^F'
'G' -> return '\^G'
'H' -> return '\^H'
'I' -> return '\^I'
'J' -> return '\^J'
'K' -> return '\^K'
'L' -> return '\^L'
'M' -> return '\^M'
'N' -> return '\^N'
'O' -> return '\^O'
'P' -> return '\^P'
'Q' -> return '\^Q'
'R' -> return '\^R'
'S' -> return '\^S'
'T' -> return '\^T'
'U' -> return '\^U'
'V' -> return '\^V'
'W' -> return '\^W'
'X' -> return '\^X'
'Y' -> return '\^Y'
'Z' -> return '\^Z'
'[' -> return '\^['
'\\' -> return '\^\'
']' -> return '\^]'
'^' -> return '\^^'
'_' -> return '\^_'
_ -> pfail
lexAscii =
do choice
[ (string "SOH" >> return '\SOH') <++
(string "SO" >> return '\SO')
-- \SO and \SOH need maximal-munch treatment
-- See the Haskell report Sect 2.6
, string "NUL" >> return '\NUL'
, string "STX" >> return '\STX'
, string "ETX" >> return '\ETX'
, string "EOT" >> return '\EOT'
, string "ENQ" >> return '\ENQ'
, string "ACK" >> return '\ACK'
, string "BEL" >> return '\BEL'
, string "BS" >> return '\BS'
, string "HT" >> return '\HT'
, string "LF" >> return '\LF'
, string "VT" >> return '\VT'
, string "FF" >> return '\FF'
, string "CR" >> return '\CR'
, string "SI" >> return '\SI'
, string "DLE" >> return '\DLE'
, string "DC1" >> return '\DC1'
, string "DC2" >> return '\DC2'
, string "DC3" >> return '\DC3'
, string "DC4" >> return '\DC4'
, string "NAK" >> return '\NAK'
, string "SYN" >> return '\SYN'
, string "ETB" >> return '\ETB'
, string "CAN" >> return '\CAN'
, string "EM" >> return '\EM'
, string "SUB" >> return '\SUB'
, string "ESC" >> return '\ESC'
, string "FS" >> return '\FS'
, string "GS" >> return '\GS'
, string "RS" >> return '\RS'
, string "US" >> return '\US'
, string "SP" >> return '\SP'
, string "DEL" >> return '\DEL'
]
-- ---------------------------------------------------------------------------
-- string literal
lexString :: ReadP Lexeme
lexString =
do char '"'
body id
where
body f =
do (c,esc) <- lexStrItem
if c /= '"' || esc
then body (f.(c:))
else let s = f "" in
return (String s)
lexStrItem = (lexEmpty >> lexStrItem)
+++ lexCharE
lexEmpty =
do char '\\'
c <- get
case c of
'&' -> do return ()
_ | isSpace c -> do skipSpaces; char '\\'; return ()
_ -> do pfail
-- ---------------------------------------------------------------------------
-- Lexing numbers
type Base = Int
type Digits = [Int]
lexNumber :: ReadP Lexeme
lexNumber
= lexHexOct <++ -- First try for hex or octal 0x, 0o etc
-- If that fails, try for a decimal number
lexDecNumber -- Start with ordinary digits
lexHexOct :: ReadP Lexeme
lexHexOct
= do char '0'
base <- lexBaseChar
digits <- lexDigits base
return (Int (val (fromIntegral base) 0 digits))
lexBaseChar :: ReadP Int
-- Lex a single character indicating the base; fail if not there
lexBaseChar = do { c <- get;
case c of
'o' -> return 8
'O' -> return 8
'x' -> return 16
'X' -> return 16
_ -> pfail }
lexDecNumber :: ReadP Lexeme
lexDecNumber =
do xs <- lexDigits 10
mFrac <- lexFrac <++ return Nothing
mExp <- lexExp <++ return Nothing
return (value xs mFrac mExp)
where
value xs mFrac mExp = valueFracExp (val 10 0 xs) mFrac mExp
valueFracExp :: Integer -> Maybe Digits -> Maybe Integer
-> Lexeme
valueFracExp a Nothing Nothing
= Int a -- 43
valueFracExp a Nothing (Just exp)
| exp >= 0 = Int (a * (10 ^ exp)) -- 43e7
| otherwise = Rat (valExp (fromInteger a) exp) -- 43e-7
valueFracExp a (Just fs) mExp
= case mExp of
Nothing -> Rat rat -- 4.3
Just exp -> Rat (valExp rat exp) -- 4.3e-4
where
rat :: Rational
rat = fromInteger a + frac 10 0 1 fs
valExp :: Rational -> Integer -> Rational
valExp rat exp = rat * (10 ^^ exp)
lexFrac :: ReadP (Maybe Digits)
-- Read the fractional part; fail if it doesn't
-- start ".d" where d is a digit
lexFrac = do char '.'
frac <- lexDigits 10
return (Just frac)
lexExp :: ReadP (Maybe Integer)
lexExp = do char 'e' +++ char 'E'
exp <- signedExp +++ lexInteger 10
return (Just exp)
where
signedExp
= do c <- char '-' +++ char '+'
n <- lexInteger 10
return (if c == '-' then -n else n)
lexDigits :: Int -> ReadP Digits
-- Lex a non-empty sequence of digits in specified base
lexDigits base =
do s <- look
xs <- scan s id
guard (not (null xs))
return xs
where
scan (c:cs) f = case valDig base c of
Just n -> do get; scan cs (f.(n:))
Nothing -> do return (f [])
scan [] f = do return (f [])
lexInteger :: Base -> ReadP Integer
lexInteger base =
do xs <- lexDigits base
return (val (fromIntegral base) 0 xs)
val :: Num a => a -> a -> Digits -> a
-- val base y [d1,..,dn] = y ++ [d1,..,dn], as it were
val base y [] = y
val base y (x:xs) = y' `seq` val base y' xs
where
y' = y * base + fromIntegral x
frac :: Integral a => a -> a -> a -> Digits -> Ratio a
frac base a b [] = a % b
frac base a b (x:xs) = a' `seq` b' `seq` frac base a' b' xs
where
a' = a * base + fromIntegral x
b' = b * base
valDig :: Num a => a -> Char -> Maybe Int
valDig 8 c
| '0' <= c && c <= '7' = Just (ord c - ord '0')
| otherwise = Nothing
valDig 10 c = valDecDig c
valDig 16 c
| '0' <= c && c <= '9' = Just (ord c - ord '0')
| 'a' <= c && c <= 'f' = Just (ord c - ord 'a' + 10)
| 'A' <= c && c <= 'F' = Just (ord c - ord 'A' + 10)
| otherwise = Nothing
valDecDig c
| '0' <= c && c <= '9' = Just (ord c - ord '0')
| otherwise = Nothing
-- ----------------------------------------------------------------------
-- other numeric lexing functions
readIntP :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
readIntP base isDigit valDigit =
do s <- munch1 isDigit
return (val base 0 (map valDigit s))
readIntP' :: Num a => a -> ReadP a
readIntP' base = readIntP base isDigit valDigit
where
isDigit c = maybe False (const True) (valDig base c)
valDigit c = maybe 0 id (valDig base c)
readOctP, readDecP, readHexP :: Num a => ReadP a
readOctP = readIntP' 8
readDecP = readIntP' 10
readHexP = readIntP' 16
|