module Xml where
import Parse
data Token = StartTag String
| EndTag String
| Comment String
| Text String
deriving Show
parseName :: Parser Char String
parseName = do
x <- letter <|> char '_'
xs <- many (letter <|> char '_' <|> digit)
return $ x:xs
parseTag :: Parser Char Token
parseTag =
between (char '<') (char '>' >> spaces) (sTag <|> eTag)
where
sTag = parseName >>= return . StartTag
eTag = char '/' >> parseName >>= return . EndTag
parseComment :: Parser Char Token
parseComment = do
string "<!--"
cmt <- manyTill anyToken (string "-->")
spaces
return $ Comment cmt
parseText :: Parser Char Token
parseText = many1 (noneof "<") >>= return . Text
lexer :: Parser Char [Token]
lexer = do
spaces
ts <- many (parseTag <|> parseComment <|> parseText)
eoi
return ts
tokenize :: [Char] -> Maybe [Token]
tokenize cs = case parse lexer cs of
Nothing -> Nothing
Just ts -> Just (dropComments ts)
where
dropComments = filter notComment
notComment (Comment _) = False
notComment _ = True
type Tag = String
data Xml = Xml Tag [Xml]
| Txt String
deriving Show
parseXml :: Parser Token Xml
parseXml = (token text >>= return . Txt) <|> do
t <- token start
ns <- many parseXml
sat (isEnd t)
return $ Xml t ns
where
text (Text t) = Just t
text _ = Nothing
start (StartTag t) = Just t
start _ = Nothing
isEnd s (EndTag t) | s == t = True
isEnd _ _ = False
fromString :: String -> Maybe Xml
fromString xs = case tokenize xs of
Just ts -> parse p ts
Nothing -> Nothing
where p = do { xml <- parseXml; eoi; return xml }