module Xml where

import Parse

-- Tokenization

data Token = StartTag String
           | EndTag String
           | Comment String
           | Text String
  deriving Show

parseName :: Parser Char String
parseName = do
  x <- letter <|> char '_'
  xs <- many (letter <|> char '_' <|> digit)
  return $ x:xs

parseTag :: Parser Char Token
parseTag =
    between (char '<') (char '>' >> spaces) (sTag <|> eTag)
  where
    sTag = parseName >>= return . StartTag
    eTag = char '/' >> parseName >>= return . EndTag

parseComment :: Parser Char Token
parseComment = do
  string "<!--"
  cmt <- manyTill anyToken (string "-->")
  many space
  return $ Comment cmt

parseText :: Parser Char Token
parseText = many1 (noneof "<") >>= return . Text

lexer :: Parser Char [Token]
lexer = do
  many space
  ts <- many (parseTag <|> parseComment <|> parseText)
  eoi
  return ts

tokenize :: [Char] -> Maybe [Token]
tokenize cs = case parse lexer cs of
  Nothing -> Nothing
  Just ts -> Just (dropComments ts)
--tokenize = fmap dropComments . parse lexer
  where
    dropComments = filter notComment
    notComment (Comment _) = False
    notComment _ = True


-- Parsing

type Tag = String
data Xml = Xml Tag [Xml]
         | Txt String
  deriving Show

parseXml :: Parser Token Xml
parseXml = (token text >>= return . Txt) <|> do
  t <- token start
  ns <- many parseXml 
  sat (isEnd t)
  return $ Xml t ns
  where
    text (Text t) = Just t
    text _ = Nothing
    start (StartTag t) = Just t
    start _ = Nothing
    isEnd s (EndTag t) | s == t = True
    isEnd _ _ = False

fromString :: String -> Maybe Xml
fromString xs = case tokenize xs of
    Just ts -> parse p ts
    Nothing -> Nothing
  where p = do { xml <- parseXml; eoi; return xml }