{-# LANGUAGE OverloadedStrings #-} module Much.ParseMail (readMail) where import Codec.MIME.Parse import Codec.MIME.QuotedPrintable qualified as QP import Codec.MIME.Type import Control.Applicative import Data.Attoparsec.ByteString.Char8 qualified as A8 import Data.ByteString qualified as BS import Data.ByteString.Char8 qualified as BS8 import Data.ByteString.Lazy qualified as LBS import Data.CaseInsensitive qualified as CI import Data.Char import Data.Text qualified as T import Data.Text.Encoding qualified as T import Data.Text.Lazy qualified as LT import Data.Text.Lazy.Encoding qualified as LT import Network.Email.Header.Parser qualified as P import Network.Email.Header.Types qualified as H import Network.Mail.Mime qualified as M -- TODO eventually we want our completely own Address, i.e. w/o M.Address data Address = Mailbox M.Address | Group T.Text [M.Address] deriving (Show) readMail :: T.Text -> M.Mail readMail = fromMIMEValue . parseMIMEMessage fromMIMEValue :: MIMEValue -> M.Mail fromMIMEValue val0 = let m = foldr f (M.emptyMail $ M.Address Nothing "anonymous@localhost") $ fromMIMEParams $ mime_val_headers val0 in m { M.mailParts = [part val0] } where part val = case mime_val_content val of Single content -> (:[]) $ M.Part -- TODO actually check if we're utf-8 or ascii(?) { M.partType = "text/plain; charset=utf-8" , M.partEncoding = M.QuotedPrintableText , M.partFilename = Nothing , M.partHeaders = [] , M.partContent = LT.encodeUtf8 $ LT.fromStrict content } Multi vals -> concatMap part vals --f :: H.Header -> M.Mail -> M.Mail f (k, v) m = case k of "from" -> m { M.mailFrom = case parseAddress (LBS.toStrict v) of Left msg -> error msg Right Nothing -> M.mailFrom m Right (Just (Mailbox a)) -> a Right (Just (Group _ _)) -> error "cannot use group in from header" } "to" -> m { M.mailTo = mconcat $ map (\case Mailbox a -> [a] Group _ as -> as ) $ either error id $ parseAddresses $ LBS.toStrict v } "cc" -> m { M.mailCc = mconcat $ map (\case Mailbox a -> [a] Group _ as -> as ) $ either error id $ parseAddresses $ LBS.toStrict v } "bcc" -> m { M.mailBcc = mconcat $ map (\case Mailbox a -> [a] Group _ as -> as ) $ either error id $ parseAddresses $ LBS.toStrict v } _ -> m { M.mailHeaders = ( CI.original k , either (const "I am made of stupid") LT.toStrict (LT.decodeUtf8' v) ) : M.mailHeaders m } parseAddress :: BS.ByteString -> Either String (Maybe Address) parseAddress = A8.parseOnly (P.cfws *> (Just <$> address <|> return Nothing) <* A8.endOfInput) parseAddresses :: BS.ByteString -> Either String [Address] parseAddresses = A8.parseOnly (P.cfws *> address `A8.sepBy1` A8.char ',' <* A8.endOfInput) fromMIMEParams :: [MIMEParam] -> H.Headers fromMIMEParams = map $ \(MIMEParam k v) -> (CI.mk $ T.encodeUtf8 $ CI.original k, LT.encodeUtf8 $ LT.fromStrict v) -- TODO we should probably use email-header -- address = mailbox ; one addressee -- / group ; named list address :: A8.Parser Address address = (A8. "address") $ Mailbox <$> mailbox <|> group -- group = phrase ":" [#mailbox] ";" group :: A8.Parser Address group = (A8. "group") $ Group <$> T.intercalate "," <$> phrase <* A8.char ':' <*> mailbox `A8.sepBy` A8.many1 (A8.char ',') <* A8.char ';' -- mailbox = addr-spec ; simple address -- / phrase route-addr ; name & addr-spec mailbox :: A8.Parser M.Address mailbox = (A8. "mailbox") $ M.Address Nothing <$> addrSpec <|> M.Address . Just . T.intercalate " " <$> A8.option [] phrase <*> routeAddr -- route-addr = "<" [route] addr-spec ">" routeAddr :: A8.Parser T.Text routeAddr = (A8. "routeAddr") $ P.cfws *> A8.char '<' *> -- TODO A8.option [] route <*> addrSpec <* A8.char '>' ---- route = 1#("@" domain) ":" ; path-relative --route :: A8.Parser [T.Text] --route = -- (A8. "route") $ -- A8.many1 (A8.char '@' *> domain) <* A8.char ':' -- addr-spec = local-part "@" domain ; global address addrSpec :: A8.Parser T.Text addrSpec = (A8. "addrSpec") $ do a <- localPart b <- T.singleton <$> A8.char '@' c <- domain return $ a <> b <> c -- local-part = word *("." word) ; uninterpreted -- ; case-preserved localPart :: A8.Parser T.Text localPart = (A8. "localPart") $ T.intercalate "." <$> (word `A8.sepBy1` A8.char '.') -- domain = sub-domain *("." sub-domain) domain :: A8.Parser T.Text domain = (A8. "domain") $ T.intercalate "." <$> (subDomain `A8.sepBy1` A8.char '.') -- sub-domain = domain-ref / domain-literal subDomain :: A8.Parser T.Text subDomain = (A8. "subDomain") $ domainRef <|> domainLiteral -- domain-ref = atom ; symbolic reference domainRef :: A8.Parser T.Text domainRef = (A8. "domainRef") $ atom -- atom = 1* atom :: A8.Parser T.Text atom = (A8. "atom") $ P.cfws *> (T.pack <$> A8.many1 (A8.satisfy $ A8.notInClass atomClass)) -- domain-literal = "[" *(dtext / quoted-pair) "]" domainLiteral :: A8.Parser T.Text domainLiteral = (A8. "domainLiteral") $ T.pack <$> (A8.char '[' *> A8.many' (dtext <|> quotedPair) <* A8.char ']') -- dtext = may be folded -- "]", "\" & CR, & including -- linear-white-space> dtext :: A8.Parser Char dtext = (A8. "dtext") $ A8.satisfy (A8.notInClass "[]\\\CR") -- phrase = 1*word phrase :: A8.Parser [T.Text] phrase = (A8. "phrase") $ A8.many1 word -- qtext = , ; => may be folded -- "\" & CR, and including -- linear-white-space> qtext :: A8.Parser Char qtext = (A8. "qtext") $ A8.satisfy (A8.notInClass "\"\\\CR") -- quoted-pair = "\" CHAR quotedPair :: A8.Parser Char quotedPair = (A8. "quotedPair") $ A8.char '\\' *> A8.anyChar -- quoted-string = <"> *(qtext/quoted-pair) <">; Regular qtext or -- ; quoted chars. quotedString :: A8.Parser T.Text quotedString = (A8. "quotedString") $ T.pack <$> (A8.char '"' *> A8.many' (qtext <|> quotedPair) <* A8.char '"') encodedWord :: A8.Parser T.Text encodedWord = (A8. "encodedWord") $ do _ <- A8.string "=?" _ <- A8.string "utf-8" -- TODO 1. CI, 2. other encodings _ <- A8.string "?Q?" w <- A8.manyTill A8.anyChar (A8.string "?=") -- TODO all of them return $ T.decodeUtf8 $ BS8.pack $ decodeQ $ w where -- RFC 2047, 4.2. The "Q" encoding decodeQ = QP.decode . replace '_' ' ' replace a b = map $ \c -> if c == a then b else c -- word = encoded-word / atom / quoted-string -- ^ TODO what's the correct term for that? word :: A8.Parser T.Text word = (A8. "word") $ encodedWord <|> atom <|> quotedString atomClass :: [Char] atomClass = specialClass ++ spaceClass ++ ctlClass specialClass :: [Char] specialClass = "()<>@,;:\\\".[]" spaceClass :: [Char] spaceClass = " " ctlClass :: [Char] ctlClass = map chr $ [0..31] ++ [127]