diff options
| author | Kierán Meinhardt <kieran.meinhardt@gmail.com> | 2020-09-23 17:44:40 +0200 | 
|---|---|---|
| committer | Kierán Meinhardt <kieran.meinhardt@gmail.com> | 2020-09-23 17:44:40 +0200 | 
| commit | 8e92e6e11d2b3b0bfb5ac9d68f347219493e6380 (patch) | |
| tree | 6484ca42d85ca89475e922f7b45039c116ebbf97 /src/Codec/MIME/Decode.hs | |
| parent | 6a6ad3aecd53ffd89101a0dee2b4ea576d4964d4 (diff) | |
split into library + executables
Diffstat (limited to 'src/Codec/MIME/Decode.hs')
| -rw-r--r-- | src/Codec/MIME/Decode.hs | 76 | 
1 files changed, 76 insertions, 0 deletions
diff --git a/src/Codec/MIME/Decode.hs b/src/Codec/MIME/Decode.hs new file mode 100644 index 0000000..278d6f6 --- /dev/null +++ b/src/Codec/MIME/Decode.hs @@ -0,0 +1,76 @@ +-------------------------------------------------------------------- +-- | +-- Module    : Codec.MIME.Decode +-- Copyright : (c) 2006-2009, Galois, Inc.  +-- License   : BSD3 +-- +-- Maintainer: Sigbjorn Finne <sigbjorn.finne@gmail.com> +-- Stability : provisional +-- Portability: portable +-- +--  +--  +-------------------------------------------------------------------- + +module Codec.MIME.Decode where + +import Data.Char + +import Codec.MIME.QuotedPrintable as QP +import Codec.MIME.Base64 as Base64 + +-- | @decodeBody enc str@ decodes @str@ according to the scheme +-- specified by @enc@. Currently, @base64@ and @quoted-printable@ are +-- the only two encodings supported. If you supply anything else +-- for @enc@, @decodeBody@ returns @str@. +--  +decodeBody :: String -> String -> String +decodeBody enc body = + case map toLower enc of +   "base64"           -> Base64.decodeToString body +   "quoted-printable" -> QP.decode body +   _ -> body + +-- Decoding of RFC 2047's "encoded-words" production +-- (as used in email-headers and some HTTP header cases +-- (AtomPub's Slug: header)) +decodeWord :: String -> Maybe (String, String) +decodeWord str = +  case str of +   '=':'?':xs -> +     case dropLang $ break (\ch -> ch =='?' || ch == '*') xs of +       (cs,_:x:'?':bs) +         | isKnownCharset (map toLower cs) -> +           case toLower x of +             'q' -> decodeQ cs (break (=='?') bs) +             'b' -> decodeB cs (break (=='?') bs) +             _   -> Nothing +       _ -> Nothing +   _ -> Nothing + where +  isKnownCharset cs = cs `elem` ["iso-8859-1", "us-ascii"] + +   -- ignore RFC 2231 extension of permitting a language tag to be supplied +   -- after the charset. +  dropLang (as,'*':bs) = (as,dropWhile (/='?') bs) +  dropLang (as,bs) = (as,bs) + +  decodeQ cset (fs,'?':'=':rs) = Just (fromCharset cset (QP.decode fs),rs) +  decodeQ _ _ = Nothing + +  decodeB cset (fs,'?':'=':rs) = +    Just (fromCharset cset (Base64.decodeToString fs),rs) +  decodeB _ _ = Nothing + +  fromCharset _cset cs = cs + +decodeWords :: String -> String +decodeWords "" = "" +decodeWords (x:xs) + | isSpace x = x : decodeWords xs + | otherwise = +  case decodeWord (x:xs) of +    Nothing -> x : decodeWords xs +    Just (as,bs) -> as ++ decodeWords bs + +  | 
