diff options
Diffstat (limited to 'src/Data')
| -rw-r--r-- | src/Data/Text/WCWidth.hs | 141 | ||||
| -rw-r--r-- | src/Data/WText.hs | 137 |
2 files changed, 278 insertions, 0 deletions
diff --git a/src/Data/Text/WCWidth.hs b/src/Data/Text/WCWidth.hs new file mode 100644 index 0000000..71d59cf --- /dev/null +++ b/src/Data/Text/WCWidth.hs @@ -0,0 +1,141 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Data.Text.WCWidth where + +import Data.Char.WCWidth qualified as WC +import Data.Text (Text) +import Data.Text qualified as T + +-------------------------------------------------------------------------------- +-- primitives +-------------------------------------------------------------------------------- + +-- TODO this breaks when WC.wcwidth returns -1, which happens for +-- non-printable characters like '\n'. + +wcwidthLenient :: Char -> Int +wcwidthLenient c = + case WC.wcwidth c of + -1 -> 0 -- non-printable + w -> w -- 0, 1, or 2 + + +-- | Width-aware split: split on characters satisfying the predicate. +-- The predicate sees the character, not its width. +wsplit :: (Char -> Bool) -> Text -> [Text] +wsplit p = go [] + where + go acc t = + case T.uncons t of + Nothing -> + case acc of + [] -> [] + _ -> [T.pack (reverse acc)] + + Just (c, rest) + | p c -> + let chunk = if null acc then [] else [T.pack (reverse acc)] + in chunk ++ go [] rest + + | otherwise -> + go (c : acc) rest + + + +wsplitAt :: Int -> Text -> (Text, Text) +wsplitAt k = go k mempty + where + go n acc t = + case T.uncons t of + Just (c, rest) + | let w = wcwidthLenient c + , w <= n + -> go (n - w) (c:acc) rest + _ -> + (T.pack (reverse acc), t) + + +-------------------------------------------------------------------------------- +-- low-level +-------------------------------------------------------------------------------- + +wtake :: Int -> Text -> Text +wtake k = fst . wsplitAt k + +wdrop :: Int -> Text -> Text +wdrop k = snd . wsplitAt k + +wlength :: Text -> Int +--wlength = sum . map wcwidthLenient . T.unpack +wlength = T.foldl' (\acc c -> acc + wcwidthLenient c) 0 + + +-------------------------------------------------------------------------------- +-- high-level +-------------------------------------------------------------------------------- + +wchunksOf :: Int -> Text -> [Text] +wchunksOf k w = + case wsplitAt k w of + (a, b) + | b == mempty -> [a] + | otherwise -> a : wchunksOf k b + +wpadRight :: Int -> Text -> Text +wpadRight k w = + let missing = k - wlength w + in if missing <= 0 + then w + else w <> T.replicate missing " " + +--breakOn, splitOn + + + +--wcwidth :: Char -> Int +--wcwidth '\0' = 0 +--wcwidth c = +-- case WC.wcwidth c of +-- 0 -> 0 +-- n -> n +-- +-- +--wcwidth :: Char -> Int +--wcwidth '\0' = 0 +--wcwidth c = max 1 (WC.wcwidth c) + +--length' :: Text -> Int +--length' = Text.foldr ((+) . wcwidth') 0 +-- +--drop' :: Int -> Text -> Text +--drop' k t = +-- if k <= 0 +-- then t +-- else +-- case Text.uncons t of +-- Just (c, t') -> +-- drop' (k - wcwidth' c) t' +-- Nothing -> mempty +-- +--take' :: Int -> Text -> Text +--take' k0 = +-- Text.pack . rec k0 +-- where +-- rec k t = +-- if | Just (c, t') <- Text.uncons t, nc <- wcwidth' c, nc <= k -> +-- c : rec (k - nc) t' +-- +-- | otherwise -> +-- [] +-- +--splitAt' :: Int -> Text -> (Text, Text) +--splitAt' k0 = +-- rec k0 mempty +-- where +-- rec k a t = +-- if | Just (c, t') <- Text.uncons t, nc <- wcwidth' c, nc <= k -> +-- rec (k - nc) (c : a) t' +-- +-- | otherwise -> +-- (Text.pack $ reverse a, t) +-- diff --git a/src/Data/WText.hs b/src/Data/WText.hs new file mode 100644 index 0000000..9624b37 --- /dev/null +++ b/src/Data/WText.hs @@ -0,0 +1,137 @@ +{-# LANGUAGE DerivingVia #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TypeFamilies #-} + +module Data.WText where + +import Prelude hiding (break, drop, map, splitAt, take) +import Prelude qualified + +import Data.Coerce (coerce) +import Data.String (IsString(..)) +import Data.Text (Text) +import Data.Text qualified as T +import Data.Text.WCWidth qualified as WT + +import Data.Sequences +import Data.MonoTraversable + + +newtype WText = WText { unWText :: Text } + deriving (Eq, Ord, Show, IsString, Semigroup, Monoid) + deriving (MonoFoldable, MonoFunctor) via Text + + +type instance Element WText = Char + +instance MonoTraversable WText where + otraverse f (WText t) = WText <$> otraverse f t + +instance MonoPointed WText where + opoint c = WText (T.singleton c) + +instance GrowingAppend WText +--gappend (WText a) (WText b) = WText (a <> b) + +instance IsSequence WText where + fromList = WText . T.pack + --singleton = WText . T.singleton + + --index (WText t) = T.index t + lengthIndex (WText t) = T.length t + + take n (WText t) = WText (T.take n t) + drop n (WText t) = WText (T.drop n t) + splitAt n (WText t) = + let (a,b) = T.splitAt n t + in (WText a, WText b) + +instance SemiSequence WText where + type Index WText = Int + + intersperse c (WText t) = WText (T.intersperse c t) + + reverse (WText t) = WText (T.reverse t) + + cons c (WText t) = WText (T.cons c t) + snoc (WText t) c = WText (T.snoc t c) + + find p (WText t) = find p t + + sortBy cmp (WText t) = + WText (T.pack (sortBy cmp (T.unpack t))) + +instance Textual WText where + words (WText t) = Prelude.map WText (T.words t) + unwords ws = WText (T.unwords (ofoldMap (\(WText t) -> [t]) ws)) + + lines (WText t) = Prelude.map WText (T.lines t) + unlines ws = WText (T.unlines (ofoldMap (\(WText t) -> [t]) ws)) + + toLower (WText t) = WText (T.toLower t) + toUpper (WText t) = WText (T.toUpper t) + toCaseFold (WText t) = WText (T.toCaseFold t) + + +-------------------------------------------------------------------------------- +-- low-level +-------------------------------------------------------------------------------- + +split :: (Char -> Bool) -> WText -> [WText] +split = coerce WT.wsplit + +splitAt :: Int -> WText -> (WText, WText) +splitAt = coerce WT.wsplitAt + +take :: Int -> WText -> WText +take = coerce WT.wtake + +drop :: Int -> WText -> WText +drop = coerce WT.wdrop + +length :: WText -> Int +length = coerce WT.wlength + + +-------------------------------------------------------------------------------- +-- coercible +-------------------------------------------------------------------------------- + +null :: WText -> Bool +null = coerce T.null + +filter :: (Char -> Bool) -> WText -> WText +filter = coerce T.filter + +map :: (Char -> Char) -> WText -> WText +map = coerce T.map + +reverse + , strip + , toUpper + , toLower + :: WText -> WText +reverse = coerce T.reverse +strip = coerce T.strip +toUpper = coerce T.toUpper +toLower = coerce T.toLower + +stripPrefix, stripSuffix :: WText -> WText -> Maybe WText +stripPrefix = coerce T.stripPrefix +stripSuffix = coerce T.stripSuffix + +replace :: WText -> WText -> WText -> WText +replace = coerce T.replace + +intercalate :: WText -> [WText] -> WText +intercalate = coerce T.intercalate + +isPrefixOf + , isSuffixOf + , isInfixOf + :: WText -> WText -> Bool +isPrefixOf = coerce T.isPrefixOf +isSuffixOf = coerce T.isSuffixOf +isInfixOf = coerce T.isInfixOf + |
