summaryrefslogtreecommitdiffstats
path: root/src/Data
diff options
context:
space:
mode:
Diffstat (limited to 'src/Data')
-rw-r--r--src/Data/Text/WCWidth.hs141
-rw-r--r--src/Data/WText.hs137
2 files changed, 278 insertions, 0 deletions
diff --git a/src/Data/Text/WCWidth.hs b/src/Data/Text/WCWidth.hs
new file mode 100644
index 0000000..71d59cf
--- /dev/null
+++ b/src/Data/Text/WCWidth.hs
@@ -0,0 +1,141 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+module Data.Text.WCWidth where
+
+import Data.Char.WCWidth qualified as WC
+import Data.Text (Text)
+import Data.Text qualified as T
+
+--------------------------------------------------------------------------------
+-- primitives
+--------------------------------------------------------------------------------
+
+-- TODO this breaks when WC.wcwidth returns -1, which happens for
+-- non-printable characters like '\n'.
+
+wcwidthLenient :: Char -> Int
+wcwidthLenient c =
+ case WC.wcwidth c of
+ -1 -> 0 -- non-printable
+ w -> w -- 0, 1, or 2
+
+
+-- | Width-aware split: split on characters satisfying the predicate.
+-- The predicate sees the character, not its width.
+wsplit :: (Char -> Bool) -> Text -> [Text]
+wsplit p = go []
+ where
+ go acc t =
+ case T.uncons t of
+ Nothing ->
+ case acc of
+ [] -> []
+ _ -> [T.pack (reverse acc)]
+
+ Just (c, rest)
+ | p c ->
+ let chunk = if null acc then [] else [T.pack (reverse acc)]
+ in chunk ++ go [] rest
+
+ | otherwise ->
+ go (c : acc) rest
+
+
+
+wsplitAt :: Int -> Text -> (Text, Text)
+wsplitAt k = go k mempty
+ where
+ go n acc t =
+ case T.uncons t of
+ Just (c, rest)
+ | let w = wcwidthLenient c
+ , w <= n
+ -> go (n - w) (c:acc) rest
+ _ ->
+ (T.pack (reverse acc), t)
+
+
+--------------------------------------------------------------------------------
+-- low-level
+--------------------------------------------------------------------------------
+
+wtake :: Int -> Text -> Text
+wtake k = fst . wsplitAt k
+
+wdrop :: Int -> Text -> Text
+wdrop k = snd . wsplitAt k
+
+wlength :: Text -> Int
+--wlength = sum . map wcwidthLenient . T.unpack
+wlength = T.foldl' (\acc c -> acc + wcwidthLenient c) 0
+
+
+--------------------------------------------------------------------------------
+-- high-level
+--------------------------------------------------------------------------------
+
+wchunksOf :: Int -> Text -> [Text]
+wchunksOf k w =
+ case wsplitAt k w of
+ (a, b)
+ | b == mempty -> [a]
+ | otherwise -> a : wchunksOf k b
+
+wpadRight :: Int -> Text -> Text
+wpadRight k w =
+ let missing = k - wlength w
+ in if missing <= 0
+ then w
+ else w <> T.replicate missing " "
+
+--breakOn, splitOn
+
+
+
+--wcwidth :: Char -> Int
+--wcwidth '\0' = 0
+--wcwidth c =
+-- case WC.wcwidth c of
+-- 0 -> 0
+-- n -> n
+--
+--
+--wcwidth :: Char -> Int
+--wcwidth '\0' = 0
+--wcwidth c = max 1 (WC.wcwidth c)
+
+--length' :: Text -> Int
+--length' = Text.foldr ((+) . wcwidth') 0
+--
+--drop' :: Int -> Text -> Text
+--drop' k t =
+-- if k <= 0
+-- then t
+-- else
+-- case Text.uncons t of
+-- Just (c, t') ->
+-- drop' (k - wcwidth' c) t'
+-- Nothing -> mempty
+--
+--take' :: Int -> Text -> Text
+--take' k0 =
+-- Text.pack . rec k0
+-- where
+-- rec k t =
+-- if | Just (c, t') <- Text.uncons t, nc <- wcwidth' c, nc <= k ->
+-- c : rec (k - nc) t'
+--
+-- | otherwise ->
+-- []
+--
+--splitAt' :: Int -> Text -> (Text, Text)
+--splitAt' k0 =
+-- rec k0 mempty
+-- where
+-- rec k a t =
+-- if | Just (c, t') <- Text.uncons t, nc <- wcwidth' c, nc <= k ->
+-- rec (k - nc) (c : a) t'
+--
+-- | otherwise ->
+-- (Text.pack $ reverse a, t)
+--
diff --git a/src/Data/WText.hs b/src/Data/WText.hs
new file mode 100644
index 0000000..9624b37
--- /dev/null
+++ b/src/Data/WText.hs
@@ -0,0 +1,137 @@
+{-# LANGUAGE DerivingVia #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TypeFamilies #-}
+
+module Data.WText where
+
+import Prelude hiding (break, drop, map, splitAt, take)
+import Prelude qualified
+
+import Data.Coerce (coerce)
+import Data.String (IsString(..))
+import Data.Text (Text)
+import Data.Text qualified as T
+import Data.Text.WCWidth qualified as WT
+
+import Data.Sequences
+import Data.MonoTraversable
+
+
+newtype WText = WText { unWText :: Text }
+ deriving (Eq, Ord, Show, IsString, Semigroup, Monoid)
+ deriving (MonoFoldable, MonoFunctor) via Text
+
+
+type instance Element WText = Char
+
+instance MonoTraversable WText where
+ otraverse f (WText t) = WText <$> otraverse f t
+
+instance MonoPointed WText where
+ opoint c = WText (T.singleton c)
+
+instance GrowingAppend WText
+--gappend (WText a) (WText b) = WText (a <> b)
+
+instance IsSequence WText where
+ fromList = WText . T.pack
+ --singleton = WText . T.singleton
+
+ --index (WText t) = T.index t
+ lengthIndex (WText t) = T.length t
+
+ take n (WText t) = WText (T.take n t)
+ drop n (WText t) = WText (T.drop n t)
+ splitAt n (WText t) =
+ let (a,b) = T.splitAt n t
+ in (WText a, WText b)
+
+instance SemiSequence WText where
+ type Index WText = Int
+
+ intersperse c (WText t) = WText (T.intersperse c t)
+
+ reverse (WText t) = WText (T.reverse t)
+
+ cons c (WText t) = WText (T.cons c t)
+ snoc (WText t) c = WText (T.snoc t c)
+
+ find p (WText t) = find p t
+
+ sortBy cmp (WText t) =
+ WText (T.pack (sortBy cmp (T.unpack t)))
+
+instance Textual WText where
+ words (WText t) = Prelude.map WText (T.words t)
+ unwords ws = WText (T.unwords (ofoldMap (\(WText t) -> [t]) ws))
+
+ lines (WText t) = Prelude.map WText (T.lines t)
+ unlines ws = WText (T.unlines (ofoldMap (\(WText t) -> [t]) ws))
+
+ toLower (WText t) = WText (T.toLower t)
+ toUpper (WText t) = WText (T.toUpper t)
+ toCaseFold (WText t) = WText (T.toCaseFold t)
+
+
+--------------------------------------------------------------------------------
+-- low-level
+--------------------------------------------------------------------------------
+
+split :: (Char -> Bool) -> WText -> [WText]
+split = coerce WT.wsplit
+
+splitAt :: Int -> WText -> (WText, WText)
+splitAt = coerce WT.wsplitAt
+
+take :: Int -> WText -> WText
+take = coerce WT.wtake
+
+drop :: Int -> WText -> WText
+drop = coerce WT.wdrop
+
+length :: WText -> Int
+length = coerce WT.wlength
+
+
+--------------------------------------------------------------------------------
+-- coercible
+--------------------------------------------------------------------------------
+
+null :: WText -> Bool
+null = coerce T.null
+
+filter :: (Char -> Bool) -> WText -> WText
+filter = coerce T.filter
+
+map :: (Char -> Char) -> WText -> WText
+map = coerce T.map
+
+reverse
+ , strip
+ , toUpper
+ , toLower
+ :: WText -> WText
+reverse = coerce T.reverse
+strip = coerce T.strip
+toUpper = coerce T.toUpper
+toLower = coerce T.toLower
+
+stripPrefix, stripSuffix :: WText -> WText -> Maybe WText
+stripPrefix = coerce T.stripPrefix
+stripSuffix = coerce T.stripSuffix
+
+replace :: WText -> WText -> WText -> WText
+replace = coerce T.replace
+
+intercalate :: WText -> [WText] -> WText
+intercalate = coerce T.intercalate
+
+isPrefixOf
+ , isSuffixOf
+ , isInfixOf
+ :: WText -> WText -> Bool
+isPrefixOf = coerce T.isPrefixOf
+isSuffixOf = coerce T.isSuffixOf
+isInfixOf = coerce T.isInfixOf
+