summaryrefslogtreecommitdiffstats
path: root/src/Data/Text
diff options
context:
space:
mode:
Diffstat (limited to 'src/Data/Text')
-rw-r--r--src/Data/Text/WCWidth.hs141
1 files changed, 141 insertions, 0 deletions
diff --git a/src/Data/Text/WCWidth.hs b/src/Data/Text/WCWidth.hs
new file mode 100644
index 0000000..71d59cf
--- /dev/null
+++ b/src/Data/Text/WCWidth.hs
@@ -0,0 +1,141 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+module Data.Text.WCWidth where
+
+import Data.Char.WCWidth qualified as WC
+import Data.Text (Text)
+import Data.Text qualified as T
+
+--------------------------------------------------------------------------------
+-- primitives
+--------------------------------------------------------------------------------
+
+-- TODO this breaks when WC.wcwidth returns -1, which happens for
+-- non-printable characters like '\n'.
+
+wcwidthLenient :: Char -> Int
+wcwidthLenient c =
+ case WC.wcwidth c of
+ -1 -> 0 -- non-printable
+ w -> w -- 0, 1, or 2
+
+
+-- | Width-aware split: split on characters satisfying the predicate.
+-- The predicate sees the character, not its width.
+wsplit :: (Char -> Bool) -> Text -> [Text]
+wsplit p = go []
+ where
+ go acc t =
+ case T.uncons t of
+ Nothing ->
+ case acc of
+ [] -> []
+ _ -> [T.pack (reverse acc)]
+
+ Just (c, rest)
+ | p c ->
+ let chunk = if null acc then [] else [T.pack (reverse acc)]
+ in chunk ++ go [] rest
+
+ | otherwise ->
+ go (c : acc) rest
+
+
+
+wsplitAt :: Int -> Text -> (Text, Text)
+wsplitAt k = go k mempty
+ where
+ go n acc t =
+ case T.uncons t of
+ Just (c, rest)
+ | let w = wcwidthLenient c
+ , w <= n
+ -> go (n - w) (c:acc) rest
+ _ ->
+ (T.pack (reverse acc), t)
+
+
+--------------------------------------------------------------------------------
+-- low-level
+--------------------------------------------------------------------------------
+
+wtake :: Int -> Text -> Text
+wtake k = fst . wsplitAt k
+
+wdrop :: Int -> Text -> Text
+wdrop k = snd . wsplitAt k
+
+wlength :: Text -> Int
+--wlength = sum . map wcwidthLenient . T.unpack
+wlength = T.foldl' (\acc c -> acc + wcwidthLenient c) 0
+
+
+--------------------------------------------------------------------------------
+-- high-level
+--------------------------------------------------------------------------------
+
+wchunksOf :: Int -> Text -> [Text]
+wchunksOf k w =
+ case wsplitAt k w of
+ (a, b)
+ | b == mempty -> [a]
+ | otherwise -> a : wchunksOf k b
+
+wpadRight :: Int -> Text -> Text
+wpadRight k w =
+ let missing = k - wlength w
+ in if missing <= 0
+ then w
+ else w <> T.replicate missing " "
+
+--breakOn, splitOn
+
+
+
+--wcwidth :: Char -> Int
+--wcwidth '\0' = 0
+--wcwidth c =
+-- case WC.wcwidth c of
+-- 0 -> 0
+-- n -> n
+--
+--
+--wcwidth :: Char -> Int
+--wcwidth '\0' = 0
+--wcwidth c = max 1 (WC.wcwidth c)
+
+--length' :: Text -> Int
+--length' = Text.foldr ((+) . wcwidth') 0
+--
+--drop' :: Int -> Text -> Text
+--drop' k t =
+-- if k <= 0
+-- then t
+-- else
+-- case Text.uncons t of
+-- Just (c, t') ->
+-- drop' (k - wcwidth' c) t'
+-- Nothing -> mempty
+--
+--take' :: Int -> Text -> Text
+--take' k0 =
+-- Text.pack . rec k0
+-- where
+-- rec k t =
+-- if | Just (c, t') <- Text.uncons t, nc <- wcwidth' c, nc <= k ->
+-- c : rec (k - nc) t'
+--
+-- | otherwise ->
+-- []
+--
+--splitAt' :: Int -> Text -> (Text, Text)
+--splitAt' k0 =
+-- rec k0 mempty
+-- where
+-- rec k a t =
+-- if | Just (c, t') <- Text.uncons t, nc <- wcwidth' c, nc <= k ->
+-- rec (k - nc) (c : a) t'
+--
+-- | otherwise ->
+-- (Text.pack $ reverse a, t)
+--