1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
|
module TextViewport.Render.Segmentation where
import Data.DList qualified as DL
import Data.HashMap.Strict qualified as HM
import Data.List (minimumBy)
import Data.Ord (comparing)
import Data.Text (Text)
import Data.Text qualified as T
import Data.Vector (Vector)
import Data.Vector qualified as V
import Text.Hyphenation qualified as H
import TextViewport.Buffer.Item
import TextViewport.Render.RenderedLine (RenderedLine(..)) -- TODO qualified
applyStrategy :: SegmentStrategy -> Int -> Int -> Text -> Vector RenderedLine
applyStrategy NoSegments width itemIx txt =
let rawLines = T.splitOn "\n" txt
chunks = map (T.take width) rawLines -- crop
offsets = scanOffsetsWithNewlines chunks
in V.fromList
[ RenderedLine { rlText = chunk, rlItemIx = itemIx, rlLineIx = lineIx, rlCharStart = off }
| (lineIx, (off, chunk)) <- zip [0..] (zip offsets chunks)
]
applyStrategy FixedWidthSegments width itemIx txt =
let rawLines = T.splitOn "\n" txt
(dl, _) = foldl step (DL.empty, 0) rawLines
step (acc, off0) line =
let chunks = chunkFixed width line
offsets = scanOffsetsFrom off0 chunks
offNext = off0 + T.length line + 1
acc' = acc `DL.append` DL.fromList (zip offsets chunks)
in (acc', offNext)
allChunks = DL.toList dl
in V.fromList
[ RenderedLine { rlText = chunk, rlItemIx = itemIx, rlLineIx = lineIx, rlCharStart = off }
| (lineIx, (off, chunk)) <- zip [0..] allChunks
]
applyStrategy (HyphenateSegments dict cache0) width itemIx txt =
let rawLines = T.splitOn "\n" txt
-- fold over each physical line, accumulating:
-- * all rendered (offset, chunk) pairs
-- * updated hyphenation cache (unused for now)
-- * running character offset across lines
(dl, _cache1, _) =
foldl segmentOneLine (DL.empty, cache0, 0) rawLines
allChunks = DL.toList dl
in V.fromList
[ RenderedLine { rlText = chunk, rlItemIx = itemIx, rlLineIx = lineIx, rlCharStart = off }
| (lineIx, (off, chunk)) <- zip [0..] allChunks
]
where
-- Segment a single physical line using TeX‑lite hyphenation
--segmentOneLine
-- :: ([(Int, Text)], HM.HashMap Text [(Text, Text)], Int)
-- -> Text
-- -> ([(Int, Text)], HM.HashMap Text [(Text, Text)], Int)
segmentOneLine (acc, cache, off0) line =
let (chunks, cache1) = segmentWithHyphenationTeXLite dict width line cache
offsets = scanOffsetsFrom off0 chunks
offNext = off0 + T.length line + 1
acc' = acc `DL.append` DL.fromList (zip offsets chunks)
in (acc', cache1, offNext)
--segmentOneLine (acc, cache, off0) line =
-- let chunks = segmentWithHyphenationTeXLite dict width line
-- offsets = scanOffsetsFrom off0 chunks
-- offNext = off0 + T.length line + 1 -- +1 for newline
-- acc' = acc ++ zip offsets chunks
-- in (acc', cache, offNext)
-- | Compute running character offsets for a list of chunks.
scanOffsetsWithNewlines :: [Text] -> [Int]
scanOffsetsWithNewlines = go 0
where
go !_ [] = []
go !o (l:ls) =
let off = o
o' = o + T.length l + 1 -- +1 for newline
in off : go o' ls
-- | Chunk a single line into fixed-width pieces.
chunkFixed :: Int -> Text -> [Text]
chunkFixed w t
| w <= 0 = []
| T.null t = [""]
| otherwise =
let (h, rest) = T.splitAt w t
in h : if T.null rest then [] else chunkFixed w rest
--------------------------------------------------------------------------------
-- Hyphenation-aware segmenting (TeX-lite)
--------------------------------------------------------------------------------
-- Compute offsets starting from a base offset
scanOffsetsFrom :: Int -> [Text] -> [Int]
scanOffsetsFrom start = go start
where
go !_ [] = []
go !o (t:ts) = o : go (o + T.length t) ts
segmentWithHyphenationTeXLite
:: H.Hyphenator
-> Int
-> Text
-> HM.HashMap Text [(Text, Text)]
-> ([Text], HM.HashMap Text [(Text, Text)])
segmentWithHyphenationTeXLite dict width txt cache0 =
go cache0 (T.words txt)
where
go cache [] = ([], cache)
go cache ws =
case lineCandidates dict width cache ws of
([], cache1) ->
let chunks = breakWordSafe width ws
in (chunks, cache1)
(cs, cache1) ->
let (line, rest, _) =
minimumBy (comparing (scoreCandidate width)) cs
(more, cache2) = go cache1 rest
in (line : more, cache2)
-- | Lossless fallback: treat remaining words as one long text and
-- chunk it into width-sized pieces. Never truncates, never drops text.
breakWordSafe :: Int -> [Text] -> [Text]
breakWordSafe width ws =
chunk (T.unwords ws)
where
chunk t
| T.null t = []
| T.length t <= width = [t]
| otherwise =
let (c, r) = T.splitAt width t
in c : chunk r
type Candidate = (Text, [Text], Bool)
lineCandidates
:: H.Hyphenator
-> Int
-> HM.HashMap Text [(Text, Text)]
-> [Text]
-> ([(Text, [Text], Bool)], HM.HashMap Text [(Text, Text)])
lineCandidates dict width cache0 ws0 =
go [] [] cache0 ws0
where
go _ acc cache [] = (acc, cache)
go line acc cache (w:ws) =
let space = if null line then "" else " "
baseTxt = T.unwords line
wholeTxt = baseTxt <> space <> w
wholeLen = T.length wholeTxt
acc1 =
if wholeLen <= width && not (T.null wholeTxt)
then (wholeTxt, ws, False) : acc
else acc
(hyphs, cache1) =
case HM.lookup w cache of
Just hs -> (hs, cache)
Nothing ->
let hs = hyphenateWord dict w
in (hs, HM.insert w hs cache)
hyphCands =
[ (preTxt, suf : ws, True)
| (pre, suf) <- hyphs
, not (T.null pre)
, let preTxt = baseTxt <> space <> pre <> "-"
, T.length preTxt <= width
]
acc2 = hyphCands ++ acc1
in if wholeLen <= width
then go (line ++ [w]) acc2 cache1 ws
else (acc2, cache1)
hyphenateWord :: H.Hyphenator -> Text -> [(Text, Text)]
hyphenateWord dict word =
let parts = H.hyphenate dict (T.unpack word)
in [ ( T.pack (concat (take i parts))
, T.pack (concat (drop i parts))
)
| i <- [1 .. length parts - 1]
]
scoreCandidate :: Int -> Candidate -> Int
scoreCandidate width (line, _, endsWithHyphen) =
let len = T.length line
remSpace = max 0 (width - len)
badness = remSpace * remSpace * remSpace
hyphenPenalty =
if endsWithHyphen then 50 else 0
shortPenalty =
if len < width `div` 2 then 200 else 0
in badness + hyphenPenalty + shortPenalty
|