summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authortv <tv@krebsco.de>2026-03-06 23:32:59 +0100
committertv <tv@krebsco.de>2026-03-06 23:36:06 +0100
commit18fffd492fe7134ef1cc53e1725d1709ddbde20b (patch)
tree95a89458b1b7ebc67b02a53394609df5f095bd18
parent7a9f03584573cbcef94a610e6aafa5d8be89722e (diff)
HyphenateSegments: Hyphenator -> Language
This way we don't need to define bad orphan instances.
-rw-r--r--src/TextViewport/Buffer/Item.hs8
-rw-r--r--src/TextViewport/Render/Segmentation.hs24
-rw-r--r--test/Spec.hs44
3 files changed, 22 insertions, 54 deletions
diff --git a/src/TextViewport/Buffer/Item.hs b/src/TextViewport/Buffer/Item.hs
index b08aaf4..81d18ef 100644
--- a/src/TextViewport/Buffer/Item.hs
+++ b/src/TextViewport/Buffer/Item.hs
@@ -15,13 +15,7 @@ data SegmentStrategy
= NoSegments
| FixedWidthSegments
| HyphenateSegments
- { hsDict :: H.Hyphenator
+ { hsLang :: H.Language
, hsCache :: HM.HashMap Text [(Text, Text)]
}
deriving (Eq, Show)
-
-instance Show H.Hyphenator where
- show _ = "<Hyphenator>"
-
-instance Eq H.Hyphenator where
- a == b = False
diff --git a/src/TextViewport/Render/Segmentation.hs b/src/TextViewport/Render/Segmentation.hs
index 584798e..55971ce 100644
--- a/src/TextViewport/Render/Segmentation.hs
+++ b/src/TextViewport/Render/Segmentation.hs
@@ -38,7 +38,7 @@ applyStrategy FixedWidthSegments width itemIx txt =
| (lineIx, (off, chunk)) <- zip [0..] allChunks
]
-applyStrategy (HyphenateSegments dict cache0) width itemIx txt =
+applyStrategy (HyphenateSegments lang cache0) width itemIx txt =
let rawLines = T.splitOn "\n" txt
-- fold over each physical line, accumulating:
@@ -59,13 +59,13 @@ applyStrategy (HyphenateSegments dict cache0) width itemIx txt =
-- -> Text
-- -> ([(Int, Text)], HM.HashMap Text [(Text, Text)], Int)
segmentOneLine (acc, cache, off0) line =
- let (chunks, cache1) = segmentWithHyphenationTeXLite dict width line cache
+ let (chunks, cache1) = segmentWithHyphenationTeXLite lang width line cache
offsets = scanOffsetsFrom off0 chunks
offNext = off0 + T.length line + 1
acc' = acc `DL.append` DL.fromList (zip offsets chunks)
in (acc', cache1, offNext)
--segmentOneLine (acc, cache, off0) line =
- -- let chunks = segmentWithHyphenationTeXLite dict width line
+ -- let chunks = segmentWithHyphenationTeXLite lang width line
-- offsets = scanOffsetsFrom off0 chunks
-- offNext = off0 + T.length line + 1 -- +1 for newline
-- acc' = acc ++ zip offsets chunks
@@ -102,17 +102,17 @@ scanOffsetsFrom start = go start
go !o (t:ts) = o : go (o + T.length t) ts
segmentWithHyphenationTeXLite
- :: H.Hyphenator
+ :: H.Language
-> Int
-> Text
-> HM.HashMap Text [(Text, Text)]
-> ([Text], HM.HashMap Text [(Text, Text)])
-segmentWithHyphenationTeXLite dict width txt cache0 =
+segmentWithHyphenationTeXLite lang width txt cache0 =
go cache0 (T.words txt)
where
go cache [] = ([], cache)
go cache ws =
- case lineCandidates dict width cache ws of
+ case lineCandidates lang width cache ws of
([], cache1) ->
let chunks = breakWordSafe width ws
in (chunks, cache1)
@@ -138,12 +138,12 @@ breakWordSafe width ws =
type Candidate = (Text, [Text], Bool)
lineCandidates
- :: H.Hyphenator
+ :: H.Language
-> Int
-> HM.HashMap Text [(Text, Text)]
-> [Text]
-> ([(Text, [Text], Bool)], HM.HashMap Text [(Text, Text)])
-lineCandidates dict width cache0 ws0 =
+lineCandidates lang width cache0 ws0 =
go [] [] cache0 ws0
where
go _ acc cache [] = (acc, cache)
@@ -162,7 +162,7 @@ lineCandidates dict width cache0 ws0 =
case HM.lookup w cache of
Just hs -> (hs, cache)
Nothing ->
- let hs = hyphenateWord dict w
+ let hs = hyphenateWord lang w
in (hs, HM.insert w hs cache)
hyphCands =
@@ -178,9 +178,9 @@ lineCandidates dict width cache0 ws0 =
then go (line ++ [w]) acc2 cache1 ws
else (acc2, cache1)
-hyphenateWord :: H.Hyphenator -> Text -> [(Text, Text)]
-hyphenateWord dict word =
- let parts = H.hyphenate dict (T.unpack word)
+hyphenateWord :: H.Language -> Text -> [(Text, Text)]
+hyphenateWord lang word =
+ let parts = H.hyphenate (H.languageHyphenator lang) (T.unpack word)
in [ ( T.pack (concat (take i parts))
, T.pack (concat (drop i parts))
)
diff --git a/test/Spec.hs b/test/Spec.hs
index 3ada0ea..c7c2cbd 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -113,33 +113,7 @@ main = hspec do
describe "Item & SegmentStrategy" do
-
- it "Hyphenator Eq instance should consider identical hyphenators equal" do
- let hy = H.german_1996
- (hy == hy) `shouldBe` True
-
- it "HyphenateSegments with same hyphenator and same cache should be equal" do
- let hy = H.german_1996
- s1 = HyphenateSegments hy HM.empty
- s2 = HyphenateSegments hy HM.empty
- s1 == s2 `shouldBe` True
-
- it "Show Hyphenator should reflect differences between hyphenators" do
- let hy1 = H.german_1996
- hy2 = H.english_US
- show hy1 `shouldNotBe` show hy2
-
- it "Show HyphenateSegments should differ for different hyphenators" do
- let hy1 = H.german_1996
- hy2 = H.english_US
- s1 = HyphenateSegments hy1 HM.empty
- s2 = HyphenateSegments hy2 HM.empty
- show s1 `shouldNotBe` show s2
-
- it "Show HyphenateSegments should not expose internal cache structure" do
- let hy = H.german_1996
- s = HyphenateSegments hy (HM.fromList [("a",[("a","")])])
- show s `shouldNotContain` "fromList"
+ pure ()
describe "Render" do
@@ -256,11 +230,11 @@ main = hspec do
it "renderItem should invalidate cache when segmentation output changes" do
let old = CachedRender
{ crWidth = 5
- , crStrategy = HyphenateSegments H.german_1996 mempty
+ , crStrategy = HyphenateSegments H.German_1996 mempty
, crText = "Schifffahrt"
, crRendered = RenderedItem mempty
}
- itm = Item "Schifffahrt" (HyphenateSegments H.german_1996 mempty)
+ itm = Item "Schifffahrt" (HyphenateSegments H.German_1996 mempty)
new = renderItem 5 0 itm (Just old)
crRendered new `shouldNotBe` crRendered old
@@ -384,8 +358,8 @@ main = hspec do
["abcde","fgh"]
it "hyphenateWord splits German words" do
- let hy = H.german_1996
- hyphenateWord hy "Schifffahrt" `shouldSatisfy` (not . null)
+ let lang = H.German_1996
+ hyphenateWord lang "Schifffahrt" `shouldSatisfy` (not . null)
it "scoreCandidate is deterministic for identical candidates" do
let c = ("abcdefghijk", ["abcdefgh","ijk"], False)
@@ -406,14 +380,14 @@ main = hspec do
scanOffsetsFrom 0 ["abc","def"] `shouldBe` [0,4]
it "HyphenateSegments should return updated cache" do
- let dict = H.german_1996
+ let lang = H.German_1996
cache0 = HM.empty
- _v = applyStrategy (HyphenateSegments dict cache0) 5 0 "Schifffahrt"
+ _v = applyStrategy (HyphenateSegments lang cache0) 5 0 "Schifffahrt"
cache0 `shouldNotBe` (HM.empty :: HM.HashMap Text [(Text, Text)])
it "lineCandidates should preserve candidate order" do
- let dict = H.german_1996
- (cs, _) = lineCandidates dict 10 HM.empty ["a","b","c"]
+ let lang = H.German_1996
+ (cs, _) = lineCandidates lang 10 HM.empty ["a","b","c"]
map (\(t,_,_) -> t) cs `shouldBe` ["a","a b","a b c"]
it "breakWordSafe should not split grapheme clusters" do