@@ -10,14 +10,12 @@ module Text.Fuzzy.Parallel
10
10
import Control.Monad.ST (runST )
11
11
import Control.Parallel.Strategies (Eval , Strategy , evalTraversable ,
12
12
parTraversable , rseq , using )
13
- import Data.Function (on )
14
13
import Data.Monoid.Textual (TextualMonoid )
15
- import Data.Ord (Down (Down ))
16
14
import Data.Vector (Vector , (!) )
17
15
import qualified Data.Vector as V
18
16
-- need to use a stable sort
19
17
import Data.Bifunctor (second )
20
- import qualified Data.Vector.Algorithms.Tim as VA
18
+ import qualified Data.Monoid.Factorial as T
21
19
import Prelude hiding (filter )
22
20
import Text.Fuzzy (Fuzzy (.. ), match )
23
21
@@ -27,22 +25,20 @@ import Text.Fuzzy (Fuzzy (..), match)
27
25
-- 200
28
26
filter :: (TextualMonoid s )
29
27
=> Int -- ^ Chunk size. 1000 works well.
28
+ -> Int -- ^ Max. number of results wanted
30
29
-> s -- ^ Pattern.
31
30
-> [t ] -- ^ The list of values containing the text to search in.
32
31
-> s -- ^ The text to add before each match.
33
32
-> s -- ^ The text to add after each match.
34
33
-> (t -> s ) -- ^ The function to extract the text from the container.
35
34
-> Bool -- ^ Case sensitivity.
36
35
-> [Fuzzy t s ] -- ^ The list of results, sorted, highest score first.
37
- filter chunkSize pattern ts pre post extract caseSen = runST $ do
36
+ filter chunkSize maxRes pattern ts pre post extract caseSen = runST $ do
38
37
let v = (V. mapMaybe id
39
38
(V. map (\ t -> match pattern t pre post extract caseSen) (V. fromList ts)
40
39
`using`
41
40
parVectorChunk chunkSize (evalTraversable forceScore)))
42
- v' <- V. unsafeThaw v
43
- VA. sortBy (compare `on` (Down . score)) v'
44
- v'' <- V. unsafeFreeze v'
45
- return $ V. toList v''
41
+ return $ partialSortByAscScore maxRes (T. length pattern ) v
46
42
47
43
-- | Return all elements of the list that have a fuzzy
48
44
-- match against the pattern. Runs with default settings where
@@ -53,11 +49,12 @@ filter chunkSize pattern ts pre post extract caseSen = runST $ do
53
49
{-# INLINABLE simpleFilter #-}
54
50
simpleFilter :: (TextualMonoid s )
55
51
=> Int -- ^ Chunk size. 1000 works well.
52
+ -> Int -- ^ Max. number of results wanted
56
53
-> s -- ^ Pattern to look for.
57
54
-> [s ] -- ^ List of texts to check.
58
55
-> [s ] -- ^ The ones that match.
59
- simpleFilter chunk pattern xs =
60
- map original $ filter chunk pattern xs mempty mempty id False
56
+ simpleFilter chunk maxRes pattern xs =
57
+ map original $ filter chunk maxRes pattern xs mempty mempty id False
61
58
62
59
--------------------------------------------------------------------------------
63
60
@@ -103,3 +100,35 @@ pairwise :: [a] -> [(a,a)]
103
100
pairwise [] = []
104
101
pairwise [_] = []
105
102
pairwise (x: y: xs) = (x,y) : pairwise (y: xs)
103
+
104
+ -- | A stable partial sort ascending by score. O(N) best case, O(wanted*N) worst case
105
+ --- >>> partialSortByAscScore 3 5 $ V.fromList $ map (\x -> Fuzzy x x (length x)) ["A","B","ABCDE","ABBC"]
106
+ -- [Fuzzy {original = "ABCDE", rendered = "ABCDE", score = 5},Fuzzy {original = "ABBC", rendered = "ABBC", score = 4},Fuzzy {original = "A", rendered = "A", score = 1}]
107
+ partialSortByAscScore :: TextualMonoid s
108
+ => Int -- ^ Number of items needed
109
+ -> Int -- ^ Value of a perfect score
110
+ -> Vector (Fuzzy t s )
111
+ -> [Fuzzy t s ]
112
+ partialSortByAscScore wantedCount perfectScore v = loop 0 (SortState minBound perfectScore 0 ) [] where
113
+ l = V. length v
114
+ loop index st@ SortState {.. } acc
115
+ | foundCount == wantedCount = reverse acc
116
+ | index == l
117
+ = if bestScoreSeen < scoreWanted
118
+ then loop 0 st{scoreWanted = bestScoreSeen, bestScoreSeen = minBound } acc
119
+ else reverse acc
120
+ | otherwise =
121
+ case v! index of
122
+ x | score x == scoreWanted
123
+ -> loop (index+ 1 ) st{foundCount = foundCount+ 1 } (x: acc)
124
+ | score x < scoreWanted && score x > bestScoreSeen
125
+ -> loop (index+ 1 ) st{bestScoreSeen = score x} acc
126
+ | otherwise
127
+ -> loop (index+ 1 ) st acc
128
+
129
+ data SortState a = SortState
130
+ { bestScoreSeen :: ! Int
131
+ , scoreWanted :: ! Int
132
+ , foundCount :: ! Int
133
+ }
134
+ deriving Show
0 commit comments