@@ -10,39 +10,33 @@ module Text.Fuzzy.Parallel
10
10
import Control.Monad.ST (runST )
11
11
import Control.Parallel.Strategies (Eval , Strategy , evalTraversable ,
12
12
parTraversable , rseq , using )
13
- import Data.Function (on )
14
13
import Data.Monoid.Textual (TextualMonoid )
15
- import Data.Ord (Down (Down ))
16
14
import Data.Vector (Vector , (!) )
17
15
import qualified Data.Vector as V
18
16
-- need to use a stable sort
19
17
import Data.Bifunctor (second )
20
- import qualified Data.Vector.Algorithms.Tim as VA
18
+ import Data.Maybe ( fromJust )
21
19
import Prelude hiding (filter )
22
20
import Text.Fuzzy (Fuzzy (.. ), match )
23
21
24
22
-- | The function to filter a list of values by fuzzy search on the text extracted from them.
25
- --
26
- -- >>> length $ filter 1000 200 "ML" (concat $ replicate 10000 [("Standard ML", 1990),("OCaml",1996),("Scala",2003)]) "<" ">" fst False
27
- -- 200
28
23
filter :: (TextualMonoid s )
29
24
=> Int -- ^ Chunk size. 1000 works well.
25
+ -> Int -- ^ Max. number of results wanted
30
26
-> s -- ^ Pattern.
31
27
-> [t ] -- ^ The list of values containing the text to search in.
32
28
-> s -- ^ The text to add before each match.
33
29
-> s -- ^ The text to add after each match.
34
30
-> (t -> s ) -- ^ The function to extract the text from the container.
35
31
-> Bool -- ^ Case sensitivity.
36
32
-> [Fuzzy t s ] -- ^ The list of results, sorted, highest score first.
37
- filter chunkSize pattern ts pre post extract caseSen = runST $ do
38
- let v = ( V. mapMaybe id
33
+ filter chunkSize maxRes pattern ts pre post extract caseSen = runST $ do
34
+ let v = V. mapMaybe id
39
35
(V. map (\ t -> match pattern t pre post extract caseSen) (V. fromList ts)
40
36
`using`
41
- parVectorChunk chunkSize (evalTraversable forceScore)))
42
- v' <- V. unsafeThaw v
43
- VA. sortBy (compare `on` (Down . score)) v'
44
- v'' <- V. unsafeFreeze v'
45
- return $ V. toList v''
37
+ parVectorChunk chunkSize (evalTraversable forceScore))
38
+ perfectScore = score $ fromJust $ match pattern pattern " " " " id False
39
+ return $ partialSortByAscScore maxRes perfectScore v
46
40
47
41
-- | Return all elements of the list that have a fuzzy
48
42
-- match against the pattern. Runs with default settings where
@@ -53,11 +47,12 @@ filter chunkSize pattern ts pre post extract caseSen = runST $ do
53
47
{-# INLINABLE simpleFilter #-}
54
48
simpleFilter :: (TextualMonoid s )
55
49
=> Int -- ^ Chunk size. 1000 works well.
50
+ -> Int -- ^ Max. number of results wanted
56
51
-> s -- ^ Pattern to look for.
57
52
-> [s ] -- ^ List of texts to check.
58
53
-> [s ] -- ^ The ones that match.
59
- simpleFilter chunk pattern xs =
60
- map original $ filter chunk pattern xs mempty mempty id False
54
+ simpleFilter chunk maxRes pattern xs =
55
+ map original $ filter chunk maxRes pattern xs mempty mempty id False
61
56
62
57
--------------------------------------------------------------------------------
63
58
@@ -82,10 +77,8 @@ parVectorChunk chunkSize st v =
82
77
-- [[0,1,2],[3,4,5],[6,7,8],[9,10,11],[12]]
83
78
chunkVector :: Int -> Vector a -> [Vector a ]
84
79
chunkVector chunkSize v = do
85
- let indices = chunkIndices chunkSize (0 ,l)
86
- l = V. length v
87
- [V. fromListN (h- l+ 1 ) [v ! j | j <- [l .. h]]
88
- | (l,h) <- indices]
80
+ let indices = chunkIndices chunkSize (0 ,V. length v)
81
+ [V. slice l (h- l) v | (l,h) <- indices]
89
82
90
83
-- >>> chunkIndices 3 (0,9)
91
84
-- >>> chunkIndices 3 (0,10)
@@ -103,3 +96,34 @@ pairwise :: [a] -> [(a,a)]
103
96
pairwise [] = []
104
97
pairwise [_] = []
105
98
pairwise (x: y: xs) = (x,y) : pairwise (y: xs)
99
+
100
+ -- | A stable partial sort ascending by score. O(N) best case, O(wanted*N) worst case
101
+ partialSortByAscScore :: TextualMonoid s
102
+ => Int -- ^ Number of items needed
103
+ -> Int -- ^ Value of a perfect score
104
+ -> Vector (Fuzzy t s )
105
+ -> [Fuzzy t s ]
106
+ partialSortByAscScore wantedCount perfectScore v = loop 0 (SortState minBound perfectScore 0 ) [] where
107
+ l = V. length v
108
+ loop index st@ SortState {.. } acc
109
+ | foundCount == wantedCount = reverse acc
110
+ | index == l
111
+ -- ProgressCancelledException
112
+ = if bestScoreSeen < scoreWanted
113
+ then loop 0 st{scoreWanted = bestScoreSeen, bestScoreSeen = minBound } acc
114
+ else reverse acc
115
+ | otherwise =
116
+ case v! index of
117
+ x | score x == scoreWanted
118
+ -> loop (index+ 1 ) st{foundCount = foundCount+ 1 } (x: acc)
119
+ | score x < scoreWanted && score x > bestScoreSeen
120
+ -> loop (index+ 1 ) st{bestScoreSeen = score x} acc
121
+ | otherwise
122
+ -> loop (index+ 1 ) st acc
123
+
124
+ data SortState a = SortState
125
+ { bestScoreSeen :: ! Int
126
+ , scoreWanted :: ! Int
127
+ , foundCount :: ! Int
128
+ }
129
+ deriving Show
0 commit comments