Skip to content

Commit 665b80b

Browse files
committed
partial sort of fuzzy filtering results
1 parent c419b37 commit 665b80b

File tree

1 file changed

+39
-10
lines changed

1 file changed

+39
-10
lines changed

ghcide/src/Text/Fuzzy/Parallel.hs

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,12 @@ module Text.Fuzzy.Parallel
1010
import Control.Monad.ST (runST)
1111
import Control.Parallel.Strategies (Eval, Strategy, evalTraversable,
1212
parTraversable, rseq, using)
13-
import Data.Function (on)
1413
import Data.Monoid.Textual (TextualMonoid)
15-
import Data.Ord (Down (Down))
1614
import Data.Vector (Vector, (!))
1715
import qualified Data.Vector as V
1816
-- need to use a stable sort
1917
import Data.Bifunctor (second)
20-
import qualified Data.Vector.Algorithms.Tim as VA
18+
import qualified Data.Monoid.Factorial as T
2119
import Prelude hiding (filter)
2220
import Text.Fuzzy (Fuzzy (..), match)
2321

@@ -27,22 +25,20 @@ import Text.Fuzzy (Fuzzy (..), match)
2725
-- 200
2826
filter :: (TextualMonoid s)
2927
=> Int -- ^ Chunk size. 1000 works well.
28+
-> Int -- ^ Max. number of results wanted
3029
-> s -- ^ Pattern.
3130
-> [t] -- ^ The list of values containing the text to search in.
3231
-> s -- ^ The text to add before each match.
3332
-> s -- ^ The text to add after each match.
3433
-> (t -> s) -- ^ The function to extract the text from the container.
3534
-> Bool -- ^ Case sensitivity.
3635
-> [Fuzzy t s] -- ^ The list of results, sorted, highest score first.
37-
filter chunkSize pattern ts pre post extract caseSen = runST $ do
36+
filter chunkSize maxRes pattern ts pre post extract caseSen = runST $ do
3837
let v = (V.mapMaybe id
3938
(V.map (\t -> match pattern t pre post extract caseSen) (V.fromList ts)
4039
`using`
4140
parVectorChunk chunkSize (evalTraversable forceScore)))
42-
v' <- V.unsafeThaw v
43-
VA.sortBy (compare `on` (Down . score)) v'
44-
v'' <- V.unsafeFreeze v'
45-
return $ V.toList v''
41+
return $ partialSortByAscScore maxRes (T.length pattern) v
4642

4743
-- | Return all elements of the list that have a fuzzy
4844
-- match against the pattern. Runs with default settings where
@@ -53,11 +49,12 @@ filter chunkSize pattern ts pre post extract caseSen = runST $ do
5349
{-# INLINABLE simpleFilter #-}
5450
simpleFilter :: (TextualMonoid s)
5551
=> Int -- ^ Chunk size. 1000 works well.
52+
-> Int -- ^ Max. number of results wanted
5653
-> s -- ^ Pattern to look for.
5754
-> [s] -- ^ List of texts to check.
5855
-> [s] -- ^ The ones that match.
59-
simpleFilter chunk pattern xs =
60-
map original $ filter chunk pattern xs mempty mempty id False
56+
simpleFilter chunk maxRes pattern xs =
57+
map original $ filter chunk maxRes pattern xs mempty mempty id False
6158

6259
--------------------------------------------------------------------------------
6360

@@ -103,3 +100,35 @@ pairwise :: [a] -> [(a,a)]
103100
pairwise [] = []
104101
pairwise [_] = []
105102
pairwise (x:y:xs) = (x,y) : pairwise (y:xs)
103+
104+
-- | A stable partial sort ascending by score. O(N) best case, O(wanted*N) worst case
105+
--- >>> partialSortByAscScore 3 5 $ V.fromList $ map (\x -> Fuzzy x x (length x)) ["A","B","ABCDE","ABBC"]
106+
-- [Fuzzy {original = "ABCDE", rendered = "ABCDE", score = 5},Fuzzy {original = "ABBC", rendered = "ABBC", score = 4},Fuzzy {original = "A", rendered = "A", score = 1}]
107+
partialSortByAscScore :: TextualMonoid s
108+
=> Int -- ^ Number of items needed
109+
-> Int -- ^ Value of a perfect score
110+
-> Vector (Fuzzy t s)
111+
-> [Fuzzy t s]
112+
partialSortByAscScore wantedCount perfectScore v = loop 0 (SortState minBound perfectScore 0) [] where
113+
l = V.length v
114+
loop index st@SortState{..} acc
115+
| foundCount == wantedCount = reverse acc
116+
| index == l
117+
= if bestScoreSeen < scoreWanted
118+
then loop 0 st{scoreWanted = bestScoreSeen, bestScoreSeen = minBound} acc
119+
else reverse acc
120+
| otherwise =
121+
case v!index of
122+
x | score x == scoreWanted
123+
-> loop (index+1) st{foundCount = foundCount+1} (x:acc)
124+
| score x < scoreWanted && score x > bestScoreSeen
125+
-> loop (index+1) st{bestScoreSeen = score x} acc
126+
| otherwise
127+
-> loop (index+1) st acc
128+
129+
data SortState a = SortState
130+
{ bestScoreSeen :: !Int
131+
, scoreWanted :: !Int
132+
, foundCount :: !Int
133+
}
134+
deriving Show

0 commit comments

Comments
 (0)