haskell · hvr · Oct 28, 2017 · Sep 30, 2017 · Sep 30, 2017
diff --git a/Distribution/Server/Features/Search/ExtractDescriptionTerms.hs b/Distribution/Server/Features/Search/ExtractDescriptionTerms.hs
@@ -2,7 +2,8 @@
 
 module Distribution.Server.Features.Search.ExtractDescriptionTerms (
     extractSynopsisTerms,
-    extractDescriptionTerms
+    extractDescriptionTerms,
+    extraStems
   ) where
 
 import Distribution.Server.Prelude
@@ -20,18 +21,21 @@ import Documentation.Haddock.Types
 
 import qualified Distribution.Server.Pages.Package.HaddockParse as Haddock (parse)
 
+extraStems :: [Text] -> Text -> [Text]
+extraStems ss x = x : mapMaybe (x `T.stripSuffix`) ss
 
-extractSynopsisTerms :: Set Text -> String -> [Text]
-extractSynopsisTerms stopWords =
-      NLP.stems NLP.English
+extractSynopsisTerms :: [Text] -> Set Text -> String -> [Text]
+extractSynopsisTerms ss stopWords =
+      concatMap (extraStems ss) --note this adds extra possible stems, it doesn't delete any given one.
+    . NLP.stems NLP.English
     . filter (`Set.notMember` stopWords)
     . map (T.toCaseFold . T.pack)
     . concatMap splitTok
     . filter (not . ignoreTok)
     . NLP.tokenize
 
 
-ignoreTok :: String -> Bool  
+ignoreTok :: String -> Bool
 ignoreTok = all isPunctuation
 
 splitTok :: String -> [String]
@@ -48,9 +52,10 @@ splitTok tok =
         (leading, [])         -> leading : []
 
 
-extractDescriptionTerms :: Set Text -> String -> [Text]
-extractDescriptionTerms stopWords =
-      NLP.stems NLP.English
+extractDescriptionTerms :: [Text] -> Set Text -> String -> [Text]
+extractDescriptionTerms ss stopWords =
+      concatMap (extraStems ss)
+    . NLP.stems NLP.English
     . filter (`Set.notMember` stopWords)
     . map (T.toCaseFold . T.pack)
     . maybe
@@ -98,7 +103,7 @@ main = do
     let mostFreq :: [String]
         pkgs     :: [PackageDescription]
         (mostFreq, pkgs) = read pkgsFile
-    
+
     stopWordsFile <- T.readFile "stopwords.txt"
 --    wordsFile <- T.readFile "/usr/share/dict/words"
 --    let ws = Set.fromList (map T.toLower $ T.lines wordsFile)
@@ -114,7 +119,7 @@ main = do
     sequence_
       [ putStrLn $ display (packageName pkg) ++ ": "
                 ++ --intercalate ", "
-                   (description pkg) ++ "\n" 
+                   (description pkg) ++ "\n"
                 ++ intercalate ", "
                    (map T.unpack $ extractDescriptionTerms stopWords (description pkg)) ++ "\n"
       | pkg <- pkgs

diff --git a/Distribution/Server/Features/Search/PkgSearch.hs b/Distribution/Server/Features/Search/PkgSearch.hs
@@ -56,13 +56,15 @@ pkgSearchConfig =
   }
   where
     extractTokens :: PackageDescription -> PkgDocField -> [Text]
-    extractTokens pkg NameField        = extractPackageNameTerms           (display $ packageName pkg)
-    extractTokens pkg SynopsisField    = extractSynopsisTerms    stopWords (synopsis    pkg)
-    extractTokens pkg DescriptionField = extractDescriptionTerms stopWords (description pkg)
+    extractTokens pkg NameField        = concatMap (extraStems computerStems) $
+                                         extractPackageNameTerms           (display $ packageName pkg)
+    extractTokens pkg SynopsisField    = extractSynopsisTerms    computerStems stopWords (synopsis    pkg)
+    extractTokens pkg DescriptionField = extractDescriptionTerms computerStems stopWords (description pkg)
 
     normaliseQueryToken :: Text -> PkgDocField -> Text
     normaliseQueryToken tok =
       let tokFold = T.toCaseFold tok
+          -- we don't need to use extraStems here because the index is inflated by it already.
           tokStem = stem English tokFold
        in \field -> case field of
                       NameField        -> tokFold
@@ -79,8 +81,8 @@ defaultSearchRankParameters =
       paramFieldWeights,
       paramFeatureWeights,
       paramFeatureFunctions,
-      paramResultsetSoftLimit = 200,
-      paramResultsetHardLimit = 400
+      paramResultsetSoftLimit = 400,
+      paramResultsetHardLimit = 800
     }
   where
     paramK1 :: Float
@@ -114,6 +116,10 @@ stopWords =
      "now","how","where","when","up","has","been","about","them","then","see",
      "no","do","than","should","out","off","much","if","i","have","also"]
 
+-- Extra stems that tend to occur with software packages
+computerStems :: [Text]
+computerStems = map T.pack ["ql","db","ml","gl"]
+
 
 {-
 -------------------
@@ -146,15 +152,15 @@ main = do
     print ("search engine invariant", invariant searchengine)
 
 --    print [ avgFieldLength ctx s | s <- [minBound..maxBound] ]
-    
+
 --    print $ take 100 $ sortBy (flip compare) $ map Set.size $ Map.elems (termMap searchindex)
 --    T.putStr $ T.unlines $ Map.keys (termMap searchindex)
 --    let SearchEngine{searchIndex=SearchIndex{termMap, termIdMap, docKeyMap, docIdMap}} = searchengine
 --    print (Map.size termMap, IntMap.size termIdMap, Map.size docKeyMap, IntMap.size docIdMap)
 
     let loop = do
           putStr "search term> "
-          hFlush stdout 
+          hFlush stdout
           t <- getLine
           unless (null t) $ do
             let terms = stems English