From bac34ee20a367848534e475d4b3ecef2da1baa67 Mon Sep 17 00:00:00 2001
From: Gershom <gershomb@gmail.com>
Date: Sat, 30 Sep 2017 18:46:50 -0400
Subject: [PATCH 1/2] bumping query limits

---
 Distribution/Server/Features/Search/PkgSearch.hs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Distribution/Server/Features/Search/PkgSearch.hs b/Distribution/Server/Features/Search/PkgSearch.hs
index 0312f7b87..f153c96e7 100644
--- a/Distribution/Server/Features/Search/PkgSearch.hs
+++ b/Distribution/Server/Features/Search/PkgSearch.hs
@@ -79,8 +79,8 @@ defaultSearchRankParameters =
       paramFieldWeights,
       paramFeatureWeights,
       paramFeatureFunctions,
-      paramResultsetSoftLimit = 200,
-      paramResultsetHardLimit = 400
+      paramResultsetSoftLimit = 400,
+      paramResultsetHardLimit = 800
     }
   where
     paramK1 :: Float
@@ -146,7 +146,7 @@ main = do
     print ("search engine invariant", invariant searchengine)
 
 --    print [ avgFieldLength ctx s | s <- [minBound..maxBound] ]
-    
+
 --    print $ take 100 $ sortBy (flip compare) $ map Set.size $ Map.elems (termMap searchindex)
 --    T.putStr $ T.unlines $ Map.keys (termMap searchindex)
 --    let SearchEngine{searchIndex=SearchIndex{termMap, termIdMap, docKeyMap, docIdMap}} = searchengine
@@ -154,7 +154,7 @@ main = do
 
     let loop = do
           putStr "search term> "
-          hFlush stdout 
+          hFlush stdout
           t <- getLine
           unless (null t) $ do
             let terms = stems English

From 59f95714b2e82fede85dc1de28f5043572ae95d8 Mon Sep 17 00:00:00 2001
From: Gershom <gershomb@gmail.com>
Date: Sat, 30 Sep 2017 19:19:10 -0400
Subject: [PATCH 2/2] some extra search tweaks to handle stemmed/inexact
 matches on suffixes that don't occur in normal language but do in software

---
 .../Search/ExtractDescriptionTerms.hs         | 25 +++++++++++--------
 .../Server/Features/Search/PkgSearch.hs       | 12 ++++++---
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/Distribution/Server/Features/Search/ExtractDescriptionTerms.hs b/Distribution/Server/Features/Search/ExtractDescriptionTerms.hs
index 08916c0bc..65e1d7169 100644
--- a/Distribution/Server/Features/Search/ExtractDescriptionTerms.hs
+++ b/Distribution/Server/Features/Search/ExtractDescriptionTerms.hs
@@ -2,7 +2,8 @@
 
 module Distribution.Server.Features.Search.ExtractDescriptionTerms (
     extractSynopsisTerms,
-    extractDescriptionTerms
+    extractDescriptionTerms,
+    extraStems
   ) where
 
 import Distribution.Server.Prelude
@@ -20,10 +21,13 @@ import Documentation.Haddock.Types
 
 import qualified Distribution.Server.Pages.Package.HaddockParse as Haddock (parse)
 
+extraStems :: [Text] -> Text -> [Text]
+extraStems ss x = x : mapMaybe (x `T.stripSuffix`) ss
 
-extractSynopsisTerms :: Set Text -> String -> [Text]
-extractSynopsisTerms stopWords =
-      NLP.stems NLP.English
+extractSynopsisTerms :: [Text] -> Set Text -> String -> [Text]
+extractSynopsisTerms ss stopWords =
+      concatMap (extraStems ss) --note this adds extra possible stems, it doesn't delete any given one.
+    . NLP.stems NLP.English
     . filter (`Set.notMember` stopWords)
     . map (T.toCaseFold . T.pack)
     . concatMap splitTok
@@ -31,7 +35,7 @@ extractSynopsisTerms stopWords =
     . NLP.tokenize
 
 
-ignoreTok :: String -> Bool  
+ignoreTok :: String -> Bool
 ignoreTok = all isPunctuation
 
 splitTok :: String -> [String]
@@ -48,9 +52,10 @@ splitTok tok =
         (leading, [])         -> leading : []
 
 
-extractDescriptionTerms :: Set Text -> String -> [Text]
-extractDescriptionTerms stopWords =
-      NLP.stems NLP.English
+extractDescriptionTerms :: [Text] -> Set Text -> String -> [Text]
+extractDescriptionTerms ss stopWords =
+      concatMap (extraStems ss)
+    . NLP.stems NLP.English
     . filter (`Set.notMember` stopWords)
     . map (T.toCaseFold . T.pack)
     . maybe
@@ -98,7 +103,7 @@ main = do
     let mostFreq :: [String]
         pkgs     :: [PackageDescription]
         (mostFreq, pkgs) = read pkgsFile
-    
+
     stopWordsFile <- T.readFile "stopwords.txt"
 --    wordsFile <- T.readFile "/usr/share/dict/words"
 --    let ws = Set.fromList (map T.toLower $ T.lines wordsFile)
@@ -114,7 +119,7 @@ main = do
     sequence_
       [ putStrLn $ display (packageName pkg) ++ ": "
                 ++ --intercalate ", "
-                   (description pkg) ++ "\n" 
+                   (description pkg) ++ "\n"
                 ++ intercalate ", "
                    (map T.unpack $ extractDescriptionTerms stopWords (description pkg)) ++ "\n"
       | pkg <- pkgs
diff --git a/Distribution/Server/Features/Search/PkgSearch.hs b/Distribution/Server/Features/Search/PkgSearch.hs
index f153c96e7..6f19cfcfb 100644
--- a/Distribution/Server/Features/Search/PkgSearch.hs
+++ b/Distribution/Server/Features/Search/PkgSearch.hs
@@ -56,13 +56,15 @@ pkgSearchConfig =
   }
   where
     extractTokens :: PackageDescription -> PkgDocField -> [Text]
-    extractTokens pkg NameField        = extractPackageNameTerms           (display $ packageName pkg)
-    extractTokens pkg SynopsisField    = extractSynopsisTerms    stopWords (synopsis    pkg)
-    extractTokens pkg DescriptionField = extractDescriptionTerms stopWords (description pkg)
+    extractTokens pkg NameField        = concatMap (extraStems computerStems) $
+                                         extractPackageNameTerms           (display $ packageName pkg)
+    extractTokens pkg SynopsisField    = extractSynopsisTerms    computerStems stopWords (synopsis    pkg)
+    extractTokens pkg DescriptionField = extractDescriptionTerms computerStems stopWords (description pkg)
 
     normaliseQueryToken :: Text -> PkgDocField -> Text
     normaliseQueryToken tok =
       let tokFold = T.toCaseFold tok
+          -- we don't need to use extraStems here because the index is inflated by it already.
           tokStem = stem English tokFold
        in \field -> case field of
                       NameField        -> tokFold
@@ -114,6 +116,10 @@ stopWords =
      "now","how","where","when","up","has","been","about","them","then","see",
      "no","do","than","should","out","off","much","if","i","have","also"]
 
+-- Extra stems that tend to occur with software packages
+computerStems :: [Text]
+computerStems = map T.pack ["ql","db","ml","gl"]
+
 
 {-
 -------------------