never executed always true always false
    1 -----------------------------------------------------------------------------
    2 -- |
    3 -- Module      :  Distribution.Client.Init.Heuristics
    4 -- Copyright   :  (c) Benedikt Huber 2009
    5 -- License     :  BSD-like
    6 --
    7 -- Maintainer  :  cabal-devel@haskell.org
    8 -- Stability   :  provisional
    9 -- Portability :  portable
   10 --
   11 -- Heuristics for creating initial cabal files.
   12 --
   13 -----------------------------------------------------------------------------
   14 module Distribution.Client.Init.Heuristics (
   15     guessPackageName,
   16     scanForModules,     SourceFileEntry(..),
   17     neededBuildPrograms,
   18     guessMainFileCandidates,
   19     guessAuthorNameMail,
   20     knownCategories,
   21 ) where
   22 
   23 import Prelude ()
   24 import qualified Data.ByteString as BS
   25 import Distribution.Client.Compat.Prelude
   26 import Distribution.Utils.Generic (safeHead, safeTail, safeLast)
   27 
   28 import Distribution.Simple.Setup (Flag(..), flagToMaybe)
   29 import Distribution.Simple.Utils (fromUTF8BS)
   30 import Distribution.ModuleName
   31     ( ModuleName, toFilePath )
   32 import qualified Distribution.Package as P
   33 import qualified Distribution.PackageDescription as PD
   34     ( category, packageDescription )
   35 import Distribution.Client.Utils
   36          ( tryCanonicalizePath )
   37 import Language.Haskell.Extension ( Extension )
   38 
   39 import Distribution.Solver.Types.PackageIndex
   40     ( allPackagesByName )
   41 import Distribution.Solver.Types.SourcePackage
   42     ( srcpkgDescription )
   43 
   44 import Distribution.Client.Types ( SourcePackageDb(..) )
   45 import Data.Char   ( isLower )
   46 import Data.List   ( isInfixOf )
   47 import qualified Data.Set as Set ( fromList, toList )
   48 import System.Directory ( getCurrentDirectory, getDirectoryContents,
   49                           doesDirectoryExist, doesFileExist, getHomeDirectory, )
   50 import Distribution.Compat.Environment ( getEnvironment )
   51 import System.FilePath ( takeExtension, takeBaseName, dropExtension,
   52                          (</>), (<.>), splitDirectories, makeRelative )
   53 
   54 import Distribution.Client.Init.Types     ( InitFlags(..) )
   55 import Distribution.Client.Compat.Process ( readProcessWithExitCode )
   56 
   57 import qualified Distribution.Utils.ShortText as ShortText
   58 
   59 -- | Return a list of candidate main files for this executable: top-level
   60 -- modules including the word 'Main' in the file name. The list is sorted in
   61 -- order of preference, shorter file names are preferred. 'Right's are existing
   62 -- candidates and 'Left's are those that do not yet exist.
   63 guessMainFileCandidates :: InitFlags -> IO [Either FilePath FilePath]
   64 guessMainFileCandidates flags = do
   65   dir <-
   66     maybe getCurrentDirectory return (flagToMaybe $ packageDir flags)
   67   files <- getDirectoryContents dir
   68   let existingCandidates = filter isMain files
   69       -- We always want to give the user at least one default choice.  If either
   70       -- Main.hs or Main.lhs has already been created, then we don't want to
   71       -- suggest the other; however, if neither has been created, then we
   72       -- suggest both.
   73       newCandidates =
   74         if any (`elem` existingCandidates) ["Main.hs", "Main.lhs"]
   75         then []
   76         else ["Main.hs", "Main.lhs"]
   77       candidates =
   78         sortBy (\x y -> comparing (length . either id id) x y
   79                         `mappend` compare x y)
   80                (map Left newCandidates ++ map Right existingCandidates)
   81   return candidates
   82 
   83   where
   84     isMain f =    (isInfixOf "Main" f || isInfixOf  "main" f)
   85                && (isSuffixOf ".hs" f || isSuffixOf ".lhs" f)
   86 
   87 -- | Guess the package name based on the given root directory.
   88 guessPackageName :: FilePath -> IO P.PackageName
   89 guessPackageName = liftM (P.mkPackageName . repair . fromMaybe "" . safeLast . splitDirectories)
   90                  . tryCanonicalizePath
   91   where
   92     -- Treat each span of non-alphanumeric characters as a hyphen. Each
   93     -- hyphenated component of a package name must contain at least one
   94     -- alphabetic character. An arbitrary character ('x') will be prepended if
   95     -- this is not the case for the first component, and subsequent components
   96     -- will simply be run together. For example, "1+2_foo-3" will become
   97     -- "x12-foo3".
   98     repair = repair' ('x' :) id
   99     repair' invalid valid x = case dropWhile (not . isAlphaNum) x of
  100         "" -> repairComponent ""
  101         x' -> let (c, r) = first repairComponent $ break (not . isAlphaNum) x'
  102               in c ++ repairRest r
  103       where
  104         repairComponent c | all isDigit c = invalid c
  105                           | otherwise     = valid c
  106     repairRest = repair' id ('-' :)
  107 
  108 -- |Data type of source files found in the working directory
  109 data SourceFileEntry = SourceFileEntry
  110     { relativeSourcePath :: FilePath
  111     , moduleName         :: ModuleName
  112     , fileExtension      :: String
  113     , imports            :: [ModuleName]
  114     , extensions         :: [Extension]
  115     } deriving Show
  116 
  117 sfToFileName :: FilePath -> SourceFileEntry -> FilePath
  118 sfToFileName projectRoot (SourceFileEntry relPath m ext _ _)
  119   = projectRoot </> relPath </> toFilePath m <.> ext
  120 
  121 -- |Search for source files in the given directory
  122 -- and return pairs of guessed Haskell source path and
  123 -- module names.
  124 scanForModules :: FilePath -> IO [SourceFileEntry]
  125 scanForModules rootDir = scanForModulesIn rootDir rootDir
  126 
  127 scanForModulesIn :: FilePath -> FilePath -> IO [SourceFileEntry]
  128 scanForModulesIn projectRoot srcRoot = scan srcRoot []
  129   where
  130     scan dir hierarchy = do
  131         entries <- getDirectoryContents (projectRoot </> dir)
  132         (files, dirs) <- liftM partitionEithers (traverse (tagIsDir dir) entries)
  133         let modules = catMaybes [ guessModuleName hierarchy file
  134                                 | file <- files
  135                                 , maybe False isUpper (safeHead file) ]
  136         modules' <- traverse (findImportsAndExts projectRoot) modules
  137         recMods <- traverse (scanRecursive dir hierarchy) dirs
  138         return $ concat (modules' : recMods)
  139     tagIsDir parent entry = do
  140         isDir <- doesDirectoryExist (parent </> entry)
  141         return $ (if isDir then Right else Left) entry
  142     guessModuleName hierarchy entry
  143         | takeBaseName entry == "Setup" = Nothing
  144         | ext `elem` sourceExtensions   =
  145             SourceFileEntry <$> pure relRoot <*> modName <*> pure ext <*> pure [] <*> pure []
  146         | otherwise = Nothing
  147       where
  148         relRoot       = makeRelative projectRoot srcRoot
  149         unqualModName = dropExtension entry
  150         modName       = simpleParsec
  151                       $ intercalate "." . reverse $ (unqualModName : hierarchy)
  152         ext           = case takeExtension entry of '.':e -> e; e -> e
  153     scanRecursive parent hierarchy entry
  154       | maybe False isUpper (safeHead entry) = scan (parent </> entry) (entry : hierarchy)
  155       | maybe False isLower (safeHead entry) && not (ignoreDir entry) =
  156           scanForModulesIn projectRoot $ foldl (</>) srcRoot (reverse (entry : hierarchy))
  157       | otherwise = return []
  158     ignoreDir ('.':_)  = True
  159     ignoreDir dir      = dir `elem` ["dist", "_darcs"]
  160 
  161 -- | Read the contents of the handle and parse for Language pragmas
  162 -- and other module names that might be part of this project.
  163 findImportsAndExts :: FilePath -> SourceFileEntry -> IO SourceFileEntry
  164 findImportsAndExts projectRoot sf = do
  165   s <- fromUTF8BS <$> BS.readFile (sfToFileName projectRoot sf)
  166 
  167   let modules = mapMaybe
  168                 ( getModName
  169                 . drop 1
  170                 . filter (not . null)
  171                 . dropWhile (/= "import")
  172                 . words
  173                 )
  174               . filter (not . ("--" `isPrefixOf`)) -- poor man's comment filtering
  175               . lines
  176               $ s
  177 
  178       -- TODO: We should probably make a better attempt at parsing
  179       -- comments above.  Unfortunately we can't use a full-fledged
  180       -- Haskell parser since cabal's dependencies must be kept at a
  181       -- minimum.
  182 
  183       -- A poor man's LANGUAGE pragma parser.
  184       exts = mapMaybe simpleParsec
  185            . concatMap getPragmas
  186            . filter isLANGUAGEPragma
  187            . map fst
  188            . drop 1
  189            . takeWhile (not . null . snd)
  190            . iterate (takeBraces . snd)
  191            $ ("",s)
  192 
  193       takeBraces = break (== '}') . dropWhile (/= '{')
  194 
  195       isLANGUAGEPragma = ("{-# LANGUAGE " `isPrefixOf`)
  196 
  197       getPragmas = map trim . splitCommas . takeWhile (/= '#') . drop 13
  198 
  199       splitCommas "" = []
  200       splitCommas xs = x : splitCommas (drop 1 y)
  201         where (x,y) = break (==',') xs
  202 
  203   return sf { imports    = modules
  204             , extensions = exts
  205             }
  206 
  207  where getModName :: [String] -> Maybe ModuleName
  208        getModName []               = Nothing
  209        getModName ("qualified":ws) = getModName ws
  210        getModName (ms:_)           = simpleParsec ms
  211 
  212 
  213 
  214 -- Unfortunately we cannot use the version exported by Distribution.Simple.Program
  215 knownSuffixHandlers :: [(String,String)]
  216 knownSuffixHandlers =
  217   [ ("gc",     "greencard")
  218   , ("chs",    "chs")
  219   , ("hsc",    "hsc2hs")
  220   , ("x",      "alex")
  221   , ("y",      "happy")
  222   , ("ly",     "happy")
  223   , ("cpphs",  "cpp")
  224   ]
  225 
  226 sourceExtensions :: [String]
  227 sourceExtensions = "hs" : "lhs" : map fst knownSuffixHandlers
  228 
  229 neededBuildPrograms :: [SourceFileEntry] -> [String]
  230 neededBuildPrograms entries =
  231     [ handler
  232     | ext <- nubSet (map fileExtension entries)
  233     , handler <- maybeToList (lookup ext knownSuffixHandlers)
  234     ]
  235 
  236 -- | Guess author and email using darcs and git configuration options. Use
  237 -- the following in decreasing order of preference:
  238 --
  239 -- 1. vcs env vars ($DARCS_EMAIL, $GIT_AUTHOR_*)
  240 -- 2. Local repo configs
  241 -- 3. Global vcs configs
  242 -- 4. The generic $EMAIL
  243 --
  244 -- Name and email are processed separately, so the guess might end up being
  245 -- a name from DARCS_EMAIL and an email from git config.
  246 --
  247 -- Darcs has preference, for tradition's sake.
  248 guessAuthorNameMail :: IO (Flag String, Flag String)
  249 guessAuthorNameMail = fmap authorGuessPure authorGuessIO
  250 
  251 -- Ordered in increasing preference, since Flag-as-monoid is identical to
  252 -- Last.
  253 authorGuessPure :: AuthorGuessIO -> AuthorGuess
  254 authorGuessPure (AuthorGuessIO { authorGuessEnv = env
  255                                , authorGuessLocalDarcs = darcsLocalF
  256                                , authorGuessGlobalDarcs = darcsGlobalF
  257                                , authorGuessLocalGit = gitLocal
  258                                , authorGuessGlobalGit = gitGlobal })
  259     = mconcat
  260         [ emailEnv env
  261         , gitGlobal
  262         , darcsCfg darcsGlobalF
  263         , gitLocal
  264         , darcsCfg darcsLocalF
  265         , gitEnv env
  266         , darcsEnv env
  267         ]
  268 
  269 authorGuessIO :: IO AuthorGuessIO
  270 authorGuessIO = AuthorGuessIO
  271     <$> getEnvironment
  272     <*> (maybeReadFile $ "_darcs" </> "prefs" </> "author")
  273     <*> (maybeReadFile =<< liftM (</> (".darcs" </> "author")) getHomeDirectory)
  274     <*> gitCfg Local
  275     <*> gitCfg Global
  276 
  277 -- Types and functions used for guessing the author are now defined:
  278 
  279 type AuthorGuess   = (Flag String, Flag String)
  280 type Enviro        = [(String, String)]
  281 data GitLoc        = Local | Global
  282 data AuthorGuessIO = AuthorGuessIO {
  283     authorGuessEnv         :: Enviro,         -- ^ Environment lookup table
  284     authorGuessLocalDarcs  :: (Maybe String), -- ^ Contents of local darcs author info
  285     authorGuessGlobalDarcs :: (Maybe String), -- ^ Contents of global darcs author info
  286     authorGuessLocalGit    :: AuthorGuess,   -- ^ Git config --local
  287     authorGuessGlobalGit   :: AuthorGuess    -- ^ Git config --global
  288   }
  289 
  290 darcsEnv :: Enviro -> AuthorGuess
  291 darcsEnv = maybe mempty nameAndMail . lookup "DARCS_EMAIL"
  292 
  293 gitEnv :: Enviro -> AuthorGuess
  294 gitEnv env = (name, mail)
  295   where
  296     name = maybeFlag "GIT_AUTHOR_NAME" env
  297     mail = maybeFlag "GIT_AUTHOR_EMAIL" env
  298 
  299 darcsCfg :: Maybe String -> AuthorGuess
  300 darcsCfg = maybe mempty nameAndMail
  301 
  302 emailEnv :: Enviro -> AuthorGuess
  303 emailEnv env = (mempty, mail)
  304   where
  305     mail = maybeFlag "EMAIL" env
  306 
  307 gitCfg :: GitLoc -> IO AuthorGuess
  308 gitCfg which = do
  309   name <- gitVar which "user.name"
  310   mail <- gitVar which "user.email"
  311   return (name, mail)
  312 
  313 gitVar :: GitLoc -> String -> IO (Flag String)
  314 gitVar which = fmap happyOutput . gitConfigQuery which
  315 
  316 happyOutput :: (ExitCode, a, t) -> Flag a
  317 happyOutput v = case v of
  318   (ExitSuccess, s, _) -> Flag s
  319   _                   -> mempty
  320 
  321 gitConfigQuery :: GitLoc -> String -> IO (ExitCode, String, String)
  322 gitConfigQuery which key =
  323     fmap trim' $ readProcessWithExitCode "git" ["config", w, key] ""
  324   where
  325     w = case which of
  326         Local  -> "--local"
  327         Global -> "--global"
  328     trim' (a, b, c) = (a, trim b, c)
  329 
  330 maybeFlag :: String -> Enviro -> Flag String
  331 maybeFlag k = maybe mempty Flag . lookup k
  332 
  333 -- | Read the first non-comment, non-trivial line of a file, if it exists
  334 maybeReadFile :: String -> IO (Maybe String)
  335 maybeReadFile f = do
  336     exists <- doesFileExist f
  337     if exists
  338         then fmap getFirstLine $ readFile f
  339         else return Nothing
  340   where
  341     getFirstLine content =
  342       let nontrivialLines = dropWhile (\l -> (null l) || ("#" `isPrefixOf` l)) . lines $ content
  343       in case nontrivialLines of
  344            [] -> Nothing
  345            (l:_) -> Just l
  346 
  347 -- |Get list of categories used in Hackage. NOTE: Very slow, needs to be cached
  348 knownCategories :: SourcePackageDb -> [String]
  349 knownCategories (SourcePackageDb sourcePkgIndex _) = nubSet
  350     [ cat | pkg <- maybeToList . safeHead =<< (allPackagesByName sourcePkgIndex)
  351           , let catList = (PD.category . PD.packageDescription . srcpkgDescription) pkg
  352           , cat <- splitString ',' $ ShortText.fromShortText catList
  353     ]
  354 
  355 -- Parse name and email, from darcs pref files or environment variable
  356 nameAndMail :: String -> (Flag String, Flag String)
  357 nameAndMail str
  358   | all isSpace nameOrEmail = mempty
  359   | null erest = (mempty, Flag $ trim nameOrEmail)
  360   | otherwise  = (Flag $ trim nameOrEmail, Flag mail)
  361   where
  362     (nameOrEmail,erest) = break (== '<') str
  363     (mail,_)            = break (== '>') (safeTail erest)
  364 
  365 trim :: String -> String
  366 trim = removeLeadingSpace . reverse . removeLeadingSpace . reverse
  367   where
  368     removeLeadingSpace  = dropWhile isSpace
  369 
  370 -- split string at given character, and remove whitespace
  371 splitString :: Char -> String -> [String]
  372 splitString sep str = go str where
  373     go s = if null s' then [] else tok : go rest where
  374       s' = dropWhile (\c -> c == sep || isSpace c) s
  375       (tok,rest) = break (==sep) s'
  376 
  377 nubSet :: (Ord a) => [a] -> [a]
  378 nubSet = Set.toList . Set.fromList
  379 
  380 {-
  381 test db testProjectRoot = do
  382   putStrLn "Guessed package name"
  383   (guessPackageName >=> print) testProjectRoot
  384   putStrLn "Guessed name and email"
  385   guessAuthorNameMail >>= print
  386 
  387   mods <- scanForModules testProjectRoot
  388 
  389   putStrLn "Guessed modules"
  390   mapM_ print mods
  391   putStrLn "Needed build programs"
  392   print (neededBuildPrograms mods)
  393 
  394   putStrLn "List of known categories"
  395   print $ knownCategories db
  396 -}