never executed always true always false
1 {-# LANGUAGE CPP #-}
2 {-# LANGUAGE DeriveGeneric, DeriveFunctor, GeneralizedNewtypeDeriving,
3 NamedFieldPuns, BangPatterns #-}
4 {-# OPTIONS_GHC -fno-warn-orphans #-}
5
6 -- | An abstraction to help with re-running actions when files or other
7 -- input values they depend on have changed.
8 --
9 module Distribution.Client.FileMonitor (
10
11 -- * Declaring files to monitor
12 MonitorFilePath(..),
13 MonitorKindFile(..),
14 MonitorKindDir(..),
15 FilePathGlob(..),
16 monitorFile,
17 monitorFileHashed,
18 monitorNonExistentFile,
19 monitorFileExistence,
20 monitorDirectory,
21 monitorNonExistentDirectory,
22 monitorDirectoryExistence,
23 monitorFileOrDirectory,
24 monitorFileGlob,
25 monitorFileGlobExistence,
26 monitorFileSearchPath,
27 monitorFileHashedSearchPath,
28
29 -- * Creating and checking sets of monitored files
30 FileMonitor(..),
31 newFileMonitor,
32 MonitorChanged(..),
33 MonitorChangedReason(..),
34 checkFileMonitorChanged,
35 updateFileMonitor,
36 MonitorTimestamp,
37 beginUpdateFileMonitor,
38
39 -- * Internal
40 MonitorStateFileSet,
41 MonitorStateFile,
42 MonitorStateGlob,
43 ) where
44
45 import Prelude ()
46 import Distribution.Client.Compat.Prelude
47
48 import qualified Data.Map.Strict as Map
49 import qualified Data.ByteString.Lazy as BS
50 import qualified Data.Hashable as Hashable
51
52 import Control.Monad
53 import Control.Monad.Trans (MonadIO, liftIO)
54 import Control.Monad.State (StateT, mapStateT)
55 import qualified Control.Monad.State as State
56 import Control.Monad.Except (ExceptT, runExceptT, withExceptT,
57 throwError)
58 import Control.Exception
59
60 import Distribution.Compat.Time
61 import Distribution.Client.Glob
62 import Distribution.Simple.Utils (handleDoesNotExist, writeFileAtomic)
63 import Distribution.Client.Utils (mergeBy, MergeResult(..))
64 import Distribution.Utils.Structured (structuredDecodeOrFailIO, structuredEncode)
65 import System.FilePath
66 import System.Directory
67 import System.IO
68
69 ------------------------------------------------------------------------------
70 -- Types for specifying files to monitor
71 --
72
73
74 -- | A description of a file (or set of files) to monitor for changes.
75 --
76 -- Where file paths are relative they are relative to a common directory
77 -- (e.g. project root), not necessarily the process current directory.
78 --
79 data MonitorFilePath =
80 MonitorFile {
81 monitorKindFile :: !MonitorKindFile,
82 monitorKindDir :: !MonitorKindDir,
83 monitorPath :: !FilePath
84 }
85 | MonitorFileGlob {
86 monitorKindFile :: !MonitorKindFile,
87 monitorKindDir :: !MonitorKindDir,
88 monitorPathGlob :: !FilePathGlob
89 }
90 deriving (Eq, Show, Generic)
91
92 data MonitorKindFile = FileExists
93 | FileModTime
94 | FileHashed
95 | FileNotExists
96 deriving (Eq, Show, Generic)
97
98 data MonitorKindDir = DirExists
99 | DirModTime
100 | DirNotExists
101 deriving (Eq, Show, Generic)
102
103 instance Binary MonitorFilePath
104 instance Binary MonitorKindFile
105 instance Binary MonitorKindDir
106
107 instance Structured MonitorFilePath
108 instance Structured MonitorKindFile
109 instance Structured MonitorKindDir
110
111 -- | Monitor a single file for changes, based on its modification time.
112 -- The monitored file is considered to have changed if it no longer
113 -- exists or if its modification time has changed.
114 --
115 monitorFile :: FilePath -> MonitorFilePath
116 monitorFile = MonitorFile FileModTime DirNotExists
117
118 -- | Monitor a single file for changes, based on its modification time
119 -- and content hash. The monitored file is considered to have changed if
120 -- it no longer exists or if its modification time and content hash have
121 -- changed.
122 --
123 monitorFileHashed :: FilePath -> MonitorFilePath
124 monitorFileHashed = MonitorFile FileHashed DirNotExists
125
126 -- | Monitor a single non-existent file for changes. The monitored file
127 -- is considered to have changed if it exists.
128 --
129 monitorNonExistentFile :: FilePath -> MonitorFilePath
130 monitorNonExistentFile = MonitorFile FileNotExists DirNotExists
131
132 -- | Monitor a single file for existence only. The monitored file is
133 -- considered to have changed if it no longer exists.
134 --
135 monitorFileExistence :: FilePath -> MonitorFilePath
136 monitorFileExistence = MonitorFile FileExists DirNotExists
137
138 -- | Monitor a single directory for changes, based on its modification
139 -- time. The monitored directory is considered to have changed if it no
140 -- longer exists or if its modification time has changed.
141 --
142 monitorDirectory :: FilePath -> MonitorFilePath
143 monitorDirectory = MonitorFile FileNotExists DirModTime
144
145 -- | Monitor a single non-existent directory for changes. The monitored
146 -- directory is considered to have changed if it exists.
147 --
148 monitorNonExistentDirectory :: FilePath -> MonitorFilePath
149 -- Just an alias for monitorNonExistentFile, since you can't
150 -- tell the difference between a non-existent directory and
151 -- a non-existent file :)
152 monitorNonExistentDirectory = monitorNonExistentFile
153
154 -- | Monitor a single directory for existence. The monitored directory is
155 -- considered to have changed only if it no longer exists.
156 --
157 monitorDirectoryExistence :: FilePath -> MonitorFilePath
158 monitorDirectoryExistence = MonitorFile FileNotExists DirExists
159
160 -- | Monitor a single file or directory for changes, based on its modification
161 -- time. The monitored file is considered to have changed if it no longer
162 -- exists or if its modification time has changed.
163 --
164 monitorFileOrDirectory :: FilePath -> MonitorFilePath
165 monitorFileOrDirectory = MonitorFile FileModTime DirModTime
166
167 -- | Monitor a set of files (or directories) identified by a file glob.
168 -- The monitored glob is considered to have changed if the set of files
169 -- matching the glob changes (i.e. creations or deletions), or for files if the
170 -- modification time and content hash of any matching file has changed.
171 --
172 monitorFileGlob :: FilePathGlob -> MonitorFilePath
173 monitorFileGlob = MonitorFileGlob FileHashed DirExists
174
175 -- | Monitor a set of files (or directories) identified by a file glob for
176 -- existence only. The monitored glob is considered to have changed if the set
177 -- of files matching the glob changes (i.e. creations or deletions).
178 --
179 monitorFileGlobExistence :: FilePathGlob -> MonitorFilePath
180 monitorFileGlobExistence = MonitorFileGlob FileExists DirExists
181
182 -- | Creates a list of files to monitor when you search for a file which
183 -- unsuccessfully looked in @notFoundAtPaths@ before finding it at
184 -- @foundAtPath@.
185 monitorFileSearchPath :: [FilePath] -> FilePath -> [MonitorFilePath]
186 monitorFileSearchPath notFoundAtPaths foundAtPath =
187 monitorFile foundAtPath
188 : map monitorNonExistentFile notFoundAtPaths
189
190 -- | Similar to 'monitorFileSearchPath', but also instructs us to
191 -- monitor the hash of the found file.
192 monitorFileHashedSearchPath :: [FilePath] -> FilePath -> [MonitorFilePath]
193 monitorFileHashedSearchPath notFoundAtPaths foundAtPath =
194 monitorFileHashed foundAtPath
195 : map monitorNonExistentFile notFoundAtPaths
196
197
198 ------------------------------------------------------------------------------
199 -- Implementation types, files status
200 --
201
202 -- | The state necessary to determine whether a set of monitored
203 -- files has changed. It consists of two parts: a set of specific
204 -- files to be monitored (index by their path), and a list of
205 -- globs, which monitor may files at once.
206 data MonitorStateFileSet
207 = MonitorStateFileSet ![MonitorStateFile]
208 ![MonitorStateGlob]
209 -- Morally this is not actually a set but a bag (represented by lists).
210 -- There is no principled reason to use a bag here rather than a set, but
211 -- there is also no particular gain either. That said, we do preserve the
212 -- order of the lists just to reduce confusion (and have predictable I/O
213 -- patterns).
214 deriving (Show, Generic)
215
216 instance Binary MonitorStateFileSet
217 instance Structured MonitorStateFileSet
218
219 type Hash = Int
220
221 -- | The state necessary to determine whether a monitored file has changed.
222 --
223 -- This covers all the cases of 'MonitorFilePath' except for globs which is
224 -- covered separately by 'MonitorStateGlob'.
225 --
226 -- The @Maybe ModTime@ is to cover the case where we already consider the
227 -- file to have changed, either because it had already changed by the time we
228 -- did the snapshot (i.e. too new, changed since start of update process) or it
229 -- no longer exists at all.
230 --
231 data MonitorStateFile = MonitorStateFile !MonitorKindFile !MonitorKindDir
232 !FilePath !MonitorStateFileStatus
233 deriving (Show, Generic)
234
235 data MonitorStateFileStatus
236 = MonitorStateFileExists
237 | MonitorStateFileModTime !ModTime -- ^ cached file mtime
238 | MonitorStateFileHashed !ModTime !Hash -- ^ cached mtime and content hash
239 | MonitorStateDirExists
240 | MonitorStateDirModTime !ModTime -- ^ cached dir mtime
241 | MonitorStateNonExistent
242 | MonitorStateAlreadyChanged
243 deriving (Show, Generic)
244
245 instance Binary MonitorStateFile
246 instance Binary MonitorStateFileStatus
247 instance Structured MonitorStateFile
248 instance Structured MonitorStateFileStatus
249
250 -- | The state necessary to determine whether the files matched by a globbing
251 -- match have changed.
252 --
253 data MonitorStateGlob = MonitorStateGlob !MonitorKindFile !MonitorKindDir
254 !FilePathRoot !MonitorStateGlobRel
255 deriving (Show, Generic)
256
257 data MonitorStateGlobRel
258 = MonitorStateGlobDirs
259 !Glob !FilePathGlobRel
260 !ModTime
261 ![(FilePath, MonitorStateGlobRel)] -- invariant: sorted
262
263 | MonitorStateGlobFiles
264 !Glob
265 !ModTime
266 ![(FilePath, MonitorStateFileStatus)] -- invariant: sorted
267
268 | MonitorStateGlobDirTrailing
269 deriving (Show, Generic)
270
271 instance Binary MonitorStateGlob
272 instance Binary MonitorStateGlobRel
273
274 instance Structured MonitorStateGlob
275 instance Structured MonitorStateGlobRel
276
277 -- | We can build a 'MonitorStateFileSet' from a set of 'MonitorFilePath' by
278 -- inspecting the state of the file system, and we can go in the reverse
279 -- direction by just forgetting the extra info.
280 --
281 reconstructMonitorFilePaths :: MonitorStateFileSet -> [MonitorFilePath]
282 reconstructMonitorFilePaths (MonitorStateFileSet singlePaths globPaths) =
283 map getSinglePath singlePaths
284 ++ map getGlobPath globPaths
285 where
286 getSinglePath (MonitorStateFile kindfile kinddir filepath _) =
287 MonitorFile kindfile kinddir filepath
288
289 getGlobPath (MonitorStateGlob kindfile kinddir root gstate) =
290 MonitorFileGlob kindfile kinddir $ FilePathGlob root $
291 case gstate of
292 MonitorStateGlobDirs glob globs _ _ -> GlobDir glob globs
293 MonitorStateGlobFiles glob _ _ -> GlobFile glob
294 MonitorStateGlobDirTrailing -> GlobDirTrailing
295
296 ------------------------------------------------------------------------------
297 -- Checking the status of monitored files
298 --
299
300 -- | A monitor for detecting changes to a set of files. It can be used to
301 -- efficiently test if any of a set of files (specified individually or by
302 -- glob patterns) has changed since some snapshot. In addition, it also checks
303 -- for changes in a value (of type @a@), and when there are no changes in
304 -- either it returns a saved value (of type @b@).
305 --
306 -- The main use case looks like this: suppose we have some expensive action
307 -- that depends on certain pure inputs and reads some set of files, and
308 -- produces some pure result. We want to avoid re-running this action when it
309 -- would produce the same result. So we need to monitor the files the action
310 -- looked at, the other pure input values, and we need to cache the result.
311 -- Then at some later point, if the input value didn't change, and none of the
312 -- files changed, then we can re-use the cached result rather than re-running
313 -- the action.
314 --
315 -- This can be achieved using a 'FileMonitor'. Each 'FileMonitor' instance
316 -- saves state in a disk file, so the file for that has to be specified,
317 -- making sure it is unique. The pattern is to use 'checkFileMonitorChanged'
318 -- to see if there's been any change. If there is, re-run the action, keeping
319 -- track of the files, then use 'updateFileMonitor' to record the current
320 -- set of files to monitor, the current input value for the action, and the
321 -- result of the action.
322 --
323 -- The typical occurrence of this pattern is captured by 'rerunIfChanged'
324 -- and the 'Rebuild' monad. More complicated cases may need to use
325 -- 'checkFileMonitorChanged' and 'updateFileMonitor' directly.
326 --
327 data FileMonitor a b
328 = FileMonitor {
329
330 -- | The file where this 'FileMonitor' should store its state.
331 --
332 fileMonitorCacheFile :: FilePath,
333
334 -- | Compares a new cache key with old one to determine if a
335 -- corresponding cached value is still valid.
336 --
337 -- Typically this is just an equality test, but in some
338 -- circumstances it can make sense to do things like subset
339 -- comparisons.
340 --
341 -- The first arg is the new value, the second is the old cached value.
342 --
343 fileMonitorKeyValid :: a -> a -> Bool,
344
345 -- | When this mode is enabled, if 'checkFileMonitorChanged' returns
346 -- 'MonitoredValueChanged' then we have the guarantee that no files
347 -- changed, that the value change was the only change. In the default
348 -- mode no such guarantee is provided which is slightly faster.
349 --
350 fileMonitorCheckIfOnlyValueChanged :: Bool
351 }
352
353 -- | Define a new file monitor.
354 --
355 -- It's best practice to define file monitor values once, and then use the
356 -- same value for 'checkFileMonitorChanged' and 'updateFileMonitor' as this
357 -- ensures you get the same types @a@ and @b@ for reading and writing.
358 --
359 -- The path of the file monitor itself must be unique because it keeps state
360 -- on disk and these would clash.
361 --
362 newFileMonitor :: Eq a => FilePath -- ^ The file to cache the state of the
363 -- file monitor. Must be unique.
364 -> FileMonitor a b
365 newFileMonitor path = FileMonitor path (==) False
366
367 -- | The result of 'checkFileMonitorChanged': either the monitored files or
368 -- value changed (and it tells us which it was) or nothing changed and we get
369 -- the cached result.
370 --
371 data MonitorChanged a b =
372 -- | The monitored files and value did not change. The cached result is
373 -- @b@.
374 --
375 -- The set of monitored files is also returned. This is useful
376 -- for composing or nesting 'FileMonitor's.
377 MonitorUnchanged b [MonitorFilePath]
378
379 -- | The monitor found that something changed. The reason is given.
380 --
381 | MonitorChanged (MonitorChangedReason a)
382 deriving Show
383
384 -- | What kind of change 'checkFileMonitorChanged' detected.
385 --
386 data MonitorChangedReason a =
387
388 -- | One of the files changed (existence, file type, mtime or file
389 -- content, depending on the 'MonitorFilePath' in question)
390 MonitoredFileChanged FilePath
391
392 -- | The pure input value changed.
393 --
394 -- The previous cached key value is also returned. This is sometimes
395 -- useful when using a 'fileMonitorKeyValid' function that is not simply
396 -- '(==)', when invalidation can be partial. In such cases it can make
397 -- sense to 'updateFileMonitor' with a key value that's a combination of
398 -- the new and old (e.g. set union).
399 | MonitoredValueChanged a
400
401 -- | There was no saved monitor state, cached value etc. Ie the file
402 -- for the 'FileMonitor' does not exist.
403 | MonitorFirstRun
404
405 -- | There was existing state, but we could not read it. This typically
406 -- happens when the code has changed compared to an existing 'FileMonitor'
407 -- cache file and type of the input value or cached value has changed such
408 -- that we cannot decode the values. This is completely benign as we can
409 -- treat is just as if there were no cache file and re-run.
410 | MonitorCorruptCache
411 deriving (Eq, Show, Functor)
412
413 -- | Test if the input value or files monitored by the 'FileMonitor' have
414 -- changed. If not, return the cached value.
415 --
416 -- See 'FileMonitor' for a full explanation.
417 --
418 checkFileMonitorChanged
419 :: (Binary a, Structured a, Binary b, Structured b)
420 => FileMonitor a b -- ^ cache file path
421 -> FilePath -- ^ root directory
422 -> a -- ^ guard or key value
423 -> IO (MonitorChanged a b) -- ^ did the key or any paths change?
424 checkFileMonitorChanged
425 monitor@FileMonitor { fileMonitorKeyValid,
426 fileMonitorCheckIfOnlyValueChanged }
427 root currentKey =
428
429 -- Consider it a change if the cache file does not exist,
430 -- or we cannot decode it. Sadly ErrorCall can still happen, despite
431 -- using decodeFileOrFail, e.g. Data.Char.chr errors
432
433 handleDoesNotExist (MonitorChanged MonitorFirstRun) $
434 handleErrorCall (MonitorChanged MonitorCorruptCache) $
435 readCacheFile monitor
436 >>= either (\_ -> return (MonitorChanged MonitorCorruptCache))
437 checkStatusCache
438
439 where
440 checkStatusCache (cachedFileStatus, cachedKey, cachedResult) = do
441 change <- checkForChanges
442 case change of
443 Just reason -> return (MonitorChanged reason)
444 Nothing -> return (MonitorUnchanged cachedResult monitorFiles)
445 where monitorFiles = reconstructMonitorFilePaths cachedFileStatus
446 where
447 -- In fileMonitorCheckIfOnlyValueChanged mode we want to guarantee that
448 -- if we return MonitoredValueChanged that only the value changed.
449 -- We do that by checkin for file changes first. Otherwise it makes
450 -- more sense to do the cheaper test first.
451 checkForChanges
452 | fileMonitorCheckIfOnlyValueChanged
453 = checkFileChange cachedFileStatus cachedKey cachedResult
454 `mplusMaybeT`
455 checkValueChange cachedKey
456
457 | otherwise
458 = checkValueChange cachedKey
459 `mplusMaybeT`
460 checkFileChange cachedFileStatus cachedKey cachedResult
461
462 mplusMaybeT :: Monad m => m (Maybe a) -> m (Maybe a) -> m (Maybe a)
463 mplusMaybeT ma mb = do
464 mx <- ma
465 case mx of
466 Nothing -> mb
467 Just x -> return (Just x)
468
469 -- Check if the guard value has changed
470 checkValueChange cachedKey
471 | not (fileMonitorKeyValid currentKey cachedKey)
472 = return (Just (MonitoredValueChanged cachedKey))
473 | otherwise
474 = return Nothing
475
476 -- Check if any file has changed
477 checkFileChange cachedFileStatus cachedKey cachedResult = do
478 res <- probeFileSystem root cachedFileStatus
479 case res of
480 -- Some monitored file has changed
481 Left changedPath ->
482 return (Just (MonitoredFileChanged (normalise changedPath)))
483
484 -- No monitored file has changed
485 Right (cachedFileStatus', cacheStatus) -> do
486
487 -- But we might still want to update the cache
488 whenCacheChanged cacheStatus $
489 rewriteCacheFile monitor cachedFileStatus' cachedKey cachedResult
490
491 return Nothing
492
493 -- | Helper for reading the cache file.
494 --
495 -- This determines the type and format of the binary cache file.
496 --
497 readCacheFile :: (Binary a, Structured a, Binary b, Structured b)
498 => FileMonitor a b
499 -> IO (Either String (MonitorStateFileSet, a, b))
500 readCacheFile FileMonitor {fileMonitorCacheFile} =
501 withBinaryFile fileMonitorCacheFile ReadMode $ \hnd -> do
502 contents <- BS.hGetContents hnd
503 structuredDecodeOrFailIO contents
504
505 -- | Helper for writing the cache file.
506 --
507 -- This determines the type and format of the binary cache file.
508 --
509 rewriteCacheFile :: (Binary a, Structured a, Binary b, Structured b)
510 => FileMonitor a b
511 -> MonitorStateFileSet -> a -> b -> IO ()
512 rewriteCacheFile FileMonitor {fileMonitorCacheFile} fileset key result =
513 writeFileAtomic fileMonitorCacheFile $
514 structuredEncode (fileset, key, result)
515
516 -- | Probe the file system to see if any of the monitored files have changed.
517 --
518 -- It returns Nothing if any file changed, or returns a possibly updated
519 -- file 'MonitorStateFileSet' plus an indicator of whether it actually changed.
520 --
521 -- We may need to update the cache since there may be changes in the filesystem
522 -- state which don't change any of our affected files.
523 --
524 -- Consider the glob @{proj1,proj2}\/\*.cabal@. Say we first run and find a
525 -- @proj1@ directory containing @proj1.cabal@ yet no @proj2@. If we later run
526 -- and find @proj2@ was created, yet contains no files matching @*.cabal@ then
527 -- we want to update the cache despite no changes in our relevant file set.
528 -- Specifically, we should add an mtime for this directory so we can avoid
529 -- re-traversing the directory in future runs.
530 --
531 probeFileSystem :: FilePath -> MonitorStateFileSet
532 -> IO (Either FilePath (MonitorStateFileSet, CacheChanged))
533 probeFileSystem root (MonitorStateFileSet singlePaths globPaths) =
534 runChangedM $ do
535 sequence_
536 [ probeMonitorStateFileStatus root file status
537 | MonitorStateFile _ _ file status <- singlePaths ]
538 -- The glob monitors can require state changes
539 globPaths' <-
540 sequence
541 [ probeMonitorStateGlob root globPath
542 | globPath <- globPaths ]
543 return (MonitorStateFileSet singlePaths globPaths')
544
545
546 -----------------------------------------------
547 -- Monad for checking for file system changes
548 --
549 -- We need to be able to bail out if we detect a change (using ExceptT),
550 -- but if there's no change we need to be able to rebuild the monitor
551 -- state. And we want to optimise that rebuilding by keeping track if
552 -- anything actually changed (using StateT), so that in the typical case
553 -- we can avoid rewriting the state file.
554
555 newtype ChangedM a = ChangedM (StateT CacheChanged (ExceptT FilePath IO) a)
556 deriving (Functor, Applicative, Monad, MonadIO)
557
558 runChangedM :: ChangedM a -> IO (Either FilePath (a, CacheChanged))
559 runChangedM (ChangedM action) =
560 runExceptT $ State.runStateT action CacheUnchanged
561
562 somethingChanged :: FilePath -> ChangedM a
563 somethingChanged path = ChangedM $ throwError path
564
565 cacheChanged :: ChangedM ()
566 cacheChanged = ChangedM $ State.put CacheChanged
567
568 mapChangedFile :: (FilePath -> FilePath) -> ChangedM a -> ChangedM a
569 mapChangedFile adjust (ChangedM a) =
570 ChangedM (mapStateT (withExceptT adjust) a)
571
572 data CacheChanged = CacheChanged | CacheUnchanged
573
574 whenCacheChanged :: Monad m => CacheChanged -> m () -> m ()
575 whenCacheChanged CacheChanged action = action
576 whenCacheChanged CacheUnchanged _ = return ()
577
578 ----------------------
579
580 -- | Probe the file system to see if a single monitored file has changed.
581 --
582 probeMonitorStateFileStatus :: FilePath -> FilePath
583 -> MonitorStateFileStatus
584 -> ChangedM ()
585 probeMonitorStateFileStatus root file status =
586 case status of
587 MonitorStateFileExists ->
588 probeFileExistence root file
589
590 MonitorStateFileModTime mtime ->
591 probeFileModificationTime root file mtime
592
593 MonitorStateFileHashed mtime hash ->
594 probeFileModificationTimeAndHash root file mtime hash
595
596 MonitorStateDirExists ->
597 probeDirExistence root file
598
599 MonitorStateDirModTime mtime ->
600 probeFileModificationTime root file mtime
601
602 MonitorStateNonExistent ->
603 probeFileNonExistence root file
604
605 MonitorStateAlreadyChanged ->
606 somethingChanged file
607
608
609 -- | Probe the file system to see if a monitored file glob has changed.
610 --
611 probeMonitorStateGlob :: FilePath -- ^ root path
612 -> MonitorStateGlob
613 -> ChangedM MonitorStateGlob
614 probeMonitorStateGlob relroot
615 (MonitorStateGlob kindfile kinddir globroot glob) = do
616 root <- liftIO $ getFilePathRootDirectory globroot relroot
617 case globroot of
618 FilePathRelative ->
619 MonitorStateGlob kindfile kinddir globroot <$>
620 probeMonitorStateGlobRel kindfile kinddir root "." glob
621
622 -- for absolute cases, make the changed file we report absolute too
623 _ ->
624 mapChangedFile (root </>) $
625 MonitorStateGlob kindfile kinddir globroot <$>
626 probeMonitorStateGlobRel kindfile kinddir root "" glob
627
628 probeMonitorStateGlobRel :: MonitorKindFile -> MonitorKindDir
629 -> FilePath -- ^ root path
630 -> FilePath -- ^ path of the directory we are
631 -- looking in relative to @root@
632 -> MonitorStateGlobRel
633 -> ChangedM MonitorStateGlobRel
634 probeMonitorStateGlobRel kindfile kinddir root dirName
635 (MonitorStateGlobDirs glob globPath mtime children) = do
636 change <- liftIO $ checkDirectoryModificationTime (root </> dirName) mtime
637 case change of
638 Nothing -> do
639 children' <- sequence
640 [ do fstate' <- probeMonitorStateGlobRel
641 kindfile kinddir root
642 (dirName </> fname) fstate
643 return (fname, fstate')
644 | (fname, fstate) <- children ]
645 return $! MonitorStateGlobDirs glob globPath mtime children'
646
647 Just mtime' -> do
648 -- directory modification time changed:
649 -- a matching subdir may have been added or deleted
650 matches <- filterM (\entry -> let subdir = root </> dirName </> entry
651 in liftIO $ doesDirectoryExist subdir)
652 . filter (matchGlob glob)
653 =<< liftIO (getDirectoryContents (root </> dirName))
654
655 children' <- traverse probeMergeResult $
656 mergeBy (\(path1,_) path2 -> compare path1 path2)
657 children
658 (sort matches)
659 return $! MonitorStateGlobDirs glob globPath mtime' children'
660 -- Note that just because the directory has changed, we don't force
661 -- a cache rewrite with 'cacheChanged' since that has some cost, and
662 -- all we're saving is scanning the directory. But we do rebuild the
663 -- cache with the new mtime', so that if the cache is rewritten for
664 -- some other reason, we'll take advantage of that.
665
666 where
667 probeMergeResult :: MergeResult (FilePath, MonitorStateGlobRel) FilePath
668 -> ChangedM (FilePath, MonitorStateGlobRel)
669
670 -- Only in cached (directory deleted)
671 probeMergeResult (OnlyInLeft (path, fstate)) = do
672 case allMatchingFiles (dirName </> path) fstate of
673 [] -> return (path, fstate)
674 -- Strictly speaking we should be returning 'CacheChanged' above
675 -- as we should prune the now-missing 'MonitorStateGlobRel'. However
676 -- we currently just leave these now-redundant entries in the
677 -- cache as they cost no IO and keeping them allows us to avoid
678 -- rewriting the cache.
679 (file:_) -> somethingChanged file
680
681 -- Only in current filesystem state (directory added)
682 probeMergeResult (OnlyInRight path) = do
683 fstate <- liftIO $ buildMonitorStateGlobRel Nothing Map.empty
684 kindfile kinddir root (dirName </> path) globPath
685 case allMatchingFiles (dirName </> path) fstate of
686 (file:_) -> somethingChanged file
687 -- This is the only case where we use 'cacheChanged' because we can
688 -- have a whole new dir subtree (of unbounded size and cost), so we
689 -- need to save the state of that new subtree in the cache.
690 [] -> cacheChanged >> return (path, fstate)
691
692 -- Found in path
693 probeMergeResult (InBoth (path, fstate) _) = do
694 fstate' <- probeMonitorStateGlobRel kindfile kinddir
695 root (dirName </> path) fstate
696 return (path, fstate')
697
698 -- | Does a 'MonitorStateGlob' have any relevant files within it?
699 allMatchingFiles :: FilePath -> MonitorStateGlobRel -> [FilePath]
700 allMatchingFiles dir (MonitorStateGlobFiles _ _ entries) =
701 [ dir </> fname | (fname, _) <- entries ]
702 allMatchingFiles dir (MonitorStateGlobDirs _ _ _ entries) =
703 [ res
704 | (subdir, fstate) <- entries
705 , res <- allMatchingFiles (dir </> subdir) fstate ]
706 allMatchingFiles dir MonitorStateGlobDirTrailing =
707 [dir]
708
709 probeMonitorStateGlobRel _ _ root dirName
710 (MonitorStateGlobFiles glob mtime children) = do
711 change <- liftIO $ checkDirectoryModificationTime (root </> dirName) mtime
712 mtime' <- case change of
713 Nothing -> return mtime
714 Just mtime' -> do
715 -- directory modification time changed:
716 -- a matching file may have been added or deleted
717 matches <- return . filter (matchGlob glob)
718 =<< liftIO (getDirectoryContents (root </> dirName))
719
720 traverse_ probeMergeResult $
721 mergeBy (\(path1,_) path2 -> compare path1 path2)
722 children
723 (sort matches)
724 return mtime'
725
726 -- Check that none of the children have changed
727 for_ children $ \(file, status) ->
728 probeMonitorStateFileStatus root (dirName </> file) status
729
730
731 return (MonitorStateGlobFiles glob mtime' children)
732 -- Again, we don't force a cache rewite with 'cacheChanged', but we do use
733 -- the new mtime' if any.
734 where
735 probeMergeResult :: MergeResult (FilePath, MonitorStateFileStatus) FilePath
736 -> ChangedM ()
737 probeMergeResult mr = case mr of
738 InBoth _ _ -> return ()
739 -- this is just to be able to accurately report which file changed:
740 OnlyInLeft (path, _) -> somethingChanged (dirName </> path)
741 OnlyInRight path -> somethingChanged (dirName </> path)
742
743 probeMonitorStateGlobRel _ _ _ _ MonitorStateGlobDirTrailing =
744 return MonitorStateGlobDirTrailing
745
746 ------------------------------------------------------------------------------
747
748 -- | Update the input value and the set of files monitored by the
749 -- 'FileMonitor', plus the cached value that may be returned in future.
750 --
751 -- This takes a snapshot of the state of the monitored files right now, so
752 -- 'checkFileMonitorChanged' will look for file system changes relative to
753 -- this snapshot.
754 --
755 -- This is typically done once the action has been completed successfully and
756 -- we have the action's result and we know what files it looked at. See
757 -- 'FileMonitor' for a full explanation.
758 --
759 -- If we do take the snapshot after the action has completed then we have a
760 -- problem. The problem is that files might have changed /while/ the action was
761 -- running but /after/ the action read them. If we take the snapshot after the
762 -- action completes then we will miss these changes. The solution is to record
763 -- a timestamp before beginning execution of the action and then we make the
764 -- conservative assumption that any file that has changed since then has
765 -- already changed, ie the file monitor state for these files will be such that
766 -- 'checkFileMonitorChanged' will report that they have changed.
767 --
768 -- So if you do use 'updateFileMonitor' after the action (so you can discover
769 -- the files used rather than predicting them in advance) then use
770 -- 'beginUpdateFileMonitor' to get a timestamp and pass that. Alternatively,
771 -- if you take the snapshot in advance of the action, or you're not monitoring
772 -- any files then you can use @Nothing@ for the timestamp parameter.
773 --
774 updateFileMonitor
775 :: (Binary a, Structured a, Binary b, Structured b)
776 => FileMonitor a b -- ^ cache file path
777 -> FilePath -- ^ root directory
778 -> Maybe MonitorTimestamp -- ^ timestamp when the update action started
779 -> [MonitorFilePath] -- ^ files of interest relative to root
780 -> a -- ^ the current key value
781 -> b -- ^ the current result value
782 -> IO ()
783 updateFileMonitor monitor root startTime monitorFiles
784 cachedKey cachedResult = do
785 hashcache <- readCacheFileHashes monitor
786 msfs <- buildMonitorStateFileSet startTime hashcache root monitorFiles
787 rewriteCacheFile monitor msfs cachedKey cachedResult
788
789 -- | A timestamp to help with the problem of file changes during actions.
790 -- See 'updateFileMonitor' for details.
791 --
792 newtype MonitorTimestamp = MonitorTimestamp ModTime
793
794 -- | Record a timestamp at the beginning of an action, and when the action
795 -- completes call 'updateFileMonitor' passing it the timestamp.
796 -- See 'updateFileMonitor' for details.
797 --
798 beginUpdateFileMonitor :: IO MonitorTimestamp
799 beginUpdateFileMonitor = MonitorTimestamp <$> getCurTime
800
801 -- | Take the snapshot of the monitored files. That is, given the
802 -- specification of the set of files we need to monitor, inspect the state
803 -- of the file system now and collect the information we'll need later to
804 -- determine if anything has changed.
805 --
806 buildMonitorStateFileSet :: Maybe MonitorTimestamp -- ^ optional: timestamp
807 -- of the start of the action
808 -> FileHashCache -- ^ existing file hashes
809 -> FilePath -- ^ root directory
810 -> [MonitorFilePath] -- ^ patterns of interest
811 -- relative to root
812 -> IO MonitorStateFileSet
813 buildMonitorStateFileSet mstartTime hashcache root =
814 go [] []
815 where
816 go :: [MonitorStateFile] -> [MonitorStateGlob]
817 -> [MonitorFilePath] -> IO MonitorStateFileSet
818 go !singlePaths !globPaths [] =
819 return (MonitorStateFileSet (reverse singlePaths) (reverse globPaths))
820
821 go !singlePaths !globPaths
822 (MonitorFile kindfile kinddir path : monitors) = do
823 monitorState <- MonitorStateFile kindfile kinddir path
824 <$> buildMonitorStateFile mstartTime hashcache
825 kindfile kinddir root path
826 go (monitorState : singlePaths) globPaths monitors
827
828 go !singlePaths !globPaths
829 (MonitorFileGlob kindfile kinddir globPath : monitors) = do
830 monitorState <- buildMonitorStateGlob mstartTime hashcache
831 kindfile kinddir root globPath
832 go singlePaths (monitorState : globPaths) monitors
833
834
835 buildMonitorStateFile :: Maybe MonitorTimestamp -- ^ start time of update
836 -> FileHashCache -- ^ existing file hashes
837 -> MonitorKindFile -> MonitorKindDir
838 -> FilePath -- ^ the root directory
839 -> FilePath
840 -> IO MonitorStateFileStatus
841 buildMonitorStateFile mstartTime hashcache kindfile kinddir root path = do
842 let abspath = root </> path
843 isFile <- doesFileExist abspath
844 isDir <- doesDirectoryExist abspath
845 case (isFile, kindfile, isDir, kinddir) of
846 (_, FileNotExists, _, DirNotExists) ->
847 -- we don't need to care if it exists now, since we check at probe time
848 return MonitorStateNonExistent
849
850 (False, _, False, _) ->
851 return MonitorStateAlreadyChanged
852
853 (True, FileExists, _, _) ->
854 return MonitorStateFileExists
855
856 (True, FileModTime, _, _) ->
857 handleIOException MonitorStateAlreadyChanged $ do
858 mtime <- getModTime abspath
859 if changedDuringUpdate mstartTime mtime
860 then return MonitorStateAlreadyChanged
861 else return (MonitorStateFileModTime mtime)
862
863 (True, FileHashed, _, _) ->
864 handleIOException MonitorStateAlreadyChanged $ do
865 mtime <- getModTime abspath
866 if changedDuringUpdate mstartTime mtime
867 then return MonitorStateAlreadyChanged
868 else do hash <- getFileHash hashcache abspath abspath mtime
869 return (MonitorStateFileHashed mtime hash)
870
871 (_, _, True, DirExists) ->
872 return MonitorStateDirExists
873
874 (_, _, True, DirModTime) ->
875 handleIOException MonitorStateAlreadyChanged $ do
876 mtime <- getModTime abspath
877 if changedDuringUpdate mstartTime mtime
878 then return MonitorStateAlreadyChanged
879 else return (MonitorStateDirModTime mtime)
880
881 (False, _, True, DirNotExists) -> return MonitorStateAlreadyChanged
882 (True, FileNotExists, False, _) -> return MonitorStateAlreadyChanged
883
884 -- | If we have a timestamp for the beginning of the update, then any file
885 -- mtime later than this means that it changed during the update and we ought
886 -- to consider the file as already changed.
887 --
888 changedDuringUpdate :: Maybe MonitorTimestamp -> ModTime -> Bool
889 changedDuringUpdate (Just (MonitorTimestamp startTime)) mtime
890 = mtime > startTime
891 changedDuringUpdate _ _ = False
892
893 -- | Much like 'buildMonitorStateFileSet' but for the somewhat complicated case
894 -- of a file glob.
895 --
896 -- This gets used both by 'buildMonitorStateFileSet' when we're taking the
897 -- file system snapshot, but also by 'probeGlobStatus' as part of checking
898 -- the monitored (globed) files for changes when we find a whole new subtree.
899 --
900 buildMonitorStateGlob :: Maybe MonitorTimestamp -- ^ start time of update
901 -> FileHashCache -- ^ existing file hashes
902 -> MonitorKindFile -> MonitorKindDir
903 -> FilePath -- ^ the root directory
904 -> FilePathGlob -- ^ the matching glob
905 -> IO MonitorStateGlob
906 buildMonitorStateGlob mstartTime hashcache kindfile kinddir relroot
907 (FilePathGlob globroot globPath) = do
908 root <- liftIO $ getFilePathRootDirectory globroot relroot
909 MonitorStateGlob kindfile kinddir globroot <$>
910 buildMonitorStateGlobRel
911 mstartTime hashcache kindfile kinddir root "." globPath
912
913 buildMonitorStateGlobRel :: Maybe MonitorTimestamp -- ^ start time of update
914 -> FileHashCache -- ^ existing file hashes
915 -> MonitorKindFile -> MonitorKindDir
916 -> FilePath -- ^ the root directory
917 -> FilePath -- ^ directory we are examining
918 -- relative to the root
919 -> FilePathGlobRel -- ^ the matching glob
920 -> IO MonitorStateGlobRel
921 buildMonitorStateGlobRel mstartTime hashcache kindfile kinddir root
922 dir globPath = do
923 let absdir = root </> dir
924 dirEntries <- getDirectoryContents absdir
925 dirMTime <- getModTime absdir
926 case globPath of
927 GlobDir glob globPath' -> do
928 subdirs <- filterM (\subdir -> doesDirectoryExist (absdir </> subdir))
929 $ filter (matchGlob glob) dirEntries
930 subdirStates <-
931 for (sort subdirs) $ \subdir -> do
932 fstate <- buildMonitorStateGlobRel
933 mstartTime hashcache kindfile kinddir root
934 (dir </> subdir) globPath'
935 return (subdir, fstate)
936 return $! MonitorStateGlobDirs glob globPath' dirMTime subdirStates
937
938 GlobFile glob -> do
939 let files = filter (matchGlob glob) dirEntries
940 filesStates <-
941 for (sort files) $ \file -> do
942 fstate <- buildMonitorStateFile
943 mstartTime hashcache kindfile kinddir root
944 (dir </> file)
945 return (file, fstate)
946 return $! MonitorStateGlobFiles glob dirMTime filesStates
947
948 GlobDirTrailing ->
949 return MonitorStateGlobDirTrailing
950
951
952 -- | We really want to avoid re-hashing files all the time. We already make
953 -- the assumption that if a file mtime has not changed then we don't need to
954 -- bother checking if the content hash has changed. We can apply the same
955 -- assumption when updating the file monitor state. In the typical case of
956 -- updating a file monitor the set of files is the same or largely the same so
957 -- we can grab the previously known content hashes with their corresponding
958 -- mtimes.
959 --
960 type FileHashCache = Map FilePath (ModTime, Hash)
961
962 -- | We declare it a cache hit if the mtime of a file is the same as before.
963 --
964 lookupFileHashCache :: FileHashCache -> FilePath -> ModTime -> Maybe Hash
965 lookupFileHashCache hashcache file mtime = do
966 (mtime', hash) <- Map.lookup file hashcache
967 guard (mtime' == mtime)
968 return hash
969
970 -- | Either get it from the cache or go read the file
971 getFileHash :: FileHashCache -> FilePath -> FilePath -> ModTime -> IO Hash
972 getFileHash hashcache relfile absfile mtime =
973 case lookupFileHashCache hashcache relfile mtime of
974 Just hash -> return hash
975 Nothing -> readFileHash absfile
976
977 -- | Build a 'FileHashCache' from the previous 'MonitorStateFileSet'. While
978 -- in principle we could preserve the structure of the previous state, given
979 -- that the set of files to monitor can change then it's simpler just to throw
980 -- away the structure and use a finite map.
981 --
982 readCacheFileHashes :: (Binary a, Structured a, Binary b, Structured b)
983 => FileMonitor a b -> IO FileHashCache
984 readCacheFileHashes monitor =
985 handleDoesNotExist Map.empty $
986 handleErrorCall Map.empty $ do
987 res <- readCacheFile monitor
988 case res of
989 Left _ -> return Map.empty
990 Right (msfs, _, _) -> return (mkFileHashCache msfs)
991 where
992 mkFileHashCache :: MonitorStateFileSet -> FileHashCache
993 mkFileHashCache (MonitorStateFileSet singlePaths globPaths) =
994 collectAllFileHashes singlePaths
995 `Map.union` collectAllGlobHashes globPaths
996
997 collectAllFileHashes singlePaths =
998 Map.fromList [ (fpath, (mtime, hash))
999 | MonitorStateFile _ _ fpath
1000 (MonitorStateFileHashed mtime hash) <- singlePaths ]
1001
1002 collectAllGlobHashes globPaths =
1003 Map.fromList [ (fpath, (mtime, hash))
1004 | MonitorStateGlob _ _ _ gstate <- globPaths
1005 , (fpath, (mtime, hash)) <- collectGlobHashes "" gstate ]
1006
1007 collectGlobHashes dir (MonitorStateGlobDirs _ _ _ entries) =
1008 [ res
1009 | (subdir, fstate) <- entries
1010 , res <- collectGlobHashes (dir </> subdir) fstate ]
1011
1012 collectGlobHashes dir (MonitorStateGlobFiles _ _ entries) =
1013 [ (dir </> fname, (mtime, hash))
1014 | (fname, MonitorStateFileHashed mtime hash) <- entries ]
1015
1016 collectGlobHashes _dir MonitorStateGlobDirTrailing =
1017 []
1018
1019
1020 ------------------------------------------------------------------------------
1021 -- Utils
1022 --
1023
1024 -- | Within the @root@ directory, check if @file@ has its 'ModTime' is
1025 -- the same as @mtime@, short-circuiting if it is different.
1026 probeFileModificationTime :: FilePath -> FilePath -> ModTime -> ChangedM ()
1027 probeFileModificationTime root file mtime = do
1028 unchanged <- liftIO $ checkModificationTimeUnchanged root file mtime
1029 unless unchanged (somethingChanged file)
1030
1031 -- | Within the @root@ directory, check if @file@ has its 'ModTime' and
1032 -- 'Hash' is the same as @mtime@ and @hash@, short-circuiting if it is
1033 -- different.
1034 probeFileModificationTimeAndHash :: FilePath -> FilePath -> ModTime -> Hash
1035 -> ChangedM ()
1036 probeFileModificationTimeAndHash root file mtime hash = do
1037 unchanged <- liftIO $
1038 checkFileModificationTimeAndHashUnchanged root file mtime hash
1039 unless unchanged (somethingChanged file)
1040
1041 -- | Within the @root@ directory, check if @file@ still exists as a file.
1042 -- If it *does not* exist, short-circuit.
1043 probeFileExistence :: FilePath -> FilePath -> ChangedM ()
1044 probeFileExistence root file = do
1045 existsFile <- liftIO $ doesFileExist (root </> file)
1046 unless existsFile (somethingChanged file)
1047
1048 -- | Within the @root@ directory, check if @dir@ still exists.
1049 -- If it *does not* exist, short-circuit.
1050 probeDirExistence :: FilePath -> FilePath -> ChangedM ()
1051 probeDirExistence root dir = do
1052 existsDir <- liftIO $ doesDirectoryExist (root </> dir)
1053 unless existsDir (somethingChanged dir)
1054
1055 -- | Within the @root@ directory, check if @file@ still does not exist.
1056 -- If it *does* exist, short-circuit.
1057 probeFileNonExistence :: FilePath -> FilePath -> ChangedM ()
1058 probeFileNonExistence root file = do
1059 existsFile <- liftIO $ doesFileExist (root </> file)
1060 existsDir <- liftIO $ doesDirectoryExist (root </> file)
1061 when (existsFile || existsDir) (somethingChanged file)
1062
1063 -- | Returns @True@ if, inside the @root@ directory, @file@ has the same
1064 -- 'ModTime' as @mtime@.
1065 checkModificationTimeUnchanged :: FilePath -> FilePath
1066 -> ModTime -> IO Bool
1067 checkModificationTimeUnchanged root file mtime =
1068 handleIOException False $ do
1069 mtime' <- getModTime (root </> file)
1070 return (mtime == mtime')
1071
1072 -- | Returns @True@ if, inside the @root@ directory, @file@ has the
1073 -- same 'ModTime' and 'Hash' as @mtime and @chash@.
1074 checkFileModificationTimeAndHashUnchanged :: FilePath -> FilePath
1075 -> ModTime -> Hash -> IO Bool
1076 checkFileModificationTimeAndHashUnchanged root file mtime chash =
1077 handleIOException False $ do
1078 mtime' <- getModTime (root </> file)
1079 if mtime == mtime'
1080 then return True
1081 else do
1082 chash' <- readFileHash (root </> file)
1083 return (chash == chash')
1084
1085 -- | Read a non-cryptographic hash of a @file@.
1086 readFileHash :: FilePath -> IO Hash
1087 readFileHash file =
1088 withBinaryFile file ReadMode $ \hnd ->
1089 evaluate . Hashable.hash =<< BS.hGetContents hnd
1090
1091 -- | Given a directory @dir@, return @Nothing@ if its 'ModTime'
1092 -- is the same as @mtime@, and the new 'ModTime' if it is not.
1093 checkDirectoryModificationTime :: FilePath -> ModTime -> IO (Maybe ModTime)
1094 checkDirectoryModificationTime dir mtime =
1095 handleIOException Nothing $ do
1096 mtime' <- getModTime dir
1097 if mtime == mtime'
1098 then return Nothing
1099 else return (Just mtime')
1100
1101 -- | Run an IO computation, returning the first argument @e@ if there is an 'error'
1102 -- call. ('ErrorCall')
1103 handleErrorCall :: a -> IO a -> IO a
1104 handleErrorCall e = handle handler where
1105 #if MIN_VERSION_base(4,9,0)
1106 handler (ErrorCallWithLocation _ _) = return e
1107 #else
1108 handler (ErrorCall _) = return e
1109 #endif
1110
1111
1112 -- | Run an IO computation, returning @e@ if there is any 'IOException'.
1113 --
1114 -- This policy is OK in the file monitor code because it just causes the
1115 -- monitor to report that something changed, and then code reacting to that
1116 -- will normally encounter the same IO exception when it re-runs the action
1117 -- that uses the file.
1118 --
1119 handleIOException :: a -> IO a -> IO a
1120 handleIOException e =
1121 handle (anyIOException e)
1122 where
1123 anyIOException :: a -> IOException -> IO a
1124 anyIOException x _ = return x
1125
1126
1127 ------------------------------------------------------------------------------
1128 -- Instances
1129 --
1130