diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs index f7690f3d652..c88726402f5 100644 --- a/src/cargo/sources/registry/index.rs +++ b/src/cargo/sources/registry/index.rs @@ -164,10 +164,28 @@ fn overflow_hyphen() { ) } +/// Manager for handling the on-disk index. +/// +/// Note that local and remote registries store the index differently. Local +/// is a simple on-disk tree of files of the raw index. Remote registries are +/// stored as a raw git repository. The different means of access are handled +/// via the [`RegistryData`] trait abstraction. +/// +/// This transparently handles caching of the index in a more efficient format. pub struct RegistryIndex<'cfg> { source_id: SourceId, + /// Root directory of the index for the registry. path: Filesystem, + /// Cache of summary data. + /// + /// This is keyed off the package name. The [`Summaries`] value handles + /// loading the summary data. It keeps an optimized on-disk representation + /// of the JSON files, which is created in an as-needed fashion. If it + /// hasn't been cached already, it uses [`RegistryData::load`] to access + /// to JSON files from the index, and the creates the optimized on-disk + /// summary cache. summaries_cache: HashMap, + /// [`Config`] reference for convenience. config: &'cfg Config, } diff --git a/src/cargo/sources/registry/local.rs b/src/cargo/sources/registry/local.rs index d35345eb86c..7276c688b58 100644 --- a/src/cargo/sources/registry/local.rs +++ b/src/cargo/sources/registry/local.rs @@ -9,6 +9,9 @@ use std::io::prelude::*; use std::io::SeekFrom; use std::path::Path; +/// A local registry is a registry that lives on the filesystem as a set of +/// `.crate` files with an `index` directory in the same format as a remote +/// registry. pub struct LocalRegistry<'cfg> { index_path: Filesystem, root: Filesystem, diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index 159b0952900..495df40bfce 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -85,7 +85,7 @@ //! ``` //! //! The root of the index contains a `config.json` file with a few entries -//! corresponding to the registry (see `RegistryConfig` below). +//! corresponding to the registry (see [`RegistryConfig`] below). //! //! Otherwise, there are three numbered directories (1, 2, 3) for crates with //! names 1, 2, and 3 characters in length. The 1/2 directories simply have the @@ -189,16 +189,42 @@ const VERSION_TEMPLATE: &str = "{version}"; const PREFIX_TEMPLATE: &str = "{prefix}"; const LOWER_PREFIX_TEMPLATE: &str = "{lowerprefix}"; +/// A "source" for a [local](local::LocalRegistry) or +/// [remote](remote::RemoteRegistry) registry. +/// +/// This contains common functionality that is shared between the two registry +/// kinds, with the registry-specific logic implemented as part of the +/// [`RegistryData`] trait referenced via the `ops` field. pub struct RegistrySource<'cfg> { source_id: SourceId, + /// The path where crate files are extracted (`$CARGO_HOME/registry/src/$REG-HASH`). src_path: Filesystem, + /// Local reference to [`Config`] for convenience. config: &'cfg Config, + /// Whether or not the index has been updated. + /// + /// This is used as an optimization to avoid updating if not needed, such + /// as `Cargo.lock` already exists and the index already contains the + /// locked entries. Or, to avoid updating multiple times. + /// + /// Only remote registries really need to update. Local registries only + /// check that the index exists. updated: bool, + /// Abstraction for interfacing to the different registry kinds. ops: Box, + /// Interface for managing the on-disk index. index: index::RegistryIndex<'cfg>, + /// A set of packages that should be allowed to be used, even if they are + /// yanked. + /// + /// This is populated from the entries in `Cargo.lock` to ensure that + /// `cargo update -p somepkg` won't unlock yanked entries in `Cargo.lock`. + /// Otherwise, the resolver would think that those entries no longer + /// exist, and it would trigger updates to unrelated packages. yanked_whitelist: HashSet, } +/// The `config.json` file stored in the index. #[derive(Deserialize)] pub struct RegistryConfig { /// Download endpoint for all crates. @@ -278,18 +304,7 @@ fn escaped_char_in_json() { .unwrap(); } -#[derive(Deserialize)] -#[serde(field_identifier, rename_all = "lowercase")] -enum Field { - Name, - Vers, - Deps, - Features, - Cksum, - Yanked, - Links, -} - +/// A dependency as encoded in the index JSON. #[derive(Deserialize)] struct RegistryDependency<'a> { name: InternedString, @@ -369,30 +384,108 @@ impl<'a> RegistryDependency<'a> { } } +/// An abstract interface to handle both a [local](local::LocalRegistry) and +/// [remote](remote::RemoteRegistry) registry. +/// +/// This allows [`RegistrySource`] to abstractly handle both registry kinds. pub trait RegistryData { + /// Performs initialization for the registry. + /// + /// This should be safe to call multiple times, the implementation is + /// expected to not do any work if it is already prepared. fn prepare(&self) -> CargoResult<()>; + + /// Returns the path to the index. + /// + /// Note that different registries store the index in different formats + /// (remote=git, local=files). fn index_path(&self) -> &Filesystem; + + /// Loads the JSON for a specific named package from the index. + /// + /// * `root` is the root path to the index. + /// * `path` is the relative path to the package to load (like `ca/rg/cargo`). + /// * `data` is a callback that will receive the raw bytes of the index JSON file. fn load( &self, root: &Path, path: &Path, data: &mut dyn FnMut(&[u8]) -> CargoResult<()>, ) -> CargoResult<()>; + + /// Loads the `config.json` file and returns it. + /// + /// Local registries don't have a config, and return `None`. fn config(&mut self) -> CargoResult>; + + /// Updates the index. + /// + /// For a remote registry, this updates the index over the network. Local + /// registries only check that the index exists. fn update_index(&mut self) -> CargoResult<()>; + + /// Prepare to start downloading a `.crate` file. + /// + /// Despite the name, this doesn't actually download anything. If the + /// `.crate` is already downloaded, then it returns [`MaybeLock::Ready`]. + /// If it hasn't been downloaded, then it returns [`MaybeLock::Download`] + /// which contains the URL to download. The [`crate::core::package::Download`] + /// system handles the actual download process. After downloading, it + /// calls [`finish_download`] to save the downloaded file. + /// + /// `checksum` is currently only used by local registries to verify the + /// file contents (because local registries never actually download + /// anything). Remote registries will validate the checksum in + /// `finish_download`. For already downloaded `.crate` files, it does not + /// validate the checksum, assuming the filesystem does not suffer from + /// corruption or manipulation. fn download(&mut self, pkg: PackageId, checksum: &str) -> CargoResult; + + /// Finish a download by saving a `.crate` file to disk. + /// + /// After [`crate::core::package::Download`] has finished a download, + /// it will call this to save the `.crate` file. This is only relevant + /// for remote registries. This should validate the checksum and save + /// the given data to the on-disk cache. + /// + /// Returns a [`File`] handle to the `.crate` file, positioned at the start. fn finish_download(&mut self, pkg: PackageId, checksum: &str, data: &[u8]) -> CargoResult; + /// Returns whether or not the `.crate` file is already downloaded. fn is_crate_downloaded(&self, _pkg: PackageId) -> bool { true } + + /// Validates that the global package cache lock is held. + /// + /// Given the [`Filesystem`], this will make sure that the package cache + /// lock is held. If not, it will panic. See + /// [`Config::acquire_package_cache_lock`] for acquiring the global lock. + /// + /// Returns the [`Path`] to the [`Filesystem`]. fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path; + + /// Returns the current "version" of the index. + /// + /// For local registries, this returns `None` because there is no + /// versioning. For remote registries, this returns the SHA hash of the + /// git index on disk (or None if the index hasn't been downloaded yet). + /// + /// This is used by index caching to check if the cache is out of date. fn current_version(&self) -> Option; } +/// The status of [`RegistryData::download`] which indicates if a `.crate` +/// file has already been downloaded, or if not then the URL to download. pub enum MaybeLock { + /// The `.crate` file is already downloaded. [`File`] is a handle to the + /// opened `.crate` file on the filesystem. Ready(File), + /// The `.crate` file is not downloaded, here's the URL to download it from. + /// + /// `descriptor` is just a text string to display to the user of what is + /// being downloaded. Download { url: String, descriptor: String }, } diff --git a/src/cargo/sources/registry/remote.rs b/src/cargo/sources/registry/remote.rs index 2e44d9ae3ea..d3f9eb9c03c 100644 --- a/src/cargo/sources/registry/remote.rs +++ b/src/cargo/sources/registry/remote.rs @@ -29,8 +29,12 @@ fn make_dep_prefix(name: &str) -> String { } } +/// A remote registry is a registry that lives at a remote URL (such as +/// crates.io). The git index is cloned locally, and `.crate` files are +/// downloaded as needed and cached locally. pub struct RemoteRegistry<'cfg> { index_path: Filesystem, + /// Path to the cache of `.crate` files (`$CARGO_HOME/registry/path/$REG-HASH`). cache_path: Filesystem, source_id: SourceId, index_git_ref: GitReference, diff --git a/src/cargo/util/toml/mod.rs b/src/cargo/util/toml/mod.rs index 6afbee3db65..be68c980518 100644 --- a/src/cargo/util/toml/mod.rs +++ b/src/cargo/util/toml/mod.rs @@ -876,7 +876,7 @@ struct Context<'a, 'b> { } impl TomlManifest { - /// Prepares the manfiest for publishing. + /// Prepares the manifest for publishing. // - Path and git components of dependency specifications are removed. // - License path is updated to point within the package. pub fn prepare_for_publish(