From 50989db56990371ede3dcf3b497c7c5ad705b18f Mon Sep 17 00:00:00 2001 From: Jacob Hoffman-Andrews Date: Thu, 13 Oct 2022 22:03:20 -0700 Subject: [PATCH 1/3] Use a different path for static files --- src/docbuilder/rustwide_builder.rs | 6 ++- src/lib.rs | 3 ++ src/web/mod.rs | 16 ++++--- src/web/routes.rs | 44 +++++++++++++++++++ src/web/rustdoc.rs | 68 ++++++++++++++++++++++++++++-- 5 files changed, 124 insertions(+), 13 deletions(-) diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs index c758fd6e6..47c7ab16d 100644 --- a/src/docbuilder/rustwide_builder.rs +++ b/src/docbuilder/rustwide_builder.rs @@ -11,6 +11,7 @@ use crate::storage::{rustdoc_archive_path, source_archive_path}; use crate::utils::{ copy_dir_all, parse_rustc_version, queue_builder, set_config, CargoMetadata, ConfigName, }; +use crate::RUSTDOC_STATIC_STORAGE_PREFIX; use crate::{db::blacklist::is_blacklisted, utils::MetadataPackage}; use crate::{Config, Context, Index, Metrics, Storage}; use anyhow::{anyhow, bail, Error}; @@ -225,7 +226,8 @@ impl RustwideBuilder { .prefix("essential-files") .tempdir()?; copy_dir_all(source, &dest)?; - add_path_into_database(&self.storage, "", &dest)?; + + add_path_into_database(&self.storage, RUSTDOC_STATIC_STORAGE_PREFIX, &dest)?; set_config( &mut conn, @@ -710,7 +712,7 @@ impl RustwideBuilder { #[rustfmt::skip] const UNCONDITIONAL_ARGS: &[&str] = &[ - "--static-root-path", "/", + "--static-root-path", "/-/rustdoc.static/", "--cap-lints", "warn", "--disable-per-crate-search", "--extern-html-root-takes-precedence", diff --git a/src/lib.rs b/src/lib.rs index d8c81791d..eda3a5c0a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -55,3 +55,6 @@ pub const BUILD_VERSION: &str = concat!( " ", include_str!(concat!(env!("OUT_DIR"), "/git_version")) ); + +/// Where rustdoc's static files are stored in S3. +pub const RUSTDOC_STATIC_STORAGE_PREFIX: &str = "/rustdoc-static/"; diff --git a/src/web/mod.rs b/src/web/mod.rs index 98a82682f..cf0164a0a 100644 --- a/src/web/mod.rs +++ b/src/web/mod.rs @@ -138,8 +138,10 @@ impl MainHandler { let inject_extensions = InjectExtensions::new(context, template_data)?; let routes = routes::build_routes(); - let shared_resources = - Self::chain(inject_extensions.clone(), rustdoc::SharedResourceHandler); + let shared_resources = Self::chain( + inject_extensions.clone(), + rustdoc::LegacySharedResourceHandler, + ); let router_chain = Self::chain(inject_extensions.clone(), routes.iron_router()); Ok(MainHandler { @@ -174,13 +176,13 @@ impl Handler for MainHandler { // removes the shared static files from individual builds to save space, the "../mainXXX.js" // path doesn't really exist in storage for any particular build. The appropriate main file // *does* exist in the storage root, which is where the shared static files are put for each - // new rustdoc version. So here we give SharedResourceHandler a chance to handle all URLs - // before they go to the router. SharedResourceHandler looks at the last component of the + // new rustdoc version. So here we give LegacySharedResourceHandler a chance to handle all URLs + // before they go to the router. LegacySharedResourceHandler looks at the last component of the // request path ("main-20181217-1.33.0-nightly-adbfec229.js") and tries to fetch it from // the storage root (if it's JS, CSS, etc). If the file doesn't exist, we fall through to // the normal router, which may wind up serving an invocation-specific file from the crate // itself. For instance, a request for "/crate/foo/search-index-XYZ.js" will get a 404 from - // the SharedResourceHandler because "search-index-XYZ.js" doesn't exist in the storage root + // the LegacySharedResourceHandler because "search-index-XYZ.js" doesn't exist in the storage root // (it's not a shared static file), but it will get a 200 from rustdoc_html_server_handler // because it exists in a specific crate. // @@ -192,8 +194,8 @@ impl Handler for MainHandler { // https://docs.rs/this.path.does.not.exist/main-20181217-1.33.0-nightly-adbfec229.js // // If those 2018 crates get rebuilt, we won't have this problem anymore, and - // SharedResourceHandler can receive dispatch from the router, as other handlers do. That - // will also allow SharedResourceHandler to look up full paths in storage rather than just + // LegacySharedResourceHandler can receive dispatch from the router, as other handlers do. That + // will also allow LegacySharedResourceHandler to look up full paths in storage rather than just // the last component of the requested path. // // Also note: this approach means that a request for a given JS/CSS may serve from two diff --git a/src/web/routes.rs b/src/web/routes.rs index ac5df67fb..43d8b140a 100644 --- a/src/web/routes.rs +++ b/src/web/routes.rs @@ -31,6 +31,11 @@ pub(super) fn build_routes() -> Routes { routes.static_resource("/-/static/:single", super::statics::static_handler); routes.static_resource("/-/static/*", super::statics::static_handler); + routes.internal_page( + "/-/rustdoc.static/:single", + super::rustdoc::static_asset_handler, + ); + routes.internal_page("/-/rustdoc.static/*", super::rustdoc::static_asset_handler); routes.internal_page("/-/storage-change-detection.html", { #[derive(Debug, serde::Serialize)] struct StorageChangeDetection {} @@ -356,6 +361,8 @@ fn calculate_id(pattern: &str) -> String { #[cfg(test)] mod tests { use crate::test::*; + use crate::web::cache::CachePolicy; + use reqwest::StatusCode; #[test] fn test_root_redirects() { @@ -377,4 +384,41 @@ mod tests { Ok(()) }); } + + #[test] + fn serve_rustdoc_content_not_found() { + wrapper(|env| { + let response = env.frontend().get("/-/rustdoc-static/style.css").send()?; + assert_eq!(response.status(), StatusCode::NOT_FOUND); + assert_cache_control(&response, CachePolicy::NoCaching, &env.config()); + Ok(()) + }) + } + + #[test] + fn serve_rustdoc_content() { + wrapper(|env| { + let web = env.frontend(); + env.storage().store_one("style.css", *b"content")?; + env.storage() + .store_one("will_not/be_found.css", *b"something")?; + + let response = web.get("/-/rustdoc-static/style.css").send()?; + assert_cache_control( + &response, + CachePolicy::ForeverInCdnAndBrowser, + &env.config(), + ); + assert!(response.status().is_success()); + assert_eq!(response.text()?, "content"); + + assert_eq!( + web.get("/-/rustdoc-static/will_not/be_found.css") + .send()? + .status(), + StatusCode::NOT_FOUND + ); + Ok(()) + }) + } } diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 6f00dedb6..a21273c28 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -3,14 +3,14 @@ use crate::{ db::Pool, repositories::RepositoryStatsUpdater, - storage::rustdoc_archive_path, + storage::{rustdoc_archive_path, PathNotFoundError}, utils, web::{ cache::CachePolicy, crate_details::CrateDetails, csp::Csp, error::Nope, file::File, match_version, metrics::RenderingTimesRecorder, parse_url_with_params, redirect_base, report_error, MatchSemver, MetaData, }, - Config, Metrics, Storage, + Config, Metrics, Storage, RUSTDOC_STATIC_STORAGE_PREFIX, }; use anyhow::{anyhow, Context}; use iron::{ @@ -766,14 +766,51 @@ pub fn download_handler(req: &mut Request) -> IronResult { ))) } +/// Serves shared resources used by rustdoc-generated documentation. +/// +/// This serves files from S3, and is pointed to by the `--static-root-path` flag to rustdoc. +pub fn static_asset_handler(req: &mut Request) -> IronResult { + let storage = extension!(req, Storage); + let config = extension!(req, Config); + + let filename = req.url.path()[2..].join("/"); + let storage_path = format!("{}{}", RUSTDOC_STATIC_STORAGE_PREFIX, filename); + + // Prevent accessing static files outside the root. This could happen if the path + // contains `/` or `..`. The check doesn't outright prevent those strings to be present + // to allow accessing files in subdirectories. + let canonical_path = + std::fs::canonicalize(&storage_path).map_err(|_| Nope::ResourceNotFound)?; + let canonical_root = + std::fs::canonicalize(&storage_path).map_err(|_| Nope::ResourceNotFound)?; + + if !canonical_path.starts_with(canonical_root) { + return Err(Nope::ResourceNotFound.into()); + } + + match File::from_path(storage, &storage_path, config) { + Ok(file) => Ok(file.serve()), + Err(err) if err.downcast_ref::().is_some() => { + Err(Nope::ResourceNotFound.into()) + } + Err(err) => { + utils::report_error(&err); + Err(Nope::InternalServerError.into()) + } + } +} + /// Serves shared web resources used by rustdoc-generated documentation. /// /// This includes common `css` and `js` files that only change when the compiler is updated, but are /// otherwise the same for all crates documented with that compiler. Those have a custom handler to /// deduplicate them and save space. -pub struct SharedResourceHandler; +/// +/// This handler considers only the last component of the request path, and looks for a matching file +/// in the Storage root. +pub struct LegacySharedResourceHandler; -impl Handler for SharedResourceHandler { +impl Handler for LegacySharedResourceHandler { fn handle(&self, req: &mut Request) -> IronResult { let path = req.url.path(); let filename = path.last().unwrap(); // unwrap is fine: vector is non-empty @@ -796,6 +833,29 @@ impl Handler for SharedResourceHandler { } } +/// Serves shared web resources used by rustdoc-generated documentation. +/// +/// Rustdoc has certain JS, CSS, font and image files that are required for all +/// documentation it generates, and these don't change often. We make one copy +/// of these per rustdoc release and serve them from a common location. +/// +/// This handler considers the whole path, and looks for a file at that path in +/// the Storage. +pub struct SharedResourceHandler; + +impl Handler for SharedResourceHandler { + fn handle(&self, req: &mut Request) -> IronResult { + let storage = extension!(req, Storage); + let config = extension!(req, Config); + + let storage_path = format!("/{}", req.url.path().join("/")); + match File::from_path(storage, &storage_path, config) { + Ok(file) => Ok(file.serve()), + Err(_) => Err(Nope::ResourceNotFound.into()), + } + } +} + #[cfg(test)] mod test { use crate::{test::*, web::cache::CachePolicy, Config}; From 4d03c0275447ddba4ec2260ac589264f06da27cc Mon Sep 17 00:00:00 2001 From: Jacob Hoffman-Andrews Date: Thu, 20 Oct 2022 01:24:27 -0700 Subject: [PATCH 2/3] Fix paths for test --- src/web/routes.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/web/routes.rs b/src/web/routes.rs index 43d8b140a..f0ed7c42f 100644 --- a/src/web/routes.rs +++ b/src/web/routes.rs @@ -388,7 +388,7 @@ mod tests { #[test] fn serve_rustdoc_content_not_found() { wrapper(|env| { - let response = env.frontend().get("/-/rustdoc-static/style.css").send()?; + let response = env.frontend().get("/-/rustdoc.static/style.css").send()?; assert_eq!(response.status(), StatusCode::NOT_FOUND); assert_cache_control(&response, CachePolicy::NoCaching, &env.config()); Ok(()) @@ -399,21 +399,22 @@ mod tests { fn serve_rustdoc_content() { wrapper(|env| { let web = env.frontend(); - env.storage().store_one("style.css", *b"content")?; env.storage() - .store_one("will_not/be_found.css", *b"something")?; + .store_one("/rustdoc-static/style.css", "content".as_bytes())?; + env.storage() + .store_one("/will_not/be_found.css", "something".as_bytes())?; - let response = web.get("/-/rustdoc-static/style.css").send()?; + let response = web.get("/-/rustdoc.static/style.css").send()?; + assert!(response.status().is_success()); assert_cache_control( &response, CachePolicy::ForeverInCdnAndBrowser, &env.config(), ); - assert!(response.status().is_success()); assert_eq!(response.text()?, "content"); assert_eq!( - web.get("/-/rustdoc-static/will_not/be_found.css") + web.get("/-/rustdoc.static/will_not/be_found.css") .send()? .status(), StatusCode::NOT_FOUND From d38e14ff379f7fb1368b049b6dcb4551f3532705 Mon Sep 17 00:00:00 2001 From: Jacob Hoffman-Andrews Date: Thu, 20 Oct 2022 01:41:20 -0700 Subject: [PATCH 3/3] More fixups --- src/web/rustdoc.rs | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index a21273c28..e8a4a059e 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -776,18 +776,6 @@ pub fn static_asset_handler(req: &mut Request) -> IronResult { let filename = req.url.path()[2..].join("/"); let storage_path = format!("{}{}", RUSTDOC_STATIC_STORAGE_PREFIX, filename); - // Prevent accessing static files outside the root. This could happen if the path - // contains `/` or `..`. The check doesn't outright prevent those strings to be present - // to allow accessing files in subdirectories. - let canonical_path = - std::fs::canonicalize(&storage_path).map_err(|_| Nope::ResourceNotFound)?; - let canonical_root = - std::fs::canonicalize(&storage_path).map_err(|_| Nope::ResourceNotFound)?; - - if !canonical_path.starts_with(canonical_root) { - return Err(Nope::ResourceNotFound.into()); - } - match File::from_path(storage, &storage_path, config) { Ok(file) => Ok(file.serve()), Err(err) if err.downcast_ref::().is_some() => { @@ -833,29 +821,6 @@ impl Handler for LegacySharedResourceHandler { } } -/// Serves shared web resources used by rustdoc-generated documentation. -/// -/// Rustdoc has certain JS, CSS, font and image files that are required for all -/// documentation it generates, and these don't change often. We make one copy -/// of these per rustdoc release and serve them from a common location. -/// -/// This handler considers the whole path, and looks for a file at that path in -/// the Storage. -pub struct SharedResourceHandler; - -impl Handler for SharedResourceHandler { - fn handle(&self, req: &mut Request) -> IronResult { - let storage = extension!(req, Storage); - let config = extension!(req, Config); - - let storage_path = format!("/{}", req.url.path().join("/")); - match File::from_path(storage, &storage_path, config) { - Ok(file) => Ok(file.serve()), - Err(_) => Err(Nope::ResourceNotFound.into()), - } - } -} - #[cfg(test)] mod test { use crate::{test::*, web::cache::CachePolicy, Config};