diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 94c0b0667..87c4215ba 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,7 +23,7 @@ jobs: - name: Install toolchain uses: actions-rs/toolchain@v1 with: - toolchain: 1.73.0 + toolchain: 1.75.0 override: true components: rustfmt, clippy diff --git a/Cargo.lock b/Cargo.lock index e460a6ef7..151ed16c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2285,7 +2285,6 @@ dependencies = [ "analyzeme", "anyhow", "arc-swap", - "async-trait", "brotli", "bytes", "chrono", diff --git a/site/src/interpolate.rs b/database/src/interpolate.rs similarity index 99% rename from site/src/interpolate.rs rename to database/src/interpolate.rs index ac3840896..16c8244b5 100644 --- a/site/src/interpolate.rs +++ b/database/src/interpolate.rs @@ -10,7 +10,7 @@ //! Given a series with some missing data `[1, 2, ?, 4]`, //! this iterator yields `[1, 2, 2, 4]`. -use crate::db::Point; +use crate::selector::Point; /// Whether a point has been interpolated or not #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] diff --git a/database/src/lib.rs b/database/src/lib.rs index 27d84ca57..19b749243 100644 --- a/database/src/lib.rs +++ b/database/src/lib.rs @@ -6,9 +6,13 @@ use serde::{Deserialize, Serialize}; use std::fmt; use std::hash; use std::ops::{Add, Sub}; +use std::sync::Arc; use std::time::Duration; +pub mod interpolate; +pub mod metric; pub mod pool; +pub mod selector; pub use pool::{Connection, Pool}; @@ -245,20 +249,15 @@ impl fmt::Display for Profile { /// /// These are usually reported to users in a "flipped" way. For example, /// `Cache::Empty` means we're doing a "full" build. We present this to users as "full". -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] -#[serde(tag = "variant", content = "name")] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub enum Scenario { /// Empty cache (i.e., full build) - #[serde(rename = "full")] Empty, /// Empty cache but still incremental (i.e., a full incremental build) - #[serde(rename = "incr-full")] IncrementalEmpty, /// Cache is fully up-to-date (i.e., no code has changed) - #[serde(rename = "incr-unchanged")] IncrementalFresh, /// Cache is mostly up-to-date but some code has been changed - #[serde(rename = "incr-patched")] IncrementalPatch(PatchName), } @@ -431,6 +430,34 @@ intern!(pub struct QueryLabel); #[derive(Serialize, Deserialize, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] pub struct ArtifactIdNumber(pub u32); +#[derive(Debug)] +pub struct ArtifactIdIter { + ids: Arc>, + idx: usize, +} + +impl ArtifactIdIter { + pub fn new(artifact_ids: Arc>) -> ArtifactIdIter { + ArtifactIdIter { + ids: artifact_ids, + idx: 0, + } + } +} + +impl Iterator for ArtifactIdIter { + type Item = ArtifactId; + fn next(&mut self) -> Option { + let r = self.ids.get(self.idx)?; + self.idx += 1; + Some(r.clone()) + } + + fn size_hint(&self) -> (usize, Option) { + (self.ids.len(), Some(self.ids.len())) + } +} + /// Cached Id lookups for many database tables. /// /// This is a quick way to find what the database id for something. diff --git a/database/src/metric.rs b/database/src/metric.rs new file mode 100644 index 000000000..0cfe13c16 --- /dev/null +++ b/database/src/metric.rs @@ -0,0 +1,113 @@ +use std::str::FromStr; + +use serde::{de::IntoDeserializer, Deserialize, Serialize}; + +/// This enum contains all "known" metrics coming from rustc or profiling tools that we know +/// (and care) about. +#[derive(Copy, Clone, Debug, PartialEq, Serialize, Deserialize)] +pub enum Metric { + #[serde(rename = "context-switches")] + ContextSwitches, + #[serde(rename = "cpu-clock")] + CpuClock, + #[serde(rename = "cpu-clock:u")] + CpuClockUser, + #[serde(rename = "cycles")] + Cycles, + #[serde(rename = "cycles:u")] + CyclesUser, + #[serde(rename = "faults")] + Faults, + #[serde(rename = "faults:u")] + FaultsUser, + #[serde(rename = "instructions:u")] + InstructionsUser, + #[serde(rename = "max-rss")] + MaxRSS, + #[serde(rename = "task-clock")] + TaskClock, + #[serde(rename = "task-clock:u")] + TaskClockUser, + #[serde(rename = "wall-time")] + WallTime, + #[serde(rename = "branch-misses")] + BranchMisses, + #[serde(rename = "cache-misses")] + CacheMisses, + /// Rustc guesses the codegen unit size by MIR count. + #[serde(rename = "size:codegen_unit_size_estimate")] + CodegenUnitSize, + /// The codegen unit size by llvm ir count, the real size of a cgu. + #[serde(rename = "size:cgu_instructions")] + CodegenUnitLlvmIrCount, + #[serde(rename = "size:dep_graph")] + DepGraphSize, + #[serde(rename = "size:linked_artifact")] + LinkedArtifactSize, + #[serde(rename = "size:object_file")] + ObjectFileSize, + #[serde(rename = "size:query_cache")] + QueryCacheSize, + #[serde(rename = "size:work_product_index")] + WorkProductIndexSize, + #[serde(rename = "size:crate_metadata")] + CrateMetadataSize, + #[serde(rename = "size:dwo_file")] + DwoFileSize, + #[serde(rename = "size:assembly_file")] + AssemblyFileSize, + #[serde(rename = "size:llvm_bitcode")] + LlvmBitcodeSize, + #[serde(rename = "size:llvm_ir")] + LlvmIrSize, + /// Total bytes of a generated documentation directory + #[serde(rename = "size:doc_bytes")] + DocByteSize, + /// Number of files inside a generated documentation directory. + #[serde(rename = "size:doc_files_count")] + DocFilesCount, +} + +impl FromStr for Metric { + type Err = String; + + fn from_str(s: &str) -> Result { + Metric::deserialize(s.into_deserializer()) + .map_err(|e: serde::de::value::Error| format!("Unknown metric `{s}`: {e:?}")) + } +} + +impl Metric { + pub fn as_str(&self) -> &str { + match self { + Metric::ContextSwitches => "context-switches", + Metric::CpuClock => "cpu-clock", + Metric::CpuClockUser => "cpu-clock:u", + Metric::Cycles => "cycles", + Metric::CyclesUser => "cycles:u", + Metric::Faults => "faults", + Metric::FaultsUser => "faults:u", + Metric::InstructionsUser => "instructions:u", + Metric::MaxRSS => "max-rss", + Metric::TaskClock => "task-clock", + Metric::TaskClockUser => "task-clock:u", + Metric::WallTime => "wall-time", + Metric::BranchMisses => "branch-misses", + Metric::CacheMisses => "cache-misses", + Metric::CodegenUnitSize => "size:codegen_unit_size_estimate", + Metric::CodegenUnitLlvmIrCount => "size:cgu_instructions", + Metric::DepGraphSize => "size:dep_graph", + Metric::LinkedArtifactSize => "size:linked_artifact", + Metric::ObjectFileSize => "size:object_file", + Metric::QueryCacheSize => "size:query_cache", + Metric::WorkProductIndexSize => "size:work_product_index", + Metric::CrateMetadataSize => "size:crate_metadata", + Metric::DwoFileSize => "size:dwo_file", + Metric::AssemblyFileSize => "size:assembly_file", + Metric::LlvmBitcodeSize => "size:llvm_bitcode", + Metric::LlvmIrSize => "size:llvm_ir", + Metric::DocByteSize => "size:doc_bytes", + Metric::DocFilesCount => "size:doc_files_count", + } + } +} diff --git a/database/src/selector.rs b/database/src/selector.rs new file mode 100644 index 000000000..6069989ef --- /dev/null +++ b/database/src/selector.rs @@ -0,0 +1,406 @@ +//! Selector API for returning subset of series which will be rendered in some +//! format. +//! +//! We have the following expected paths: +//! +//! * :benchmark/:profile/:scenario/:metric => [cid => u64] +//! * :crate/:profile/:scenario/:self_profile_query/:stat (SelfProfileTime, SelfProfileCacheHits, ...) +//! :stat = time => Duration, +//! :stat = cache hits => u32, +//! :stat = invocation count => u32, +//! :stat = blocked time => Duration, +//! :stat = incremental load time => Duration, +//! +//! Note that the returned series always have a "simple" type of a small set -- +//! things like arrays, integers. We aggregate into higher level types above the +//! primitive series readers. +//! +//! We specify a single struct per path style above. +//! +//! `Option` in the path either specifies a specific T to filter by, or +//! requests that all are provided. Note that this is a cartesian product if +//! there are multiple `None`s. + +use std::{ + fmt::{self, Debug}, + hash::Hash, + sync::Arc, +}; + +use crate::{ + interpolate::Interpolate, metric::Metric, ArtifactId, ArtifactIdIter, Benchmark, + CodegenBackend, Connection, Index, Lookup, Profile, Scenario, +}; + +#[derive(Debug)] +pub struct StatisticSeries { + pub artifact_ids: ArtifactIdIter, + pub points: std::vec::IntoIter>, +} + +impl Iterator for StatisticSeries { + type Item = (ArtifactId, Option); + fn next(&mut self) -> Option { + Some((self.artifact_ids.next()?, self.points.next().unwrap())) + } + + fn size_hint(&self) -> (usize, Option) { + self.artifact_ids.size_hint() + } +} + +pub trait Point { + type Key: fmt::Debug + PartialEq + Clone; + + fn key(&self) -> &Self::Key; + fn set_key(&mut self, key: Self::Key); + fn value(&self) -> Option; + fn set_value(&mut self, value: f64); + fn interpolated(&self) -> bool; + fn set_interpolated(&mut self); +} + +impl Point for (T, Option) { + type Key = T; + + fn key(&self) -> &T { + &self.0 + } + fn set_key(&mut self, key: T) { + self.0 = key; + } + fn value(&self) -> Option { + self.1 + } + fn set_value(&mut self, value: f64) { + self.1 = Some(value); + } + fn interpolated(&self) -> bool { + false + } + fn set_interpolated(&mut self) { + // no-op + } +} + +impl Point for (T, f64) { + type Key = T; + + fn key(&self) -> &T { + &self.0 + } + fn set_key(&mut self, key: T) { + self.0 = key; + } + fn value(&self) -> Option { + Some(self.1) + } + fn set_value(&mut self, value: f64) { + self.1 = value; + } + fn interpolated(&self) -> bool { + false + } + fn set_interpolated(&mut self) { + // no-op + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Selector { + All, + Subset(Vec), + One(T), +} + +impl Selector { + fn map(self, mut f: impl FnMut(T) -> U) -> Selector { + match self { + Selector::All => Selector::All, + Selector::Subset(subset) => Selector::Subset(subset.into_iter().map(f).collect()), + Selector::One(o) => Selector::One(f(o)), + } + } + pub fn try_map(self, mut f: impl FnMut(T) -> Result) -> Result, E> { + Ok(match self { + Selector::All => Selector::All, + Selector::Subset(subset) => { + Selector::Subset(subset.into_iter().map(f).collect::>()?) + } + Selector::One(o) => Selector::One(f(o)?), + }) + } + + fn matches(&self, other: U) -> bool + where + U: PartialEq, + { + match self { + Selector::One(c) => other == *c, + Selector::Subset(subset) => subset.iter().any(|c| other == *c), + Selector::All => true, + } + } +} + +/// Represents the parameters of a single benchmark execution that collects a set of statistics. +pub trait TestCase: Debug + Clone + Hash + PartialEq + Eq + PartialOrd + Ord {} + +#[derive(Debug)] +pub struct SeriesResponse { + pub test_case: Case, + pub series: T, +} + +impl SeriesResponse { + pub fn map(self, m: impl FnOnce(T) -> U) -> SeriesResponse { + let SeriesResponse { + test_case: key, + series, + } = self; + SeriesResponse { + test_case: key, + series: m(series), + } + } + + pub fn interpolate(self) -> SeriesResponse> + where + T: Iterator, + T::Item: Point, + { + self.map(|s| Interpolate::new(s)) + } +} + +pub trait BenchmarkQuery: Debug + Clone { + type TestCase: TestCase; + + #[allow(async_fn_in_trait)] + async fn execute( + &self, + connection: &mut dyn Connection, + index: &Index, + artifact_ids: Arc>, + ) -> Result>, String>; +} + +// Compile benchmarks querying +#[derive(Clone, Hash, Eq, PartialEq, Debug)] +pub struct CompileBenchmarkQuery { + benchmark: Selector, + scenario: Selector, + profile: Selector, + backend: Selector, + metric: Selector, +} + +impl CompileBenchmarkQuery { + pub fn benchmark(mut self, selector: Selector) -> Self { + self.benchmark = selector; + self + } + + pub fn profile(mut self, selector: Selector) -> Self { + self.profile = selector; + self + } + + pub fn scenario(mut self, selector: Selector) -> Self { + self.scenario = selector; + self + } + + pub fn metric(mut self, selector: Selector) -> Self { + self.metric = selector.map(|v| v.as_str().into()); + self + } + + pub fn all_for_metric(metric: Metric) -> Self { + Self { + benchmark: Selector::All, + profile: Selector::All, + scenario: Selector::All, + backend: Selector::All, + metric: Selector::One(metric.as_str().into()), + } + } +} + +impl Default for CompileBenchmarkQuery { + fn default() -> Self { + Self { + benchmark: Selector::All, + scenario: Selector::All, + profile: Selector::All, + backend: Selector::All, + metric: Selector::All, + } + } +} + +impl BenchmarkQuery for CompileBenchmarkQuery { + type TestCase = CompileTestCase; + + async fn execute( + &self, + conn: &mut dyn Connection, + index: &Index, + artifact_ids: Arc>, + ) -> Result>, String> { + let mut statistic_descriptions: Vec<_> = index + .compile_statistic_descriptions() + .filter(|(&(b, p, s, backend, m), _)| { + self.benchmark.matches(b) + && self.profile.matches(p) + && self.scenario.matches(s) + && self.backend.matches(backend) + && self.metric.matches(m) + }) + .map(|(&(benchmark, profile, scenario, backend, metric), sid)| { + ( + CompileTestCase { + benchmark, + profile, + scenario, + backend, + }, + metric, + sid, + ) + }) + .collect(); + + statistic_descriptions.sort_unstable(); + + let sids: Vec<_> = statistic_descriptions + .iter() + .map(|(_, _, sid)| *sid) + .collect(); + + let aids = artifact_ids + .iter() + .map(|aid| aid.lookup(index)) + .collect::>(); + + Ok(conn + .get_pstats(&sids, &aids) + .await + .into_iter() + .zip(statistic_descriptions) + .filter(|(points, _)| points.iter().any(|value| value.is_some())) + .map(|(points, (test_case, metric, _))| { + SeriesResponse { + series: StatisticSeries { + artifact_ids: ArtifactIdIter::new(artifact_ids.clone()), + points: if *metric == *"cpu-clock" || *metric == *"task-clock" { + // Convert to seconds -- perf reports these measurements in + // milliseconds + points + .into_iter() + .map(|p| p.map(|v| v / 1000.0)) + .collect::>() + .into_iter() + } else { + points.into_iter() + }, + }, + test_case, + } + }) + .collect::>()) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct CompileTestCase { + pub benchmark: Benchmark, + pub profile: Profile, + pub scenario: Scenario, + pub backend: CodegenBackend, +} + +impl TestCase for CompileTestCase {} + +// Runtime benchmarks querying +#[derive(Clone, Hash, Eq, PartialEq, Debug)] +pub struct RuntimeBenchmarkQuery { + benchmark: Selector, + metric: Selector, +} + +impl RuntimeBenchmarkQuery { + pub fn benchmark(mut self, selector: Selector) -> Self { + self.benchmark = selector; + self + } + + pub fn metric(mut self, selector: Selector) -> Self { + self.metric = selector.map(|v| v.as_str().into()); + self + } + + pub fn all_for_metric(metric: Metric) -> Self { + Self { + benchmark: Selector::All, + metric: Selector::One(metric.as_str().into()), + } + } +} + +impl Default for RuntimeBenchmarkQuery { + fn default() -> Self { + Self { + benchmark: Selector::All, + metric: Selector::All, + } + } +} + +impl BenchmarkQuery for RuntimeBenchmarkQuery { + type TestCase = RuntimeTestCase; + + async fn execute( + &self, + conn: &mut dyn Connection, + index: &Index, + artifact_ids: Arc>, + ) -> Result>, String> { + let mut statistic_descriptions: Vec<_> = index + .runtime_statistic_descriptions() + .filter(|(&(b, m), _)| self.benchmark.matches(b) && self.metric.matches(m)) + .map(|(&(benchmark, _), sid)| (RuntimeTestCase { benchmark }, sid)) + .collect(); + + statistic_descriptions.sort_unstable(); + + let sids: Vec<_> = statistic_descriptions.iter().map(|(_, sid)| *sid).collect(); + + let aids = artifact_ids + .iter() + .map(|aid| aid.lookup(index)) + .collect::>(); + + Ok(conn + .get_runtime_pstats(&sids, &aids) + .await + .into_iter() + .zip(statistic_descriptions) + .filter(|(points, _)| points.iter().any(|value| value.is_some())) + .map(|(points, (test_case, _))| SeriesResponse { + series: StatisticSeries { + artifact_ids: ArtifactIdIter::new(artifact_ids.clone()), + points: points.into_iter(), + }, + test_case, + }) + .collect::>()) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct RuntimeTestCase { + pub benchmark: Benchmark, +} + +impl TestCase for RuntimeTestCase {} diff --git a/site/Cargo.toml b/site/Cargo.toml index a15cd9af4..968f30f32 100644 --- a/site/Cargo.toml +++ b/site/Cargo.toml @@ -32,7 +32,6 @@ snap = "1" itertools = "0.10" hashbrown = { version = "0.14", features = ["serde"] } arc-swap = "1.3" -async-trait = "0.1" database = { path = "../database" } bytes = "1.0" url = "2" diff --git a/site/src/api.rs b/site/src/api.rs index 2b8ec4257..79bdcbbae 100644 --- a/site/src/api.rs +++ b/site/src/api.rs @@ -252,9 +252,8 @@ pub mod bootstrap { pub mod comparison { use crate::benchmark_metadata::ProfileMetadata; - use crate::comparison::Metric; use collector::Bound; - use database::Date; + use database::{metric::Metric, Date}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; @@ -595,8 +594,8 @@ pub mod github { } pub mod triage { - use crate::comparison::Metric; use collector::Bound; + use database::metric::Metric; use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/site/src/average.rs b/site/src/average.rs index 6fcc0e7fd..729aff96d 100644 --- a/site/src/average.rs +++ b/site/src/average.rs @@ -1,4 +1,4 @@ -use crate::db::Point; +use database::selector::Point; /// This aggregates interpolated iterators. /// @@ -80,12 +80,13 @@ where #[cfg(test)] mod tests { + use database::selector::Point; + use super::average; #[test] fn test_no_interpolation_average() { // Test that averaging works without interpolation. - use crate::db::Point; let v = vec![ vec![("a", 0.0), ("b", 200.0)], @@ -109,8 +110,7 @@ mod tests { #[test] fn test_interpolation_average() { // Test that averaging works with interpolation. - use crate::db::Point; - use crate::interpolate::{Interpolate, IsInterpolated}; + use database::interpolate::{Interpolate, IsInterpolated}; let v = vec![ vec![("a", Some(0.0)), ("b", Some(200.0))], diff --git a/site/src/comparison.rs b/site/src/comparison.rs index 073bb828c..d8cf69a1e 100644 --- a/site/src/comparison.rs +++ b/site/src/comparison.rs @@ -3,23 +3,23 @@ //! comparison endpoints use crate::api; -use crate::db::{ArtifactId, Benchmark, Lookup, Profile, Scenario}; use crate::github; use crate::load::SiteCtxt; -use crate::selector::{ - self, BenchmarkQuery, CompileBenchmarkQuery, RuntimeBenchmarkQuery, TestCase, -}; use collector::compile::benchmark::category::Category; use collector::Bound; -use serde::{Deserialize, Serialize}; +use database::{ + metric::Metric, + selector::{self, BenchmarkQuery, CompileBenchmarkQuery, RuntimeBenchmarkQuery, TestCase}, +}; +use database::{ArtifactId, Benchmark, Lookup, Profile, Scenario}; +use serde::Serialize; use crate::api::comparison::CompileBenchmarkMetadata; use crate::benchmark_metadata::get_compile_benchmarks_metadata; use crate::server::comparison::StatComparison; use collector::compile::benchmark::ArtifactType; use database::{CodegenBackend, CommitType, CompileBenchmark}; -use serde::de::IntoDeserializer; use std::cmp; use std::collections::{HashMap, HashSet}; use std::error::Error; @@ -27,7 +27,6 @@ use std::fmt::Write; use std::hash::Hash; use std::iter; use std::ops::Deref; -use std::str::FromStr; use std::sync::Arc; type BoxedError = Box; @@ -234,115 +233,12 @@ async fn populate_report( } } -/// This enum contains all "known" metrics coming from rustc or profiling tools that we know -/// (and care) about. -#[derive(Copy, Clone, Debug, PartialEq, Serialize, Deserialize)] -pub enum Metric { - #[serde(rename = "context-switches")] - ContextSwitches, - #[serde(rename = "cpu-clock")] - CpuClock, - #[serde(rename = "cpu-clock:u")] - CpuClockUser, - #[serde(rename = "cycles")] - Cycles, - #[serde(rename = "cycles:u")] - CyclesUser, - #[serde(rename = "faults")] - Faults, - #[serde(rename = "faults:u")] - FaultsUser, - #[serde(rename = "instructions:u")] - InstructionsUser, - #[serde(rename = "max-rss")] - MaxRSS, - #[serde(rename = "task-clock")] - TaskClock, - #[serde(rename = "task-clock:u")] - TaskClockUser, - #[serde(rename = "wall-time")] - WallTime, - #[serde(rename = "branch-misses")] - BranchMisses, - #[serde(rename = "cache-misses")] - CacheMisses, - /// Rustc guesses the codegen unit size by MIR count. - #[serde(rename = "size:codegen_unit_size_estimate")] - CodegenUnitSize, - /// The codegen unit size by llvm ir count, the real size of a cgu. - #[serde(rename = "size:cgu_instructions")] - CodegenUnitLlvmIrCount, - #[serde(rename = "size:dep_graph")] - DepGraphSize, - #[serde(rename = "size:linked_artifact")] - LinkedArtifactSize, - #[serde(rename = "size:object_file")] - ObjectFileSize, - #[serde(rename = "size:query_cache")] - QueryCacheSize, - #[serde(rename = "size:work_product_index")] - WorkProductIndexSize, - #[serde(rename = "size:crate_metadata")] - CrateMetadataSize, - #[serde(rename = "size:dwo_file")] - DwoFileSize, - #[serde(rename = "size:assembly_file")] - AssemblyFileSize, - #[serde(rename = "size:llvm_bitcode")] - LlvmBitcodeSize, - #[serde(rename = "size:llvm_ir")] - LlvmIrSize, - /// Total bytes of a generated documentation directory - #[serde(rename = "size:doc_bytes")] - DocByteSize, - /// Number of files inside a generated documentation directory. - #[serde(rename = "size:doc_files_count")] - DocFilesCount, +trait MetricExt { + fn relative_change_magnitude(&self, change: f64) -> Magnitude; + fn is_typically_noisy(&self) -> bool; } -impl FromStr for Metric { - type Err = String; - - fn from_str(s: &str) -> Result { - Metric::deserialize(s.into_deserializer()) - .map_err(|e: serde::de::value::Error| format!("Unknown metric `{s}`: {e:?}")) - } -} - -impl Metric { - pub fn as_str(&self) -> &str { - match self { - Metric::ContextSwitches => "context-switches", - Metric::CpuClock => "cpu-clock", - Metric::CpuClockUser => "cpu-clock:u", - Metric::Cycles => "cycles", - Metric::CyclesUser => "cycles:u", - Metric::Faults => "faults", - Metric::FaultsUser => "faults:u", - Metric::InstructionsUser => "instructions:u", - Metric::MaxRSS => "max-rss", - Metric::TaskClock => "task-clock", - Metric::TaskClockUser => "task-clock:u", - Metric::WallTime => "wall-time", - Metric::BranchMisses => "branch-misses", - Metric::CacheMisses => "cache-misses", - Metric::CodegenUnitSize => "size:codegen_unit_size_estimate", - Metric::CodegenUnitLlvmIrCount => "size:cgu_instructions", - Metric::DepGraphSize => "size:dep_graph", - Metric::LinkedArtifactSize => "size:linked_artifact", - Metric::ObjectFileSize => "size:object_file", - Metric::QueryCacheSize => "size:query_cache", - Metric::WorkProductIndexSize => "size:work_product_index", - Metric::CrateMetadataSize => "size:crate_metadata", - Metric::DwoFileSize => "size:dwo_file", - Metric::AssemblyFileSize => "size:assembly_file", - Metric::LlvmBitcodeSize => "size:llvm_bitcode", - Metric::LlvmIrSize => "size:llvm_ir", - Metric::DocByteSize => "size:doc_bytes", - Metric::DocFilesCount => "size:doc_files_count", - } - } - +impl MetricExt for Metric { /// Determines the magnitude of a percent relative change for a given metric. /// /// Takes into account how noisy the stat is. For example, instruction diff --git a/site/src/db.rs b/site/src/db.rs deleted file mode 100644 index 7721bed85..000000000 --- a/site/src/db.rs +++ /dev/null @@ -1,61 +0,0 @@ -use std::fmt; - -pub use crate::average::average; -pub use database::*; - -pub trait Point { - type Key: fmt::Debug + PartialEq + Clone; - - fn key(&self) -> &Self::Key; - fn set_key(&mut self, key: Self::Key); - fn value(&self) -> Option; - fn set_value(&mut self, value: f64); - fn interpolated(&self) -> bool; - fn set_interpolated(&mut self); -} - -impl Point for (T, Option) { - type Key = T; - - fn key(&self) -> &T { - &self.0 - } - fn set_key(&mut self, key: T) { - self.0 = key; - } - fn value(&self) -> Option { - self.1 - } - fn set_value(&mut self, value: f64) { - self.1 = Some(value); - } - fn interpolated(&self) -> bool { - false - } - fn set_interpolated(&mut self) { - // no-op - } -} - -impl Point for (T, f64) { - type Key = T; - - fn key(&self) -> &T { - &self.0 - } - fn set_key(&mut self, key: T) { - self.0 = key; - } - fn value(&self) -> Option { - Some(self.1) - } - fn set_value(&mut self, value: f64) { - self.1 = value; - } - fn interpolated(&self) -> bool { - false - } - fn set_interpolated(&mut self) { - // no-op - } -} diff --git a/site/src/github/comparison_summary.rs b/site/src/github/comparison_summary.rs index b37798ed6..0fb96653c 100644 --- a/site/src/github/comparison_summary.rs +++ b/site/src/github/comparison_summary.rs @@ -1,10 +1,10 @@ use crate::comparison::{ deserves_attention_icount, write_summary_table, ArtifactComparison, ArtifactComparisonSummary, - Direction, Metric, + Direction, }; use crate::load::SiteCtxt; -use database::{ArtifactId, QueuedCommit}; +use database::{metric::Metric, ArtifactId, QueuedCommit}; use crate::github::{COMMENT_MARK_ROLLUP, COMMENT_MARK_TEMPORARY, RUST_REPO_GITHUB_API_URL}; use humansize::BINARY; diff --git a/site/src/lib.rs b/site/src/lib.rs index f8d205f22..05bb1e443 100644 --- a/site/src/lib.rs +++ b/site/src/lib.rs @@ -2,7 +2,6 @@ extern crate itertools; pub mod api; -pub mod db; pub mod github; pub mod load; pub mod server; @@ -10,7 +9,6 @@ pub mod server; mod average; mod benchmark_metadata; mod comparison; -mod interpolate; mod request_handlers; mod resources; mod selector; diff --git a/site/src/load.rs b/site/src/load.rs index 17d28f468..fe892b69e 100644 --- a/site/src/load.rs +++ b/site/src/load.rs @@ -12,7 +12,6 @@ use parking_lot::Mutex; use regex::Regex; use serde::{Deserialize, Serialize}; -use crate::db; use crate::self_profile::SelfProfileCache; use collector::compile::benchmark::category::Category; use collector::{Bound, MasterCommit}; @@ -128,7 +127,7 @@ pub struct SiteCtxt { /// Cached site landing page pub landing_page: ArcSwap>>, /// Index of various common queries - pub index: ArcSwap, + pub index: ArcSwap, /// Cached master-branch Rust commits pub master_commits: Arc>, // outer Arc enables mutation in background task /// Cache for self profile data @@ -138,12 +137,12 @@ pub struct SiteCtxt { } impl SiteCtxt { - pub fn summary_scenarios(&self) -> Vec { + pub fn summary_scenarios(&self) -> Vec { vec![ - crate::db::Scenario::Empty, - crate::db::Scenario::IncrementalEmpty, - crate::db::Scenario::IncrementalFresh, - crate::db::Scenario::IncrementalPatch("println".into()), + database::Scenario::Empty, + database::Scenario::IncrementalEmpty, + database::Scenario::IncrementalFresh, + database::Scenario::IncrementalPatch("println".into()), ] } @@ -160,7 +159,7 @@ impl SiteCtxt { let pool = Pool::open(db_url); let mut conn = pool.connection().await; - let index = db::Index::load(&mut *conn).await; + let index = database::Index::load(&mut *conn).await; let config = if let Ok(s) = fs::read_to_string("site-config.toml") { toml::from_str(&s)? diff --git a/site/src/request_handlers/bootstrap.rs b/site/src/request_handlers/bootstrap.rs index 4e1ecee39..a1b3faaa9 100644 --- a/site/src/request_handlers/bootstrap.rs +++ b/site/src/request_handlers/bootstrap.rs @@ -1,8 +1,8 @@ use futures::stream::{FuturesOrdered, StreamExt}; use crate::api::{bootstrap, ServerResult}; -use crate::db::ArtifactId; use crate::load::SiteCtxt; +use database::ArtifactId; use std::time::Duration; diff --git a/site/src/request_handlers/dashboard.rs b/site/src/request_handlers/dashboard.rs index 8db6eecdd..e70f311f7 100644 --- a/site/src/request_handlers/dashboard.rs +++ b/site/src/request_handlers/dashboard.rs @@ -4,10 +4,9 @@ use lazy_static::lazy_static; use crate::api::{dashboard, ServerResult}; use crate::benchmark_metadata::get_stable_benchmark_names; -use crate::comparison::Metric; -use crate::db::{self, ArtifactId, Profile, Scenario}; use crate::load::SiteCtxt; -use crate::selector; +use database::selector; +use database::{self, metric::Metric, ArtifactId, Profile, Scenario}; pub async fn handle_dashboard(ctxt: Arc) -> ServerResult { let index = ctxt.index.load(); @@ -105,7 +104,7 @@ pub async fn handle_dashboard(ctxt: Arc) -> ServerResult) -> ServerResult [cid => u64] -//! * :crate/:profile/:scenario/:self_profile_query/:stat (SelfProfileTime, SelfProfileCacheHits, ...) -//! :stat = time => Duration, -//! :stat = cache hits => u32, -//! :stat = invocation count => u32, -//! :stat = blocked time => Duration, -//! :stat = incremental load time => Duration, -//! -//! Note that the returned series always have a "simple" type of a small set -- -//! things like arrays, integers. We aggregate into higher level types above the -//! primitive series readers. -//! -//! We specify a single struct per path style above. -//! -//! `Option` in the path either specifies a specific T to filter by, or -//! requests that all are provided. Note that this is a cartesian product if -//! there are multiple `None`s. - -use crate::db::{ArtifactId, Profile, Scenario}; -use crate::interpolate::Interpolate; use crate::load::SiteCtxt; use collector::Bound; -use database::{Benchmark, CodegenBackend, Commit, Connection, Index, Lookup}; +use database::selector::StatisticSeries; +use database::selector::{BenchmarkQuery, SeriesResponse}; +use database::ArtifactId; +use database::{Commit, Index}; -use crate::comparison::Metric; -use async_trait::async_trait; -use std::fmt::Debug; -use std::hash::Hash; use std::ops::RangeInclusive; use std::sync::Arc; use std::time::Instant; @@ -86,335 +60,6 @@ pub fn range_subset(data: Vec, range: RangeInclusive) -> Vec>, - idx: usize, -} - -impl ArtifactIdIter { - fn new(artifact_ids: Arc>) -> ArtifactIdIter { - ArtifactIdIter { - ids: artifact_ids, - idx: 0, - } - } -} - -impl Iterator for ArtifactIdIter { - type Item = ArtifactId; - fn next(&mut self) -> Option { - let r = self.ids.get(self.idx)?; - self.idx += 1; - Some(r.clone()) - } - - fn size_hint(&self) -> (usize, Option) { - (self.ids.len(), Some(self.ids.len())) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum Selector { - All, - Subset(Vec), - One(T), -} - -impl Selector { - fn map(self, mut f: impl FnMut(T) -> U) -> Selector { - match self { - Selector::All => Selector::All, - Selector::Subset(subset) => Selector::Subset(subset.into_iter().map(f).collect()), - Selector::One(o) => Selector::One(f(o)), - } - } - pub fn try_map(self, mut f: impl FnMut(T) -> Result) -> Result, E> { - Ok(match self { - Selector::All => Selector::All, - Selector::Subset(subset) => { - Selector::Subset(subset.into_iter().map(f).collect::>()?) - } - Selector::One(o) => Selector::One(f(o)?), - }) - } - - fn matches(&self, other: U) -> bool - where - U: PartialEq, - { - match self { - Selector::One(c) => other == *c, - Selector::Subset(subset) => subset.iter().any(|c| other == *c), - Selector::All => true, - } - } -} - -/// Represents the parameters of a single benchmark execution that collects a set of statistics. -pub trait TestCase: Debug + Clone + Hash + PartialEq + Eq + PartialOrd + Ord {} - -#[derive(Debug)] -pub struct SeriesResponse { - pub test_case: Case, - pub series: T, -} - -impl SeriesResponse { - pub fn map(self, m: impl FnOnce(T) -> U) -> SeriesResponse { - let SeriesResponse { - test_case: key, - series, - } = self; - SeriesResponse { - test_case: key, - series: m(series), - } - } - - pub fn interpolate(self) -> SeriesResponse> - where - T: Iterator, - T::Item: crate::db::Point, - { - self.map(|s| Interpolate::new(s)) - } -} - -#[async_trait] -pub trait BenchmarkQuery: Debug + Clone { - type TestCase: TestCase; - - async fn execute( - &self, - connection: &mut dyn Connection, - index: &Index, - artifact_ids: Arc>, - ) -> Result>, String>; -} - -// Compile benchmarks querying -#[derive(Clone, Hash, Eq, PartialEq, Debug)] -pub struct CompileBenchmarkQuery { - benchmark: Selector, - scenario: Selector, - profile: Selector, - backend: Selector, - metric: Selector, -} - -impl CompileBenchmarkQuery { - pub fn benchmark(mut self, selector: Selector) -> Self { - self.benchmark = selector; - self - } - - pub fn profile(mut self, selector: Selector) -> Self { - self.profile = selector; - self - } - - pub fn scenario(mut self, selector: Selector) -> Self { - self.scenario = selector; - self - } - - pub fn metric(mut self, selector: Selector) -> Self { - self.metric = selector.map(|v| v.as_str().into()); - self - } - - pub fn all_for_metric(metric: Metric) -> Self { - Self { - benchmark: Selector::All, - profile: Selector::All, - scenario: Selector::All, - backend: Selector::All, - metric: Selector::One(metric.as_str().into()), - } - } -} - -impl Default for CompileBenchmarkQuery { - fn default() -> Self { - Self { - benchmark: Selector::All, - scenario: Selector::All, - profile: Selector::All, - backend: Selector::All, - metric: Selector::All, - } - } -} - -#[async_trait] -impl BenchmarkQuery for CompileBenchmarkQuery { - type TestCase = CompileTestCase; - - async fn execute( - &self, - conn: &mut dyn Connection, - index: &Index, - artifact_ids: Arc>, - ) -> Result>, String> { - let mut statistic_descriptions: Vec<_> = index - .compile_statistic_descriptions() - .filter(|(&(b, p, s, backend, m), _)| { - self.benchmark.matches(b) - && self.profile.matches(p) - && self.scenario.matches(s) - && self.backend.matches(backend) - && self.metric.matches(m) - }) - .map(|(&(benchmark, profile, scenario, backend, metric), sid)| { - ( - CompileTestCase { - benchmark, - profile, - scenario, - backend, - }, - metric, - sid, - ) - }) - .collect(); - - statistic_descriptions.sort_unstable(); - - let sids: Vec<_> = statistic_descriptions - .iter() - .map(|(_, _, sid)| *sid) - .collect(); - - let aids = artifact_ids - .iter() - .map(|aid| aid.lookup(index)) - .collect::>(); - - Ok(conn - .get_pstats(&sids, &aids) - .await - .into_iter() - .zip(statistic_descriptions) - .filter(|(points, _)| points.iter().any(|value| value.is_some())) - .map(|(points, (test_case, metric, _))| { - SeriesResponse { - series: StatisticSeries { - artifact_ids: ArtifactIdIter::new(artifact_ids.clone()), - points: if *metric == *"cpu-clock" || *metric == *"task-clock" { - // Convert to seconds -- perf reports these measurements in - // milliseconds - points - .into_iter() - .map(|p| p.map(|v| v / 1000.0)) - .collect::>() - .into_iter() - } else { - points.into_iter() - }, - }, - test_case, - } - }) - .collect::>()) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct CompileTestCase { - pub benchmark: Benchmark, - pub profile: Profile, - pub scenario: Scenario, - pub backend: CodegenBackend, -} - -impl TestCase for CompileTestCase {} - -// Runtime benchmarks querying -#[derive(Clone, Hash, Eq, PartialEq, Debug)] -pub struct RuntimeBenchmarkQuery { - benchmark: Selector, - metric: Selector, -} - -impl RuntimeBenchmarkQuery { - pub fn benchmark(mut self, selector: Selector) -> Self { - self.benchmark = selector; - self - } - - pub fn metric(mut self, selector: Selector) -> Self { - self.metric = selector.map(|v| v.as_str().into()); - self - } - - pub fn all_for_metric(metric: Metric) -> Self { - Self { - benchmark: Selector::All, - metric: Selector::One(metric.as_str().into()), - } - } -} - -impl Default for RuntimeBenchmarkQuery { - fn default() -> Self { - Self { - benchmark: Selector::All, - metric: Selector::All, - } - } -} - -#[async_trait] -impl BenchmarkQuery for RuntimeBenchmarkQuery { - type TestCase = RuntimeTestCase; - - async fn execute( - &self, - conn: &mut dyn Connection, - index: &Index, - artifact_ids: Arc>, - ) -> Result>, String> { - let mut statistic_descriptions: Vec<_> = index - .runtime_statistic_descriptions() - .filter(|(&(b, m), _)| self.benchmark.matches(b) && self.metric.matches(m)) - .map(|(&(benchmark, _), sid)| (RuntimeTestCase { benchmark }, sid)) - .collect(); - - statistic_descriptions.sort_unstable(); - - let sids: Vec<_> = statistic_descriptions.iter().map(|(_, sid)| *sid).collect(); - - let aids = artifact_ids - .iter() - .map(|aid| aid.lookup(index)) - .collect::>(); - - Ok(conn - .get_runtime_pstats(&sids, &aids) - .await - .into_iter() - .zip(statistic_descriptions) - .filter(|(points, _)| points.iter().any(|value| value.is_some())) - .map(|(points, (test_case, _))| SeriesResponse { - series: StatisticSeries { - artifact_ids: ArtifactIdIter::new(artifact_ids.clone()), - points: points.into_iter(), - }, - test_case, - }) - .collect::>()) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct RuntimeTestCase { - pub benchmark: Benchmark, -} - -impl TestCase for RuntimeTestCase {} - impl SiteCtxt { pub async fn statistic_series( &self, @@ -425,13 +70,15 @@ impl SiteCtxt { } } -#[derive(Debug)] -pub struct StatisticSeries { - artifact_ids: ArtifactIdIter, - points: std::vec::IntoIter>, +trait StatisticSeriesExt { + async fn execute_query( + artifact_ids: Arc>, + ctxt: &SiteCtxt, + query: Q, + ) -> Result>, String>; } -impl StatisticSeries { +impl StatisticSeriesExt for StatisticSeries { async fn execute_query( artifact_ids: Arc>, ctxt: &SiteCtxt, @@ -453,14 +100,3 @@ impl StatisticSeries { Ok(result) } } - -impl Iterator for StatisticSeries { - type Item = (ArtifactId, Option); - fn next(&mut self) -> Option { - Some((self.artifact_ids.next()?, self.points.next().unwrap())) - } - - fn size_hint(&self) -> (usize, Option) { - self.artifact_ids.size_hint() - } -} diff --git a/site/src/server.rs b/site/src/server.rs index 6bb155bbd..602fe6640 100644 --- a/site/src/server.rs +++ b/site/src/server.rs @@ -24,11 +24,12 @@ pub use crate::api::{ self, bootstrap, comparison, dashboard, github, graphs, info, self_profile, self_profile_raw, status, triage, ServerResult, }; -use crate::db::{self, ArtifactId}; use crate::load::{Config, SiteCtxt}; use crate::request_handlers; use crate::resources::{Payload, ResourceResolver}; +use database::{self, ArtifactId}; + pub type Request = http::Request; pub type Response = http::Response; @@ -295,7 +296,7 @@ impl Server { let ctxt: Arc = self.ctxt.read().as_ref().unwrap().clone(); let _updating = self.updating.release_on_drop(channel); let mut conn = ctxt.conn().await; - let index = db::Index::load(&mut *conn).await; + let index = database::Index::load(&mut *conn).await; eprintln!("index has {} commits", index.commits().len()); ctxt.index.store(Arc::new(index));