diff --git a/Cargo.lock b/Cargo.lock index e557c9b60..9ec47173c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" @@ -391,6 +391,7 @@ dependencies = [ "env_logger", "flate2", "futures", + "hashbrown", "humansize", "inquire", "jobserver", diff --git a/collector/Cargo.toml b/collector/Cargo.toml index 75134a8a2..6fe0e5635 100644 --- a/collector/Cargo.toml +++ b/collector/Cargo.toml @@ -11,6 +11,7 @@ anyhow = { workspace = true } chrono = { workspace = true, features = ["serde"] } clap = { workspace = true, features = ["derive"] } env_logger = { workspace = true } +hashbrown = { workspace = true } log = { workspace = true } reqwest = { workspace = true, features = ["blocking", "json"] } serde = { workspace = true, features = ["derive"] } diff --git a/collector/src/bin/collector.rs b/collector/src/bin/collector.rs index 3d53ce885..fffa3ed47 100644 --- a/collector/src/bin/collector.rs +++ b/collector/src/bin/collector.rs @@ -225,6 +225,8 @@ fn profile_compile( toolchain, Some(1), targets, + // We always want to profile everything + &hashbrown::HashSet::new(), )); eprintln!("Finished benchmark {benchmark_id}"); @@ -1804,11 +1806,8 @@ async fn bench_compile( print_intro: &dyn Fn(), measure: F, ) { - let is_fresh = collector.start_compile_step(conn, benchmark_name).await; - if !is_fresh { - eprintln!("skipping {} -- already benchmarked", benchmark_name); - return; - } + collector.start_compile_step(conn, benchmark_name).await; + let mut tx = conn.transaction().await; let (supports_stable, category) = category.db_representation(); tx.conn() @@ -1819,7 +1818,7 @@ async fn bench_compile( tx.conn(), benchmark_name, &shared.artifact_id, - collector.artifact_row_id, + collector, config.is_self_profile, ); let result = measure(&mut processor).await; @@ -1866,6 +1865,7 @@ async fn bench_compile( &shared.toolchain, config.iterations, &config.targets, + &collector.measured_compile_test_cases, )) .await .with_context(|| anyhow::anyhow!("Cannot compile {}", benchmark.name)) diff --git a/collector/src/compile/benchmark/mod.rs b/collector/src/compile/benchmark/mod.rs index f8115ce10..122af31eb 100644 --- a/collector/src/compile/benchmark/mod.rs +++ b/collector/src/compile/benchmark/mod.rs @@ -8,6 +8,7 @@ use crate::compile::execute::{CargoProcess, Processor}; use crate::toolchain::Toolchain; use crate::utils::wait_for_future; use anyhow::{bail, Context}; +use database::selector::CompileTestCase; use log::debug; use std::collections::{HashMap, HashSet}; use std::fmt::{Display, Formatter}; @@ -243,6 +244,7 @@ impl Benchmark { toolchain: &Toolchain, iterations: Option, targets: &[Target], + already_computed: &hashbrown::HashSet, ) -> anyhow::Result<()> { if self.config.disabled { eprintln!("Skipping {}: disabled", self.name); @@ -273,19 +275,65 @@ impl Benchmark { return Ok(()); } - eprintln!("Preparing {}", self.name); - let mut target_dirs: Vec<((CodegenBackend, Profile, Target), TempDir)> = vec![]; + struct BenchmarkDir { + dir: TempDir, + scenarios: Vec, + profile: Profile, + backend: CodegenBackend, + target: Target, + } + + // Materialize the test cases that we want to benchmark + // We need to handle scenarios a bit specially, because they share the target directory + let mut benchmark_dirs: Vec = vec![]; + for backend in backends { for profile in &profiles { for target in targets { - target_dirs.push(( - (*backend, *profile, *target), - self.make_temp_dir(&self.path)?, - )); + // Do we have any scenarios left to compute? + let remaining_scenarios = scenarios + .iter() + .filter(|scenario| { + self.should_run_scenario( + scenario, + profile, + backend, + target, + already_computed, + ) + }) + .copied() + .collect::>(); + if remaining_scenarios.is_empty() { + continue; + } + + let temp_dir = self.make_temp_dir(&self.path)?; + benchmark_dirs.push(BenchmarkDir { + dir: temp_dir, + scenarios: remaining_scenarios, + profile: *profile, + backend: *backend, + target: *target, + }); } } } + if benchmark_dirs.is_empty() { + eprintln!( + "Skipping {}: all test cases were previously computed", + self.name + ); + return Ok(()); + } + + eprintln!( + "Preparing {} (test cases: {})", + self.name, + benchmark_dirs.len() + ); + // In parallel (but with a limit to the number of CPUs), prepare all // profiles. This is done in parallel vs. sequentially because: // * We don't record any measurements during this phase, so the @@ -319,18 +367,18 @@ impl Benchmark { .get(), ) .context("jobserver::new")?; - let mut threads = Vec::with_capacity(target_dirs.len()); - for ((backend, profile, target), prep_dir) in &target_dirs { + let mut threads = Vec::with_capacity(benchmark_dirs.len()); + for benchmark_dir in &benchmark_dirs { let server = server.clone(); let thread = s.spawn::<_, anyhow::Result<()>>(move || { wait_for_future(async move { let server = server.clone(); self.mk_cargo_process( toolchain, - prep_dir.path(), - *profile, - *backend, - *target, + benchmark_dir.dir.path(), + benchmark_dir.profile, + benchmark_dir.backend, + benchmark_dir.target, ) .jobserver(server) .run_rustc(false) @@ -365,10 +413,11 @@ impl Benchmark { let mut timing_dirs: Vec> = vec![]; let benchmark_start = std::time::Instant::now(); - for ((backend, profile, target), prep_dir) in &target_dirs { - let backend = *backend; - let profile = *profile; - let target = *target; + for benchmark_dir in &benchmark_dirs { + let backend = benchmark_dir.backend; + let profile = benchmark_dir.profile; + let target = benchmark_dir.target; + let scenarios = &benchmark_dir.scenarios; eprintln!( "Running {}: {:?} + {:?} + {:?} + {:?}", self.name, profile, scenarios, backend, target, @@ -388,7 +437,7 @@ impl Benchmark { } log::debug!("Benchmark iteration {}/{}", i + 1, iterations); // Don't delete the directory on error. - let timing_dir = ManuallyDrop::new(self.make_temp_dir(prep_dir.path())?); + let timing_dir = ManuallyDrop::new(self.make_temp_dir(benchmark_dir.dir.path())?); let cwd = timing_dir.path(); // A full non-incremental build. @@ -458,6 +507,67 @@ impl Benchmark { Ok(()) } + + /// Return true if the given `scenario` should be computed. + fn should_run_scenario( + &self, + scenario: &Scenario, + profile: &Profile, + backend: &CodegenBackend, + target: &Target, + already_computed: &hashbrown::HashSet, + ) -> bool { + let benchmark = database::Benchmark::from(self.name.0.as_str()); + let profile = match profile { + Profile::Check => database::Profile::Check, + Profile::Debug => database::Profile::Debug, + Profile::Doc => database::Profile::Doc, + Profile::DocJson => database::Profile::DocJson, + Profile::Opt => database::Profile::Opt, + Profile::Clippy => database::Profile::Clippy, + }; + let backend = match backend { + CodegenBackend::Llvm => database::CodegenBackend::Llvm, + CodegenBackend::Cranelift => database::CodegenBackend::Cranelift, + }; + let target = match target { + Target::X86_64UnknownLinuxGnu => database::Target::X86_64UnknownLinuxGnu, + }; + + match scenario { + // For these scenarios, we can simply check if they were benchmarked or not + Scenario::Full | Scenario::IncrFull | Scenario::IncrUnchanged => { + let test_case = CompileTestCase { + benchmark, + profile, + backend, + target, + scenario: match scenario { + Scenario::Full => database::Scenario::Empty, + Scenario::IncrFull => database::Scenario::IncrementalEmpty, + Scenario::IncrUnchanged => database::Scenario::IncrementalFresh, + Scenario::IncrPatched => unreachable!(), + }, + }; + !already_computed.contains(&test_case) + } + // For incr-patched, it is a bit more complicated. + // If there is at least a single uncomputed `IncrPatched`, we need to rerun + // all of them, because they stack on top of one another. + // Note that we don't need to explicitly include `IncrFull` if `IncrPatched` + // is selected, as the benchmark code will always run `IncrFull` before `IncrPatched`. + Scenario::IncrPatched => self.patches.iter().any(|patch| { + let test_case = CompileTestCase { + benchmark, + profile, + scenario: database::Scenario::IncrementalPatch(patch.name), + backend, + target, + }; + !already_computed.contains(&test_case) + }), + } + } } /// Directory containing compile-time benchmarks. diff --git a/collector/src/compile/execute/bencher.rs b/collector/src/compile/execute/bencher.rs index 694d78571..ac19a9ebc 100644 --- a/collector/src/compile/execute/bencher.rs +++ b/collector/src/compile/execute/bencher.rs @@ -10,6 +10,7 @@ use crate::compile::execute::{ }; use crate::toolchain::Toolchain; use crate::utils::git::get_rustc_perf_commit; +use crate::CollectorCtx; use anyhow::Context; use database::CollectionId; use futures::stream::FuturesUnordered; @@ -42,7 +43,7 @@ pub struct BenchProcessor<'a> { benchmark: &'a BenchmarkName, conn: &'a mut dyn database::Connection, artifact: &'a database::ArtifactId, - artifact_row_id: database::ArtifactIdNumber, + collector_ctx: &'a CollectorCtx, is_first_collection: bool, is_self_profile: bool, tries: u8, @@ -54,7 +55,7 @@ impl<'a> BenchProcessor<'a> { conn: &'a mut dyn database::Connection, benchmark: &'a BenchmarkName, artifact: &'a database::ArtifactId, - artifact_row_id: database::ArtifactIdNumber, + collector_ctx: &'a CollectorCtx, is_self_profile: bool, ) -> Self { // Check we have `perf` or (`xperf.exe` and `tracelog.exe`) available. @@ -78,7 +79,7 @@ impl<'a> BenchProcessor<'a> { conn, benchmark, artifact, - artifact_row_id, + collector_ctx, is_first_collection: true, is_self_profile, tries: 0, @@ -108,7 +109,7 @@ impl<'a> BenchProcessor<'a> { for (stat, value) in stats.iter() { buf.push(self.conn.record_statistic( collection, - self.artifact_row_id, + self.collector_ctx.artifact_row_id, self.benchmark.0.as_str(), profile, scenario, @@ -123,7 +124,13 @@ impl<'a> BenchProcessor<'a> { } pub async fn measure_rustc(&mut self, toolchain: &Toolchain) -> anyhow::Result<()> { - rustc::measure(self.conn, toolchain, self.artifact, self.artifact_row_id).await + rustc::measure( + self.conn, + toolchain, + self.artifact, + self.collector_ctx.artifact_row_id, + ) + .await } } @@ -252,7 +259,7 @@ impl Processor for BenchProcessor<'_> { .map(|profile| { self.conn.record_raw_self_profile( profile.collection, - self.artifact_row_id, + self.collector_ctx.artifact_row_id, self.benchmark.0.as_str(), profile.profile, profile.scenario, @@ -270,7 +277,7 @@ impl Processor for BenchProcessor<'_> { // FIXME: Record codegen backend in the self profile name let prefix = PathBuf::from("self-profile") - .join(self.artifact_row_id.0.to_string()) + .join(self.collector_ctx.artifact_row_id.0.to_string()) .join(self.benchmark.0.as_str()) .join(profile.profile.to_string()) .join(profile.scenario.to_id()); diff --git a/collector/src/lib.rs b/collector/src/lib.rs index 94f1568da..91210dce6 100644 --- a/collector/src/lib.rs +++ b/collector/src/lib.rs @@ -17,7 +17,9 @@ pub mod utils; use crate::compile::benchmark::{Benchmark, BenchmarkName}; use crate::runtime::{BenchmarkGroup, BenchmarkSuite}; +use database::selector::CompileTestCase; use database::{ArtifactId, ArtifactIdNumber, Connection}; +use hashbrown::HashSet; use process::Stdio; use std::time::{Duration, Instant}; @@ -330,23 +332,30 @@ impl CollectorStepBuilder { tx.commit().await.unwrap(); artifact_row_id }; - CollectorCtx { artifact_row_id } + // Find out which tests cases were already computed + let measured_compile_test_cases = conn + .get_compile_test_cases_with_measurements(&artifact_row_id) + .await + .expect("cannot fetch measured compile test cases from DB"); + + CollectorCtx { + artifact_row_id, + measured_compile_test_cases, + } } } /// Represents an in-progress run for a given artifact. pub struct CollectorCtx { pub artifact_row_id: ArtifactIdNumber, + /// Which tests cases were already computed **before** this collection began? + pub measured_compile_test_cases: HashSet, } impl CollectorCtx { - pub async fn start_compile_step( - &self, - conn: &dyn Connection, - benchmark_name: &BenchmarkName, - ) -> bool { + pub async fn start_compile_step(&self, conn: &dyn Connection, benchmark_name: &BenchmarkName) { conn.collector_start_step(self.artifact_row_id, &benchmark_name.0) - .await + .await; } pub async fn end_compile_step(&self, conn: &dyn Connection, benchmark_name: &BenchmarkName) { diff --git a/database/src/pool.rs b/database/src/pool.rs index 96d1dfc2a..2c3f1e966 100644 --- a/database/src/pool.rs +++ b/database/src/pool.rs @@ -1,10 +1,11 @@ +use crate::selector::CompileTestCase; use crate::{ ArtifactCollection, ArtifactId, ArtifactIdNumber, BenchmarkRequest, CodegenBackend, CompileBenchmark, Target, }; use crate::{CollectionId, Index, Profile, QueuedCommit, Scenario, Step}; use chrono::{DateTime, Utc}; -use hashbrown::HashMap; +use hashbrown::{HashMap, HashSet}; use std::sync::{Arc, Mutex}; use std::time::Duration; use tokio::sync::{OwnedSemaphorePermit, Semaphore}; @@ -183,6 +184,17 @@ pub trait Connection: Send + Sync { /// Add an item to the `benchmark_requests`, if the `benchmark_request` /// exists it will be ignored async fn insert_benchmark_request(&self, benchmark_request: &BenchmarkRequest); + + /// Returns a set of compile-time benchmark test cases that were already computed for the + /// given artifact. + /// Note that for efficiency reasons, the function only checks if we have at least a single + /// result for a given test case. It does not check if *all* test results from all test + /// iterations were finished. + /// Therefore, the result is an over-approximation. + async fn get_compile_test_cases_with_measurements( + &self, + artifact_row_id: &ArtifactIdNumber, + ) -> anyhow::Result>; } #[async_trait::async_trait] @@ -302,15 +314,13 @@ impl Pool { #[cfg(test)] mod tests { + use super::*; + use crate::metric::Metric; + use crate::tests::run_postgres_test; + use crate::{tests::run_db_test, BenchmarkRequestStatus, Commit, CommitType, Date}; use chrono::Utc; use std::str::FromStr; - use super::*; - use crate::{ - tests::{run_db_test, run_postgres_test}, - BenchmarkRequestStatus, Commit, CommitType, Date, - }; - /// Create a Commit fn create_commit(commit_sha: &str, time: chrono::DateTime, r#type: CommitType) -> Commit { Commit { @@ -424,4 +434,64 @@ mod tests { }) .await; } + + #[tokio::test] + async fn get_compile_test_cases_with_data() { + run_db_test(|ctx| async { + let db = ctx.db_client().connection().await; + + let collection = db.collection_id("test").await; + let artifact = db + .artifact_id(&ArtifactId::Commit(create_commit( + "abcdef", + Utc::now(), + CommitType::Try, + ))) + .await; + db.record_compile_benchmark("benchmark", None, "primary".to_string()) + .await; + + db.record_statistic( + collection, + artifact, + "benchmark", + Profile::Check, + Scenario::IncrementalFresh, + CodegenBackend::Llvm, + Target::X86_64UnknownLinuxGnu, + Metric::CacheMisses.as_str(), + 1.0, + ) + .await; + + assert_eq!( + db.get_compile_test_cases_with_measurements(&artifact) + .await + .unwrap(), + HashSet::from([CompileTestCase { + benchmark: "benchmark".into(), + profile: Profile::Check, + scenario: Scenario::IncrementalFresh, + backend: CodegenBackend::Llvm, + target: Target::X86_64UnknownLinuxGnu, + }]) + ); + + let artifact2 = db + .artifact_id(&ArtifactId::Commit(create_commit( + "abcdef2", + Utc::now(), + CommitType::Try, + ))) + .await; + assert!(db + .get_compile_test_cases_with_measurements(&artifact2) + .await + .unwrap() + .is_empty()); + + Ok(ctx) + }) + .await; + } } diff --git a/database/src/pool/postgres.rs b/database/src/pool/postgres.rs index 1da5016c0..5c366a60d 100644 --- a/database/src/pool/postgres.rs +++ b/database/src/pool/postgres.rs @@ -1,4 +1,5 @@ use crate::pool::{Connection, ConnectionManager, ManagedConnection, Transaction}; +use crate::selector::CompileTestCase; use crate::{ ArtifactCollection, ArtifactId, ArtifactIdNumber, Benchmark, BenchmarkRequest, CodegenBackend, CollectionId, Commit, CommitType, CompileBenchmark, Date, Index, Profile, QueuedCommit, @@ -6,7 +7,7 @@ use crate::{ }; use anyhow::Context as _; use chrono::{DateTime, TimeZone, Utc}; -use hashbrown::HashMap; +use hashbrown::{HashMap, HashSet}; use native_tls::{Certificate, TlsConnector}; use postgres_native_tls::MakeTlsConnector; use std::str::FromStr; @@ -382,6 +383,7 @@ pub struct CachedStatements { get_runtime_pstat: Statement, record_artifact_size: Statement, get_artifact_size: Statement, + get_compile_test_cases_with_measurements: Statement, } pub struct PostgresTransaction<'a> { @@ -563,7 +565,16 @@ impl PostgresConnection { get_artifact_size: conn.prepare(" select component, size from artifact_size where aid = $1 - ").await.unwrap() + ").await.unwrap(), + get_compile_test_cases_with_measurements: conn.prepare(" + SELECT DISTINCT crate, profile, scenario, backend, target + FROM pstat_series + WHERE id IN ( + SELECT DISTINCT series + FROM pstat + WHERE aid = $1 + ) + ").await.unwrap(), }), conn, } @@ -1100,19 +1111,14 @@ where == 1 } async fn collector_end_step(&self, aid: ArtifactIdNumber, step: &str) { - let did_modify = self - .conn() + self.conn() .execute( "update collector_progress set end_time = statement_timestamp() \ - where aid = $1 and step = $2 and start_time is not null and end_time is null;", + where aid = $1 and step = $2 and start_time is not null;", &[&(aid.0 as i32), &step], ) .await - .unwrap() - == 1; - if !did_modify { - log::error!("did not end {} for {:?}", step, aid); - } + .unwrap(); } async fn collector_remove_step(&self, aid: ArtifactIdNumber, step: &str) { self.conn() @@ -1413,6 +1419,30 @@ where .await .unwrap(); } + + async fn get_compile_test_cases_with_measurements( + &self, + artifact_row_id: &ArtifactIdNumber, + ) -> anyhow::Result> { + let rows = self + .conn() + .query( + &self.statements().get_compile_test_cases_with_measurements, + &[&(artifact_row_id.0 as i32)], + ) + .await + .context("cannot query compile-time test cases with measurements")?; + Ok(rows + .into_iter() + .map(|row| CompileTestCase { + benchmark: Benchmark::from(row.get::<_, &str>(0)), + profile: Profile::from_str(row.get::<_, &str>(1)).unwrap(), + scenario: row.get::<_, &str>(2).parse().unwrap(), + backend: CodegenBackend::from_str(row.get::<_, &str>(3)).unwrap(), + target: Target::from_str(row.get::<_, &str>(4)).unwrap(), + }) + .collect()) + } } fn parse_artifact_id(ty: &str, sha: &str, date: Option>) -> ArtifactId { diff --git a/database/src/pool/sqlite.rs b/database/src/pool/sqlite.rs index 9a0614b07..3e1e42099 100644 --- a/database/src/pool/sqlite.rs +++ b/database/src/pool/sqlite.rs @@ -1,11 +1,12 @@ use crate::pool::{Connection, ConnectionManager, ManagedConnection, Transaction}; +use crate::selector::CompileTestCase; use crate::{ ArtifactCollection, ArtifactId, Benchmark, BenchmarkRequest, CodegenBackend, CollectionId, Commit, CommitType, CompileBenchmark, Date, Profile, Target, }; use crate::{ArtifactIdNumber, Index, QueuedCommit}; use chrono::{DateTime, TimeZone, Utc}; -use hashbrown::HashMap; +use hashbrown::{HashMap, HashSet}; use rusqlite::params; use rusqlite::OptionalExtension; use std::path::PathBuf; @@ -1049,18 +1050,13 @@ impl Connection for SqliteConnection { } async fn collector_end_step(&self, aid: ArtifactIdNumber, step: &str) { - let did_modify = self - .raw_ref() + self.raw_ref() .execute( "update collector_progress set end = strftime('%s','now') \ - where aid = ? and step = ? and start is not null and end is null;", + where aid = ? and step = ? and start is not null;", params![&aid.0, &step], ) - .unwrap() - == 1; - if !did_modify { - log::error!("did not end {} for {:?}", step, aid); - } + .unwrap(); } async fn collector_remove_step(&self, aid: ArtifactIdNumber, step: &str) { @@ -1256,6 +1252,33 @@ impl Connection for SqliteConnection { async fn insert_benchmark_request(&self, _benchmark_request: &BenchmarkRequest) { panic!("Queueing for SQLite has not been implemented, if you are wanting to test the queueing functionality please use postgres. Presuming you have docker installed, at the root of the repo you can run `make start-postgres` to spin up a postgres database."); } + + async fn get_compile_test_cases_with_measurements( + &self, + artifact_row_id: &ArtifactIdNumber, + ) -> anyhow::Result> { + Ok(self + .raw_ref() + .prepare_cached( + "SELECT DISTINCT crate, profile, scenario, backend, target + FROM pstat_series + WHERE id IN ( + SELECT DISTINCT series + FROM pstat + WHERE aid = ? + );", + )? + .query_map(params![artifact_row_id.0], |row| { + Ok(CompileTestCase { + benchmark: Benchmark::from(row.get::<_, String>(0)?.as_str()), + profile: row.get::<_, String>(1)?.parse().unwrap(), + scenario: row.get::<_, String>(2)?.parse().unwrap(), + backend: row.get::<_, String>(3)?.parse().unwrap(), + target: row.get::<_, String>(4)?.parse().unwrap(), + }) + })? + .collect::>()?) + } } fn parse_artifact_id(ty: &str, sha: &str, date: Option) -> ArtifactId {