Skip to content

Commit ac5046c

Browse files
committed
Auto merge of #38779 - Craig-Macomber:bench, r=alexcrichton
Do not run outer setup part of benchmarks multiple times to fix issue 20142 Fix #20142 This is my first real rust code, so I expect the quality is quite bad. Please let me know in which ways it is horrible and I'll fix it. Previously the whole benchmark function was rerun many times, but with this change, only the callback passed to iter is rerun. This improves performances by saving benchmark startup time. The setup used to be called a minimum of 101 times, and now only runs once. I wasn't sure exactly what should be done for the case where iter is never called, so I left a FIXME for that: currently it does not error, and I added tests to cover that. I have left the algorithm and statistics unchanged: I don't like how the minimum number of runs is 301 (that's bad for very slow benchmarks) but I consider such changes out of scope for this fix.
2 parents 139d741 + 7cb2040 commit ac5046c

File tree

2 files changed

+148
-97
lines changed

2 files changed

+148
-97
lines changed

src/libtest/lib.rs

+144-96
Original file line numberDiff line numberDiff line change
@@ -185,13 +185,19 @@ impl fmt::Debug for TestFn {
185185
/// This is fed into functions marked with `#[bench]` to allow for
186186
/// set-up & tear-down before running a piece of code repeatedly via a
187187
/// call to `iter`.
188-
#[derive(Copy, Clone)]
188+
#[derive(Clone)]
189189
pub struct Bencher {
190-
iterations: u64,
191-
dur: Duration,
190+
mode: BenchMode,
191+
summary: Option<stats::Summary>,
192192
pub bytes: u64,
193193
}
194194

195+
#[derive(Clone, PartialEq, Eq)]
196+
pub enum BenchMode {
197+
Auto,
198+
Single,
199+
}
200+
195201
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
196202
pub enum ShouldPanic {
197203
No,
@@ -1444,138 +1450,148 @@ impl Bencher {
14441450
pub fn iter<T, F>(&mut self, mut inner: F)
14451451
where F: FnMut() -> T
14461452
{
1447-
let start = Instant::now();
1448-
let k = self.iterations;
1449-
for _ in 0..k {
1450-
black_box(inner());
1453+
if self.mode == BenchMode::Single {
1454+
ns_iter_inner(&mut inner, 1);
1455+
return;
14511456
}
1452-
self.dur = start.elapsed();
1453-
}
14541457

1455-
pub fn ns_elapsed(&mut self) -> u64 {
1456-
self.dur.as_secs() * 1_000_000_000 + (self.dur.subsec_nanos() as u64)
1458+
self.summary = Some(iter(&mut inner));
14571459
}
14581460

1459-
pub fn ns_per_iter(&mut self) -> u64 {
1460-
if self.iterations == 0 {
1461-
0
1462-
} else {
1463-
self.ns_elapsed() / cmp::max(self.iterations, 1)
1464-
}
1465-
}
1466-
1467-
pub fn bench_n<F>(&mut self, n: u64, f: F)
1468-
where F: FnOnce(&mut Bencher)
1461+
pub fn bench<F>(&mut self, mut f: F) -> Option<stats::Summary>
1462+
where F: FnMut(&mut Bencher)
14691463
{
1470-
self.iterations = n;
14711464
f(self);
1465+
return self.summary;
14721466
}
1467+
}
14731468

1474-
// This is a more statistics-driven benchmark algorithm
1475-
pub fn auto_bench<F>(&mut self, mut f: F) -> stats::Summary
1476-
where F: FnMut(&mut Bencher)
1477-
{
1478-
// Initial bench run to get ballpark figure.
1479-
let mut n = 1;
1480-
self.bench_n(n, |x| f(x));
1481-
1482-
// Try to estimate iter count for 1ms falling back to 1m
1483-
// iterations if first run took < 1ns.
1484-
if self.ns_per_iter() == 0 {
1485-
n = 1_000_000;
1486-
} else {
1487-
n = 1_000_000 / cmp::max(self.ns_per_iter(), 1);
1488-
}
1489-
// if the first run took more than 1ms we don't want to just
1490-
// be left doing 0 iterations on every loop. The unfortunate
1491-
// side effect of not being able to do as many runs is
1492-
// automatically handled by the statistical analysis below
1493-
// (i.e. larger error bars).
1494-
if n == 0 {
1495-
n = 1;
1469+
fn ns_from_dur(dur: Duration) -> u64 {
1470+
dur.as_secs() * 1_000_000_000 + (dur.subsec_nanos() as u64)
1471+
}
1472+
1473+
fn ns_iter_inner<T, F>(inner: &mut F, k: u64) -> u64
1474+
where F: FnMut() -> T
1475+
{
1476+
let start = Instant::now();
1477+
for _ in 0..k {
1478+
black_box(inner());
1479+
}
1480+
return ns_from_dur(start.elapsed());
1481+
}
1482+
1483+
1484+
pub fn iter<T, F>(inner: &mut F) -> stats::Summary
1485+
where F: FnMut() -> T
1486+
{
1487+
// Initial bench run to get ballpark figure.
1488+
let ns_single = ns_iter_inner(inner, 1);
1489+
1490+
// Try to estimate iter count for 1ms falling back to 1m
1491+
// iterations if first run took < 1ns.
1492+
let ns_target_total = 1_000_000; // 1ms
1493+
let mut n = ns_target_total / cmp::max(1, ns_single);
1494+
1495+
// if the first run took more than 1ms we don't want to just
1496+
// be left doing 0 iterations on every loop. The unfortunate
1497+
// side effect of not being able to do as many runs is
1498+
// automatically handled by the statistical analysis below
1499+
// (i.e. larger error bars).
1500+
n = cmp::max(1, n);
1501+
1502+
let mut total_run = Duration::new(0, 0);
1503+
let samples: &mut [f64] = &mut [0.0_f64; 50];
1504+
loop {
1505+
let loop_start = Instant::now();
1506+
1507+
for p in &mut *samples {
1508+
*p = ns_iter_inner(inner, n) as f64 / n as f64;
14961509
}
14971510

1498-
let mut total_run = Duration::new(0, 0);
1499-
let samples: &mut [f64] = &mut [0.0_f64; 50];
1500-
loop {
1501-
let loop_start = Instant::now();
1511+
stats::winsorize(samples, 5.0);
1512+
let summ = stats::Summary::new(samples);
15021513

1503-
for p in &mut *samples {
1504-
self.bench_n(n, |x| f(x));
1505-
*p = self.ns_per_iter() as f64;
1506-
}
1514+
for p in &mut *samples {
1515+
let ns = ns_iter_inner(inner, 5 * n);
1516+
*p = ns as f64 / (5 * n) as f64;
1517+
}
15071518

1508-
stats::winsorize(samples, 5.0);
1509-
let summ = stats::Summary::new(samples);
1519+
stats::winsorize(samples, 5.0);
1520+
let summ5 = stats::Summary::new(samples);
15101521

1511-
for p in &mut *samples {
1512-
self.bench_n(5 * n, |x| f(x));
1513-
*p = self.ns_per_iter() as f64;
1514-
}
1522+
let loop_run = loop_start.elapsed();
15151523

1516-
stats::winsorize(samples, 5.0);
1517-
let summ5 = stats::Summary::new(samples);
1518-
let loop_run = loop_start.elapsed();
1524+
// If we've run for 100ms and seem to have converged to a
1525+
// stable median.
1526+
if loop_run > Duration::from_millis(100) && summ.median_abs_dev_pct < 1.0 &&
1527+
summ.median - summ5.median < summ5.median_abs_dev {
1528+
return summ5;
1529+
}
15191530

1520-
// If we've run for 100ms and seem to have converged to a
1521-
// stable median.
1522-
if loop_run > Duration::from_millis(100) && summ.median_abs_dev_pct < 1.0 &&
1523-
summ.median - summ5.median < summ5.median_abs_dev {
1524-
return summ5;
1525-
}
1531+
total_run = total_run + loop_run;
1532+
// Longest we ever run for is 3s.
1533+
if total_run > Duration::from_secs(3) {
1534+
return summ5;
1535+
}
15261536

1527-
total_run = total_run + loop_run;
1528-
// Longest we ever run for is 3s.
1529-
if total_run > Duration::from_secs(3) {
1537+
// If we overflow here just return the results so far. We check a
1538+
// multiplier of 10 because we're about to multiply by 2 and the
1539+
// next iteration of the loop will also multiply by 5 (to calculate
1540+
// the summ5 result)
1541+
n = match n.checked_mul(10) {
1542+
Some(_) => n * 2,
1543+
None => {
15301544
return summ5;
15311545
}
1532-
1533-
// If we overflow here just return the results so far. We check a
1534-
// multiplier of 10 because we're about to multiply by 2 and the
1535-
// next iteration of the loop will also multiply by 5 (to calculate
1536-
// the summ5 result)
1537-
n = match n.checked_mul(10) {
1538-
Some(_) => n * 2,
1539-
None => return summ5,
1540-
};
1541-
}
1546+
};
15421547
}
15431548
}
15441549

15451550
pub mod bench {
15461551
use std::cmp;
1547-
use std::time::Duration;
1548-
use super::{Bencher, BenchSamples};
1552+
use stats;
1553+
use super::{Bencher, BenchSamples, BenchMode};
15491554

15501555
pub fn benchmark<F>(f: F) -> BenchSamples
15511556
where F: FnMut(&mut Bencher)
15521557
{
15531558
let mut bs = Bencher {
1554-
iterations: 0,
1555-
dur: Duration::new(0, 0),
1559+
mode: BenchMode::Auto,
1560+
summary: None,
15561561
bytes: 0,
15571562
};
15581563

1559-
let ns_iter_summ = bs.auto_bench(f);
1564+
return match bs.bench(f) {
1565+
Some(ns_iter_summ) => {
1566+
let ns_iter = cmp::max(ns_iter_summ.median as u64, 1);
1567+
let mb_s = bs.bytes * 1000 / ns_iter;
15601568

1561-
let ns_iter = cmp::max(ns_iter_summ.median as u64, 1);
1562-
let mb_s = bs.bytes * 1000 / ns_iter;
1563-
1564-
BenchSamples {
1565-
ns_iter_summ: ns_iter_summ,
1566-
mb_s: mb_s as usize,
1567-
}
1569+
BenchSamples {
1570+
ns_iter_summ: ns_iter_summ,
1571+
mb_s: mb_s as usize,
1572+
}
1573+
}
1574+
None => {
1575+
// iter not called, so no data.
1576+
// FIXME: error in this case?
1577+
let samples: &mut [f64] = &mut [0.0_f64; 1];
1578+
BenchSamples {
1579+
ns_iter_summ: stats::Summary::new(samples),
1580+
mb_s: 0,
1581+
}
1582+
}
1583+
};
15681584
}
15691585

15701586
pub fn run_once<F>(f: F)
1571-
where F: FnOnce(&mut Bencher)
1587+
where F: FnMut(&mut Bencher)
15721588
{
15731589
let mut bs = Bencher {
1574-
iterations: 0,
1575-
dur: Duration::new(0, 0),
1590+
mode: BenchMode::Single,
1591+
summary: None,
15761592
bytes: 0,
15771593
};
1578-
bs.bench_n(1, f);
1594+
bs.bench(f);
15791595
}
15801596
}
15811597

@@ -1585,6 +1601,8 @@ mod tests {
15851601
TestDescAndFn, TestOpts, run_test, MetricMap, StaticTestName, DynTestName,
15861602
DynTestFn, ShouldPanic};
15871603
use std::sync::mpsc::channel;
1604+
use bench;
1605+
use Bencher;
15881606

15891607
#[test]
15901608
pub fn do_not_run_ignored_tests() {
@@ -1880,4 +1898,34 @@ mod tests {
18801898
m1.insert_metric("in-both-want-upwards-and-improved", 1000.0, -10.0);
18811899
m2.insert_metric("in-both-want-upwards-and-improved", 2000.0, -10.0);
18821900
}
1901+
1902+
#[test]
1903+
pub fn test_bench_once_no_iter() {
1904+
fn f(_: &mut Bencher) {}
1905+
bench::run_once(f);
1906+
}
1907+
1908+
#[test]
1909+
pub fn test_bench_once_iter() {
1910+
fn f(b: &mut Bencher) {
1911+
b.iter(|| {
1912+
})
1913+
}
1914+
bench::run_once(f);
1915+
}
1916+
1917+
#[test]
1918+
pub fn test_bench_no_iter() {
1919+
fn f(_: &mut Bencher) {}
1920+
bench::benchmark(f);
1921+
}
1922+
1923+
#[test]
1924+
pub fn test_bench_iter() {
1925+
fn f(b: &mut Bencher) {
1926+
b.iter(|| {
1927+
})
1928+
}
1929+
bench::benchmark(f);
1930+
}
18831931
}

src/libtest/stats.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ pub trait Stats {
120120
}
121121

122122
/// Extracted collection of all the summary statistics of a sample set.
123-
#[derive(Clone, PartialEq)]
123+
#[derive(Clone, PartialEq, Copy)]
124124
#[allow(missing_docs)]
125125
pub struct Summary {
126126
pub sum: f64,
@@ -896,4 +896,7 @@ mod bench {
896896
v.sum();
897897
})
898898
}
899+
900+
#[bench]
901+
pub fn no_iter(_: &mut Bencher) {}
899902
}

0 commit comments

Comments
 (0)