Skip to content

Replace the 64-bit hash function and add some tests #18

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@ license = "Apache-2.0/MIT"
readme = "README.md"
keywords = ["hash", "fxhash", "rustc"]
repository = "https://github.com/rust-lang-nursery/rustc-hash"
edition = "2018"

[features]
std = []
8 changes: 0 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
@@ -8,14 +8,6 @@ default uses SipHash which isn't quite as speedy as we want. In the
compiler we're not really worried about DOS attempts, so we use a fast
non-cryptographic hash.

This is the same as the algorithm used by Firefox -- which is a
homespun one not based on any widely-known algorithm -- though
modified to produce 64-bit hash values instead of 32-bit hash
values. It consistently out-performs an FNV-based hash within rustc
itself -- the collision rate is similar or slightly worse than FNV,
but the speed of the hash function itself is much higher because it
works on up to 8 bytes at a time.

## Usage

```rust
26 changes: 13 additions & 13 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -34,6 +34,7 @@ use core::default::Default;
use core::hash::BuildHasherDefault;
use core::hash::Hasher;
use core::mem::size_of;
#[cfg(target_pointer_width = "32")]
use core::ops::BitXor;
#[cfg(feature = "std")]
use std::collections::{HashMap, HashSet};
@@ -50,22 +51,10 @@ pub type FxHashSet<V> = HashSet<V, BuildHasherDefault<FxHasher>>;
/// by default uses SipHash which isn't quite as speedy as we want. In the
/// compiler we're not really worried about DOS attempts, so we use a fast
/// non-cryptographic hash.
///
/// This is the same as the algorithm used by Firefox -- which is a homespun
/// one not based on any widely-known algorithm -- though modified to produce
/// 64-bit hash values instead of 32-bit hash values. It consistently
/// out-performs an FNV-based hash within rustc itself -- the collision rate is
/// similar or slightly worse than FNV, but the speed of the hash function
/// itself is much higher because it works on up to 8 bytes at a time.
pub struct FxHasher {
hash: usize,
}

#[cfg(target_pointer_width = "32")]
const K: usize = 0x9e3779b9;
#[cfg(target_pointer_width = "64")]
const K: usize = 0x517cc1b727220a95;

impl Default for FxHasher {
#[inline]
fn default() -> FxHasher {
@@ -75,8 +64,19 @@ impl Default for FxHasher {

impl FxHasher {
#[inline]
#[cfg(target_pointer_width = "32")]
fn add_to_hash(&mut self, i: usize) {
self.hash = self.hash.rotate_left(5).bitxor(i).wrapping_mul(0x9e3779b9);
}

#[inline]
#[cfg(target_pointer_width = "64")]
fn add_to_hash(&mut self, i: usize) {
self.hash = self.hash.rotate_left(5).bitxor(i).wrapping_mul(K);
self.hash = self
.hash
.rotate_right(31)
.wrapping_add(i)
.wrapping_mul(0xcfee444d8b59a89b);
}
}

178 changes: 178 additions & 0 deletions tests/test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
use std::hash::{BuildHasher, BuildHasherDefault, Hash, Hasher};

use rustc_hash::FxHasher;

fn gen_word_pairs() -> Vec<String> {
let words: Vec<_> = r#"
a, ability, able, about, above, accept, according, account, across, act, action,
activity, actually, add, address, administration, admit, adult, affect, after,
again, against, age, agency, agent, ago, agree, agreement, ahead, air, all,
allow, almost, alone, along, already, also, although, always, American, among,
amount, analysis, and, animal, another, answer, any, anyone, anything, appear,
apply, approach, area, argue, arm, around, arrive, art, article, artist, as,
ask, assume, at, attack, attention, attorney, audience, author, authority,
available, avoid, away, baby, back, bad, bag, ball, bank, bar, base, be, beat,
beautiful, because, become, bed, before, begin, behavior, behind, believe,
benefit, best, better, between, beyond, big, bill, billion, bit, black, blood,
blue, board, body, book, born, both, box, boy, break, bring, brother, budget,
build, building, business, but, buy, by, call, camera, campaign, can, cancer,
candidate, capital, car, card, care, career, carry, case, catch, cause, cell,
center, central, century, certain, certainly, chair, challenge, chance, change,
character, charge, check, child, choice, choose, church, citizen, city, civil,
claim, class, clear, clearly, close, coach, cold, collection, college, color,
come, commercial, common, community, company, compare, computer, concern,
condition, conference, Congress, consider, consumer, contain, continue, control,
cost, could, country, couple, course, court, cover, create, crime, cultural,
culture, cup, current, customer, cut, dark, data, daughter, day, dead, deal,
death, debate, decade, decide, decision, deep, defense, degree, Democrat,
democratic, describe, design, despite, detail, determine, develop, development,
die, difference, different, difficult, dinner, direction, director, discover,
discuss, discussion, disease, do, doctor, dog, door, down, draw, dream, drive,
drop, drug, during, each, early, east, easy, eat, economic, economy, edge,
education, effect, effort, eight, either, election, else, employee, end, energy,
enjoy, enough, enter, entire, environment, environmental, especially, establish,
even, evening, event, ever, every, everybody, everyone, everything, evidence,
exactly, example, executive, exist, expect, experience, expert, explain, eye,
face, fact, factor, fail, fall, family, far, fast, father, fear, federal, feel,
feeling, few, field, fight, figure, fill, film, final, finally, financial, find,
fine, finger, finish, fire, firm, first, fish, five, floor, fly, focus, follow,
food, foot, for, force, foreign, forget, form, former, forward, four, free,
friend, from, front, full, fund, future, game, garden, gas, general, generation,
get, girl, give, glass, go, goal, good, government, great, green, ground, group,
grow, growth, guess, gun, guy, hair, half, hand, hang, happen, happy, hard,
have, he, head, health, hear, heart, heat, heavy, help, her, here, herself,
high, him, himself, his, history, hit, hold, home, hope, hospital, hot, hotel,
hour, house, how, however, huge, human, hundred, husband, I, idea, identify, if,
image, imagine, impact, important, improve, in, include, including, increase,
indeed, indicate, individual, industry, information, inside, instead,
institution, interest, interesting, international, interview, into, investment,
involve, issue, it, item, its, itself, job, join, just, keep, key, kid, kill,
kind, kitchen, know, knowledge, land, language, large, last, late, later, laugh,
law, lawyer, lay, lead, leader, learn, least, leave, left, leg, legal, less,
let, letter, level, lie, life, light, like, likely, line, list, listen, little,
live, local, long, look, lose, loss, lot, love, low, machine, magazine, main,
maintain, major, majority, make, man, manage, management, manager, many, market,
marriage, material, matter, may, maybe, me, mean, measure, media, medical, meet,
meeting, member, memory, mention, message, method, middle, might, military,
million, mind, minute, miss, mission, model, modern, moment, money, month, more,
morning, most, mother, mouth, move, movement, movie, Mr, Mrs, much, music, must,
my, myself, name, nation, national, natural, nature, near, nearly, necessary,
need, network, never, new, news, newspaper, next, nice, night, no, none, nor,
north, not, note, nothing, notice, now, n't, number, occur, of, off, offer,
office, officer, official, often, oh, oil, ok, old, on, once, one, only, onto,
open, operation, opportunity, option, or, order, organization, other, others,
our, out, outside, over, own, owner, page, pain, painting, paper, parent, part,
participant, particular, particularly, partner, party, pass, past, patient,
pattern, pay, peace, people, per, perform, performance, perhaps, period, person,
personal, phone, physical, pick, picture, piece, place, plan, plant, play,
player, PM, point, police, policy, political, politics, poor, popular,
population, position, positive, possible, power, practice, prepare, present,
president, pressure, pretty, prevent, price, private, probably, problem,
process, produce, product, production, professional, professor, program,
project, property, protect, prove, provide, public, pull, purpose, push, put,
quality, question, quickly, quite, race, radio, raise, range, rate, rather,
reach, read, ready, real, reality, realize, really, reason, receive, recent,
recently, recognize, record, red, reduce, reflect, region, relate, relationship,
religious, remain, remember, remove, report, represent, Republican, require,
research, resource, respond, response, responsibility, rest, result, return,
reveal, rich, right, rise, risk, road, rock, role, room, rule, run, safe, same,
save, say, scene, school, science, scientist, score, sea, season, seat, second,
section, security, see, seek, seem, sell, send, senior, sense, series, serious,
serve, service, set, seven, several, sex, sexual, shake, share, she, shoot,
short, shot, should, shoulder, show, side, sign, significant, similar, simple,
simply, since, sing, single, sister, sit, site, situation, six, size, skill,
skin, small, smile, so, social, society, soldier, some, somebody, someone,
something, sometimes, son, song, soon, sort, sound, source, south, southern,
space, speak, special, specific, speech, spend, sport, spring, staff, stage,
stand, standard, star, start, state, statement, station, stay, step, still,
stock, stop, store, story, strategy, street, strong, structure, student, study,
stuff, style, subject, success, successful, such, suddenly, suffer, suggest,
summer, support, sure, surface, system, table, take, talk, task, tax, teach,
teacher, team, technology, television, tell, ten, tend, term, test, than, thank,
that, the, their, them, themselves, then, theory, there, these, they, thing,
think, third, this, those, though, thought, thousand, threat, three, through,
throughout, throw, thus, time, to, today, together, tonight, too, top, total,
tough, toward, town, trade, traditional, training, travel, treat, treatment,
tree, trial, trip, trouble, true, truth, try, turn, TV, two, type, under,
understand, unit, until, up, upon, us, use, usually, value, various, very,
victim, view, violence, visit, voice, vote, wait, walk, wall, want, war, watch,
water, way, we, weapon, wear, week, weight, well, west, western, what, whatever,
when, where, whether, which, while, white, who, whole, whom, whose, why, wide,
wife, will, win, wind, window, wish, with, within, without, woman, wonder, word,
work, worker, world, worry, would, write, writer, wrong, yard, yeah, year, yes,
yet, you, young, your, yourself"#
.split(',')
.map(|word| word.trim())
.collect();

let mut word_pairs: Vec<_> = Vec::new();
for word in &words {
for other_word in &words {
word_pairs.push(word.to_string() + " " + other_word);
}
}
assert_eq!(1_000_000, word_pairs.len());
word_pairs
}

fn check_for_collisions<H: Hash, B: BuildHasher>(
build_hasher: &B,
items: &[H],
bucket_count: usize,
expected_min: usize,
expected_max: usize,
) {
let mut buckets = vec![0; bucket_count];
for item in items {
let value = hash(item, build_hasher) as usize;
buckets[value % bucket_count] += 1;
}
let mean = items.len() / bucket_count;
let max = *buckets.iter().max().unwrap();
let min = *buckets.iter().min().unwrap();
assert_eq!(
(expected_min, expected_max),
(min, max),
"unexpected min/max value, mean: {}, buckets: {:?}",
mean,
buckets
);
}

fn hash<H: Hash, B: BuildHasher>(b: &H, build_hasher: &B) -> u64 {
let mut hasher = build_hasher.build_hasher();
b.hash(&mut hasher);
hasher.finish()
}

#[test]
#[cfg(target_pointer_width = "32")]
fn test_bucket_distribution() {
let build_hasher = BuildHasherDefault::<FxHasher>::default();
let word_pairs: Vec<_> = gen_word_pairs();
check_for_collisions(&build_hasher, &word_pairs, 32, 30979, 31607);
let sequence: Vec<_> = (0..320000).collect();
check_for_collisions(&build_hasher, &sequence, 32, 10000, 10000);
let sequence: Vec<_> = (0..2560000).collect();
check_for_collisions(&build_hasher, &sequence, 256, 10000, 10000);
let sequence: Vec<_> = (0..320000).map(|i| i * 1024).collect();
check_for_collisions(&build_hasher, &sequence, 32, 0, 320000);
let sequence: Vec<_> = (0..2560000_u64).map(|i| i * 1024).collect();
check_for_collisions(&build_hasher, &sequence, 256, 0, 80009);
}

#[test]
#[cfg(target_pointer_width = "64")]
fn test_bucket_distribution() {
let build_hasher = BuildHasherDefault::<FxHasher>::default();
let word_pairs: Vec<_> = gen_word_pairs();
check_for_collisions(&build_hasher, &word_pairs, 32, 30451, 31990);
let sequence: Vec<_> = (0..320000).collect();
check_for_collisions(&build_hasher, &sequence, 32, 10000, 10000);
let sequence: Vec<_> = (0..2560000).collect();
check_for_collisions(&build_hasher, &sequence, 256, 10000, 10000);
let sequence: Vec<_> = (0..320000).map(|i| i * 1024).collect();
check_for_collisions(&build_hasher, &sequence, 32, 0, 320000);
let sequence: Vec<_> = (0..2560000_u64).map(|i| i * 1024).collect();
check_for_collisions(&build_hasher, &sequence, 256, 0, 2560000);
}