diff --git a/Cargo.toml b/Cargo.toml index c1fe4a5..abddf0e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ license = "Apache-2.0/MIT" readme = "README.md" keywords = ["hash", "fxhash", "rustc"] repository = "https://github.com/rust-lang-nursery/rustc-hash" +edition = "2018" [features] std = [] diff --git a/README.md b/README.md index e33057a..49526c8 100644 --- a/README.md +++ b/README.md @@ -8,14 +8,6 @@ default uses SipHash which isn't quite as speedy as we want. In the compiler we're not really worried about DOS attempts, so we use a fast non-cryptographic hash. -This is the same as the algorithm used by Firefox -- which is a -homespun one not based on any widely-known algorithm -- though -modified to produce 64-bit hash values instead of 32-bit hash -values. It consistently out-performs an FNV-based hash within rustc -itself -- the collision rate is similar or slightly worse than FNV, -but the speed of the hash function itself is much higher because it -works on up to 8 bytes at a time. - ## Usage ```rust diff --git a/src/lib.rs b/src/lib.rs index ee9ad31..64ac546 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,6 +34,7 @@ use core::default::Default; use core::hash::BuildHasherDefault; use core::hash::Hasher; use core::mem::size_of; +#[cfg(target_pointer_width = "32")] use core::ops::BitXor; #[cfg(feature = "std")] use std::collections::{HashMap, HashSet}; @@ -50,22 +51,10 @@ pub type FxHashSet = HashSet>; /// by default uses SipHash which isn't quite as speedy as we want. In the /// compiler we're not really worried about DOS attempts, so we use a fast /// non-cryptographic hash. -/// -/// This is the same as the algorithm used by Firefox -- which is a homespun -/// one not based on any widely-known algorithm -- though modified to produce -/// 64-bit hash values instead of 32-bit hash values. It consistently -/// out-performs an FNV-based hash within rustc itself -- the collision rate is -/// similar or slightly worse than FNV, but the speed of the hash function -/// itself is much higher because it works on up to 8 bytes at a time. pub struct FxHasher { hash: usize, } -#[cfg(target_pointer_width = "32")] -const K: usize = 0x9e3779b9; -#[cfg(target_pointer_width = "64")] -const K: usize = 0x517cc1b727220a95; - impl Default for FxHasher { #[inline] fn default() -> FxHasher { @@ -75,8 +64,19 @@ impl Default for FxHasher { impl FxHasher { #[inline] + #[cfg(target_pointer_width = "32")] + fn add_to_hash(&mut self, i: usize) { + self.hash = self.hash.rotate_left(5).bitxor(i).wrapping_mul(0x9e3779b9); + } + + #[inline] + #[cfg(target_pointer_width = "64")] fn add_to_hash(&mut self, i: usize) { - self.hash = self.hash.rotate_left(5).bitxor(i).wrapping_mul(K); + self.hash = self + .hash + .rotate_right(31) + .wrapping_add(i) + .wrapping_mul(0xcfee444d8b59a89b); } } diff --git a/tests/test.rs b/tests/test.rs new file mode 100644 index 0000000..3b8a64b --- /dev/null +++ b/tests/test.rs @@ -0,0 +1,178 @@ +use std::hash::{BuildHasher, BuildHasherDefault, Hash, Hasher}; + +use rustc_hash::FxHasher; + +fn gen_word_pairs() -> Vec { + let words: Vec<_> = r#" +a, ability, able, about, above, accept, according, account, across, act, action, +activity, actually, add, address, administration, admit, adult, affect, after, +again, against, age, agency, agent, ago, agree, agreement, ahead, air, all, +allow, almost, alone, along, already, also, although, always, American, among, +amount, analysis, and, animal, another, answer, any, anyone, anything, appear, +apply, approach, area, argue, arm, around, arrive, art, article, artist, as, +ask, assume, at, attack, attention, attorney, audience, author, authority, +available, avoid, away, baby, back, bad, bag, ball, bank, bar, base, be, beat, +beautiful, because, become, bed, before, begin, behavior, behind, believe, +benefit, best, better, between, beyond, big, bill, billion, bit, black, blood, +blue, board, body, book, born, both, box, boy, break, bring, brother, budget, +build, building, business, but, buy, by, call, camera, campaign, can, cancer, +candidate, capital, car, card, care, career, carry, case, catch, cause, cell, +center, central, century, certain, certainly, chair, challenge, chance, change, +character, charge, check, child, choice, choose, church, citizen, city, civil, +claim, class, clear, clearly, close, coach, cold, collection, college, color, +come, commercial, common, community, company, compare, computer, concern, +condition, conference, Congress, consider, consumer, contain, continue, control, +cost, could, country, couple, course, court, cover, create, crime, cultural, +culture, cup, current, customer, cut, dark, data, daughter, day, dead, deal, +death, debate, decade, decide, decision, deep, defense, degree, Democrat, +democratic, describe, design, despite, detail, determine, develop, development, +die, difference, different, difficult, dinner, direction, director, discover, +discuss, discussion, disease, do, doctor, dog, door, down, draw, dream, drive, +drop, drug, during, each, early, east, easy, eat, economic, economy, edge, +education, effect, effort, eight, either, election, else, employee, end, energy, +enjoy, enough, enter, entire, environment, environmental, especially, establish, +even, evening, event, ever, every, everybody, everyone, everything, evidence, +exactly, example, executive, exist, expect, experience, expert, explain, eye, +face, fact, factor, fail, fall, family, far, fast, father, fear, federal, feel, +feeling, few, field, fight, figure, fill, film, final, finally, financial, find, +fine, finger, finish, fire, firm, first, fish, five, floor, fly, focus, follow, +food, foot, for, force, foreign, forget, form, former, forward, four, free, +friend, from, front, full, fund, future, game, garden, gas, general, generation, +get, girl, give, glass, go, goal, good, government, great, green, ground, group, +grow, growth, guess, gun, guy, hair, half, hand, hang, happen, happy, hard, +have, he, head, health, hear, heart, heat, heavy, help, her, here, herself, +high, him, himself, his, history, hit, hold, home, hope, hospital, hot, hotel, +hour, house, how, however, huge, human, hundred, husband, I, idea, identify, if, +image, imagine, impact, important, improve, in, include, including, increase, +indeed, indicate, individual, industry, information, inside, instead, +institution, interest, interesting, international, interview, into, investment, +involve, issue, it, item, its, itself, job, join, just, keep, key, kid, kill, +kind, kitchen, know, knowledge, land, language, large, last, late, later, laugh, +law, lawyer, lay, lead, leader, learn, least, leave, left, leg, legal, less, +let, letter, level, lie, life, light, like, likely, line, list, listen, little, +live, local, long, look, lose, loss, lot, love, low, machine, magazine, main, +maintain, major, majority, make, man, manage, management, manager, many, market, +marriage, material, matter, may, maybe, me, mean, measure, media, medical, meet, +meeting, member, memory, mention, message, method, middle, might, military, +million, mind, minute, miss, mission, model, modern, moment, money, month, more, +morning, most, mother, mouth, move, movement, movie, Mr, Mrs, much, music, must, +my, myself, name, nation, national, natural, nature, near, nearly, necessary, +need, network, never, new, news, newspaper, next, nice, night, no, none, nor, +north, not, note, nothing, notice, now, n't, number, occur, of, off, offer, +office, officer, official, often, oh, oil, ok, old, on, once, one, only, onto, +open, operation, opportunity, option, or, order, organization, other, others, +our, out, outside, over, own, owner, page, pain, painting, paper, parent, part, +participant, particular, particularly, partner, party, pass, past, patient, +pattern, pay, peace, people, per, perform, performance, perhaps, period, person, +personal, phone, physical, pick, picture, piece, place, plan, plant, play, +player, PM, point, police, policy, political, politics, poor, popular, +population, position, positive, possible, power, practice, prepare, present, +president, pressure, pretty, prevent, price, private, probably, problem, +process, produce, product, production, professional, professor, program, +project, property, protect, prove, provide, public, pull, purpose, push, put, +quality, question, quickly, quite, race, radio, raise, range, rate, rather, +reach, read, ready, real, reality, realize, really, reason, receive, recent, +recently, recognize, record, red, reduce, reflect, region, relate, relationship, +religious, remain, remember, remove, report, represent, Republican, require, +research, resource, respond, response, responsibility, rest, result, return, +reveal, rich, right, rise, risk, road, rock, role, room, rule, run, safe, same, +save, say, scene, school, science, scientist, score, sea, season, seat, second, +section, security, see, seek, seem, sell, send, senior, sense, series, serious, +serve, service, set, seven, several, sex, sexual, shake, share, she, shoot, +short, shot, should, shoulder, show, side, sign, significant, similar, simple, +simply, since, sing, single, sister, sit, site, situation, six, size, skill, +skin, small, smile, so, social, society, soldier, some, somebody, someone, +something, sometimes, son, song, soon, sort, sound, source, south, southern, +space, speak, special, specific, speech, spend, sport, spring, staff, stage, +stand, standard, star, start, state, statement, station, stay, step, still, +stock, stop, store, story, strategy, street, strong, structure, student, study, +stuff, style, subject, success, successful, such, suddenly, suffer, suggest, +summer, support, sure, surface, system, table, take, talk, task, tax, teach, +teacher, team, technology, television, tell, ten, tend, term, test, than, thank, +that, the, their, them, themselves, then, theory, there, these, they, thing, +think, third, this, those, though, thought, thousand, threat, three, through, +throughout, throw, thus, time, to, today, together, tonight, too, top, total, +tough, toward, town, trade, traditional, training, travel, treat, treatment, +tree, trial, trip, trouble, true, truth, try, turn, TV, two, type, under, +understand, unit, until, up, upon, us, use, usually, value, various, very, +victim, view, violence, visit, voice, vote, wait, walk, wall, want, war, watch, +water, way, we, weapon, wear, week, weight, well, west, western, what, whatever, +when, where, whether, which, while, white, who, whole, whom, whose, why, wide, +wife, will, win, wind, window, wish, with, within, without, woman, wonder, word, +work, worker, world, worry, would, write, writer, wrong, yard, yeah, year, yes, +yet, you, young, your, yourself"# + .split(',') + .map(|word| word.trim()) + .collect(); + + let mut word_pairs: Vec<_> = Vec::new(); + for word in &words { + for other_word in &words { + word_pairs.push(word.to_string() + " " + other_word); + } + } + assert_eq!(1_000_000, word_pairs.len()); + word_pairs +} + +fn check_for_collisions( + build_hasher: &B, + items: &[H], + bucket_count: usize, + expected_min: usize, + expected_max: usize, +) { + let mut buckets = vec![0; bucket_count]; + for item in items { + let value = hash(item, build_hasher) as usize; + buckets[value % bucket_count] += 1; + } + let mean = items.len() / bucket_count; + let max = *buckets.iter().max().unwrap(); + let min = *buckets.iter().min().unwrap(); + assert_eq!( + (expected_min, expected_max), + (min, max), + "unexpected min/max value, mean: {}, buckets: {:?}", + mean, + buckets + ); +} + +fn hash(b: &H, build_hasher: &B) -> u64 { + let mut hasher = build_hasher.build_hasher(); + b.hash(&mut hasher); + hasher.finish() +} + +#[test] +#[cfg(target_pointer_width = "32")] +fn test_bucket_distribution() { + let build_hasher = BuildHasherDefault::::default(); + let word_pairs: Vec<_> = gen_word_pairs(); + check_for_collisions(&build_hasher, &word_pairs, 32, 30979, 31607); + let sequence: Vec<_> = (0..320000).collect(); + check_for_collisions(&build_hasher, &sequence, 32, 10000, 10000); + let sequence: Vec<_> = (0..2560000).collect(); + check_for_collisions(&build_hasher, &sequence, 256, 10000, 10000); + let sequence: Vec<_> = (0..320000).map(|i| i * 1024).collect(); + check_for_collisions(&build_hasher, &sequence, 32, 0, 320000); + let sequence: Vec<_> = (0..2560000_u64).map(|i| i * 1024).collect(); + check_for_collisions(&build_hasher, &sequence, 256, 0, 80009); +} + +#[test] +#[cfg(target_pointer_width = "64")] +fn test_bucket_distribution() { + let build_hasher = BuildHasherDefault::::default(); + let word_pairs: Vec<_> = gen_word_pairs(); + check_for_collisions(&build_hasher, &word_pairs, 32, 30451, 31990); + let sequence: Vec<_> = (0..320000).collect(); + check_for_collisions(&build_hasher, &sequence, 32, 10000, 10000); + let sequence: Vec<_> = (0..2560000).collect(); + check_for_collisions(&build_hasher, &sequence, 256, 10000, 10000); + let sequence: Vec<_> = (0..320000).map(|i| i * 1024).collect(); + check_for_collisions(&build_hasher, &sequence, 32, 0, 320000); + let sequence: Vec<_> = (0..2560000_u64).map(|i| i * 1024).collect(); + check_for_collisions(&build_hasher, &sequence, 256, 0, 2560000); +}