From dbeb1bca351adb7e7719d408eb6ed68ee6b84253 Mon Sep 17 00:00:00 2001 From: Damian Gryski Date: Sat, 28 Jul 2012 23:35:37 +0200 Subject: [PATCH 1/2] core::hash -- add a hash::streaming interface and associated siphash implementation. --- src/libcore/hash.rs | 205 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 194 insertions(+), 11 deletions(-) diff --git a/src/libcore/hash.rs b/src/libcore/hash.rs index 072c3fe70fdd6..9c85473e76111 100644 --- a/src/libcore/hash.rs +++ b/src/libcore/hash.rs @@ -32,7 +32,7 @@ pure fn hash_bytes_keyed(buf: &[const u8], k0: u64, k1: u64) -> u64 { #macro([#rotl(x,b), (x << b) | (x >> (64 - b))]); - #macro([#compress(), { + #macro([#compress(v0,v1,v2,v3), { v0 += v1; v1 = #rotl(v1, 13); v1 ^= v0; v0 = #rotl(v0, 32); v2 += v3; v3 = #rotl(v3, 16); v3 ^= v2; v0 += v3; v3 = #rotl(v3, 21); v3 ^= v0; @@ -47,8 +47,8 @@ pure fn hash_bytes_keyed(buf: &[const u8], k0: u64, k1: u64) -> u64 { while i < end { let m = #u8to64_le(buf, i); v3 ^= m; - #compress(); - #compress(); + #compress(v0,v1,v2,v3); + #compress(v0,v1,v2,v3); v0 ^= m; i += 8; } @@ -64,19 +64,182 @@ pure fn hash_bytes_keyed(buf: &[const u8], k0: u64, k1: u64) -> u64 { if left > 6 { b |= buf[i + 6] as u64 << 48; } v3 ^= b; - #compress(); - #compress(); + #compress(v0,v1,v2,v3); + #compress(v0,v1,v2,v3); v0 ^= b; v2 ^= 0xff; - #compress(); - #compress(); - #compress(); - #compress(); + + #compress(v0,v1,v2,v3); + #compress(v0,v1,v2,v3); + #compress(v0,v1,v2,v3); + #compress(v0,v1,v2,v3); ret v0 ^ v1 ^ v2 ^ v3; } + +iface streaming { + fn input(~[u8]); + fn input_str(~str); + fn result() -> ~[u8]; + fn result_str() -> ~str; + fn reset(); +} + +fn siphash(key0 : u64, key1 : u64) -> streaming { + type sipstate = { + k0 : u64, + k1 : u64, + mut length : uint, // how many bytes we've processed + mut v0 : u64, // hash state + mut v1 : u64, + mut v2 : u64, + mut v3 : u64, + tail : ~[mut u8]/8, // unprocessed bytes + mut ntail : uint, // how many bytes in tail are valid + }; + + fn add_input(st : sipstate, msg : ~[u8]) { + let length = vec::len(msg); + st.length += length; + + let mut needed = 0u; + + if st.ntail != 0 { + needed = 8 - st.ntail; + + if length < needed { + + let mut t = 0; + while t < length { + st.tail[st.ntail+t] = msg[t]; + t += 1; + } + st.ntail += length; + + ret; + } + + let mut t = 0; + while t < needed { + st.tail[st.ntail+t] = msg[t]; + t += 1; + } + + let m = #u8to64_le(st.tail, 0); + + st.v3 ^= m; + #compress(st.v0, st.v1, st.v2, st.v3); + #compress(st.v0, st.v1, st.v2, st.v3); + st.v0 ^= m; + + st.ntail = 0; + } + + let len = length - needed; + let end = len & (!0x7); + let left = len & 0x7; + + let mut i = needed; + while i < end { + let mi = #u8to64_le(msg, i); + + st.v3 ^= mi; + #compress(st.v0, st.v1, st.v2, st.v3); + #compress(st.v0, st.v1, st.v2, st.v3); + st.v0 ^= mi; + + i += 8; + } + + let mut t = 0u; + while t < left { + st.tail[t] = msg[i+t]; + t += 1 + } + st.ntail = left; + } + + fn mk_result(st : sipstate) -> ~[u8] { + + let mut v0 = st.v0; + let mut v1 = st.v1; + let mut v2 = st.v2; + let mut v3 = st.v3; + + let mut b : u64 = (st.length as u64 & 0xff) << 56; + + if st.ntail > 0 { b |= st.tail[0] as u64 << 0; } + if st.ntail > 1 { b |= st.tail[1] as u64 << 8; } + if st.ntail > 2 { b |= st.tail[2] as u64 << 16; } + if st.ntail > 3 { b |= st.tail[3] as u64 << 24; } + if st.ntail > 4 { b |= st.tail[4] as u64 << 32; } + if st.ntail > 5 { b |= st.tail[5] as u64 << 40; } + if st.ntail > 6 { b |= st.tail[6] as u64 << 48; } + + v3 ^= b; + #compress(v0, v1, v2, v3); + #compress(v0, v1, v2, v3); + v0 ^= b; + + v2 ^= 0xff; + #compress(v0, v1, v2, v3); + #compress(v0, v1, v2, v3); + #compress(v0, v1, v2, v3); + #compress(v0, v1, v2, v3); + + let h = v0 ^ v1 ^ v2 ^ v3; + + ret ~[ + (h >> 0) as u8, + (h >> 8) as u8, + (h >> 16) as u8, + (h >> 24) as u8, + (h >> 32) as u8, + (h >> 40) as u8, + (h >> 48) as u8, + (h >> 56) as u8, + ]; + } + + impl of streaming for sipstate { + fn reset() { + self.length = 0; + self.v0 = self.k0 ^ 0x736f6d6570736575; + self.v1 = self.k1 ^ 0x646f72616e646f6d; + self.v2 = self.k0 ^ 0x6c7967656e657261; + self.v3 = self.k1 ^ 0x7465646279746573; + self.ntail = 0; + } + fn input(msg: ~[u8]) { add_input(self, msg); } + fn input_str(msg: ~str) { add_input(self, str::bytes(msg)); } + fn result() -> ~[u8] { ret mk_result(self); } + fn result_str() -> ~str { + let r = mk_result(self); + let mut s = ~""; + for vec::each(r) |b| { s += uint::to_str(b as uint, 16u); } + ret s; + } + } + + let st = { + k0 : key0, + k1 : key1, + mut length : 0u, + mut v0 : 0u64, + mut v1 : 0u64, + mut v2 : 0u64, + mut v3 : 0u64, + tail : ~[mut 0u8,0,0,0,0,0,0,0]/8, + mut ntail : 0u, + }; + + let sh = st as streaming; + sh.reset(); + ret sh; +} + #[test] fn test_siphash() { let vecs : [[u8]/8]/64 = [ @@ -150,14 +313,34 @@ fn test_siphash() { let k1 = 0x_0f_0e_0d_0c_0b_0a_09_08_u64; let mut buf : ~[u8] = ~[]; let mut t = 0; + let stream_inc = siphash(k0,k1); + let stream_full = siphash(k0,k1); + + fn to_hex_str(r:[u8]/8) -> ~str { + let mut s = ~""; + for vec::each(r) |b| { s += uint::to_str(b as uint, 16u); } + ret s; + } + while t < 64 { #debug("siphash test %?", t); let vec = #u8to64_le(vecs[t], 0); let out = hash_bytes_keyed(buf, k0, k1); #debug("got %?, expected %?", out, vec); assert vec == out; + + stream_full.reset(); + stream_full.input(buf); + let f = stream_full.result_str(); + let i = stream_inc.result_str(); + let v = to_hex_str(vecs[t]); + #debug["%d: (%s) => inc=%s full=%s", t, v, i, f]; + + assert f == i && f == v; + buf += ~[t as u8]; + stream_inc.input(~[t as u8]); + t += 1; } - -} \ No newline at end of file +} From 92ac5baa10d6b97a79d6ce87d80256c10f7c0831 Mon Sep 17 00:00:00 2001 From: Damian Gryski Date: Sat, 28 Jul 2012 23:37:11 +0200 Subject: [PATCH 2/2] rustc: use new siphash impl instead of sha1 Updating types std::sha1::sha1 -> hash::streaming was a relatively small change. Renaming the variables to reflect that things aren't sha1s any more touched far more lines. --- src/rustc/back/link.rs | 44 ++++++++++++++++---------------- src/rustc/middle/trans/base.rs | 8 +++--- src/rustc/middle/trans/common.rs | 4 +-- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/src/rustc/back/link.rs b/src/rustc/back/link.rs index 4b1b370dbd790..1e078b2f9c196 100644 --- a/src/rustc/back/link.rs +++ b/src/rustc/back/link.rs @@ -307,7 +307,7 @@ mod write { */ fn build_link_meta(sess: session, c: ast::crate, output: ~str, - sha: sha1) -> link_meta { + symbol_hasher: hash::streaming) -> link_meta { type provided_metas = {name: option<@~str>, @@ -338,7 +338,7 @@ fn build_link_meta(sess: session, c: ast::crate, output: ~str, } // This calculates CMH as defined above - fn crate_meta_extras_hash(sha: sha1, _crate: ast::crate, + fn crate_meta_extras_hash(symbol_hasher: hash::streaming, _crate: ast::crate, metas: provided_metas, dep_hashes: ~[@~str]) -> ~str { fn len_and_str(s: ~str) -> ~str { @@ -351,15 +351,15 @@ fn build_link_meta(sess: session, c: ast::crate, output: ~str, let cmh_items = attr::sort_meta_items(metas.cmh_items); - sha.reset(); + symbol_hasher.reset(); for cmh_items.each |m_| { let m = m_; alt m.node { ast::meta_name_value(key, value) { - sha.input_str(len_and_str(*key)); - sha.input_str(len_and_str_lit(value)); + symbol_hasher.input_str(len_and_str(*key)); + symbol_hasher.input_str(len_and_str_lit(value)); } - ast::meta_word(name) { sha.input_str(len_and_str(*name)); } + ast::meta_word(name) { symbol_hasher.input_str(len_and_str(*name)); } ast::meta_list(_, _) { // FIXME (#607): Implement this fail ~"unimplemented meta_item variant"; @@ -368,10 +368,10 @@ fn build_link_meta(sess: session, c: ast::crate, output: ~str, } for dep_hashes.each |dh| { - sha.input_str(len_and_str(*dh)); + symbol_hasher.input_str(len_and_str(*dh)); } - ret truncated_sha1_result(sha); + ret truncated_hash_result(symbol_hasher); } fn warn_missing(sess: session, name: ~str, default: ~str) { @@ -419,40 +419,40 @@ fn build_link_meta(sess: session, c: ast::crate, output: ~str, let vers = crate_meta_vers(sess, c, provided_metas); let dep_hashes = cstore::get_dep_hashes(sess.cstore); let extras_hash = - crate_meta_extras_hash(sha, c, provided_metas, dep_hashes); + crate_meta_extras_hash(symbol_hasher, c, provided_metas, dep_hashes); ret {name: name, vers: vers, extras_hash: extras_hash}; } -fn truncated_sha1_result(sha: sha1) -> ~str unsafe { - ret str::slice(sha.result_str(), 0u, 16u); +fn truncated_hash_result(symbol_hasher: hash::streaming) -> ~str unsafe { + symbol_hasher.result_str() } // This calculates STH for a symbol, as defined above -fn symbol_hash(tcx: ty::ctxt, sha: sha1, t: ty::t, +fn symbol_hash(tcx: ty::ctxt, symbol_hasher: hash::streaming, t: ty::t, link_meta: link_meta) -> ~str { // NB: do *not* use abbrevs here as we want the symbol names // to be independent of one another in the crate. - sha.reset(); - sha.input_str(*link_meta.name); - sha.input_str(~"-"); - sha.input_str(link_meta.extras_hash); - sha.input_str(~"-"); - sha.input_str(encoder::encoded_ty(tcx, t)); - let hash = truncated_sha1_result(sha); + symbol_hasher.reset(); + symbol_hasher.input_str(*link_meta.name); + symbol_hasher.input_str(~"-"); + symbol_hasher.input_str(link_meta.extras_hash); + symbol_hasher.input_str(~"-"); + symbol_hasher.input_str(encoder::encoded_ty(tcx, t)); + let hash = truncated_hash_result(symbol_hasher); // Prefix with _ so that it never blends into adjacent digits ret ~"_" + hash; } fn get_symbol_hash(ccx: @crate_ctxt, t: ty::t) -> ~str { - alt ccx.type_sha1s.find(t) { + alt ccx.type_hashcodes.find(t) { some(h) { ret h; } none { - let hash = symbol_hash(ccx.tcx, ccx.sha, t, ccx.link_meta); - ccx.type_sha1s.insert(t, hash); + let hash = symbol_hash(ccx.tcx, ccx.symbol_hasher, t, ccx.link_meta); + ccx.type_hashcodes.insert(t, hash); ret hash; } } diff --git a/src/rustc/middle/trans/base.rs b/src/rustc/middle/trans/base.rs index 94a4a30021db5..c33a1330f9ee7 100644 --- a/src/rustc/middle/trans/base.rs +++ b/src/rustc/middle/trans/base.rs @@ -5650,8 +5650,8 @@ fn trans_crate(sess: session::session, crate: @ast::crate, tcx: ty::ctxt, output: ~str, emap: resolve3::ExportMap, maps: astencode::maps) -> (ModuleRef, link_meta) { - let sha = std::sha1::sha1(); - let link_meta = link::build_link_meta(sess, *crate, output, sha); + let symbol_hasher = hash::siphash(0,0); + let link_meta = link::build_link_meta(sess, *crate, output, symbol_hasher); let reachable = reachable::find_reachable(crate.node.module, emap, tcx, maps.method_map); @@ -5724,8 +5724,8 @@ fn trans_crate(sess: session::session, crate: @ast::crate, tcx: ty::ctxt, module_data: str_hash::(), lltypes: ty::new_ty_hash(), names: new_namegen(), - sha: sha, - type_sha1s: ty::new_ty_hash(), + symbol_hasher: symbol_hasher, + type_hashcodes: ty::new_ty_hash(), type_short_names: ty::new_ty_hash(), all_llvm_symbols: str_hash::<()>(), tcx: tcx, diff --git a/src/rustc/middle/trans/common.rs b/src/rustc/middle/trans/common.rs index 6f8eb2a5148a9..b66fb82c9f895 100644 --- a/src/rustc/middle/trans/common.rs +++ b/src/rustc/middle/trans/common.rs @@ -110,8 +110,8 @@ type crate_ctxt = { module_data: hashmap<~str, ValueRef>, lltypes: hashmap, names: namegen, - sha: std::sha1::sha1, - type_sha1s: hashmap, + symbol_hasher: hash::streaming, + type_hashcodes: hashmap, type_short_names: hashmap, all_llvm_symbols: set<~str>, tcx: ty::ctxt,