-
Notifications
You must be signed in to change notification settings - Fork 13.8k
Encode hashes as bytes, not varint #110083
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -1,4 +1,4 @@ | ||||||||||||
use crate::stable_hasher; | ||||||||||||
use crate::stable_hasher::{Hash64, StableHasher, StableHasherResult}; | ||||||||||||
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; | ||||||||||||
use std::hash::{Hash, Hasher}; | ||||||||||||
|
||||||||||||
|
@@ -9,32 +9,49 @@ mod tests; | |||||||||||
#[repr(C)] | ||||||||||||
pub struct Fingerprint(u64, u64); | ||||||||||||
|
||||||||||||
impl Fingerprint { | ||||||||||||
pub const ZERO: Fingerprint = Fingerprint(0, 0); | ||||||||||||
pub trait FingerprintComponent { | ||||||||||||
fn as_u64(&self) -> u64; | ||||||||||||
} | ||||||||||||
|
||||||||||||
impl FingerprintComponent for Hash64 { | ||||||||||||
#[inline] | ||||||||||||
pub fn new(_0: u64, _1: u64) -> Fingerprint { | ||||||||||||
Fingerprint(_0, _1) | ||||||||||||
fn as_u64(&self) -> u64 { | ||||||||||||
Hash64::as_u64(*self) | ||||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
impl FingerprintComponent for u64 { | ||||||||||||
#[inline] | ||||||||||||
fn as_u64(&self) -> u64 { | ||||||||||||
saethlin marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||
*self | ||||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
impl Fingerprint { | ||||||||||||
pub const ZERO: Fingerprint = Fingerprint(0, 0); | ||||||||||||
|
||||||||||||
#[inline] | ||||||||||||
pub fn from_smaller_hash(hash: u64) -> Fingerprint { | ||||||||||||
Fingerprint(hash, hash) | ||||||||||||
pub fn new<A, B>(_0: A, _1: B) -> Fingerprint | ||||||||||||
where | ||||||||||||
A: FingerprintComponent, | ||||||||||||
B: FingerprintComponent, | ||||||||||||
{ | ||||||||||||
Fingerprint(_0.as_u64(), _1.as_u64()) | ||||||||||||
|
Fingerprint::new( | |
// `owner` is local, so is completely defined by the local hash | |
def_path_hash.local_hash(), | |
local_id.as_u32().into(), | |
) |
After looking through this a lot I think the Fingerprint
type should probably be overhauled or discarded. Different users of Fingerprint
seem to be getting very different things out of it which seem mostly conceptually disjoint.
So Fingerprint
seems to be used as:
- a 128-bit hash
- two 64-bit hashes, which can be separated later
- a 64-bit hash and a 32-bit rustc_index
That seems like 3 different types to me.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
//! rustc encodes a lot of hashes. If hashes are stored as `u64` or `u128`, a `derive(Encodable)` | ||
//! will apply varint encoding to the hashes, which is less efficient than directly encoding the 8 | ||
//! or 16 bytes of the hash. | ||
//! | ||
//! The types in this module represent 64-bit or 128-bit hashes produced by a `StableHasher`. | ||
//! `Hash64` and `Hash128` expose some utilty functions to encourage users to not extract the inner | ||
//! hash value as an integer type and accidentally apply varint encoding to it. | ||
//! | ||
//! In contrast with `Fingerprint`, users of these types cannot and should not attempt to construct | ||
//! and decompose these types into constitutent pieces. The point of these types is only to | ||
//! connect the fact that they can only be produced by a `StableHasher` to their | ||
//! `Encode`/`Decode` impls. | ||
|
||
use crate::stable_hasher::{StableHasher, StableHasherResult}; | ||
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; | ||
use std::fmt; | ||
use std::ops::BitXorAssign; | ||
|
||
#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)] | ||
pub struct Hash64 { | ||
inner: u64, | ||
} | ||
|
||
impl Hash64 { | ||
pub const ZERO: Hash64 = Hash64 { inner: 0 }; | ||
|
||
#[inline] | ||
pub(crate) fn new(n: u64) -> Self { | ||
Self { inner: n } | ||
} | ||
|
||
#[inline] | ||
pub fn as_u64(self) -> u64 { | ||
self.inner | ||
} | ||
} | ||
|
||
impl BitXorAssign<u64> for Hash64 { | ||
#[inline] | ||
fn bitxor_assign(&mut self, rhs: u64) { | ||
saethlin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self.inner ^= rhs; | ||
} | ||
} | ||
|
||
impl<S: Encoder> Encodable<S> for Hash64 { | ||
#[inline] | ||
fn encode(&self, s: &mut S) { | ||
s.emit_raw_bytes(&self.inner.to_le_bytes()); | ||
} | ||
} | ||
|
||
impl<D: Decoder> Decodable<D> for Hash64 { | ||
#[inline] | ||
fn decode(d: &mut D) -> Self { | ||
Self { inner: u64::from_le_bytes(d.read_raw_bytes(8).try_into().unwrap()) } | ||
} | ||
} | ||
|
||
impl StableHasherResult for Hash64 { | ||
#[inline] | ||
fn finish(hasher: StableHasher) -> Self { | ||
Self { inner: hasher.finalize().0 } | ||
} | ||
} | ||
|
||
impl fmt::Debug for Hash64 { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
self.inner.fmt(f) | ||
} | ||
} | ||
|
||
impl fmt::LowerHex for Hash64 { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
fmt::LowerHex::fmt(&self.inner, f) | ||
} | ||
} | ||
|
||
#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)] | ||
pub struct Hash128 { | ||
inner: u128, | ||
} | ||
|
||
impl Hash128 { | ||
#[inline] | ||
pub fn truncate(self) -> Hash64 { | ||
Hash64 { inner: self.inner as u64 } | ||
} | ||
|
||
#[inline] | ||
pub fn wrapping_add(self, other: Self) -> Self { | ||
Self { inner: self.inner.wrapping_add(other.inner) } | ||
} | ||
|
||
#[inline] | ||
pub fn as_u128(self) -> u128 { | ||
self.inner | ||
} | ||
} | ||
|
||
impl<S: Encoder> Encodable<S> for Hash128 { | ||
#[inline] | ||
fn encode(&self, s: &mut S) { | ||
s.emit_raw_bytes(&self.inner.to_le_bytes()); | ||
} | ||
} | ||
|
||
impl<D: Decoder> Decodable<D> for Hash128 { | ||
#[inline] | ||
fn decode(d: &mut D) -> Self { | ||
Self { inner: u128::from_le_bytes(d.read_raw_bytes(16).try_into().unwrap()) } | ||
} | ||
} | ||
|
||
impl StableHasherResult for Hash128 { | ||
#[inline] | ||
fn finish(hasher: StableHasher) -> Self { | ||
let (_0, _1) = hasher.finalize(); | ||
Self { inner: u128::from(_0) | (u128::from(_1) << 64) } | ||
} | ||
} | ||
|
||
impl fmt::Debug for Hash128 { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
self.inner.fmt(f) | ||
} | ||
} | ||
|
||
impl fmt::LowerHex for Hash128 { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
fmt::LowerHex::fmt(&self.inner, f) | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.