9
9
namespace Microsoft . ML . Runtime . Data
10
10
{
11
11
/// <summary>
12
- /// A sixteen-byte unsigned integer.
12
+ /// A structure serving as a sixteen-byte unsigned integer. It is used as the row id of <see cref="IDataView"/>.
13
+ /// For datasets with millions of records, those IDs need to be unique, therefore the need for such a large structure to hold the values.
14
+ /// Those Ids are derived from other Ids of the previous components of the pipelines, and dividing the structure in two: high order and low order of bits,
15
+ /// and reduces the changes of those collisions even further.
13
16
/// </summary>
14
- public readonly struct UInt128 : IComparable < UInt128 > , IEquatable < UInt128 >
17
+ /// <seealso cref="Row.GetIdGetter"/>
18
+ public readonly struct RowId : IComparable < RowId > , IEquatable < RowId >
15
19
{
16
- // The low order bits. Corresponds to H1 in the Murmur algorithms.
17
- public readonly ulong Lo ;
18
- // The high order bits. Corresponds to H2 in the Murmur algorithms.
19
- public readonly ulong Hi ;
20
+ ///<summary>The low order bits. Corresponds to H1 in the Murmur algorithms.</summary>
21
+ public readonly ulong Low ;
20
22
21
- public UInt128 ( ulong lo , ulong hi )
23
+ ///<summary> The high order bits. Corresponds to H2 in the Murmur algorithms.</summary>
24
+ public readonly ulong High ;
25
+
26
+ /// <summary>
27
+ /// Initializes a new instance of <see cref="RowId"/>
28
+ /// </summary>
29
+ /// <param name="low">The low order <langword>ulong</langword>.</param>
30
+ /// <param name="high">The high order <langword>ulong</langword>.</param>
31
+ public RowId ( ulong low , ulong high )
22
32
{
23
- Lo = lo ;
24
- Hi = hi ;
33
+ Low = low ;
34
+ High = high ;
25
35
}
26
36
27
37
public override string ToString ( )
28
38
{
29
39
// Since H1 are the low order bits, they are printed second.
30
- return string . Format ( "{0:x16}{1:x16}" , Hi , Lo ) ;
40
+ return string . Format ( "{0:x16}{1:x16}" , High , Low ) ;
31
41
}
32
42
33
- public int CompareTo ( UInt128 other )
43
+ public int CompareTo ( RowId other )
34
44
{
35
- int result = Hi . CompareTo ( other . Hi ) ;
36
- return result == 0 ? Lo . CompareTo ( other . Lo ) : result ;
45
+ int result = High . CompareTo ( other . High ) ;
46
+ return result == 0 ? Low . CompareTo ( other . Low ) : result ;
37
47
}
38
48
39
- public bool Equals ( UInt128 other )
49
+ public bool Equals ( RowId other )
40
50
{
41
- return Lo == other . Lo && Hi == other . Hi ;
51
+ return Low == other . Low && High == other . High ;
42
52
}
43
53
44
54
public override bool Equals ( object obj )
45
55
{
46
- if ( obj != null && obj is UInt128 )
56
+ if ( obj != null && obj is RowId )
47
57
{
48
- var item = ( UInt128 ) obj ;
58
+ var item = ( RowId ) obj ;
49
59
return Equals ( item ) ;
50
60
}
51
61
return false ;
52
62
}
53
63
54
- public static UInt128 operator + ( UInt128 first , ulong second )
64
+ public static RowId operator + ( RowId first , ulong second )
55
65
{
56
- ulong resHi = first . Hi ;
57
- ulong resLo = first . Lo + second ;
66
+ ulong resHi = first . High ;
67
+ ulong resLo = first . Low + second ;
58
68
if ( resLo < second )
59
69
resHi ++ ;
60
- return new UInt128 ( resLo , resHi ) ;
70
+ return new RowId ( resLo , resHi ) ;
61
71
}
62
72
63
- public static UInt128 operator - ( UInt128 first , ulong second )
73
+ public static RowId operator - ( RowId first , ulong second )
64
74
{
65
- ulong resHi = first . Hi ;
66
- ulong resLo = first . Lo - second ;
67
- if ( resLo > first . Lo )
75
+ ulong resHi = first . High ;
76
+ ulong resLo = first . Low - second ;
77
+ if ( resLo > first . Low )
68
78
resHi -- ;
69
- return new UInt128 ( resLo , resHi ) ;
79
+ return new RowId ( resLo , resHi ) ;
70
80
}
71
81
72
- public static bool operator == ( UInt128 first , ulong second )
82
+ public static bool operator == ( RowId first , ulong second )
73
83
{
74
- return first . Hi == 0 && first . Lo == second ;
84
+ return first . High == 0 && first . Low == second ;
75
85
}
76
86
77
- public static bool operator != ( UInt128 first , ulong second )
87
+ public static bool operator != ( RowId first , ulong second )
78
88
{
79
89
return ! ( first == second ) ;
80
90
}
81
91
82
- public static bool operator < ( UInt128 first , ulong second )
92
+ public static bool operator < ( RowId first , ulong second )
83
93
{
84
- return first . Hi == 0 && first . Lo < second ;
94
+ return first . High == 0 && first . Low < second ;
85
95
}
86
96
87
- public static bool operator > ( UInt128 first , ulong second )
97
+ public static bool operator > ( RowId first , ulong second )
88
98
{
89
- return first . Hi > 0 || first . Lo > second ;
99
+ return first . High > 0 || first . Low > second ;
90
100
}
91
101
92
- public static bool operator <= ( UInt128 first , ulong second )
102
+ public static bool operator <= ( RowId first , ulong second )
93
103
{
94
- return first . Hi == 0 && first . Lo <= second ;
104
+ return first . High == 0 && first . Low <= second ;
95
105
}
96
106
97
- public static bool operator >= ( UInt128 first , ulong second )
107
+ public static bool operator >= ( RowId first , ulong second )
98
108
{
99
- return first . Hi > 0 || first . Lo >= second ;
109
+ return first . High > 0 || first . Low >= second ;
100
110
}
101
111
102
- public static explicit operator double ( UInt128 x )
112
+ public static explicit operator double ( RowId x )
103
113
{
104
114
// REVIEW: The 64-bit JIT has a bug where rounding might be not quite
105
115
// correct when converting a ulong to double with the high bit set. Should we
106
116
// care and compensate? See the DoubleParser code for a work-around.
107
- return x . Hi * ( ( double ) ( 1UL << 32 ) * ( 1UL << 32 ) ) + x . Lo ;
117
+ return x . High * ( ( double ) ( 1UL << 32 ) * ( 1UL << 32 ) ) + x . Low ;
108
118
}
109
119
110
120
public override int GetHashCode ( )
111
121
{
112
122
return ( int ) (
113
- ( uint ) Lo ^ ( uint ) ( Lo >> 32 ) ^
114
- ( uint ) ( Hi << 7 ) ^ ( uint ) ( Hi >> 57 ) ^ ( uint ) ( Hi >> ( 57 - 32 ) ) ) ;
123
+ ( uint ) Low ^ ( uint ) ( Low >> 32 ) ^
124
+ ( uint ) ( High << 7 ) ^ ( uint ) ( High >> 57 ) ^ ( uint ) ( High >> ( 57 - 32 ) ) ) ;
115
125
}
116
126
117
127
#region Hashing style
@@ -167,10 +177,10 @@ private static void FinalMix(ref ulong h1, ref ulong h2, int len)
167
177
/// that were all zeros, except for the last bit which is one.
168
178
/// </summary>
169
179
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
170
- public UInt128 Fork ( )
180
+ public RowId Fork ( )
171
181
{
172
- ulong h1 = Lo ;
173
- ulong h2 = Hi ;
182
+ ulong h1 = Low ;
183
+ ulong h2 = High ;
174
184
// Here it's as if k1=1, k2=0.
175
185
h1 = RotL ( h1 , 27 ) ;
176
186
h1 += h2 ;
@@ -179,7 +189,7 @@ public UInt128 Fork()
179
189
h2 += h1 ;
180
190
h2 = h2 * 5 + 0x38495ab5 ;
181
191
h1 ^= RotL ( _c1 , 31 ) * _c2 ;
182
- return new UInt128 ( h1 , h2 ) ;
192
+ return new RowId ( h1 , h2 ) ;
183
193
}
184
194
185
195
/// <summary>
@@ -188,18 +198,18 @@ public UInt128 Fork()
188
198
/// that were all zeros.
189
199
/// </summary>
190
200
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
191
- public UInt128 Next ( )
201
+ public RowId Next ( )
192
202
{
193
- ulong h1 = Lo ;
194
- ulong h2 = Hi ;
203
+ ulong h1 = Low ;
204
+ ulong h2 = High ;
195
205
// Here it's as if k1=0, k2=0.
196
206
h1 = RotL ( h1 , 27 ) ;
197
207
h1 += h2 ;
198
208
h1 = h1 * 5 + 0x52dce729 ;
199
209
h2 = RotL ( h2 , 31 ) ;
200
210
h2 += h1 ;
201
211
h2 = h2 * 5 + 0x38495ab5 ;
202
- return new UInt128 ( h1 , h2 ) ;
212
+ return new RowId ( h1 , h2 ) ;
203
213
}
204
214
205
215
/// <summary>
@@ -210,14 +220,14 @@ public UInt128 Next()
210
220
/// <param name="other"></param>
211
221
/// <returns></returns>
212
222
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
213
- public UInt128 Combine ( UInt128 other )
223
+ public RowId Combine ( RowId other )
214
224
{
215
- var h1 = Lo ;
216
- var h2 = Hi ;
225
+ var h1 = Low ;
226
+ var h2 = High ;
217
227
218
228
other = other . Fork ( ) ;
219
- ulong k1 = other . Lo ; // First 8 bytes.
220
- ulong k2 = other . Hi ; // Second 8 bytes.
229
+ ulong k1 = other . Low ; // First 8 bytes.
230
+ ulong k2 = other . High ; // Second 8 bytes.
221
231
222
232
k1 *= _c1 ;
223
233
k1 = RotL ( k1 , 31 ) ;
@@ -235,7 +245,7 @@ public UInt128 Combine(UInt128 other)
235
245
h2 += h1 ;
236
246
h2 = h2 * 5 + 0x38495ab5 ;
237
247
238
- return new UInt128 ( h1 , h2 ) ;
248
+ return new RowId ( h1 , h2 ) ;
239
249
}
240
250
#endregion
241
251
}
0 commit comments