Skip to content

Commit a326dda

Browse files
authored
renaming uint128 to RowId (#1858)
* renaming uint128 to RowId
1 parent b49e2b0 commit a326dda

File tree

57 files changed

+322
-312
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+322
-312
lines changed

src/Microsoft.ML.Core/Data/DataKind.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ public static Type ToType(this DataKind kind)
171171
case DataKind.DZ:
172172
return typeof(DateTimeOffset);
173173
case DataKind.UG:
174-
return typeof(UInt128);
174+
return typeof(RowId);
175175
}
176176

177177
return null;
@@ -215,7 +215,7 @@ public static bool TryGetDataKind(this Type type, out DataKind kind)
215215
kind = DataKind.DT;
216216
else if (type == typeof(DateTimeOffset))
217217
kind = DataKind.DZ;
218-
else if (type == typeof(UInt128))
218+
else if (type == typeof(RowId))
219219
kind = DataKind.UG;
220220
else
221221
{

src/Microsoft.ML.Core/Data/IDataView.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ public abstract class Row : IDisposable
182182
/// all that if the "same" data were presented in a different data view (as by, say, being transformed,
183183
/// cached, saved, or whatever), that the IDs between the two different data views would have any
184184
/// discernable relationship.</summary>
185-
public abstract ValueGetter<UInt128> GetIdGetter();
185+
public abstract ValueGetter<RowId> GetIdGetter();
186186

187187
/// <summary>
188188
/// Returns whether the given column is active in this row.

src/Microsoft.ML.Core/Data/LinkedRowFilterCursorBase.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ protected LinkedRowFilterCursorBase(IChannelProvider provider, RowCursor input,
1919
{
2020
}
2121

22-
public override ValueGetter<UInt128> GetIdGetter()
22+
public override ValueGetter<RowId> GetIdGetter()
2323
{
2424
return Input.GetIdGetter();
2525
}

src/Microsoft.ML.Core/Data/MetadataUtils.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,7 @@ public MetadataRow(Schema.Metadata metadata)
509509
public override long Position => 0;
510510
public override long Batch => 0;
511511
public override ValueGetter<TValue> GetGetter<TValue>(int col) => _metadata.GetGetter<TValue>(col);
512-
public override ValueGetter<UInt128> GetIdGetter() => (ref UInt128 dst) => dst = default;
512+
public override ValueGetter<RowId> GetIdGetter() => (ref RowId dst) => dst = default;
513513
public override bool IsColumnActive(int col) => true;
514514
}
515515

src/Microsoft.ML.Core/Data/UInt128.cs renamed to src/Microsoft.ML.Core/Data/RowId.cs

+65-55
Original file line numberDiff line numberDiff line change
@@ -9,109 +9,119 @@
99
namespace Microsoft.ML.Runtime.Data
1010
{
1111
/// <summary>
12-
/// A sixteen-byte unsigned integer.
12+
/// A structure serving as a sixteen-byte unsigned integer. It is used as the row id of <see cref="IDataView"/>.
13+
/// For datasets with millions of records, those IDs need to be unique, therefore the need for such a large structure to hold the values.
14+
/// Those Ids are derived from other Ids of the previous components of the pipelines, and dividing the structure in two: high order and low order of bits,
15+
/// and reduces the changes of those collisions even further.
1316
/// </summary>
14-
public readonly struct UInt128 : IComparable<UInt128>, IEquatable<UInt128>
17+
/// <seealso cref="Row.GetIdGetter"/>
18+
public readonly struct RowId : IComparable<RowId>, IEquatable<RowId>
1519
{
16-
// The low order bits. Corresponds to H1 in the Murmur algorithms.
17-
public readonly ulong Lo;
18-
// The high order bits. Corresponds to H2 in the Murmur algorithms.
19-
public readonly ulong Hi;
20+
///<summary>The low order bits. Corresponds to H1 in the Murmur algorithms.</summary>
21+
public readonly ulong Low;
2022

21-
public UInt128(ulong lo, ulong hi)
23+
///<summary> The high order bits. Corresponds to H2 in the Murmur algorithms.</summary>
24+
public readonly ulong High;
25+
26+
/// <summary>
27+
/// Initializes a new instance of <see cref="RowId"/>
28+
/// </summary>
29+
/// <param name="low">The low order <langword>ulong</langword>.</param>
30+
/// <param name="high">The high order <langword>ulong</langword>.</param>
31+
public RowId(ulong low, ulong high)
2232
{
23-
Lo = lo;
24-
Hi = hi;
33+
Low = low;
34+
High = high;
2535
}
2636

2737
public override string ToString()
2838
{
2939
// Since H1 are the low order bits, they are printed second.
30-
return string.Format("{0:x16}{1:x16}", Hi, Lo);
40+
return string.Format("{0:x16}{1:x16}", High, Low);
3141
}
3242

33-
public int CompareTo(UInt128 other)
43+
public int CompareTo(RowId other)
3444
{
35-
int result = Hi.CompareTo(other.Hi);
36-
return result == 0 ? Lo.CompareTo(other.Lo) : result;
45+
int result = High.CompareTo(other.High);
46+
return result == 0 ? Low.CompareTo(other.Low) : result;
3747
}
3848

39-
public bool Equals(UInt128 other)
49+
public bool Equals(RowId other)
4050
{
41-
return Lo == other.Lo && Hi == other.Hi;
51+
return Low == other.Low && High == other.High;
4252
}
4353

4454
public override bool Equals(object obj)
4555
{
46-
if (obj != null && obj is UInt128)
56+
if (obj != null && obj is RowId)
4757
{
48-
var item = (UInt128)obj;
58+
var item = (RowId)obj;
4959
return Equals(item);
5060
}
5161
return false;
5262
}
5363

54-
public static UInt128 operator +(UInt128 first, ulong second)
64+
public static RowId operator +(RowId first, ulong second)
5565
{
56-
ulong resHi = first.Hi;
57-
ulong resLo = first.Lo + second;
66+
ulong resHi = first.High;
67+
ulong resLo = first.Low + second;
5868
if (resLo < second)
5969
resHi++;
60-
return new UInt128(resLo, resHi);
70+
return new RowId(resLo, resHi);
6171
}
6272

63-
public static UInt128 operator -(UInt128 first, ulong second)
73+
public static RowId operator -(RowId first, ulong second)
6474
{
65-
ulong resHi = first.Hi;
66-
ulong resLo = first.Lo - second;
67-
if (resLo > first.Lo)
75+
ulong resHi = first.High;
76+
ulong resLo = first.Low - second;
77+
if (resLo > first.Low)
6878
resHi--;
69-
return new UInt128(resLo, resHi);
79+
return new RowId(resLo, resHi);
7080
}
7181

72-
public static bool operator ==(UInt128 first, ulong second)
82+
public static bool operator ==(RowId first, ulong second)
7383
{
74-
return first.Hi == 0 && first.Lo == second;
84+
return first.High == 0 && first.Low == second;
7585
}
7686

77-
public static bool operator !=(UInt128 first, ulong second)
87+
public static bool operator !=(RowId first, ulong second)
7888
{
7989
return !(first == second);
8090
}
8191

82-
public static bool operator <(UInt128 first, ulong second)
92+
public static bool operator <(RowId first, ulong second)
8393
{
84-
return first.Hi == 0 && first.Lo < second;
94+
return first.High == 0 && first.Low < second;
8595
}
8696

87-
public static bool operator >(UInt128 first, ulong second)
97+
public static bool operator >(RowId first, ulong second)
8898
{
89-
return first.Hi > 0 || first.Lo > second;
99+
return first.High > 0 || first.Low > second;
90100
}
91101

92-
public static bool operator <=(UInt128 first, ulong second)
102+
public static bool operator <=(RowId first, ulong second)
93103
{
94-
return first.Hi == 0 && first.Lo <= second;
104+
return first.High == 0 && first.Low <= second;
95105
}
96106

97-
public static bool operator >=(UInt128 first, ulong second)
107+
public static bool operator >=(RowId first, ulong second)
98108
{
99-
return first.Hi > 0 || first.Lo >= second;
109+
return first.High > 0 || first.Low >= second;
100110
}
101111

102-
public static explicit operator double(UInt128 x)
112+
public static explicit operator double(RowId x)
103113
{
104114
// REVIEW: The 64-bit JIT has a bug where rounding might be not quite
105115
// correct when converting a ulong to double with the high bit set. Should we
106116
// care and compensate? See the DoubleParser code for a work-around.
107-
return x.Hi * ((double)(1UL << 32) * (1UL << 32)) + x.Lo;
117+
return x.High * ((double)(1UL << 32) * (1UL << 32)) + x.Low;
108118
}
109119

110120
public override int GetHashCode()
111121
{
112122
return (int)(
113-
(uint)Lo ^ (uint)(Lo >> 32) ^
114-
(uint)(Hi << 7) ^ (uint)(Hi >> 57) ^ (uint)(Hi >> (57 - 32)));
123+
(uint)Low ^ (uint)(Low >> 32) ^
124+
(uint)(High << 7) ^ (uint)(High >> 57) ^ (uint)(High >> (57 - 32)));
115125
}
116126

117127
#region Hashing style
@@ -167,10 +177,10 @@ private static void FinalMix(ref ulong h1, ref ulong h2, int len)
167177
/// that were all zeros, except for the last bit which is one.
168178
/// </summary>
169179
[MethodImpl(MethodImplOptions.AggressiveInlining)]
170-
public UInt128 Fork()
180+
public RowId Fork()
171181
{
172-
ulong h1 = Lo;
173-
ulong h2 = Hi;
182+
ulong h1 = Low;
183+
ulong h2 = High;
174184
// Here it's as if k1=1, k2=0.
175185
h1 = RotL(h1, 27);
176186
h1 += h2;
@@ -179,7 +189,7 @@ public UInt128 Fork()
179189
h2 += h1;
180190
h2 = h2 * 5 + 0x38495ab5;
181191
h1 ^= RotL(_c1, 31) * _c2;
182-
return new UInt128(h1, h2);
192+
return new RowId(h1, h2);
183193
}
184194

185195
/// <summary>
@@ -188,18 +198,18 @@ public UInt128 Fork()
188198
/// that were all zeros.
189199
/// </summary>
190200
[MethodImpl(MethodImplOptions.AggressiveInlining)]
191-
public UInt128 Next()
201+
public RowId Next()
192202
{
193-
ulong h1 = Lo;
194-
ulong h2 = Hi;
203+
ulong h1 = Low;
204+
ulong h2 = High;
195205
// Here it's as if k1=0, k2=0.
196206
h1 = RotL(h1, 27);
197207
h1 += h2;
198208
h1 = h1 * 5 + 0x52dce729;
199209
h2 = RotL(h2, 31);
200210
h2 += h1;
201211
h2 = h2 * 5 + 0x38495ab5;
202-
return new UInt128(h1, h2);
212+
return new RowId(h1, h2);
203213
}
204214

205215
/// <summary>
@@ -210,14 +220,14 @@ public UInt128 Next()
210220
/// <param name="other"></param>
211221
/// <returns></returns>
212222
[MethodImpl(MethodImplOptions.AggressiveInlining)]
213-
public UInt128 Combine(UInt128 other)
223+
public RowId Combine(RowId other)
214224
{
215-
var h1 = Lo;
216-
var h2 = Hi;
225+
var h1 = Low;
226+
var h2 = High;
217227

218228
other = other.Fork();
219-
ulong k1 = other.Lo; // First 8 bytes.
220-
ulong k2 = other.Hi; // Second 8 bytes.
229+
ulong k1 = other.Low; // First 8 bytes.
230+
ulong k2 = other.High; // Second 8 bytes.
221231

222232
k1 *= _c1;
223233
k1 = RotL(k1, 31);
@@ -235,7 +245,7 @@ public UInt128 Combine(UInt128 other)
235245
h2 += h1;
236246
h2 = h2 * 5 + 0x38495ab5;
237247

238-
return new UInt128(h1, h2);
248+
return new RowId(h1, h2);
239249
}
240250
#endregion
241251
}

src/Microsoft.ML.Core/Data/SynchronizedCursorBase.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,6 @@ protected override void Dispose(bool disposing)
6262

6363
public sealed override RowCursor GetRootCursor() => _root;
6464

65-
public sealed override ValueGetter<UInt128> GetIdGetter() => Input.GetIdGetter();
65+
public sealed override ValueGetter<RowId> GetIdGetter() => Input.GetIdGetter();
6666
}
6767
}

src/Microsoft.ML.Core/Data/WrappingRow.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ internal abstract class WrappingRow : Row
2323

2424
public sealed override long Batch => Input.Batch;
2525
public sealed override long Position => Input.Position;
26-
public override ValueGetter<UInt128> GetIdGetter() => Input.GetIdGetter();
26+
public override ValueGetter<RowId> GetIdGetter() => Input.GetIdGetter();
2727

2828
[BestFriend]
2929
private protected WrappingRow(Row input)

src/Microsoft.ML.Data/Data/Conversion.cs

+7-7
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ namespace Microsoft.ML.Runtime.Data.Conversion
3030
using U2 = UInt16;
3131
using U4 = UInt32;
3232
using U8 = UInt64;
33-
using UG = UInt128;
33+
using UG = RowId;
3434

3535
public delegate bool TryParseMapper<T>(in TX src, out T dst);
3636

@@ -889,31 +889,31 @@ public ValueGetter<T> GetNAOrDefaultGetter<T>(ColumnType type)
889889
public void Convert(in U2 src, ref U1 dst) => dst = src <= U1.MaxValue ? (U1)src : (U1)0;
890890
public void Convert(in U4 src, ref U1 dst) => dst = src <= U1.MaxValue ? (U1)src : (U1)0;
891891
public void Convert(in U8 src, ref U1 dst) => dst = src <= U1.MaxValue ? (U1)src : (U1)0;
892-
public void Convert(in UG src, ref U1 dst) => dst = src.Hi == 0 && src.Lo <= U1.MaxValue ? (U1)src.Lo : (U1)0;
892+
public void Convert(in UG src, ref U1 dst) => dst = src.High == 0 && src.Low <= U1.MaxValue ? (U1)src.Low : (U1)0;
893893
#endregion ToU1
894894

895895
#region ToU2
896896
public void Convert(in U1 src, ref U2 dst) => dst = src;
897897
public void Convert(in U2 src, ref U2 dst) => dst = src;
898898
public void Convert(in U4 src, ref U2 dst) => dst = src <= U2.MaxValue ? (U2)src : (U2)0;
899899
public void Convert(in U8 src, ref U2 dst) => dst = src <= U2.MaxValue ? (U2)src : (U2)0;
900-
public void Convert(in UG src, ref U2 dst) => dst = src.Hi == 0 && src.Lo <= U2.MaxValue ? (U2)src.Lo : (U2)0;
900+
public void Convert(in UG src, ref U2 dst) => dst = src.High == 0 && src.Low <= U2.MaxValue ? (U2)src.Low : (U2)0;
901901
#endregion ToU2
902902

903903
#region ToU4
904904
public void Convert(in U1 src, ref U4 dst) => dst = src;
905905
public void Convert(in U2 src, ref U4 dst) => dst = src;
906906
public void Convert(in U4 src, ref U4 dst) => dst = src;
907907
public void Convert(in U8 src, ref U4 dst) => dst = src <= U4.MaxValue ? (U4)src : (U4)0;
908-
public void Convert(in UG src, ref U4 dst) => dst = src.Hi == 0 && src.Lo <= U4.MaxValue ? (U4)src.Lo : (U4)0;
908+
public void Convert(in UG src, ref U4 dst) => dst = src.High == 0 && src.Low <= U4.MaxValue ? (U4)src.Low : (U4)0;
909909
#endregion ToU4
910910

911911
#region ToU8
912912
public void Convert(in U1 src, ref U8 dst) => dst = src;
913913
public void Convert(in U2 src, ref U8 dst) => dst = src;
914914
public void Convert(in U4 src, ref U8 dst) => dst = src;
915915
public void Convert(in U8 src, ref U8 dst) => dst = src;
916-
public void Convert(in UG src, ref U8 dst) => dst = src.Hi == 0 ? src.Lo : (U8)0;
916+
public void Convert(in UG src, ref U8 dst) => dst = src.High == 0 ? src.Low : (U8)0;
917917
#endregion ToU8
918918

919919
#region ToUG
@@ -969,7 +969,7 @@ public ValueGetter<T> GetNAOrDefaultGetter<T>(ColumnType type)
969969
public void Convert(in U2 src, ref SB dst) => ClearDst(ref dst).Append(src);
970970
public void Convert(in U4 src, ref SB dst) => ClearDst(ref dst).Append(src);
971971
public void Convert(in U8 src, ref SB dst) => ClearDst(ref dst).Append(src);
972-
public void Convert(in UG src, ref SB dst) { ClearDst(ref dst); dst.AppendFormat("0x{0:x16}{1:x16}", src.Hi, src.Lo); }
972+
public void Convert(in UG src, ref SB dst) { ClearDst(ref dst); dst.AppendFormat("0x{0:x16}{1:x16}", src.High, src.Low); }
973973
public void Convert(in R4 src, ref SB dst) { ClearDst(ref dst); if (R4.IsNaN(src)) dst.AppendFormat(CultureInfo.InvariantCulture, "{0}", "?"); else dst.AppendFormat(CultureInfo.InvariantCulture, "{0:R}", src); }
974974
public void Convert(in R8 src, ref SB dst) { ClearDst(ref dst); if (R8.IsNaN(src)) dst.AppendFormat(CultureInfo.InvariantCulture, "{0}", "?"); else dst.AppendFormat(CultureInfo.InvariantCulture, "{0:G17}", src); }
975975
public void Convert(in BL src, ref SB dst)
@@ -1057,7 +1057,7 @@ public bool TryParse(in TX src, out U8 dst)
10571057
}
10581058

10591059
/// <summary>
1060-
/// A parse method that transforms a 34-length string into a <see cref="UInt128"/>.
1060+
/// A parse method that transforms a 34-length string into a <see cref="RowId"/>.
10611061
/// </summary>
10621062
/// <param name="src">What should be a 34-length hexadecimal representation, including a 0x prefix,
10631063
/// of the 128-bit number</param>

0 commit comments

Comments
 (0)