Skip to content

Commit c9370c7

Browse files
committed
Introduce ReadOnlyVBuffer and use it all over.
1 parent bee7f17 commit c9370c7

33 files changed

+570
-451
lines changed

src/Microsoft.ML.Core/Data/VBuffer.cs

+168-73
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,159 @@
88

99
namespace Microsoft.ML.Runtime.Data
1010
{
11+
/// <summary>
12+
/// A readonly buffer that supports both dense and sparse representations. This is the
13+
/// readonly representation type for all VectorType instances used as a guarantee that
14+
/// the buffer cannot be modified.
15+
/// </summary>
16+
public readonly struct ReadOnlyVBuffer<T>
17+
{
18+
private readonly T[] _values;
19+
private readonly int[] _indices;
20+
21+
/// <summary>
22+
/// The logical length of the buffer.
23+
/// </summary>
24+
public readonly int Length;
25+
26+
/// <summary>
27+
/// The number of items explicitly represented. This is == Length when the representation
28+
/// is dense and &lt; Length when sparse.
29+
/// </summary>
30+
public readonly int Count;
31+
32+
/// <summary>
33+
/// The values. Only the first Count of these are valid.
34+
/// </summary>
35+
public ReadOnlySpan<T> Values => _values;
36+
37+
/// <summary>
38+
/// The indices. For a dense representation, Indices is not used. For a sparse representation
39+
/// it is parallel to values and specifies the logical indices for the corresponding values.
40+
/// </summary>
41+
public ReadOnlySpan<int> Indices => _indices;
42+
43+
/// <summary>
44+
/// Equivalent to Count == Length.
45+
/// </summary>
46+
public bool IsDense => Count == Length;
47+
48+
internal ReadOnlyVBuffer(int logicalLength, int valuesCount, T[] values, int[] indices)
49+
{
50+
Contracts.Assert(logicalLength >= 0);
51+
Contracts.Assert(0 <= valuesCount && valuesCount <= logicalLength);
52+
Contracts.Assert(Utils.Size(values) >= valuesCount);
53+
Contracts.Assert(valuesCount == logicalLength || Utils.Size(indices) >= valuesCount);
54+
55+
Length = logicalLength;
56+
Count = valuesCount;
57+
_values = values;
58+
_indices = indices;
59+
}
60+
61+
public void GetItemOrDefault(int slot, ref T dst)
62+
{
63+
Contracts.CheckParam(0 <= slot && slot < Length, nameof(slot));
64+
65+
int index;
66+
if (IsDense)
67+
dst = Values[slot];
68+
else if (Count > 0 && Utils.TryFindIndexSorted(Indices, 0, Count, slot, out index))
69+
dst = Values[index];
70+
else
71+
dst = default;
72+
}
73+
74+
/// <summary>
75+
/// Copy from this buffer to the given destination.
76+
/// </summary>
77+
public void CopyTo(ref VBuffer<T> dst)
78+
{
79+
var values = dst.Values;
80+
var indices = dst.Indices;
81+
if (IsDense)
82+
{
83+
if (Length > 0)
84+
{
85+
if (Utils.Size(values) < Length)
86+
values = new T[Length];
87+
Values.Slice(0, Length).CopyTo(values);
88+
}
89+
dst = new VBuffer<T>(Length, values, indices);
90+
Contracts.Assert(dst.IsDense);
91+
}
92+
else
93+
{
94+
if (Count > 0)
95+
{
96+
if (Utils.Size(values) < Count)
97+
values = new T[Count];
98+
if (Utils.Size(indices) < Count)
99+
indices = new int[Count];
100+
Values.Slice(0, Count).CopyTo(values);
101+
Indices.Slice(0, Count).CopyTo(indices);
102+
}
103+
dst = new VBuffer<T>(Length, Count, values, indices);
104+
}
105+
}
106+
107+
/// <summary>
108+
/// Copy from this buffer to the given destination, forcing a dense representation.
109+
/// </summary>
110+
public void CopyToDense(ref VBuffer<T> dst)
111+
{
112+
var values = dst.Values;
113+
if (Utils.Size(values) < Length)
114+
values = new T[Length];
115+
116+
if (!IsDense)
117+
CopyTo(values);
118+
else if (Length > 0)
119+
Values.Slice(0, Length).CopyTo(values);
120+
dst = new VBuffer<T>(Length, values, dst.Indices);
121+
}
122+
123+
/// <summary>
124+
/// Copy from this buffer to the given destination array. This "densifies".
125+
/// </summary>
126+
public void CopyTo(T[] dst)
127+
{
128+
CopyTo(dst, 0);
129+
}
130+
131+
public void CopyTo(T[] dst, int ivDst, T defaultValue = default(T))
132+
{
133+
Contracts.CheckParam(0 <= ivDst && ivDst <= Utils.Size(dst) - Length, nameof(dst), "dst is not large enough");
134+
135+
if (Length == 0)
136+
return;
137+
if (IsDense)
138+
{
139+
Values.Slice(0, Length).CopyTo(dst.AsSpan(ivDst));
140+
return;
141+
}
142+
143+
if (Count == 0)
144+
{
145+
Array.Clear(dst, ivDst, Length);
146+
return;
147+
}
148+
149+
int iv = 0;
150+
for (int islot = 0; islot < Count; islot++)
151+
{
152+
int slot = Indices[islot];
153+
Contracts.Assert(slot >= iv);
154+
while (iv < slot)
155+
dst[ivDst + iv++] = defaultValue;
156+
Contracts.Assert(iv == slot);
157+
dst[ivDst + iv++] = Values[islot];
158+
}
159+
while (iv < Length)
160+
dst[ivDst + iv++] = defaultValue;
161+
}
162+
}
163+
11164
/// <summary>
12165
/// A buffer that supports both dense and sparse representations. This is the
13166
/// representation type for all VectorType instances. When an instance of this
@@ -102,48 +255,15 @@ public VBuffer(int length, int count, T[] values, int[] indices)
102255
/// </summary>
103256
public void CopyToDense(ref VBuffer<T> dst)
104257
{
105-
var values = dst.Values;
106-
if (Utils.Size(values) < Length)
107-
values = new T[Length];
108-
109-
if (!IsDense)
110-
CopyTo(values);
111-
else if (Length > 0)
112-
Array.Copy(Values, values, Length);
113-
dst = new VBuffer<T>(Length, values, dst.Indices);
258+
CreateReadOnly().CopyToDense(ref dst);
114259
}
115260

116261
/// <summary>
117262
/// Copy from this buffer to the given destination.
118263
/// </summary>
119264
public void CopyTo(ref VBuffer<T> dst)
120265
{
121-
var values = dst.Values;
122-
var indices = dst.Indices;
123-
if (IsDense)
124-
{
125-
if (Length > 0)
126-
{
127-
if (Utils.Size(values) < Length)
128-
values = new T[Length];
129-
Array.Copy(Values, values, Length);
130-
}
131-
dst = new VBuffer<T>(Length, values, indices);
132-
Contracts.Assert(dst.IsDense);
133-
}
134-
else
135-
{
136-
if (Count > 0)
137-
{
138-
if (Utils.Size(values) < Count)
139-
values = new T[Count];
140-
if (Utils.Size(indices) < Count)
141-
indices = new int[Count];
142-
Array.Copy(Values, values, Count);
143-
Array.Copy(Indices, indices, Count);
144-
}
145-
dst = new VBuffer<T>(Length, Count, values, indices);
146-
}
266+
CreateReadOnly().CopyTo(ref dst);
147267
}
148268

149269
/// <summary>
@@ -377,34 +497,7 @@ public void CopyTo(T[] dst)
377497

378498
public void CopyTo(T[] dst, int ivDst, T defaultValue = default(T))
379499
{
380-
Contracts.CheckParam(0 <= ivDst && ivDst <= Utils.Size(dst) - Length, nameof(dst), "dst is not large enough");
381-
382-
if (Length == 0)
383-
return;
384-
if (IsDense)
385-
{
386-
Array.Copy(Values, 0, dst, ivDst, Length);
387-
return;
388-
}
389-
390-
if (Count == 0)
391-
{
392-
Array.Clear(dst, ivDst, Length);
393-
return;
394-
}
395-
396-
int iv = 0;
397-
for (int islot = 0; islot < Count; islot++)
398-
{
399-
int slot = Indices[islot];
400-
Contracts.Assert(slot >= iv);
401-
while (iv < slot)
402-
dst[ivDst + iv++] = defaultValue;
403-
Contracts.Assert(iv == slot);
404-
dst[ivDst + iv++] = Values[islot];
405-
}
406-
while (iv < Length)
407-
dst[ivDst + iv++] = defaultValue;
500+
CreateReadOnly().CopyTo(dst, ivDst, defaultValue);
408501
}
409502

410503
/// <summary>
@@ -441,15 +534,7 @@ public IEnumerable<T> DenseValues()
441534

442535
public void GetItemOrDefault(int slot, ref T dst)
443536
{
444-
Contracts.CheckParam(0 <= slot && slot < Length, nameof(slot));
445-
446-
int index;
447-
if (IsDense)
448-
dst = Values[slot];
449-
else if (Count > 0 && Indices.TryFindIndexSorted(0, Count, slot, out index))
450-
dst = Values[index];
451-
else
452-
dst = default(T);
537+
CreateReadOnly().GetItemOrDefault(slot, ref dst);
453538
}
454539

455540
public T GetItemOrDefault(int slot)
@@ -459,9 +544,19 @@ public T GetItemOrDefault(int slot)
459544
int index;
460545
if (IsDense)
461546
return Values[slot];
462-
if (Count > 0 && Indices.TryFindIndexSorted(0, Count, slot, out index))
547+
if (Count > 0 && Utils.TryFindIndexSorted(Indices, 0, Count, slot, out index))
463548
return Values[index];
464549
return default(T);
465550
}
551+
552+
public static implicit operator ReadOnlyVBuffer<T>(VBuffer<T> buffer)
553+
{
554+
return buffer.CreateReadOnly();
555+
}
556+
557+
private ReadOnlyVBuffer<T> CreateReadOnly()
558+
{
559+
return new ReadOnlyVBuffer<T>(Length, Count, Values, Indices);
560+
}
466561
}
467562
}

src/Microsoft.ML.Core/Utilities/FloatUtils.cs

+5-3
Original file line numberDiff line numberDiff line change
@@ -456,12 +456,14 @@ public static bool IsFinite(Double[] values, int count)
456456
// REVIEW: Consider implementing using SSE.
457457
public static bool IsFinite(Single[] values, int count)
458458
{
459-
Contracts.Assert(count >= 0);
460-
Contracts.Assert(Utils.Size(values) >= count);
459+
return IsFinite(values.AsSpan(0, count));
460+
}
461461

462+
public static bool IsFinite(ReadOnlySpan<Single> values)
463+
{
462464
// Assuming that non-finites are rare, this is faster than testing on each item.
463465
Single sum = 0;
464-
for (int i = 0; i < count; i++)
466+
for (int i = 0; i < values.Length; i++)
465467
{
466468
var v = values[i];
467469
sum += v - v;

src/Microsoft.ML.Core/Utilities/Utils.cs

+25-7
Original file line numberDiff line numberDiff line change
@@ -186,9 +186,8 @@ public static void Push<T>(ref Stack<T> stack, T item)
186186
/// In case of duplicates it returns the index of the first one.
187187
/// It guarantees that items before the returned index are &lt; value, while those at and after the returned index are &gt;= value.
188188
/// </summary>
189-
public static int FindIndexSorted(this int[] input, int value)
189+
public static int FindIndexSorted(this ReadOnlySpan<int> input, int value)
190190
{
191-
Contracts.AssertValue(input);
192191
return FindIndexSorted(input, 0, input.Length, value);
193192
}
194193

@@ -233,9 +232,9 @@ public static int FindIndexSorted(this Double[] input, Double value)
233232
/// <c>index</c> parameter, and returns whether that index is a valid index
234233
/// pointing to a value equal to the input parameter <c>value</c>.
235234
/// </summary>
236-
public static bool TryFindIndexSorted(this int[] input, int min, int lim, int value, out int index)
235+
public static bool TryFindIndexSorted(ReadOnlySpan<int> input, int min, int lim, int value, out int index)
237236
{
238-
index = input.FindIndexSorted(min, lim, value);
237+
index = FindIndexSorted(input, min, lim, value);
239238
return index < lim && input[index] == value;
240239
}
241240

@@ -245,10 +244,14 @@ public static bool TryFindIndexSorted(this int[] input, int min, int lim, int va
245244
/// In case of duplicates it returns the index of the first one.
246245
/// It guarantees that items before the returned index are &lt; value, while those at and after the returned index are &gt;= value.
247246
/// </summary>
248-
public static int FindIndexSorted(this int[] input, int min, int lim, int value)
247+
public static int FindIndexSorted(int[] input, int min, int lim, int value)
249248
{
250-
Contracts.AssertValueOrNull(input);
251-
Contracts.Assert(0 <= min & min <= lim & lim <= Utils.Size(input));
249+
return FindIndexSorted(input.AsSpan(), min, lim, value);
250+
}
251+
252+
public static int FindIndexSorted(ReadOnlySpan<int> input, int min, int lim, int value)
253+
{
254+
Contracts.Assert(0 <= min & min <= lim & lim <= input.Length);
252255

253256
int minCur = min;
254257
int limCur = lim;
@@ -1087,5 +1090,20 @@ public static string GetDescription(this Enum value)
10871090
}
10881091
return null;
10891092
}
1093+
1094+
public static int Count<TSource>(this ReadOnlySpan<TSource> source, Func<TSource, bool> predicate)
1095+
{
1096+
Contracts.CheckValue(predicate, nameof(predicate));
1097+
1098+
int result = 0;
1099+
for (int i = 0; i < source.Length; i++)
1100+
{
1101+
if (predicate(source[i]))
1102+
{
1103+
result++;
1104+
}
1105+
}
1106+
return result;
1107+
}
10901108
}
10911109
}

0 commit comments

Comments
 (0)