Skip to content

Commit 40ce785

Browse files
committed
changing the signature for IsColumnActive to take a DataViewSchema.Column instead of the column index.
1 parent f5b4664 commit 40ce785

File tree

53 files changed

+274
-244
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+274
-244
lines changed

src/Microsoft.Data.DataView/IDataView.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,9 @@ public abstract class DataViewRow : IDisposable
132132
public abstract ValueGetter<DataViewRowId> GetIdGetter();
133133

134134
/// <summary>
135-
/// Returns whether the column with the given index, is active in this row.
135+
/// Returns whether the give column is active in this row.
136136
/// </summary>
137-
public abstract bool IsColumnActive(int columnIndex);
137+
public abstract bool IsColumnActive(DataViewSchema.Column column);
138138

139139
/// <summary>
140140
/// Returns a value getter delegate to fetch the value of the given <paramref name="column"/>, from the row.

src/Microsoft.ML.Core/Data/AnnotationUtils.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,7 @@ public AnnotationRow(DataViewSchema.Annotations annotations)
475475
/// <summary>
476476
/// Returns whether the given column is active in this row.
477477
/// </summary>
478-
public override bool IsColumnActive(int columnIndex) => true;
478+
public override bool IsColumnActive(DataViewSchema.Column column) => true;
479479
}
480480

481481
/// <summary>

src/Microsoft.ML.Core/Data/LinkedRowRootCursorBase.cs

+3-3
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ protected LinkedRowRootCursorBase(IChannelProvider provider, DataViewRowCursor i
3232
/// <summary>
3333
/// Returns whether the given column is active in this row.
3434
/// </summary>
35-
public sealed override bool IsColumnActive(int columnIndex)
35+
public sealed override bool IsColumnActive(DataViewSchema.Column column)
3636
{
37-
Ch.Check(0 <= columnIndex && columnIndex < Schema.Count);
38-
return _active == null || _active[columnIndex];
37+
Ch.Check(column.Index < Schema.Count);
38+
return _active == null || _active[column.Index];
3939
}
4040

4141
/// <summary>

src/Microsoft.ML.Core/Utilities/Utils.cs

+36
Original file line numberDiff line numberDiff line change
@@ -776,6 +776,42 @@ public static T[] BuildArray<T>(int length, Func<int, T> func)
776776
return result;
777777
}
778778

779+
/// <summary>
780+
/// Given a predicate, over a range of values defined by a limit calculate
781+
/// first the values for which that predicate was true, and second an inverse
782+
/// map.
783+
/// </summary>
784+
/// <param name="lim">Indicates the exclusive upper bound on the tested values</param>
785+
/// <param name="schema">The input schema where the predicate can checkif columns are active.</param>
786+
/// <param name="pred">The predicate to test for various value</param>
787+
/// <param name="map">An ascending array of values from 0 inclusive
788+
/// to <paramref name="lim"/> exclusive, holding all values for which
789+
/// <paramref name="pred"/> is true</param>
790+
/// <param name="invMap">Forms an inverse mapping of <paramref name="map"/>,
791+
/// so that <c><paramref name="invMap"/>[<paramref name="map"/>[i]] == i</c>,
792+
/// and for other entries not appearing in <paramref name="map"/>,
793+
/// <c><paramref name="invMap"/>[i] == -1</c></param>
794+
public static void BuildSubsetMaps(int lim, DataViewSchema schema, Func<DataViewSchema.Column, bool> pred, out int[] map, out int[] invMap)
795+
{
796+
Contracts.CheckParam(lim >= 0, nameof(lim));
797+
Contracts.CheckValue(pred, nameof(pred));
798+
// REVIEW: Better names?
799+
List<int> mapList = new List<int>();
800+
invMap = new int[lim];
801+
for (int c = 0; c < lim; ++c)
802+
{
803+
if (!pred(schema[c]))
804+
{
805+
invMap[c] = -1;
806+
continue;
807+
}
808+
invMap[c] = mapList.Count;
809+
mapList.Add(c);
810+
}
811+
map = mapList.ToArray();
812+
}
813+
814+
//SENJA: REMOVE BEFORE COMMIT
779815
/// <summary>
780816
/// Given a predicate, over a range of values defined by a limit calculate
781817
/// first the values for which that predicate was true, and second an inverse

src/Microsoft.ML.Data/Data/DataViewUtils.cs

+21-21
Original file line numberDiff line numberDiff line change
@@ -173,14 +173,14 @@ public static DataViewRowCursor[] CreateSplitCursors(IChannelProvider provider,
173173
/// Return whether all the active columns, as determined by the predicate, are
174174
/// cachable - either primitive types or vector types.
175175
/// </summary>
176-
public static bool AllCacheable(DataViewSchema schema, Func<int, bool> predicate)
176+
public static bool AllCacheable(DataViewSchema schema, Func<DataViewSchema.Column, bool> predicate)
177177
{
178178
Contracts.CheckValue(schema, nameof(schema));
179179
Contracts.CheckValue(predicate, nameof(predicate));
180180

181181
for (int col = 0; col < schema.Count; col++)
182182
{
183-
if (!predicate(col))
183+
if (!predicate(schema[col]))
184184
continue;
185185
var type = schema[col].Type;
186186
if (!IsCacheable(type))
@@ -239,10 +239,10 @@ public static bool SameSchemaAndActivity(DataViewRowCursor[] cursors)
239239
// All cursors must have the same columns active.
240240
for (int c = 0; c < schema.Count; ++c)
241241
{
242-
bool active = firstCursor.IsColumnActive(c);
242+
bool active = firstCursor.IsColumnActive(schema[c]);
243243
for (int i = 1; i < cursors.Length; ++i)
244244
{
245-
if (cursors[i].IsColumnActive(c) != active)
245+
if (cursors[i].IsColumnActive(schema[c]) != active)
246246
return false;
247247
}
248248
}
@@ -334,7 +334,7 @@ private static DataViewRowCursor ConsolidateCore(IChannelProvider provider, Data
334334

335335
int[] activeToCol;
336336
int[] colToActive;
337-
Utils.BuildSubsetMaps(schema.Count, cursor.IsColumnActive, out activeToCol, out colToActive);
337+
Utils.BuildSubsetMaps(schema.Count, schema, cursor.IsColumnActive, out activeToCol, out colToActive);
338338

339339
// Because the schema of the consolidator is not necessary fixed, we are merely
340340
// opportunistic about buffer sharing, from cursoring to cursoring. If we can do
@@ -517,7 +517,7 @@ private DataViewRowCursor[] SplitCore(IChannelProvider ch, DataViewRowCursor inp
517517
// Create the mappings between active column index, and column index.
518518
int[] activeToCol;
519519
int[] colToActive;
520-
Utils.BuildSubsetMaps(_schema.Count, input.IsColumnActive, out activeToCol, out colToActive);
520+
Utils.BuildSubsetMaps(_schema.Count, _schema, input.IsColumnActive, out activeToCol, out colToActive);
521521

522522
Func<DataViewRowCursor, int, InPipe> createFunc = CreateInPipe<int>;
523523
var inGenMethod = createFunc.GetMethodInfo().GetGenericMethodDefinition();
@@ -534,14 +534,14 @@ private DataViewRowCursor[] SplitCore(IChannelProvider ch, DataViewRowCursor inp
534534
{
535535
ch.Assert(0 <= activeToCol[c] && activeToCol[c] < _schema.Count);
536536
ch.Assert(c == 0 || activeToCol[c - 1] < activeToCol[c]);
537-
ch.Assert(input.IsColumnActive(activeToCol[c]));
538-
var type = input.Schema[activeToCol[c]].Type;
539-
ch.Assert(type.IsCacheable());
537+
var column = input.Schema[activeToCol[c]];
538+
ch.Assert(input.IsColumnActive(column));
539+
ch.Assert(column.Type.IsCacheable());
540540
arguments[1] = activeToCol[c];
541541
var inPipe = inPipes[c] =
542-
(InPipe)inGenMethod.MakeGenericMethod(type.RawType).Invoke(this, arguments);
542+
(InPipe)inGenMethod.MakeGenericMethod(column.Type.RawType).Invoke(this, arguments);
543543
for (int i = 0; i < cthd; ++i)
544-
outPipes[i][c] = inPipe.CreateOutPipe(type);
544+
outPipes[i][c] = inPipe.CreateOutPipe(column.Type);
545545
}
546546
// Beyond the InPipes corresponding to column values, we have extra side info pipes.
547547
int idIdx = activeToCol.Length + (int)ExtraIndex.Id;
@@ -1105,10 +1105,10 @@ protected override bool MoveNextCore()
11051105
/// <summary>
11061106
/// Returns whether the given column is active in this row.
11071107
/// </summary>
1108-
public override bool IsColumnActive(int columnIndex)
1108+
public override bool IsColumnActive(DataViewSchema.Column column)
11091109
{
1110-
Ch.CheckParam(0 <= columnIndex && columnIndex < _colToActive.Length, nameof(columnIndex));
1111-
return _colToActive[columnIndex] >= 0;
1110+
Ch.CheckParam(column.Index < _colToActive.Length, nameof(column));
1111+
return _colToActive[column.Index] >= 0;
11121112
}
11131113

11141114
/// <summary>
@@ -1120,7 +1120,7 @@ public override bool IsColumnActive(int columnIndex)
11201120
/// <param name="column"> is the output column whose getter should be returned.</param>
11211121
public override ValueGetter<TValue> GetGetter<TValue>(DataViewSchema.Column column)
11221122
{
1123-
Ch.CheckParam(IsColumnActive(column.Index), nameof(column), "requested column not active.");
1123+
Ch.CheckParam(IsColumnActive(column), nameof(column), "requested column not active.");
11241124
Ch.CheckParam(column.Index < _colToActive.Length, nameof(column), "requested column is not active or valid for the Schema.");
11251125

11261126
var getter = _getters[_colToActive[column.Index]] as ValueGetter<TValue>;
@@ -1180,7 +1180,7 @@ public SynchronousConsolidatingCursor(IChannelProvider provider, DataViewRowCurs
11801180
_cursors = cursors;
11811181
_schema = _cursors[0].Schema;
11821182

1183-
Utils.BuildSubsetMaps(_schema.Count, _cursors[0].IsColumnActive, out _activeToCol, out _colToActive);
1183+
Utils.BuildSubsetMaps(_schema.Count, _schema, _cursors[0].IsColumnActive, out _activeToCol, out _colToActive);
11841184

11851185
Func<int, Delegate> func = CreateGetter<int>;
11861186
_methInfo = func.GetMethodInfo().GetGenericMethodDefinition();
@@ -1251,7 +1251,7 @@ private Delegate CreateGetter<T>(int col)
12511251
var cursor = _cursors[i];
12521252
Ch.AssertValue(cursor);
12531253
Ch.Assert(col < cursor.Schema.Count);
1254-
Ch.Assert(cursor.IsColumnActive(col));
1254+
Ch.Assert(cursor.IsColumnActive(Schema[col]));
12551255
Ch.Assert(type.Equals(cursor.Schema[col].Type));
12561256
getters[i] = _cursors[i].GetGetter<T>(cursor.Schema[col]);
12571257
}
@@ -1296,10 +1296,10 @@ protected override bool MoveNextCore()
12961296
/// <summary>
12971297
/// Returns whether the given column is active in this row.
12981298
/// </summary>
1299-
public override bool IsColumnActive(int columnIndex)
1299+
public override bool IsColumnActive(DataViewSchema.Column column)
13001300
{
1301-
Ch.CheckParam(0 <= columnIndex && columnIndex < _colToActive.Length, nameof(columnIndex));
1302-
return _colToActive[columnIndex] >= 0;
1301+
Ch.CheckParam(column.Index < _colToActive.Length, nameof(column));
1302+
return _colToActive[column.Index] >= 0;
13031303
}
13041304

13051305
/// <summary>
@@ -1311,7 +1311,7 @@ public override bool IsColumnActive(int columnIndex)
13111311
/// <param name="column"> is the output column whose getter should be returned.</param>
13121312
public override ValueGetter<TValue> GetGetter<TValue>(DataViewSchema.Column column)
13131313
{
1314-
Ch.CheckParam(IsColumnActive(column.Index), nameof(column), "requested column not active");
1314+
Ch.CheckParam(IsColumnActive(column), nameof(column), "requested column not active");
13151315
Ch.CheckParam(column.Index < _colToActive.Length, nameof(column), "requested column not active or is invalid for the schema. ");
13161316

13171317
var getter = _getters[_colToActive[column.Index]] as ValueGetter<TValue>;

src/Microsoft.ML.Data/Data/RowCursorUtils.cs

+13-13
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ public static Delegate GetGetterAsDelegate(DataViewRow row, int col)
2828
{
2929
Contracts.CheckValue(row, nameof(row));
3030
Contracts.CheckParam(0 <= col && col < row.Schema.Count, nameof(col));
31-
Contracts.CheckParam(row.IsColumnActive(col), nameof(col), "column was not active");
31+
Contracts.CheckParam(row.IsColumnActive(row.Schema[col]), nameof(col), "column was not active");
3232

3333
Func<DataViewRow, int, Delegate> getGetter = GetGetterAsDelegateCore<int>;
3434
return Utils.MarshalInvoke(getGetter, row.Schema[col].Type.RawType, row, col);
@@ -51,7 +51,7 @@ public static Delegate GetGetterAs(DataViewType typeDst, DataViewRow row, int co
5151
Contracts.CheckParam(typeDst is PrimitiveDataViewType, nameof(typeDst));
5252
Contracts.CheckValue(row, nameof(row));
5353
Contracts.CheckParam(0 <= col && col < row.Schema.Count, nameof(col));
54-
Contracts.CheckParam(row.IsColumnActive(col), nameof(col), "column was not active");
54+
Contracts.CheckParam(row.IsColumnActive(row.Schema[col]), nameof(col), "column was not active");
5555

5656
var typeSrc = row.Schema[col].Type;
5757
Contracts.Check(typeSrc is PrimitiveDataViewType, "Source column type must be primitive");
@@ -72,7 +72,7 @@ public static ValueGetter<TDst> GetGetterAs<TDst>(DataViewType typeDst, DataView
7272
Contracts.CheckParam(typeDst.RawType == typeof(TDst), nameof(typeDst));
7373
Contracts.CheckValue(row, nameof(row));
7474
Contracts.CheckParam(0 <= col && col < row.Schema.Count, nameof(col));
75-
Contracts.CheckParam(row.IsColumnActive(col), nameof(col), "column was not active");
75+
Contracts.CheckParam(row.IsColumnActive(row.Schema[col]), nameof(col), "column was not active");
7676

7777
var typeSrc = row.Schema[col].Type;
7878
Contracts.Check(typeSrc is PrimitiveDataViewType, "Source column type must be primitive");
@@ -117,7 +117,7 @@ public static ValueGetter<StringBuilder> GetGetterAsStringBuilder(DataViewRow ro
117117
{
118118
Contracts.CheckValue(row, nameof(row));
119119
Contracts.CheckParam(0 <= col && col < row.Schema.Count, nameof(col));
120-
Contracts.CheckParam(row.IsColumnActive(col), nameof(col), "column was not active");
120+
Contracts.CheckParam(row.IsColumnActive(row.Schema[col]), nameof(col), "column was not active");
121121

122122
var typeSrc = row.Schema[col].Type;
123123
Contracts.Check(typeSrc is PrimitiveDataViewType, "Source column type must be primitive");
@@ -150,7 +150,7 @@ public static Delegate GetVecGetterAs(PrimitiveDataViewType typeDst, DataViewRow
150150
Contracts.CheckValue(typeDst, nameof(typeDst));
151151
Contracts.CheckValue(row, nameof(row));
152152
Contracts.CheckParam(0 <= col && col < row.Schema.Count, nameof(col));
153-
Contracts.CheckParam(row.IsColumnActive(col), nameof(col), "column was not active");
153+
Contracts.CheckParam(row.IsColumnActive(row.Schema[col]), nameof(col), "column was not active");
154154

155155
var typeSrc = row.Schema[col].Type as VectorType;
156156
Contracts.Check(typeSrc != null, "Source column type must be vector");
@@ -170,7 +170,7 @@ public static ValueGetter<VBuffer<TDst>> GetVecGetterAs<TDst>(PrimitiveDataViewT
170170
Contracts.CheckParam(typeDst.RawType == typeof(TDst), nameof(typeDst));
171171
Contracts.CheckValue(row, nameof(row));
172172
Contracts.CheckParam(0 <= col && col < row.Schema.Count, nameof(col));
173-
Contracts.CheckParam(row.IsColumnActive(col), nameof(col), "column was not active");
173+
Contracts.CheckParam(row.IsColumnActive(row.Schema[col]), nameof(col), "column was not active");
174174

175175
var typeSrc = row.Schema[col].Type as VectorType;
176176
Contracts.Check(typeSrc != null, "Source column type must be vector");
@@ -469,7 +469,7 @@ public static IDataView RowAsDataView(IHostEnvironment env, DataViewRow row)
469469
{
470470
Contracts.CheckValue(env, nameof(env));
471471
env.CheckValue(row, nameof(row));
472-
env.CheckParam(Enumerable.Range(0, row.Schema.Count).All(c => row.IsColumnActive(c)), nameof(row), "Some columns were inactive");
472+
env.CheckParam(Enumerable.Range(0, row.Schema.Count).All(c => row.IsColumnActive(row.Schema[c])), nameof(row), "Some columns were inactive");
473473
return new OneRowDataView(env, row);
474474
}
475475

@@ -512,7 +512,7 @@ public OneRowDataView(IHostEnvironment env, DataViewRow row)
512512
Contracts.AssertValue(env);
513513
_host = env.Register("OneRowDataView");
514514
_host.AssertValue(row);
515-
_host.Assert(Enumerable.Range(0, row.Schema.Count).All(c => row.IsColumnActive(c)));
515+
_host.Assert(Enumerable.Range(0, row.Schema.Count).All(c => row.IsColumnActive(row.Schema[c])));
516516

517517
_row = row;
518518
}
@@ -565,7 +565,7 @@ public Cursor(IHost host, OneRowDataView parent, bool[] active)
565565
public override ValueGetter<TValue> GetGetter<TValue>(DataViewSchema.Column column)
566566
{
567567
Ch.CheckParam(column.Index < Schema.Count, nameof(column));
568-
Ch.CheckParam(IsColumnActive(column.Index), nameof(column.Index), "Requested column is not active.");
568+
Ch.CheckParam(IsColumnActive(column), nameof(column.Index), "Requested column is not active.");
569569

570570
var getter = _parent._row.GetGetter<TValue>(column);
571571
return
@@ -579,13 +579,13 @@ public override ValueGetter<TValue> GetGetter<TValue>(DataViewSchema.Column colu
579579
/// <summary>
580580
/// Returns whether the given column is active in this row.
581581
/// </summary>
582-
public override bool IsColumnActive(int columnIndex)
582+
public override bool IsColumnActive(DataViewSchema.Column column)
583583
{
584-
Ch.CheckParam(0 <= columnIndex && columnIndex < Schema.Count, nameof(columnIndex));
584+
Ch.CheckParam(column.Index < Schema.Count, nameof(column));
585585
// We present the "illusion" that this column is not active, even though it must be
586586
// in the input row.
587-
Ch.Assert(_parent._row.IsColumnActive(columnIndex));
588-
return _active[columnIndex];
587+
Ch.Assert(_parent._row.IsColumnActive(column));
588+
return _active[column.Index];
589589
}
590590

591591
public override ValueGetter<DataViewRowId> GetIdGetter()

src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoader.cs

+3-3
Original file line numberDiff line numberDiff line change
@@ -1989,10 +1989,10 @@ public override Delegate GetGetter()
19891989
/// <summary>
19901990
/// Returns whether the given column is active in this row.
19911991
/// </summary>
1992-
public override bool IsColumnActive(int columnIndex)
1992+
public override bool IsColumnActive(DataViewSchema.Column column)
19931993
{
1994-
Ch.CheckParam(0 <= columnIndex && columnIndex < _colToActivesIndex.Length, nameof(columnIndex));
1995-
return _colToActivesIndex[columnIndex] >= 0;
1994+
Ch.CheckParam(column.Index < _colToActivesIndex.Length, nameof(column));
1995+
return _colToActivesIndex[column.Index] >= 0;
19961996
}
19971997

19981998
protected override bool MoveNextCore()

0 commit comments

Comments
 (0)