Skip to content

Creation of components through MLContext and cleanup (Onnx, Tensorflow, SelectColumn, KeytoBinVec, ValueMap) #2367

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Feb 7, 2019
2 changes: 1 addition & 1 deletion docs/samples/Microsoft.ML.Samples/Dynamic/OnnxTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public static void OnnxTransformSample()
var mlContext = new MLContext();
var data = GetTensorData();
var idv = mlContext.Data.ReadFromEnumerable(data);
var pipeline = new OnnxScoringEstimator(mlContext, new[] { outputInfo.Key }, new[] { inputInfo.Key }, modelPath);
var pipeline = mlContext.Transforms.ApplyOnnxModel(modelPath, new[] { outputInfo.Key }, new[] { inputInfo.Key });

// Run the pipeline and get the transformed values
var transformedValues = pipeline.Fit(idv).Transform(idv);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public static void Run()
};

// Constructs the ValueMappingEstimator making the ML.net pipeline
var pipeline = new ValueMappingEstimator<string, int>(mlContext, educationKeys, educationValues, ("EducationFeature", "Education"));
var pipeline = mlContext.Transforms.Conversion.ValueMap<string, int>(educationKeys, educationValues, ("EducationFeature", "Education"));

// Fits the ValueMappingEstimator and transforms the data adding the EducationFeature column.
IDataView transformedData = pipeline.Fit(trainData).Transform(trainData);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public static void Run()
// Generate the ValueMappingEstimator that will output KeyTypes even though our values are strings.
// The KeyToValueMappingEstimator is added to provide a reverse lookup of the KeyType, converting the KeyType value back
// to the original value.
var pipeline = new ValueMappingEstimator<string, string>(mlContext, educationKeys, educationValues, true, ("EducationKeyType", "Education"))
var pipeline = mlContext.Transforms.Conversion.ValueMap<string, string>(educationKeys, educationValues, true, ("EducationKeyType", "Education"))
.Append(mlContext.Transforms.Conversion.MapKeyToValue(("EducationCategory", "EducationKeyType")));

// Fits the ValueMappingEstimator and transforms the data adding the EducationKeyType column.
Expand Down
8 changes: 4 additions & 4 deletions src/Microsoft.ML.Data/Commands/CrossValidationCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ private void RunCore(IChannel ch, string cmd)
"", ComponentFactoryUtils.CreateFromFunction<IDataView, IDataTransform>(
(env, input) =>
{
var args = new GenerateNumberTransform.Arguments();
var args = new GenerateNumberTransform.Options();
args.Columns = new[] { new GenerateNumberTransform.Column() { Name = DefaultColumnNames.Name }, };
args.UseCounter = true;
return new GenerateNumberTransform(env, args, input);
Expand Down Expand Up @@ -313,7 +313,7 @@ private string GetSplitColumn(IChannel ch, IDataView input, ref IDataView output
int inc = 0;
while (input.Schema.TryGetColumnIndex(stratificationColumn, out tmp))
stratificationColumn = string.Format("StratificationColumn_{0:000}", ++inc);
var keyGenArgs = new GenerateNumberTransform.Arguments();
var keyGenArgs = new GenerateNumberTransform.Options();
var col = new GenerateNumberTransform.Column();
col.Name = stratificationColumn;
keyGenArgs.Columns = new[] { col };
Expand Down Expand Up @@ -514,7 +514,7 @@ private FoldResult RunFold(int fold)
ITrainer trainer = _trainer.CreateComponent(host);

// Train pipe.
var trainFilter = new RangeFilter.Arguments();
var trainFilter = new RangeFilter.Options();
trainFilter.Column = _splitColumn;
trainFilter.Min = (Double)fold / _numFolds;
trainFilter.Max = (Double)(fold + 1) / _numFolds;
Expand All @@ -524,7 +524,7 @@ private FoldResult RunFold(int fold)
var trainData = _createExamples(host, ch, trainPipe, trainer);

// Test pipe.
var testFilter = new RangeFilter.Arguments();
var testFilter = new RangeFilter.Options();
testFilter.Column = trainFilter.Column;
testFilter.Min = trainFilter.Min;
testFilter.Max = trainFilter.Max;
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Data/Commands/SaveDataCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ private void RunCore(IChannel ch)
// Send the first N lines to console.
if (Args.Rows > 0)
{
var args = new SkipTakeFilter.TakeArguments() { Count = Args.Rows };
var args = new SkipTakeFilter.TakeOptions() { Count = Args.Rows };
data = SkipTakeFilter.Create(Host, args, data);
}
var textSaver = saver as TextSaver;
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.Data/DataLoadSave/DataOperationsCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ public IDataView SkipRows(IDataView input, long count)
Environment.CheckValue(input, nameof(input));
Environment.CheckUserArg(count > 0, nameof(count), "Must be greater than zero.");

var options = new SkipTakeFilter.SkipArguments()
var options = new SkipTakeFilter.SkipOptions()
{
Count = count
};
Expand Down Expand Up @@ -270,7 +270,7 @@ public IDataView TakeRows(IDataView input, long count)
Environment.CheckValue(input, nameof(input));
Environment.CheckUserArg(count > 0, nameof(count), "Must be greater than zero.");

var options = new SkipTakeFilter.TakeArguments()
var options = new SkipTakeFilter.TakeOptions()
{
Count = count
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,10 @@ private void SaveTransposedData(IChannel ch, Stream stream, ITransposeDataView d

// First write out the no-row data, limited to these columns.
IDataView subdata = new ChooseColumnsByIndexTransform(_host,
new ChooseColumnsByIndexTransform.Arguments() { Indices = cols }, data);
new ChooseColumnsByIndexTransform.Options() { Indices = cols }, data);
// If we want the "dual mode" row-wise and slot-wise file, don't filter out anything.
if (!_writeRowData)
subdata = SkipTakeFilter.Create(_host, new SkipTakeFilter.TakeArguments() { Count = 0 }, subdata);
subdata = SkipTakeFilter.Create(_host, new SkipTakeFilter.TakeOptions() { Count = 0 }, subdata);

string msg = _writeRowData ? "row-wise data, schema, and metadata" : "schema and metadata";
viewAction(msg, subdata);
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Data/DataView/CacheDataView.cs
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ private static IDataView SelectCachableColumns(IDataView data, IHostEnvironment
return data;

// REVIEW: This can potentially cause hidden columns to become unhidden. See task 3739.
var args = new ChooseColumnsByIndexTransform.Arguments();
var args = new ChooseColumnsByIndexTransform.Options();
args.Drop = true;
args.Indices = columnsToDrop.ToArray();
return new ChooseColumnsByIndexTransform(env, args, data);
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Data/DebuggerExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public static DataDebuggerPreview Preview(this IEstimator<ITransformer> estimato

using (var env = new LocalEnvironment(conc: 1))
{
var trainData = SkipTakeFilter.Create(env, new SkipTakeFilter.TakeArguments { Count = maxTrainingRows }, data);
var trainData = SkipTakeFilter.Create(env, new SkipTakeFilter.TakeOptions { Count = maxTrainingRows }, data);
return new DataDebuggerPreview(estimator.Fit(trainData).Transform(data), maxRows);
}
}
Expand Down
21 changes: 11 additions & 10 deletions src/Microsoft.ML.Data/Dirty/ChooseColumnsByIndexTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,18 @@
using Microsoft.ML.Internal.Utilities;
using Microsoft.ML.Model;

[assembly: LoadableClass(typeof(ChooseColumnsByIndexTransform), typeof(ChooseColumnsByIndexTransform.Arguments), typeof(SignatureDataTransform),
[assembly: LoadableClass(typeof(ChooseColumnsByIndexTransform), typeof(ChooseColumnsByIndexTransform.Options), typeof(SignatureDataTransform),
"", "ChooseColumnsByIndexTransform", "ChooseColumnsByIndex")]

[assembly: LoadableClass(typeof(ChooseColumnsByIndexTransform), null, typeof(SignatureLoadDataTransform),
"", ChooseColumnsByIndexTransform.LoaderSignature, ChooseColumnsByIndexTransform.LoaderSignatureOld)]

namespace Microsoft.ML.Data
{
public sealed class ChooseColumnsByIndexTransform : RowToRowTransformBase
[BestFriend]
internal sealed class ChooseColumnsByIndexTransform : RowToRowTransformBase
{
public sealed class Arguments
public sealed class Options
{
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Column indices to select", Name = "Index", ShortName = "ind")]
public int[] Indices;
Expand Down Expand Up @@ -59,17 +60,17 @@ private sealed class Bindings
// This transform's output schema.
internal Schema OutputSchema { get; }

internal Bindings(Arguments args, Schema sourceSchema)
internal Bindings(Options options, Schema sourceSchema)
{
Contracts.AssertValue(args);
Contracts.AssertValue(options);
Contracts.AssertValue(sourceSchema);

_sourceSchema = sourceSchema;

// Store user-specified arguments as the major state of this transform. Only the major states will
// be saved and all other attributes can be reconstructed from them.
_drop = args.Drop;
_selectedColumnIndexes = args.Indices;
_drop = options.Drop;
_selectedColumnIndexes = options.Indices;

// Compute actually used attributes in runtime from those major states.
ComputeSources(_drop, _selectedColumnIndexes, _sourceSchema, out _sources);
Expand Down Expand Up @@ -194,12 +195,12 @@ private static VersionInfo GetVersionInfo()
/// <summary>
/// Public constructor corresponding to SignatureDataTransform.
/// </summary>
public ChooseColumnsByIndexTransform(IHostEnvironment env, Arguments args, IDataView input)
public ChooseColumnsByIndexTransform(IHostEnvironment env, Options options, IDataView input)
: base(env, RegistrationName, input)
{
Host.CheckValue(args, nameof(args));
Host.CheckValue(options, nameof(options));

_bindings = new Bindings(args, Source.Schema);
_bindings = new Bindings(options, Source.Schema);
}

private ChooseColumnsByIndexTransform(IHost host, ModelLoadContext ctx, IDataView input)
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Data/EntryPoints/SchemaManipulation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public static CommonOutputs.TransformOutput ConcatColumns(IHostEnvironment env,
}

[TlcModule.EntryPoint(Name = "Transforms.ColumnSelector", Desc = "Selects a set of columns, dropping all others", UserName = "Select Columns")]
public static CommonOutputs.TransformOutput SelectColumns(IHostEnvironment env, ColumnSelectingTransformer.Arguments input)
public static CommonOutputs.TransformOutput SelectColumns(IHostEnvironment env, ColumnSelectingTransformer.Options input)
{
Contracts.CheckValue(env, nameof(env));
var host = env.Register("SelectColumns");
Expand Down
8 changes: 4 additions & 4 deletions src/Microsoft.ML.Data/EntryPoints/SelectRows.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ namespace Microsoft.ML.EntryPoints
internal static class SelectRows
{
[TlcModule.EntryPoint(Name = "Transforms.RowRangeFilter", Desc = RangeFilter.Summary, UserName = RangeFilter.UserName, ShortName = RangeFilter.LoaderSignature)]
public static CommonOutputs.TransformOutput FilterByRange(IHostEnvironment env, RangeFilter.Arguments input)
public static CommonOutputs.TransformOutput FilterByRange(IHostEnvironment env, RangeFilter.Options input)
{
Contracts.CheckValue(env, nameof(env));
var host = env.Register(RangeFilter.LoaderSignature);
Expand All @@ -28,7 +28,7 @@ public static CommonOutputs.TransformOutput FilterByRange(IHostEnvironment env,

[TlcModule.EntryPoint(Name = "Transforms.RowSkipFilter", Desc = SkipTakeFilter.SkipFilterSummary, UserName = SkipTakeFilter.SkipFilterUserName,
ShortName = SkipTakeFilter.SkipFilterShortName)]
public static CommonOutputs.TransformOutput SkipFilter(IHostEnvironment env, SkipTakeFilter.SkipArguments input)
public static CommonOutputs.TransformOutput SkipFilter(IHostEnvironment env, SkipTakeFilter.SkipOptions input)
{
Contracts.CheckValue(env, nameof(env));
var host = env.Register("SkipFilter");
Expand All @@ -40,7 +40,7 @@ public static CommonOutputs.TransformOutput SkipFilter(IHostEnvironment env, Ski

[TlcModule.EntryPoint(Name = "Transforms.RowTakeFilter", Desc = SkipTakeFilter.TakeFilterSummary, UserName = SkipTakeFilter.TakeFilterUserName,
ShortName = SkipTakeFilter.TakeFilterShortName)]
public static CommonOutputs.TransformOutput TakeFilter(IHostEnvironment env, SkipTakeFilter.TakeArguments input)
public static CommonOutputs.TransformOutput TakeFilter(IHostEnvironment env, SkipTakeFilter.TakeOptions input)
{
Contracts.CheckValue(env, nameof(env));
var host = env.Register("TakeFilter");
Expand All @@ -52,7 +52,7 @@ public static CommonOutputs.TransformOutput TakeFilter(IHostEnvironment env, Ski

[TlcModule.EntryPoint(Name = "Transforms.RowSkipAndTakeFilter", Desc = SkipTakeFilter.SkipTakeFilterSummary,
UserName = SkipTakeFilter.SkipTakeFilterUserName, ShortName = SkipTakeFilter.SkipTakeFilterShortName)]
public static CommonOutputs.TransformOutput SkipAndTakeFilter(IHostEnvironment env, SkipTakeFilter.Arguments input)
public static CommonOutputs.TransformOutput SkipAndTakeFilter(IHostEnvironment env, SkipTakeFilter.Options input)
{
Contracts.CheckValue(env, nameof(env));
var host = env.Register("SkipTakeFilter");
Expand Down
6 changes: 3 additions & 3 deletions src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -880,7 +880,7 @@ private static IDataView AppendPerInstanceDataViews(IHostEnvironment env, string
var idv = dv;
if (hidden.Count > 0)
{
var args = new ChooseColumnsByIndexTransform.Arguments();
var args = new ChooseColumnsByIndexTransform.Options();
args.Drop = true;
args.Indices = hidden.ToArray();
idv = new ChooseColumnsByIndexTransform(env, args, idv);
Expand Down Expand Up @@ -910,12 +910,12 @@ private static IDataView AppendPerInstanceDataViews(IHostEnvironment env, string

idv = new KeyToValueMappingTransformer(env, keyCol).Transform(idv);
var hidden = FindHiddenColumns(idv.Schema, keyCol);
idv = new ChooseColumnsByIndexTransform(env, new ChooseColumnsByIndexTransform.Arguments() { Drop = true, Indices = hidden.ToArray() }, idv);
idv = new ChooseColumnsByIndexTransform(env, new ChooseColumnsByIndexTransform.Options() { Drop = true, Indices = hidden.ToArray() }, idv);
}
foreach (var keyCol in firstDvKeyNoNamesColumns)
{
var hidden = FindHiddenColumns(idv.Schema, keyCol.Key);
idv = new ChooseColumnsByIndexTransform(env, new ChooseColumnsByIndexTransform.Arguments() { Drop = true, Indices = hidden.ToArray() }, idv);
idv = new ChooseColumnsByIndexTransform(env, new ChooseColumnsByIndexTransform.Options() { Drop = true, Indices = hidden.ToArray() }, idv);
}
return idv;
};
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Data/Evaluators/MamlEvaluator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ private IDataView WrapPerInstance(RoleMappedData perInst)
}
else
{
var args = new GenerateNumberTransform.Arguments();
var args = new GenerateNumberTransform.Options();
args.Columns = new[] { new GenerateNumberTransform.Column() { Name = "Instance" } };
args.UseCounter = true;
idv = new GenerateNumberTransform(Host, args, idv);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -932,7 +932,7 @@ private protected override IDataView CombineOverallMetricsCore(IDataView[] metri
idv.Schema[col].Name.Equals(MultiClassClassifierEvaluator.PerClassLogLoss))
{
idv = new ChooseColumnsByIndexTransform(Host,
new ChooseColumnsByIndexTransform.Arguments() { Drop = true, Indices = new[] { col } }, idv);
new ChooseColumnsByIndexTransform.Options() { Drop = true, Indices = new[] { col } }, idv);
break;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ private IDataView ExtractRelevantIndex(IDataView data)
output = LambdaColumnMapper.Create(Host, "Quantile Regression", output, name, name, type, NumberType.R8,
(in VBuffer<Double> src, ref Double dst) => dst = src.GetItemOrDefault(index));
output = new ChooseColumnsByIndexTransform(Host,
new ChooseColumnsByIndexTransform.Arguments() { Drop = true, Indices = new[] { i } }, output);
new ChooseColumnsByIndexTransform.Options() { Drop = true, Indices = new[] { i } }, output);
}
}
return output;
Expand Down
8 changes: 4 additions & 4 deletions src/Microsoft.ML.Data/TrainCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,14 @@ public abstract class TrainCatalogBase

EnsureStratificationColumn(ref data, ref stratificationColumn, seed);

var trainFilter = new RangeFilter(Host, new RangeFilter.Arguments()
var trainFilter = new RangeFilter(Host, new RangeFilter.Options()
{
Column = stratificationColumn,
Min = 0,
Max = testFraction,
Complement = true
}, data);
var testFilter = new RangeFilter(Host, new RangeFilter.Arguments()
var testFilter = new RangeFilter(Host, new RangeFilter.Options()
{
Column = stratificationColumn,
Min = 0,
Expand Down Expand Up @@ -81,14 +81,14 @@ protected internal (IDataView scoredTestSet, ITransformer model)[] CrossValidate
Func<int, (IDataView scores, ITransformer model)> foldFunction =
fold =>
{
var trainFilter = new RangeFilter(Host, new RangeFilter.Arguments
var trainFilter = new RangeFilter(Host, new RangeFilter.Options
{
Column = stratificationColumn,
Min = (double)fold / numFolds,
Max = (double)(fold + 1) / numFolds,
Complement = true
}, data);
var testFilter = new RangeFilter(Host, new RangeFilter.Arguments
var testFilter = new RangeFilter(Host, new RangeFilter.Options
{
Column = stratificationColumn,
Min = (double)fold / numFolds,
Expand Down
Loading