Skip to content

Remove generic normalizer estimator catalog methods. #3116

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms;
using Microsoft.ML.Experimental;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it just experimental? We are removing the generic estimator, so these new methods must be supported officially and therefore deserve a namespace like ML.Transform. Is my understanding correct?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


namespace Microsoft.ML.Samples.Dynamic
{
@@ -28,7 +27,7 @@ public static void Example()
// 35 1 6-11yrs 1 3 32 5 ...

// A pipeline for normalizing the Induced column.
var pipeline = ml.Transforms.Normalize("Induced");
var pipeline = ml.Transforms.NormalizeMinMax("Induced");
// The transformed (normalized according to Normalizer.NormalizerMode.MinMax) data.
var transformer = pipeline.Fit(trainData);

@@ -58,8 +57,8 @@ public static void Example()

// Composing a different pipeline if we wanted to normalize more than one column at a time.
// Using log scale as the normalization mode.
var multiColPipeline = ml.Transforms.Normalize("LogInduced", "Induced", NormalizingEstimator.NormalizationMode.LogMeanVariance)
.Append(ml.Transforms.Normalize("LogSpontaneous", "Spontaneous", NormalizingEstimator.NormalizationMode.LogMeanVariance));
var multiColPipeline = ml.Transforms.NormalizeMinMax("LogInduced", "Induced")
.Append(ml.Transforms.NormalizeMinMax("LogSpontaneous", "Spontaneous"));
// The transformed data.
var multiColtransformer = multiColPipeline.Fit(trainData);
var multiColtransformedData = multiColtransformer.Transform(trainData);
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System;
using System.Linq;
using Microsoft.ML.Experimental;

namespace Microsoft.ML.Samples.Dynamic.PermutationFeatureImportance
{
@@ -19,7 +20,7 @@ public static void Example()
// Normalize the data set so that for each feature, its maximum value is 1 while its minimum value is 0.
// Then append a linear regression trainer.
var pipeline = mlContext.Transforms.Concatenate("Features", featureNames)
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.Append(mlContext.Regression.Trainers.Ols(
labelColumnName: labelName, featureColumnName: "Features"));
var model = pipeline.Fit(data);
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System;
using System.Linq;
using Microsoft.ML.Experimental;
using Microsoft.ML.Trainers;

namespace Microsoft.ML.Samples.Dynamic.PermutationFeatureImportance
@@ -21,7 +22,7 @@ public static void Example()
// Normalize the data set so that for each feature, its maximum value is 1 while its minimum value is 0.
// Then append a logistic regression trainer.
var pipeline = mlContext.Transforms.Concatenate("Features", featureNames)
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.Append(mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(
labelColumnName: labelName, featureColumnName: "Features"));
var model = pipeline.Fit(data);
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@
</PropertyGroup>

<ItemGroup>
<ProjectReference Include="..\..\..\src\Microsoft.ML.Experimental\Microsoft.ML.Experimental.csproj" />
<ProjectReference Include="..\..\..\src\Microsoft.ML.LightGbm.StaticPipe\Microsoft.ML.LightGbm.StaticPipe.csproj" />
<ProjectReference Include="..\..\..\src\Microsoft.ML.LightGbm\Microsoft.ML.LightGbm.csproj" />
<ProjectReference Include="..\..\..\src\Microsoft.ML.Mkl.Components\Microsoft.ML.Mkl.Components.csproj" />
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@
<ItemGroup>
<ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
<ProjectReference Include="..\Microsoft.ML.Data\Microsoft.ML.Data.csproj" />
<ProjectReference Include="..\Microsoft.ML.Experimental\Microsoft.ML.Experimental.csproj" />
<ProjectReference Include="..\Microsoft.ML.Transforms\Microsoft.ML.Transforms.csproj" />
</ItemGroup>

3 changes: 2 additions & 1 deletion src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@
using System.IO;
using System.Net;
using Microsoft.ML.Data;
using Microsoft.ML.Experimental;

namespace Microsoft.ML.SamplesUtils
{
@@ -171,7 +172,7 @@ public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext)
"occupation", "relationship", "ethnicity", "native-country", "age", "education-num",
"capital-gain", "capital-loss", "hours-per-week"))
// Min-max normalize all the features
.Append(mlContext.Transforms.Normalize("Features"));
.Append(mlContext.Transforms.NormalizeMinMax("Features"));

var data = loader.Load(dataFile);
var featurizedData = pipeline.Fit(data).Transform(data);
1 change: 1 addition & 0 deletions src/Microsoft.ML.Transforms/Microsoft.ML.Transforms.csproj
Original file line number Diff line number Diff line change
@@ -48,6 +48,7 @@
<ItemGroup>
<ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
<ProjectReference Include="..\Microsoft.ML.Data\Microsoft.ML.Data.csproj" />
<ProjectReference Include="..\Microsoft.ML.Experimental\Microsoft.ML.Experimental.csproj" />
</ItemGroup>

<ItemGroup>
42 changes: 0 additions & 42 deletions src/Microsoft.ML.Transforms/NormalizerCatalog.cs
Original file line number Diff line number Diff line change
@@ -9,48 +9,6 @@ namespace Microsoft.ML
/// </summary>
public static class NormalizationCatalog
{
/// <summary>
/// Normalize (rescale) the column according to the specified <paramref name="mode"/>.
/// </summary>
/// <param name="catalog">The transform catalog</param>
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
/// <param name="mode">The <see cref="NormalizingEstimator.NormalizationMode"/> used to map the old values in the new scale. </param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[Normalize](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs)]
/// ]]>
/// </format>
/// </example>
public static NormalizingEstimator Normalize(this TransformsCatalog catalog,
string outputColumnName, string inputColumnName = null,
NormalizingEstimator.NormalizationMode mode = NormalizingEstimator.NormalizationMode.MinMax)
=> new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName ?? outputColumnName, mode);

/// <summary>
/// Normalize (rescale) several columns according to the specified <paramref name="mode"/>.
/// </summary>
/// <param name="catalog">The transform catalog</param>
/// <param name="mode">The <see cref="NormalizingEstimator.NormalizationMode"/> used to map the old values to the new ones. </param>
/// <param name="columns">The pairs of input and output columns.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[Normalize](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs)]
/// ]]>
/// </format>
/// </example>
[BestFriend]
internal static NormalizingEstimator Normalize(this TransformsCatalog catalog,
NormalizingEstimator.NormalizationMode mode,
params InputOutputColumnPair[] columns)
{
var env = CatalogUtils.GetEnvironment(catalog);
env.CheckValue(columns, nameof(columns));
return new NormalizingEstimator(env, mode, InputOutputColumnPair.ConvertToValueTuples(columns));
}

/// <summary>
/// Normalize (rescale) columns according to specified custom parameters.
/// </summary>
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@
using Microsoft.ML.Benchmarks.Harness;
using Microsoft.ML.Calibrators;
using Microsoft.ML.Data;
using Microsoft.ML.Experimental;
using Microsoft.ML.TestFramework;
using Microsoft.ML.Trainers;

@@ -35,7 +36,7 @@ public CalibratedModelParametersBase<LinearBinaryModelParameters, PlattCalibrato
}, hasHeader: true);

var estimatorPipeline = ml.Transforms.Categorical.OneHotEncoding("CatFeatures")
.Append(ml.Transforms.Normalize("NumFeatures"))
.Append(ml.Transforms.NormalizeMinMax("NumFeatures"))
.Append(ml.Transforms.Concatenate("Features", "NumFeatures", "CatFeatures"))
.Append(ml.Clustering.Trainers.KMeans("Features"))
.Append(ml.Transforms.Concatenate("Features", "Features", "Score"))
3 changes: 2 additions & 1 deletion test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj
Original file line number Diff line number Diff line change
@@ -15,6 +15,7 @@
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\src\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.Experimental\Microsoft.ML.Experimental.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.KMeansClustering\Microsoft.ML.KMeansClustering.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.LightGbm\Microsoft.ML.LightGbm.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.StandardTrainers\Microsoft.ML.StandardTrainers.csproj" />
@@ -24,6 +25,6 @@
<NativeAssemblyReference Include="CpuMathNative" />
<NativeAssemblyReference Include="FastTreeNative" />
<NativeAssemblyReference Include="MklImports" />
<NativeAssemblyReference Condition="'$(OS)' == 'Windows_NT'" Include="libiomp5md"/>
<NativeAssemblyReference Condition="'$(OS)' == 'Windows_NT'" Include="libiomp5md" />
</ItemGroup>
</Project>
4 changes: 2 additions & 2 deletions test/Microsoft.ML.Functional.Tests/DataTransformation.cs
Original file line number Diff line number Diff line change
@@ -3,11 +3,11 @@
// See the LICENSE file in the project root for more information.

using System;
using Microsoft.ML.Experimental;
using Microsoft.ML.Functional.Tests.Datasets;
using Microsoft.ML.RunTests;
using Microsoft.ML.TestFramework;
using Microsoft.ML.Trainers;
using Microsoft.ML.Transforms;
using Microsoft.ML.Transforms.Text;
using Xunit;
using Xunit.Abstractions;
@@ -174,7 +174,7 @@ void ExtensibilityNormalizeColumns()

// Compose the transformation.
var pipeline = mlContext.Transforms.Concatenate("Features", Iris.Features)
.Append(mlContext.Transforms.Normalize("Features", mode: NormalizingEstimator.NormalizationMode.MinMax));
.Append(mlContext.Transforms.NormalizeMinMax("Features"));

// Transform the data.
var transformedData = pipeline.Fit(data).Transform(data);
3 changes: 2 additions & 1 deletion test/Microsoft.ML.Functional.Tests/IntrospectiveTraining.cs
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@
using System.Collections.Immutable;
using System.Linq;
using Microsoft.ML.Data;
using Microsoft.ML.Experimental;
using Microsoft.ML.Functional.Tests.Datasets;
using Microsoft.ML.RunTests;
using Microsoft.ML.TestFramework;
@@ -254,7 +255,7 @@ void IntrospectNormalization()

// Compose the transformation.
var pipeline = mlContext.Transforms.Concatenate("Features", Iris.Features)
.Append(mlContext.Transforms.Normalize("Features", mode: NormalizingEstimator.NormalizationMode.MinMax));
.Append(mlContext.Transforms.NormalizeMinMax("Features"));

// Fit the pipeline.
var model = pipeline.Fit(data);
9 changes: 5 additions & 4 deletions test/Microsoft.ML.Functional.Tests/ModelFiles.cs
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@
using System.Linq;
using Microsoft.ML.Calibrators;
using Microsoft.ML.Data;
using Microsoft.ML.Experimental;
using Microsoft.ML.Functional.Tests.Datasets;
using Microsoft.ML.RunTests;
using Microsoft.ML.Trainers.FastTree;
@@ -275,7 +276,7 @@ public void LoadSchemaAndCreateNewData()
var data = loader.Load(file);

// Pipeline.
var pipeline = ML.Transforms.Normalize("Features");
var pipeline = ML.Transforms.NormalizeMinMax("Features");

// Train.
var model = pipeline.Fit(data);
@@ -330,7 +331,7 @@ public void SaveCompositeLoaderAndLoad()
{
var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename));
var loader = ML.Data.CreateTextLoader<InputData>(hasHeader: true, dataSample: file);
var composite = loader.Append(ML.Transforms.Normalize("Features"));
var composite = loader.Append(ML.Transforms.NormalizeMinMax("Features"));
var loaderWithEmbeddedModel = composite.Fit(file);

string modelPath = GetOutputPath(FullTestName + "-model.zip");
@@ -368,7 +369,7 @@ public void SaveLoaderAndTransformerAndLoad()
{
var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename));
var loader = ML.Data.CreateTextLoader<InputData>(hasHeader: true, dataSample: file);
var estimator = ML.Transforms.Normalize("Features");
var estimator = ML.Transforms.NormalizeMinMax("Features");
var data = loader.Load(file);
var model = estimator.Fit(data);

@@ -401,7 +402,7 @@ public void SaveTransformerAndSchemaAndLoad()
{
var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename));
var loader = ML.Data.CreateTextLoader<InputData>(hasHeader: true, dataSample: file);
var estimator = ML.Transforms.Normalize("Features");
var estimator = ML.Transforms.NormalizeMinMax("Features");
var model = estimator.Fit(loader.Load(file));

string modelPath = GetOutputPath(FullTestName + "-model.zip");
7 changes: 4 additions & 3 deletions test/Microsoft.ML.Functional.Tests/ONNX.cs
Original file line number Diff line number Diff line change
@@ -3,6 +3,7 @@
// See the LICENSE file in the project root for more information.

using System.IO;
using Microsoft.ML.Experimental;
using Microsoft.ML.Functional.Tests.Datasets;
using Microsoft.ML.RunTests;
using Microsoft.ML.TestFramework;
@@ -33,7 +34,7 @@ public void SaveOnnxModelLoadAndScoreFastTree()

// Create a pipeline to train on the housing data.
var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.AppendCacheCheckpoint(mlContext)
.Append(mlContext.Regression.Trainers.FastTree(
new FastTreeRegressionTrainer.Options { NumberOfThreads = 1, NumberOfTrees = 10 }));
@@ -85,7 +86,7 @@ public void SaveOnnxModelLoadAndScoreKMeans()

// Create a pipeline to train on the housing data.
var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.AppendCacheCheckpoint(mlContext)
.Append(mlContext.Clustering.Trainers.KMeans(
new KMeansTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 10 }));
@@ -137,7 +138,7 @@ public void SaveOnnxModelLoadAndScoreSDCA()

// Create a pipeline to train on the housing data.
var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.AppendCacheCheckpoint(mlContext)
.Append(mlContext.Regression.Trainers.Sdca(
new SdcaRegressionTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 10 }));
5 changes: 3 additions & 2 deletions test/Microsoft.ML.Functional.Tests/Training.cs
Original file line number Diff line number Diff line change
@@ -5,6 +5,7 @@
using System;
using System.Linq;
using Microsoft.ML.Data;
using Microsoft.ML.Experimental;
using Microsoft.ML.Functional.Tests.Datasets;
using Microsoft.ML.RunTests;
using Microsoft.ML.TestFramework;
@@ -316,7 +317,7 @@ public void ContinueTrainingOnlineGradientDescent()

// Create a transformation pipeline.
var featurizationPipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.AppendCacheCheckpoint(mlContext);

var trainer = mlContext.Regression.Trainers.OnlineGradientDescent(
@@ -360,7 +361,7 @@ public void ContinueTrainingPoissonRegression()

// Create a transformation pipeline.
var featurizationPipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.AppendCacheCheckpoint(mlContext);

var trainer = mlContext.Regression.Trainers.LbfgsPoissonRegression(
9 changes: 5 additions & 4 deletions test/Microsoft.ML.Tests/CachingTests.cs
Original file line number Diff line number Diff line change
@@ -5,6 +5,7 @@
using System.Linq;
using System.Threading;
using Microsoft.ML.Data;
using Microsoft.ML.Experimental;
using Microsoft.ML.RunTests;
using Microsoft.ML.StaticPipe;
using Xunit;
@@ -43,8 +44,8 @@ public void CacheCheckpointTest()
var trainData = Enumerable.Range(0, 100).Select(c => new MyData()).ToArray();

var pipe = ML.Transforms.CopyColumns("F1", "Features")
.Append(ML.Transforms.Normalize("Norm1", "F1"))
.Append(ML.Transforms.Normalize("Norm2", "F1", Transforms.NormalizingEstimator.NormalizationMode.MeanVariance));
.Append(ML.Transforms.NormalizeMinMax("Norm1", "F1"))
.Append(ML.Transforms.NormalizeMeanVariance("Norm2", "F1"));

pipe.Fit(ML.Data.LoadFromEnumerable(trainData));

@@ -53,8 +54,8 @@ public void CacheCheckpointTest()
trainData = Enumerable.Range(0, 100).Select(c => new MyData()).ToArray();
pipe = ML.Transforms.CopyColumns("F1", "Features")
.AppendCacheCheckpoint(ML)
.Append(ML.Transforms.Normalize("Norm1", "F1"))
.Append(ML.Transforms.Normalize("Norm2", "F1", Transforms.NormalizingEstimator.NormalizationMode.MeanVariance));
.Append(ML.Transforms.NormalizeMinMax("Norm1", "F1"))
.Append(ML.Transforms.NormalizeMeanVariance("Norm2", "F1"));

pipe.Fit(ML.Data.LoadFromEnumerable(trainData));

5 changes: 2 additions & 3 deletions test/Microsoft.ML.Tests/FeatureContributionTests.cs
Original file line number Diff line number Diff line change
@@ -6,13 +6,12 @@
using System.IO;
using Microsoft.ML.Calibrators;
using Microsoft.ML.Data;
using Microsoft.ML.Experimental;
using Microsoft.ML.Data.IO;
using Microsoft.ML.Internal.Utilities;
using Microsoft.ML.Model;
using Microsoft.ML.RunTests;
using Microsoft.ML.TestFramework.Attributes;
using Microsoft.ML.Trainers;
using Microsoft.ML.Transforms;
using Xunit;
using Xunit.Abstractions;

@@ -306,7 +305,7 @@ private IDataView GetSparseDataset(TaskType task = TaskType.Regression, int numb
var srcDV = bldr.GetDataView();

var pipeline = ML.Transforms.Concatenate("Features", "X1", "X2VBuffer", "X3Important")
.Append(ML.Transforms.Normalize("Features"));
.Append(ML.Transforms.NormalizeMinMax("Features"));

if (task == TaskType.BinaryClassification)
return pipeline.Append(ML.Transforms.Conversion.ConvertType("Label", outputKind: DataKind.Boolean))
13 changes: 7 additions & 6 deletions test/Microsoft.ML.Tests/OnnxConversionTest.cs
Original file line number Diff line number Diff line change
@@ -10,6 +10,7 @@
using System.Text.RegularExpressions;
using Google.Protobuf;
using Microsoft.ML.Data;
using Microsoft.ML.Experimental;
using Microsoft.ML.Model.OnnxConverter;
using Microsoft.ML.RunTests;
using Microsoft.ML.Runtime;
@@ -57,7 +58,7 @@ public void SimpleEndToEndOnnxConversionTest()
hasHeader: true);
var cachedTrainData = mlContext.Data.Cache(data);
var dynamicPipeline =
mlContext.Transforms.Normalize("FeatureVector")
mlContext.Transforms.NormalizeMinMax("FeatureVector")
.AppendCacheCheckpoint(mlContext)
.Append(mlContext.Regression.Trainers.Sdca(new SdcaRegressionTrainer.Options() {
LabelColumnName = "Target",
@@ -137,7 +138,7 @@ public void KmeansOnnxConversionTest()
separatorChar: '\t',
hasHeader: true);

var pipeline = mlContext.Transforms.Normalize("Features").
var pipeline = mlContext.Transforms.NormalizeMinMax("Features").
Append(mlContext.Clustering.Trainers.KMeans(new Trainers.KMeansTrainer.Options
{
FeatureColumnName = DefaultColumnNames.Features,
@@ -315,7 +316,7 @@ public void LogisticRegressionOnnxConversionTest()
hasHeader: true);
var cachedTrainData = mlContext.Data.Cache(data);
var dynamicPipeline =
mlContext.Transforms.Normalize("FeatureVector")
mlContext.Transforms.NormalizeMinMax("FeatureVector")
.AppendCacheCheckpoint(mlContext)
.Append(mlContext.Regression.Trainers.Sdca(new SdcaRegressionTrainer.Options() {
LabelColumnName = "Target",
@@ -352,7 +353,7 @@ public void LightGbmBinaryClassificationOnnxConversionTest()
hasHeader: true);
var cachedTrainData = mlContext.Data.Cache(data);
var dynamicPipeline =
mlContext.Transforms.Normalize("FeatureVector")
mlContext.Transforms.NormalizeMinMax("FeatureVector")
.AppendCacheCheckpoint(mlContext)
.Append(mlContext.Regression.Trainers.LightGbm(labelColumnName: "Target", featureColumnName: "FeatureVector", numberOfIterations: 3, numberOfLeaves: 16, minimumExampleCountPerLeaf: 100));
var model = dynamicPipeline.Fit(data);
@@ -383,7 +384,7 @@ public void MulticlassLogisticRegressionOnnxConversionTest()
separatorChar: '\t',
hasHeader: true);

var pipeline = mlContext.Transforms.Normalize("Features").
var pipeline = mlContext.Transforms.NormalizeMinMax("Features").
Append(mlContext.Transforms.Conversion.MapValueToKey("Label")).
Append(mlContext.MulticlassClassification.Trainers.LbfgsMaximumEntropy(new LbfgsMaximumEntropyMulticlassTrainer.Options() { NumberOfThreads = 1 }));

@@ -416,7 +417,7 @@ public void RemoveVariablesInPipelineTest()
var pipeline = mlContext.Transforms.Categorical.OneHotEncoding("F2", "F2", Transforms.OneHotEncodingEstimator.OutputKind.Bag)
.Append(mlContext.Transforms.ReplaceMissingValues(new MissingValueReplacingEstimator.ColumnOptions("F2")))
.Append(mlContext.Transforms.Concatenate("Features", "F1", "F2"))
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.Append(mlContext.BinaryClassification.Trainers.FastTree(labelColumnName: "Label", featureColumnName: "Features", numberOfLeaves: 2, numberOfTrees: 1, minimumExampleCountPerLeaf: 2));

var model = pipeline.Fit(data);
5 changes: 3 additions & 2 deletions test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@
using System.Collections.Immutable;
using System.Linq;
using Microsoft.ML.Data;
using Microsoft.ML.Experimental;
using Microsoft.ML.Internal.Utilities;
using Microsoft.ML.RunTests;
using Microsoft.ML.Trainers;
@@ -421,7 +422,7 @@ private IDataView GetDenseDataset(TaskType task = TaskType.Regression)
var srcDV = bldr.GetDataView();

var pipeline = ML.Transforms.Concatenate("Features", "X1", "X2Important", "X3", "X4Rand")
.Append(ML.Transforms.Normalize("Features"));
.Append(ML.Transforms.NormalizeMinMax("Features"));
if (task == TaskType.BinaryClassification)
return pipeline.Append(ML.Transforms.Conversion.ConvertType("Label", outputKind: DataKind.Boolean))
.Fit(srcDV).Transform(srcDV);
@@ -501,7 +502,7 @@ private IDataView GetSparseDataset(TaskType task = TaskType.Regression)
var srcDV = bldr.GetDataView();

var pipeline = ML.Transforms.Concatenate("Features", "X1", "X2VBuffer", "X3Important")
.Append(ML.Transforms.Normalize("Features"));
.Append(ML.Transforms.NormalizeMinMax("Features"));
if (task == TaskType.BinaryClassification)
{
return pipeline.Append(ML.Transforms.Conversion.ConvertType("Label", outputKind: DataKind.Boolean))
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@
using System.IO;
using System.Linq;
using Microsoft.ML.Data;
using Microsoft.ML.Experimental;
using Microsoft.ML.RunTests;
using Microsoft.ML.TestFramework;
using Microsoft.ML.Trainers;
@@ -93,7 +94,7 @@ private void TrainRegression(string trainDataPath, string testDataPath, string m
var pipeline =
// First 'normalize' the data (rescale to be
// between -1 and 1 for all examples), and then train the model.
mlContext.Transforms.Normalize("FeatureVector")
mlContext.Transforms.NormalizeMinMax("FeatureVector")
// We add a step for caching data in memory so that the downstream iterative training
// algorithm can efficiently scan through the data multiple times. Otherwise, the following
// trainer will read data from disk multiple times. The caching mechanism uses an on-demand strategy.
Original file line number Diff line number Diff line change
@@ -3,6 +3,7 @@
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Data;
using Microsoft.ML.Experimental;
using Microsoft.ML.RunTests;
using Microsoft.ML.TestFramework;
using Microsoft.ML.Trainers;
@@ -29,7 +30,7 @@ public void TrainAndPredictIrisModelTest()
);

var pipe = mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.Append(mlContext.Transforms.Conversion.MapValueToKey("Label"))
.AppendCacheCheckpoint(mlContext)
.Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy(
Original file line number Diff line number Diff line change
@@ -3,6 +3,7 @@
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Data;
using Microsoft.ML.Experimental;
using Microsoft.ML.Trainers;
using Xunit;

@@ -34,7 +35,7 @@ public void TrainAndPredictIrisModelWithStringLabelTest()

// Create Estimator
var pipe = mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.Append(mlContext.Transforms.Conversion.MapValueToKey("Label", "IrisPlantType"), TransformerScope.TrainTest)
.AppendCacheCheckpoint(mlContext)
.Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy(
Original file line number Diff line number Diff line change
@@ -3,6 +3,7 @@
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Data;
using Microsoft.ML.Experimental;
using Microsoft.ML.RunTests;
using Microsoft.ML.Trainers;
using Xunit;
@@ -27,7 +28,7 @@ public void TrainAndPredictIrisModelUsingDirectInstantiationTest()
);

var pipe = mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.Append(mlContext.Transforms.Conversion.MapValueToKey("Label"))
.AppendCacheCheckpoint(mlContext)
.Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy(
24 changes: 12 additions & 12 deletions test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs
Original file line number Diff line number Diff line change
@@ -225,8 +225,8 @@ public void SimpleConstructorsAndExtensions()
var est1 = new NormalizingEstimator(Env, "float4");
var est2 = new NormalizingEstimator(Env, NormalizingEstimator.NormalizationMode.MinMax, ("float4", "float4"));
var est3 = new NormalizingEstimator(Env, new NormalizingEstimator.MinMaxColumnOptions("float4"));
var est4 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.MinMax);
var est5 = ML.Transforms.Normalize("float4");
var est4 = ML.Transforms.NormalizeMinMax("float4", "float4");
var est5 = ML.Transforms.NormalizeMinMax("float4");

var data1 = est1.Fit(data).Transform(data);
var data2 = est2.Fit(data).Transform(data);
@@ -246,7 +246,7 @@ public void SimpleConstructorsAndExtensions()
// Tests for MeanVariance
var est6 = new NormalizingEstimator(Env, NormalizingEstimator.NormalizationMode.MeanVariance, ("float4", "float4"));
var est7 = new NormalizingEstimator(Env, new NormalizingEstimator.MeanVarianceColumnOptions("float4"));
var est8 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.MeanVariance);
var est8 = ML.Transforms.NormalizeMeanVariance("float4", "float4");

var data6 = est6.Fit(data).Transform(data);
var data7 = est7.Fit(data).Transform(data);
@@ -259,7 +259,7 @@ public void SimpleConstructorsAndExtensions()
// Tests for LogMeanVariance
var est9 = new NormalizingEstimator(Env, NormalizingEstimator.NormalizationMode.LogMeanVariance, ("float4", "float4"));
var est10 = new NormalizingEstimator(Env, new NormalizingEstimator.LogMeanVarianceColumnOptions("float4"));
var est11 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.LogMeanVariance);
var est11 = ML.Transforms.NormalizeLogMeanVariance("float4", "float4");

var data9 = est9.Fit(data).Transform(data);
var data10 = est10.Fit(data).Transform(data);
@@ -272,7 +272,7 @@ public void SimpleConstructorsAndExtensions()
// Tests for Binning
var est12 = new NormalizingEstimator(Env, NormalizingEstimator.NormalizationMode.Binning, ("float4", "float4"));
var est13 = new NormalizingEstimator(Env, new NormalizingEstimator.BinningColumnOptions("float4"));
var est14 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.Binning);
var est14 = ML.Transforms.NormalizeBinning("float4", "float4");

var data12 = est12.Fit(data).Transform(data);
var data13 = est13.Fit(data).Transform(data);
@@ -285,7 +285,7 @@ public void SimpleConstructorsAndExtensions()
// Tests for SupervisedBinning
var est15 = new NormalizingEstimator(Env, NormalizingEstimator.NormalizationMode.SupervisedBinning, ("float4", "float4"));
var est16 = new NormalizingEstimator(Env, new NormalizingEstimator.SupervisedBinningColumOptions("float4"));
var est17 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.SupervisedBinning);
var est17 = ML.Transforms.NormalizeSupervisedBinning("float4", "float4");

var data15 = est15.Fit(data).Transform(data);
var data16 = est16.Fit(data).Transform(data);
@@ -314,11 +314,11 @@ public void NormalizerExperimentalExtensions()
var data = loader.Load(dataPath);

// Normalizer Extensions
var est1 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.MinMax);
var est2 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.MeanVariance);
var est3 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.LogMeanVariance);
var est4 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.Binning);
var est5 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.SupervisedBinning);
var est1 = ML.Transforms.NormalizeMinMax("float4", "float4");
var est2 = ML.Transforms.NormalizeMeanVariance("float4", "float4");
var est3 = ML.Transforms.NormalizeLogMeanVariance("float4", "float4");
var est4 = ML.Transforms.NormalizeBinning("float4", "float4");
var est5 = ML.Transforms.NormalizeSupervisedBinning("float4", "float4");

// Normalizer Extensions (Experimental)
var est6 = ML.Transforms.NormalizeMinMax("float4", "float4");
@@ -370,7 +370,7 @@ public void NormalizerExperimentalExtensionGetColumnPairs()
});

var data = loader.Load(dataPath);
var est = ML.Transforms.Normalize("output", "input", NormalizingEstimator.NormalizationMode.MinMax);
var est = ML.Transforms.NormalizeMinMax("output", "input");
var t = est.Fit(data);

Assert.Single(t.GetColumnPairs());