Skip to content

Move Normalizer extension method from experimental to stable nuget and remove Normalizer generic APIs #3118

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 2, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions docs/code/MlNetCookBook.md
Original file line number Diff line number Diff line change
@@ -344,7 +344,7 @@ var cachedTrainData = mlContext.Data.Cache(trainData);
var pipeline =
// First 'normalize' the data (rescale to be
// between -1 and 1 for all examples)
mlContext.Transforms.Normalize("FeatureVector")
mlContext.Transforms.NormalizeMinMax("FeatureVector")
// We add a step for caching data in memory so that the downstream iterative training
// algorithm can efficiently scan through the data multiple times. Otherwise, the following
// trainer will load data from disk multiple times. The caching mechanism uses an on-demand strategy.
@@ -625,18 +625,15 @@ var trainData = mlContext.Data.LoadFromTextFile<IrisInputAllFeatures>(dataPath,
separatorChar: ','
);

// Apply all kinds of standard ML.NET normalization to the raw features.
// Apply MinMax normalization to the raw features.
var pipeline =
mlContext.Transforms.Normalize(
new NormalizingEstimator.MinMaxColumnOptions("MinMaxNormalized", "Features", fixZero: true),
new NormalizingEstimator.MeanVarianceColumnOptions("MeanVarNormalized", "Features", fixZero: true),
new NormalizingEstimator.BinningColumnOptions("BinNormalized", "Features", maximumBinCount: 256));
mlContext.Transforms.NormalizeMinMax("MinMaxNormalized", "Features");

// Let's train our pipeline of normalizers, and then apply it to the same data.
var normalizedData = pipeline.Fit(trainData).Transform(trainData);

// Inspect one column of the resulting dataset.
var meanVarValues = normalizedData.GetColumn<float[]>(normalizedData.Schema["MeanVarNormalized"]).ToArray();
var meanVarValues = normalizedData.GetColumn<float[]>(normalizedData.Schema["MinMaxNormalized"]).ToArray();
```

## How do I train my model on categorical data?
8 changes: 3 additions & 5 deletions docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms;

namespace Microsoft.ML.Samples.Dynamic
{
@@ -28,7 +26,7 @@ public static void Example()
// 35 1 6-11yrs 1 3 32 5 ...

// A pipeline for normalizing the Induced column.
var pipeline = ml.Transforms.Normalize("Induced");
var pipeline = ml.Transforms.NormalizeMinMax("Induced");
// The transformed (normalized according to Normalizer.NormalizerMode.MinMax) data.
var transformer = pipeline.Fit(trainData);

@@ -58,8 +56,8 @@ public static void Example()

// Composing a different pipeline if we wanted to normalize more than one column at a time.
// Using log scale as the normalization mode.
var multiColPipeline = ml.Transforms.Normalize("LogInduced", "Induced", NormalizingEstimator.NormalizationMode.LogMeanVariance)
.Append(ml.Transforms.Normalize("LogSpontaneous", "Spontaneous", NormalizingEstimator.NormalizationMode.LogMeanVariance));
var multiColPipeline = ml.Transforms.NormalizeMinMax("LogInduced", "Induced")
.Append(ml.Transforms.NormalizeMinMax("LogSpontaneous", "Spontaneous"));
// The transformed data.
var multiColtransformer = multiColPipeline.Fit(trainData);
var multiColtransformedData = multiColtransformer.Transform(trainData);
Original file line number Diff line number Diff line change
@@ -19,7 +19,7 @@ public static void Example()
// Normalize the data set so that for each feature, its maximum value is 1 while its minimum value is 0.
// Then append a linear regression trainer.
var pipeline = mlContext.Transforms.Concatenate("Features", featureNames)
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.Append(mlContext.Regression.Trainers.Ols(
labelColumnName: labelName, featureColumnName: "Features"));
var model = pipeline.Fit(data);
Original file line number Diff line number Diff line change
@@ -21,7 +21,7 @@ public static void Example()
// Normalize the data set so that for each feature, its maximum value is 1 while its minimum value is 0.
// Then append a logistic regression trainer.
var pipeline = mlContext.Transforms.Concatenate("Features", featureNames)
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.Append(mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(
labelColumnName: labelName, featureColumnName: "Features"));
var model = pipeline.Fit(data);
3 changes: 2 additions & 1 deletion src/Microsoft.ML.Data/Transforms/Normalizer.cs
Original file line number Diff line number Diff line change
@@ -39,7 +39,8 @@ internal static class Defaults
public const long MaximumExampleCount = 1000000000;
}

public enum NormalizationMode
[BestFriend]
internal enum NormalizationMode
{
/// <summary>
/// Linear rescale such that minimum and maximum values are mapped between -1 and 1.
112 changes: 0 additions & 112 deletions src/Microsoft.ML.Experimental/TransformsCatalogExtensions.cs

This file was deleted.

2 changes: 1 addition & 1 deletion src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
Original file line number Diff line number Diff line change
@@ -171,7 +171,7 @@ public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext)
"occupation", "relationship", "ethnicity", "native-country", "age", "education-num",
"capital-gain", "capital-loss", "hours-per-week"))
// Min-max normalize all the features
.Append(mlContext.Transforms.Normalize("Features"));
.Append(mlContext.Transforms.NormalizeMinMax("Features"));

var data = loader.Load(dataFile);
var featurizedData = pipeline.Fit(data).Transform(data);
Loading