From 2b7c1f053ba708121e562b874e5ad8c47346692e Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Wed, 27 Mar 2019 13:12:20 -0700 Subject: [PATCH 1/8] Move Normalizer extension method from experimental to stable nuget. --- .../TransformsCatalogExtensions.cs | 112 ------------------ .../NormalizerCatalog.cs | 100 ++++++++++++++++ 2 files changed, 100 insertions(+), 112 deletions(-) delete mode 100644 src/Microsoft.ML.Experimental/TransformsCatalogExtensions.cs diff --git a/src/Microsoft.ML.Experimental/TransformsCatalogExtensions.cs b/src/Microsoft.ML.Experimental/TransformsCatalogExtensions.cs deleted file mode 100644 index 1c811b8243..0000000000 --- a/src/Microsoft.ML.Experimental/TransformsCatalogExtensions.cs +++ /dev/null @@ -1,112 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Data; -using Microsoft.ML.Transforms; - -namespace Microsoft.ML.Experimental -{ - public static class TransformsCatalogExtensions - { - /// - /// Normalize (rescale) the column according to the mode. - /// It normalizes the data based on the observed minimum and maximum values of the data. - /// - /// The transform catalog - /// Name of the column resulting from the transformation of . - /// Name of the column to transform. If set to , the value of the will be used as source. - /// Maximum number of examples used to train the normalizer. - /// Whether to map zero to zero, preserving sparsity. - public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalog, - string outputColumnName, string inputColumnName = null, - long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, - bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched) - { - var columnOptions = new NormalizingEstimator.MinMaxColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero); - return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); - } - - /// - /// Normalize (rescale) the column according to the mode. - /// It normalizes the data based on the computed mean and variance of the data. - /// - /// The transform catalog - /// Name of the column resulting from the transformation of . - /// Name of the column to transform. If set to , the value of the will be used as source. - /// Maximum number of examples used to train the normalizer. - /// Whether to map zero to zero, preserving sparsity. - /// Whether to use CDF as the output. - public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog catalog, - string outputColumnName, string inputColumnName = null, - long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, - bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, - bool useCdf = NormalizingEstimator.Defaults.MeanVarCdf) - { - var columnOptions = new NormalizingEstimator.MeanVarianceColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero, useCdf); - return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); - } - - /// - /// Normalize (rescale) the column according to the mode. - /// It normalizes the data based on the computed mean and variance of the logarithm of the data. - /// - /// The transform catalog - /// Name of the column resulting from the transformation of . - /// Name of the column to transform. If set to , the value of the will be used as source. - /// Maximum number of examples used to train the normalizer. - /// Whether to use CDF as the output. - public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatalog catalog, - string outputColumnName, string inputColumnName = null, - long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, - bool useCdf = NormalizingEstimator.Defaults.LogMeanVarCdf) - { - var columnOptions = new NormalizingEstimator.LogMeanVarianceColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, useCdf); - return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); - } - - /// - /// Normalize (rescale) the column according to the mode. - /// The values are assigned into bins with equal density. - /// - /// The transform catalog - /// Name of the column resulting from the transformation of . - /// Name of the column to transform. If set to , the value of the will be used as source. - /// Maximum number of examples used to train the normalizer. - /// Whether to map zero to zero, preserving sparsity. - /// Maximum number of bins (power of 2 recommended). - public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catalog, - string outputColumnName, string inputColumnName = null, - long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, - bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, - int maximumBinCount = NormalizingEstimator.Defaults.MaximumBinCount) - { - var columnOptions = new NormalizingEstimator.BinningColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero, maximumBinCount); - return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); - } - - /// - /// Normalize (rescale) the column according to the mode. - /// The values are assigned into bins based on correlation with the column. - /// - /// The transform catalog - /// Name of the column resulting from the transformation of . - /// Name of the column to transform. If set to , the value of the will be used as source. - /// Name of the label column for supervised binning. - /// Maximum number of examples used to train the normalizer. - /// Whether to map zero to zero, preserving sparsity. - /// Maximum number of bins (power of 2 recommended). - /// Minimum number of examples per bin. - public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCatalog catalog, - string outputColumnName, string inputColumnName = null, - string labelColumnName = DefaultColumnNames.Label, - long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, - bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, - int maximumBinCount = NormalizingEstimator.Defaults.MaximumBinCount, - int mininimumExamplesPerBin = NormalizingEstimator.Defaults.MininimumBinSize) - { - var columnOptions = new NormalizingEstimator.SupervisedBinningColumOptions(outputColumnName, inputColumnName, labelColumnName, maximumExampleCount, fixZero, maximumBinCount, mininimumExamplesPerBin); - return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); - } - } -} diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index dd12c452c4..d6ea92b558 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -51,6 +51,106 @@ internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, return new NormalizingEstimator(env, mode, InputOutputColumnPair.ConvertToValueTuples(columns)); } + /// + /// Normalize (rescale) the column according to the mode. + /// It normalizes the data based on the observed minimum and maximum values of the data. + /// + /// The transform catalog + /// Name of the column resulting from the transformation of . + /// Name of the column to transform. If set to , the value of the will be used as source. + /// Maximum number of examples used to train the normalizer. + /// Whether to map zero to zero, preserving sparsity. + public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalog, + string outputColumnName, string inputColumnName = null, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, + bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched) + { + var columnOptions = new NormalizingEstimator.MinMaxColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero); + return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); + } + + /// + /// Normalize (rescale) the column according to the mode. + /// It normalizes the data based on the computed mean and variance of the data. + /// + /// The transform catalog + /// Name of the column resulting from the transformation of . + /// Name of the column to transform. If set to , the value of the will be used as source. + /// Maximum number of examples used to train the normalizer. + /// Whether to map zero to zero, preserving sparsity. + /// Whether to use CDF as the output. + public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog catalog, + string outputColumnName, string inputColumnName = null, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, + bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, + bool useCdf = NormalizingEstimator.Defaults.MeanVarCdf) + { + var columnOptions = new NormalizingEstimator.MeanVarianceColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero, useCdf); + return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); + } + + /// + /// Normalize (rescale) the column according to the mode. + /// It normalizes the data based on the computed mean and variance of the logarithm of the data. + /// + /// The transform catalog + /// Name of the column resulting from the transformation of . + /// Name of the column to transform. If set to , the value of the will be used as source. + /// Maximum number of examples used to train the normalizer. + /// Whether to use CDF as the output. + public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatalog catalog, + string outputColumnName, string inputColumnName = null, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, + bool useCdf = NormalizingEstimator.Defaults.LogMeanVarCdf) + { + var columnOptions = new NormalizingEstimator.LogMeanVarianceColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, useCdf); + return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); + } + + /// + /// Normalize (rescale) the column according to the mode. + /// The values are assigned into bins with equal density. + /// + /// The transform catalog + /// Name of the column resulting from the transformation of . + /// Name of the column to transform. If set to , the value of the will be used as source. + /// Maximum number of examples used to train the normalizer. + /// Whether to map zero to zero, preserving sparsity. + /// Maximum number of bins (power of 2 recommended). + public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catalog, + string outputColumnName, string inputColumnName = null, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, + bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, + int maximumBinCount = NormalizingEstimator.Defaults.MaximumBinCount) + { + var columnOptions = new NormalizingEstimator.BinningColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero, maximumBinCount); + return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); + } + + /// + /// Normalize (rescale) the column according to the mode. + /// The values are assigned into bins based on correlation with the column. + /// + /// The transform catalog + /// Name of the column resulting from the transformation of . + /// Name of the column to transform. If set to , the value of the will be used as source. + /// Name of the label column for supervised binning. + /// Maximum number of examples used to train the normalizer. + /// Whether to map zero to zero, preserving sparsity. + /// Maximum number of bins (power of 2 recommended). + /// Minimum number of examples per bin. + public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCatalog catalog, + string outputColumnName, string inputColumnName = null, + string labelColumnName = DefaultColumnNames.Label, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, + bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, + int maximumBinCount = NormalizingEstimator.Defaults.MaximumBinCount, + int mininimumExamplesPerBin = NormalizingEstimator.Defaults.MininimumBinSize) + { + var columnOptions = new NormalizingEstimator.SupervisedBinningColumOptions(outputColumnName, inputColumnName, labelColumnName, maximumExampleCount, fixZero, maximumBinCount, mininimumExamplesPerBin); + return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); + } + /// /// Normalize (rescale) columns according to specified custom parameters. /// From fe7228a2ee4132f34a399488b1578b2a1735bc55 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Wed, 27 Mar 2019 13:07:19 -0700 Subject: [PATCH 2/8] Cleanup unused method in Normalizer Estimator. --- .../NormalizerCatalog.cs | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index d6ea92b558..30ba69c4b5 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -9,24 +9,6 @@ namespace Microsoft.ML /// public static class NormalizationCatalog { - /// - /// Normalize (rescale) the column according to the specified . - /// - /// The transform catalog - /// Name of the column resulting from the transformation of . - /// Name of the column to transform. If set to , the value of the will be used as source. - /// The used to map the old values in the new scale. - /// - /// - /// - /// - /// - public static NormalizingEstimator Normalize(this TransformsCatalog catalog, - string outputColumnName, string inputColumnName = null, - NormalizingEstimator.NormalizationMode mode = NormalizingEstimator.NormalizationMode.MinMax) - => new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName ?? outputColumnName, mode); /// /// Normalize (rescale) several columns according to the specified . From 34c63b01f2956e6c5dcddbe159aac60afcb1d9b8 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Mon, 1 Apr 2019 16:19:00 -0700 Subject: [PATCH 3/8] remove normalizer estimator catalog methods that take enum as parameter. --- .../Dynamic/Normalizer.cs | 9 ++++--- .../PFIRegressionExample.cs | 3 ++- .../PfiBinaryClassificationExample.cs | 3 ++- .../Microsoft.ML.Samples.csproj | 1 + .../Microsoft.ML.SamplesUtils.csproj | 1 + .../SamplesDatasetUtils.cs | 3 ++- .../Microsoft.ML.Transforms.csproj | 1 + .../KMeansAndLogisticRegressionBench.cs | 3 ++- .../Microsoft.ML.Benchmarks.csproj | 3 ++- .../DataTransformation.cs | 4 ++-- .../IntrospectiveTraining.cs | 3 ++- .../ModelFiles.cs | 9 +++---- test/Microsoft.ML.Functional.Tests/ONNX.cs | 7 +++--- .../Microsoft.ML.Functional.Tests/Training.cs | 5 ++-- test/Microsoft.ML.Tests/CachingTests.cs | 9 +++---- .../FeatureContributionTests.cs | 5 ++-- test/Microsoft.ML.Tests/OnnxConversionTest.cs | 13 +++++----- .../PermutationFeatureImportanceTests.cs | 5 ++-- .../CookbookSamplesDynamicApi.cs | 3 ++- .../Scenarios/IrisPlantClassificationTests.cs | 3 ++- ...PlantClassificationWithStringLabelTests.cs | 3 ++- .../IrisPlantClassificationTests.cs | 3 ++- .../Transformers/NormalizerTests.cs | 24 +++++++++---------- 23 files changed, 70 insertions(+), 53 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs index 55f3c89845..26a0514892 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs @@ -1,8 +1,7 @@ using System; using System.Collections.Generic; -using System.Linq; using Microsoft.ML.Data; -using Microsoft.ML.Transforms; +using Microsoft.ML.Experimental; namespace Microsoft.ML.Samples.Dynamic { @@ -28,7 +27,7 @@ public static void Example() // 35 1 6-11yrs 1 3 32 5 ... // A pipeline for normalizing the Induced column. - var pipeline = ml.Transforms.Normalize("Induced"); + var pipeline = ml.Transforms.NormalizeMinMax("Induced"); // The transformed (normalized according to Normalizer.NormalizerMode.MinMax) data. var transformer = pipeline.Fit(trainData); @@ -58,8 +57,8 @@ public static void Example() // Composing a different pipeline if we wanted to normalize more than one column at a time. // Using log scale as the normalization mode. - var multiColPipeline = ml.Transforms.Normalize("LogInduced", "Induced", NormalizingEstimator.NormalizationMode.LogMeanVariance) - .Append(ml.Transforms.Normalize("LogSpontaneous", "Spontaneous", NormalizingEstimator.NormalizationMode.LogMeanVariance)); + var multiColPipeline = ml.Transforms.NormalizeMinMax("LogInduced", "Induced") + .Append(ml.Transforms.NormalizeMinMax("LogSpontaneous", "Spontaneous")); // The transformed data. var multiColtransformer = multiColPipeline.Fit(trainData); var multiColtransformedData = multiColtransformer.Transform(trainData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIRegressionExample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIRegressionExample.cs index 4afa964850..bb571dd20f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIRegressionExample.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIRegressionExample.cs @@ -1,5 +1,6 @@ using System; using System.Linq; +using Microsoft.ML.Experimental; namespace Microsoft.ML.Samples.Dynamic.PermutationFeatureImportance { @@ -19,7 +20,7 @@ public static void Example() // Normalize the data set so that for each feature, its maximum value is 1 while its minimum value is 0. // Then append a linear regression trainer. var pipeline = mlContext.Transforms.Concatenate("Features", featureNames) - .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) .Append(mlContext.Regression.Trainers.Ols( labelColumnName: labelName, featureColumnName: "Features")); var model = pipeline.Fit(data); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PfiBinaryClassificationExample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PfiBinaryClassificationExample.cs index 09fb640f30..04e0bd9178 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PfiBinaryClassificationExample.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PfiBinaryClassificationExample.cs @@ -1,5 +1,6 @@ using System; using System.Linq; +using Microsoft.ML.Experimental; using Microsoft.ML.Trainers; namespace Microsoft.ML.Samples.Dynamic.PermutationFeatureImportance @@ -21,7 +22,7 @@ public static void Example() // Normalize the data set so that for each feature, its maximum value is 1 while its minimum value is 0. // Then append a logistic regression trainer. var pipeline = mlContext.Transforms.Concatenate("Features", featureNames) - .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) .Append(mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression( labelColumnName: labelName, featureColumnName: "Features")); var model = pipeline.Fit(data); diff --git a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj index 7cb766c6de..d346f97db3 100644 --- a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj +++ b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj @@ -6,6 +6,7 @@ + diff --git a/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj b/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj index b7c0a83577..7553151b5c 100644 --- a/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj +++ b/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj @@ -8,6 +8,7 @@ + diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs index 3d2aa09791..08c3f5084a 100644 --- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs @@ -7,6 +7,7 @@ using System.IO; using System.Net; using Microsoft.ML.Data; +using Microsoft.ML.Experimental; namespace Microsoft.ML.SamplesUtils { @@ -171,7 +172,7 @@ public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext) "occupation", "relationship", "ethnicity", "native-country", "age", "education-num", "capital-gain", "capital-loss", "hours-per-week")) // Min-max normalize all the features - .Append(mlContext.Transforms.Normalize("Features")); + .Append(mlContext.Transforms.NormalizeMinMax("Features")); var data = loader.Load(dataFile); var featurizedData = pipeline.Fit(data).Transform(data); diff --git a/src/Microsoft.ML.Transforms/Microsoft.ML.Transforms.csproj b/src/Microsoft.ML.Transforms/Microsoft.ML.Transforms.csproj index 4aa4a4eb79..8260cd5ab7 100644 --- a/src/Microsoft.ML.Transforms/Microsoft.ML.Transforms.csproj +++ b/src/Microsoft.ML.Transforms/Microsoft.ML.Transforms.csproj @@ -48,6 +48,7 @@ + diff --git a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs index 663c1383d3..fe34d261ca 100644 --- a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs +++ b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs @@ -6,6 +6,7 @@ using Microsoft.ML.Benchmarks.Harness; using Microsoft.ML.Calibrators; using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.TestFramework; using Microsoft.ML.Trainers; @@ -35,7 +36,7 @@ public CalibratedModelParametersBase + @@ -24,6 +25,6 @@ - + diff --git a/test/Microsoft.ML.Functional.Tests/DataTransformation.cs b/test/Microsoft.ML.Functional.Tests/DataTransformation.cs index 3790d84cdd..9a34e4402e 100644 --- a/test/Microsoft.ML.Functional.Tests/DataTransformation.cs +++ b/test/Microsoft.ML.Functional.Tests/DataTransformation.cs @@ -3,11 +3,11 @@ // See the LICENSE file in the project root for more information. using System; +using Microsoft.ML.Experimental; using Microsoft.ML.Functional.Tests.Datasets; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework; using Microsoft.ML.Trainers; -using Microsoft.ML.Transforms; using Microsoft.ML.Transforms.Text; using Xunit; using Xunit.Abstractions; @@ -174,7 +174,7 @@ void ExtensibilityNormalizeColumns() // Compose the transformation. var pipeline = mlContext.Transforms.Concatenate("Features", Iris.Features) - .Append(mlContext.Transforms.Normalize("Features", mode: NormalizingEstimator.NormalizationMode.MinMax)); + .Append(mlContext.Transforms.NormalizeMinMax("Features")); // Transform the data. var transformedData = pipeline.Fit(data).Transform(data); diff --git a/test/Microsoft.ML.Functional.Tests/IntrospectiveTraining.cs b/test/Microsoft.ML.Functional.Tests/IntrospectiveTraining.cs index 89caf8c4f2..62716a8a98 100644 --- a/test/Microsoft.ML.Functional.Tests/IntrospectiveTraining.cs +++ b/test/Microsoft.ML.Functional.Tests/IntrospectiveTraining.cs @@ -7,6 +7,7 @@ using System.Collections.Immutable; using System.Linq; using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.Functional.Tests.Datasets; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework; @@ -254,7 +255,7 @@ void IntrospectNormalization() // Compose the transformation. var pipeline = mlContext.Transforms.Concatenate("Features", Iris.Features) - .Append(mlContext.Transforms.Normalize("Features", mode: NormalizingEstimator.NormalizationMode.MinMax)); + .Append(mlContext.Transforms.NormalizeMinMax("Features")); // Fit the pipeline. var model = pipeline.Fit(data); diff --git a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs index e1fbe98749..e248ecb6bb 100644 --- a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs +++ b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs @@ -8,6 +8,7 @@ using System.Linq; using Microsoft.ML.Calibrators; using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.Functional.Tests.Datasets; using Microsoft.ML.RunTests; using Microsoft.ML.Trainers.FastTree; @@ -275,7 +276,7 @@ public void LoadSchemaAndCreateNewData() var data = loader.Load(file); // Pipeline. - var pipeline = ML.Transforms.Normalize("Features"); + var pipeline = ML.Transforms.NormalizeMinMax("Features"); // Train. var model = pipeline.Fit(data); @@ -330,7 +331,7 @@ public void SaveCompositeLoaderAndLoad() { var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); - var composite = loader.Append(ML.Transforms.Normalize("Features")); + var composite = loader.Append(ML.Transforms.NormalizeMinMax("Features")); var loaderWithEmbeddedModel = composite.Fit(file); string modelPath = GetOutputPath(FullTestName + "-model.zip"); @@ -368,7 +369,7 @@ public void SaveLoaderAndTransformerAndLoad() { var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); - var estimator = ML.Transforms.Normalize("Features"); + var estimator = ML.Transforms.NormalizeMinMax("Features"); var data = loader.Load(file); var model = estimator.Fit(data); @@ -401,7 +402,7 @@ public void SaveTransformerAndSchemaAndLoad() { var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); - var estimator = ML.Transforms.Normalize("Features"); + var estimator = ML.Transforms.NormalizeMinMax("Features"); var model = estimator.Fit(loader.Load(file)); string modelPath = GetOutputPath(FullTestName + "-model.zip"); diff --git a/test/Microsoft.ML.Functional.Tests/ONNX.cs b/test/Microsoft.ML.Functional.Tests/ONNX.cs index 3ece5658b8..49ac2e16fc 100644 --- a/test/Microsoft.ML.Functional.Tests/ONNX.cs +++ b/test/Microsoft.ML.Functional.Tests/ONNX.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System.IO; +using Microsoft.ML.Experimental; using Microsoft.ML.Functional.Tests.Datasets; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework; @@ -33,7 +34,7 @@ public void SaveOnnxModelLoadAndScoreFastTree() // Create a pipeline to train on the housing data. var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) - .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) .AppendCacheCheckpoint(mlContext) .Append(mlContext.Regression.Trainers.FastTree( new FastTreeRegressionTrainer.Options { NumberOfThreads = 1, NumberOfTrees = 10 })); @@ -85,7 +86,7 @@ public void SaveOnnxModelLoadAndScoreKMeans() // Create a pipeline to train on the housing data. var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) - .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) .AppendCacheCheckpoint(mlContext) .Append(mlContext.Clustering.Trainers.KMeans( new KMeansTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 10 })); @@ -137,7 +138,7 @@ public void SaveOnnxModelLoadAndScoreSDCA() // Create a pipeline to train on the housing data. var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) - .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) .AppendCacheCheckpoint(mlContext) .Append(mlContext.Regression.Trainers.Sdca( new SdcaRegressionTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 10 })); diff --git a/test/Microsoft.ML.Functional.Tests/Training.cs b/test/Microsoft.ML.Functional.Tests/Training.cs index 165c57dc20..babb02a0c8 100644 --- a/test/Microsoft.ML.Functional.Tests/Training.cs +++ b/test/Microsoft.ML.Functional.Tests/Training.cs @@ -5,6 +5,7 @@ using System; using System.Linq; using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.Functional.Tests.Datasets; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework; @@ -316,7 +317,7 @@ public void ContinueTrainingOnlineGradientDescent() // Create a transformation pipeline. var featurizationPipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) - .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) .AppendCacheCheckpoint(mlContext); var trainer = mlContext.Regression.Trainers.OnlineGradientDescent( @@ -360,7 +361,7 @@ public void ContinueTrainingPoissonRegression() // Create a transformation pipeline. var featurizationPipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) - .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) .AppendCacheCheckpoint(mlContext); var trainer = mlContext.Regression.Trainers.LbfgsPoissonRegression( diff --git a/test/Microsoft.ML.Tests/CachingTests.cs b/test/Microsoft.ML.Tests/CachingTests.cs index 1b58848391..a78e54ab5a 100644 --- a/test/Microsoft.ML.Tests/CachingTests.cs +++ b/test/Microsoft.ML.Tests/CachingTests.cs @@ -5,6 +5,7 @@ using System.Linq; using System.Threading; using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.RunTests; using Microsoft.ML.StaticPipe; using Xunit; @@ -43,8 +44,8 @@ public void CacheCheckpointTest() var trainData = Enumerable.Range(0, 100).Select(c => new MyData()).ToArray(); var pipe = ML.Transforms.CopyColumns("F1", "Features") - .Append(ML.Transforms.Normalize("Norm1", "F1")) - .Append(ML.Transforms.Normalize("Norm2", "F1", Transforms.NormalizingEstimator.NormalizationMode.MeanVariance)); + .Append(ML.Transforms.NormalizeMinMax("Norm1", "F1")) + .Append(ML.Transforms.NormalizeMeanVariance("Norm2", "F1")); pipe.Fit(ML.Data.LoadFromEnumerable(trainData)); @@ -53,8 +54,8 @@ public void CacheCheckpointTest() trainData = Enumerable.Range(0, 100).Select(c => new MyData()).ToArray(); pipe = ML.Transforms.CopyColumns("F1", "Features") .AppendCacheCheckpoint(ML) - .Append(ML.Transforms.Normalize("Norm1", "F1")) - .Append(ML.Transforms.Normalize("Norm2", "F1", Transforms.NormalizingEstimator.NormalizationMode.MeanVariance)); + .Append(ML.Transforms.NormalizeMinMax("Norm1", "F1")) + .Append(ML.Transforms.NormalizeMeanVariance("Norm2", "F1")); pipe.Fit(ML.Data.LoadFromEnumerable(trainData)); diff --git a/test/Microsoft.ML.Tests/FeatureContributionTests.cs b/test/Microsoft.ML.Tests/FeatureContributionTests.cs index d1f691d8c5..17f4ba83fb 100644 --- a/test/Microsoft.ML.Tests/FeatureContributionTests.cs +++ b/test/Microsoft.ML.Tests/FeatureContributionTests.cs @@ -6,13 +6,12 @@ using System.IO; using Microsoft.ML.Calibrators; using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.Data.IO; using Microsoft.ML.Internal.Utilities; -using Microsoft.ML.Model; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework.Attributes; using Microsoft.ML.Trainers; -using Microsoft.ML.Transforms; using Xunit; using Xunit.Abstractions; @@ -306,7 +305,7 @@ private IDataView GetSparseDataset(TaskType task = TaskType.Regression, int numb var srcDV = bldr.GetDataView(); var pipeline = ML.Transforms.Concatenate("Features", "X1", "X2VBuffer", "X3Important") - .Append(ML.Transforms.Normalize("Features")); + .Append(ML.Transforms.NormalizeMinMax("Features")); if (task == TaskType.BinaryClassification) return pipeline.Append(ML.Transforms.Conversion.ConvertType("Label", outputKind: DataKind.Boolean)) diff --git a/test/Microsoft.ML.Tests/OnnxConversionTest.cs b/test/Microsoft.ML.Tests/OnnxConversionTest.cs index 723ab59055..5315bc854e 100644 --- a/test/Microsoft.ML.Tests/OnnxConversionTest.cs +++ b/test/Microsoft.ML.Tests/OnnxConversionTest.cs @@ -10,6 +10,7 @@ using System.Text.RegularExpressions; using Google.Protobuf; using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.Model.OnnxConverter; using Microsoft.ML.RunTests; using Microsoft.ML.Runtime; @@ -57,7 +58,7 @@ public void SimpleEndToEndOnnxConversionTest() hasHeader: true); var cachedTrainData = mlContext.Data.Cache(data); var dynamicPipeline = - mlContext.Transforms.Normalize("FeatureVector") + mlContext.Transforms.NormalizeMinMax("FeatureVector") .AppendCacheCheckpoint(mlContext) .Append(mlContext.Regression.Trainers.Sdca(new SdcaRegressionTrainer.Options() { LabelColumnName = "Target", @@ -137,7 +138,7 @@ public void KmeansOnnxConversionTest() separatorChar: '\t', hasHeader: true); - var pipeline = mlContext.Transforms.Normalize("Features"). + var pipeline = mlContext.Transforms.NormalizeMinMax("Features"). Append(mlContext.Clustering.Trainers.KMeans(new Trainers.KMeansTrainer.Options { FeatureColumnName = DefaultColumnNames.Features, @@ -315,7 +316,7 @@ public void LogisticRegressionOnnxConversionTest() hasHeader: true); var cachedTrainData = mlContext.Data.Cache(data); var dynamicPipeline = - mlContext.Transforms.Normalize("FeatureVector") + mlContext.Transforms.NormalizeMinMax("FeatureVector") .AppendCacheCheckpoint(mlContext) .Append(mlContext.Regression.Trainers.Sdca(new SdcaRegressionTrainer.Options() { LabelColumnName = "Target", @@ -352,7 +353,7 @@ public void LightGbmBinaryClassificationOnnxConversionTest() hasHeader: true); var cachedTrainData = mlContext.Data.Cache(data); var dynamicPipeline = - mlContext.Transforms.Normalize("FeatureVector") + mlContext.Transforms.NormalizeMinMax("FeatureVector") .AppendCacheCheckpoint(mlContext) .Append(mlContext.Regression.Trainers.LightGbm(labelColumnName: "Target", featureColumnName: "FeatureVector", numberOfIterations: 3, numberOfLeaves: 16, minimumExampleCountPerLeaf: 100)); var model = dynamicPipeline.Fit(data); @@ -383,7 +384,7 @@ public void MulticlassLogisticRegressionOnnxConversionTest() separatorChar: '\t', hasHeader: true); - var pipeline = mlContext.Transforms.Normalize("Features"). + var pipeline = mlContext.Transforms.NormalizeMinMax("Features"). Append(mlContext.Transforms.Conversion.MapValueToKey("Label")). Append(mlContext.MulticlassClassification.Trainers.LbfgsMaximumEntropy(new LbfgsMaximumEntropyMulticlassTrainer.Options() { NumberOfThreads = 1 })); @@ -416,7 +417,7 @@ public void RemoveVariablesInPipelineTest() var pipeline = mlContext.Transforms.Categorical.OneHotEncoding("F2", "F2", Transforms.OneHotEncodingEstimator.OutputKind.Bag) .Append(mlContext.Transforms.ReplaceMissingValues(new MissingValueReplacingEstimator.ColumnOptions("F2"))) .Append(mlContext.Transforms.Concatenate("Features", "F1", "F2")) - .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) .Append(mlContext.BinaryClassification.Trainers.FastTree(labelColumnName: "Label", featureColumnName: "Features", numberOfLeaves: 2, numberOfTrees: 1, minimumExampleCountPerLeaf: 2)); var model = pipeline.Fit(data); diff --git a/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs b/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs index db49bbf161..589ae5ddba 100644 --- a/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs +++ b/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs @@ -6,6 +6,7 @@ using System.Collections.Immutable; using System.Linq; using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.Internal.Utilities; using Microsoft.ML.RunTests; using Microsoft.ML.Trainers; @@ -421,7 +422,7 @@ private IDataView GetDenseDataset(TaskType task = TaskType.Regression) var srcDV = bldr.GetDataView(); var pipeline = ML.Transforms.Concatenate("Features", "X1", "X2Important", "X3", "X4Rand") - .Append(ML.Transforms.Normalize("Features")); + .Append(ML.Transforms.NormalizeMinMax("Features")); if (task == TaskType.BinaryClassification) return pipeline.Append(ML.Transforms.Conversion.ConvertType("Label", outputKind: DataKind.Boolean)) .Fit(srcDV).Transform(srcDV); @@ -501,7 +502,7 @@ private IDataView GetSparseDataset(TaskType task = TaskType.Regression) var srcDV = bldr.GetDataView(); var pipeline = ML.Transforms.Concatenate("Features", "X1", "X2VBuffer", "X3Important") - .Append(ML.Transforms.Normalize("Features")); + .Append(ML.Transforms.NormalizeMinMax("Features")); if (task == TaskType.BinaryClassification) { return pipeline.Append(ML.Transforms.Conversion.ConvertType("Label", outputKind: DataKind.Boolean)) diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs index 867925788f..2b4201860b 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs @@ -7,6 +7,7 @@ using System.IO; using System.Linq; using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework; using Microsoft.ML.Trainers; @@ -93,7 +94,7 @@ private void TrainRegression(string trainDataPath, string testDataPath, string m var pipeline = // First 'normalize' the data (rescale to be // between -1 and 1 for all examples), and then train the model. - mlContext.Transforms.Normalize("FeatureVector") + mlContext.Transforms.NormalizeMinMax("FeatureVector") // We add a step for caching data in memory so that the downstream iterative training // algorithm can efficiently scan through the data multiple times. Otherwise, the following // trainer will read data from disk multiple times. The caching mechanism uses an on-demand strategy. diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs index b92b95ad32..86d8e288b0 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework; using Microsoft.ML.Trainers; @@ -29,7 +30,7 @@ public void TrainAndPredictIrisModelTest() ); var pipe = mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") - .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) .AppendCacheCheckpoint(mlContext) .Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy( diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs index 088f164b28..2383601b63 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.Trainers; using Xunit; @@ -34,7 +35,7 @@ public void TrainAndPredictIrisModelWithStringLabelTest() // Create Estimator var pipe = mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") - .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) .Append(mlContext.Transforms.Conversion.MapValueToKey("Label", "IrisPlantType"), TransformerScope.TrainTest) .AppendCacheCheckpoint(mlContext) .Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy( diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs index f2098c68e5..c44f0d5db2 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.RunTests; using Microsoft.ML.Trainers; using Xunit; @@ -27,7 +28,7 @@ public void TrainAndPredictIrisModelUsingDirectInstantiationTest() ); var pipe = mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") - .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) .AppendCacheCheckpoint(mlContext) .Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy( diff --git a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs index 775289ca07..17d41c418e 100644 --- a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs @@ -225,8 +225,8 @@ public void SimpleConstructorsAndExtensions() var est1 = new NormalizingEstimator(Env, "float4"); var est2 = new NormalizingEstimator(Env, NormalizingEstimator.NormalizationMode.MinMax, ("float4", "float4")); var est3 = new NormalizingEstimator(Env, new NormalizingEstimator.MinMaxColumnOptions("float4")); - var est4 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.MinMax); - var est5 = ML.Transforms.Normalize("float4"); + var est4 = ML.Transforms.NormalizeMinMax("float4", "float4"); + var est5 = ML.Transforms.NormalizeMinMax("float4"); var data1 = est1.Fit(data).Transform(data); var data2 = est2.Fit(data).Transform(data); @@ -246,7 +246,7 @@ public void SimpleConstructorsAndExtensions() // Tests for MeanVariance var est6 = new NormalizingEstimator(Env, NormalizingEstimator.NormalizationMode.MeanVariance, ("float4", "float4")); var est7 = new NormalizingEstimator(Env, new NormalizingEstimator.MeanVarianceColumnOptions("float4")); - var est8 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.MeanVariance); + var est8 = ML.Transforms.NormalizeMeanVariance("float4", "float4"); var data6 = est6.Fit(data).Transform(data); var data7 = est7.Fit(data).Transform(data); @@ -259,7 +259,7 @@ public void SimpleConstructorsAndExtensions() // Tests for LogMeanVariance var est9 = new NormalizingEstimator(Env, NormalizingEstimator.NormalizationMode.LogMeanVariance, ("float4", "float4")); var est10 = new NormalizingEstimator(Env, new NormalizingEstimator.LogMeanVarianceColumnOptions("float4")); - var est11 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.LogMeanVariance); + var est11 = ML.Transforms.NormalizeLogMeanVariance("float4", "float4"); var data9 = est9.Fit(data).Transform(data); var data10 = est10.Fit(data).Transform(data); @@ -272,7 +272,7 @@ public void SimpleConstructorsAndExtensions() // Tests for Binning var est12 = new NormalizingEstimator(Env, NormalizingEstimator.NormalizationMode.Binning, ("float4", "float4")); var est13 = new NormalizingEstimator(Env, new NormalizingEstimator.BinningColumnOptions("float4")); - var est14 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.Binning); + var est14 = ML.Transforms.NormalizeBinning("float4", "float4"); var data12 = est12.Fit(data).Transform(data); var data13 = est13.Fit(data).Transform(data); @@ -285,7 +285,7 @@ public void SimpleConstructorsAndExtensions() // Tests for SupervisedBinning var est15 = new NormalizingEstimator(Env, NormalizingEstimator.NormalizationMode.SupervisedBinning, ("float4", "float4")); var est16 = new NormalizingEstimator(Env, new NormalizingEstimator.SupervisedBinningColumOptions("float4")); - var est17 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.SupervisedBinning); + var est17 = ML.Transforms.NormalizeSupervisedBinning("float4", "float4"); var data15 = est15.Fit(data).Transform(data); var data16 = est16.Fit(data).Transform(data); @@ -314,11 +314,11 @@ public void NormalizerExperimentalExtensions() var data = loader.Load(dataPath); // Normalizer Extensions - var est1 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.MinMax); - var est2 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.MeanVariance); - var est3 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.LogMeanVariance); - var est4 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.Binning); - var est5 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.SupervisedBinning); + var est1 = ML.Transforms.NormalizeMinMax("float4", "float4"); + var est2 = ML.Transforms.NormalizeMeanVariance("float4", "float4"); + var est3 = ML.Transforms.NormalizeLogMeanVariance("float4", "float4"); + var est4 = ML.Transforms.NormalizeBinning("float4", "float4"); + var est5 = ML.Transforms.NormalizeSupervisedBinning("float4", "float4"); // Normalizer Extensions (Experimental) var est6 = ML.Transforms.NormalizeMinMax("float4", "float4"); @@ -370,7 +370,7 @@ public void NormalizerExperimentalExtensionGetColumnPairs() }); var data = loader.Load(dataPath); - var est = ML.Transforms.Normalize("output", "input", NormalizingEstimator.NormalizationMode.MinMax); + var est = ML.Transforms.NormalizeMinMax("output", "input"); var t = est.Fit(data); Assert.Single(t.GetColumnPairs()); From 2caf24f06a67a50e46edcf13b10cd910bc914dce Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Mon, 1 Apr 2019 17:01:34 -0700 Subject: [PATCH 4/8] Remove Microsoft.ML.Experimental references in CS files. --- .../Dynamic/Normalizer.cs | 1 - .../PFIRegressionExample.cs | 1 - .../PfiBinaryClassificationExample.cs | 1 - .../Microsoft.ML.SamplesUtils.csproj | 1 - .../SamplesDatasetUtils.cs | 1 - .../NormalizerCatalog.cs | 100 ++++++++++++++++++ .../KMeansAndLogisticRegressionBench.cs | 1 - .../Microsoft.ML.Benchmarks.csproj | 1 - .../DataTransformation.cs | 1 - .../IntrospectiveTraining.cs | 1 - .../ModelFiles.cs | 1 - test/Microsoft.ML.Functional.Tests/ONNX.cs | 1 - .../Microsoft.ML.Functional.Tests/Training.cs | 2 - test/Microsoft.ML.Tests/CachingTests.cs | 1 - .../FeatureContributionTests.cs | 1 - test/Microsoft.ML.Tests/OnnxConversionTest.cs | 1 - .../PermutationFeatureImportanceTests.cs | 1 - .../Api/CookbookSamples/CookbookSamples.cs | 1 - .../CookbookSamplesDynamicApi.cs | 2 - .../Scenarios/IrisPlantClassificationTests.cs | 1 - ...PlantClassificationWithStringLabelTests.cs | 1 - .../IrisPlantClassificationTests.cs | 1 - 22 files changed, 100 insertions(+), 23 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs index 26a0514892..2c0fcce6bb 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs @@ -1,7 +1,6 @@ using System; using System.Collections.Generic; using Microsoft.ML.Data; -using Microsoft.ML.Experimental; namespace Microsoft.ML.Samples.Dynamic { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIRegressionExample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIRegressionExample.cs index bb571dd20f..46b5bc65a6 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIRegressionExample.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIRegressionExample.cs @@ -1,6 +1,5 @@ using System; using System.Linq; -using Microsoft.ML.Experimental; namespace Microsoft.ML.Samples.Dynamic.PermutationFeatureImportance { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PfiBinaryClassificationExample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PfiBinaryClassificationExample.cs index 04e0bd9178..8e109890e1 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PfiBinaryClassificationExample.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PfiBinaryClassificationExample.cs @@ -1,6 +1,5 @@ using System; using System.Linq; -using Microsoft.ML.Experimental; using Microsoft.ML.Trainers; namespace Microsoft.ML.Samples.Dynamic.PermutationFeatureImportance diff --git a/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj b/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj index 7553151b5c..b7c0a83577 100644 --- a/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj +++ b/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj @@ -8,7 +8,6 @@ - diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs index 08c3f5084a..6396ecee86 100644 --- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs @@ -7,7 +7,6 @@ using System.IO; using System.Net; using Microsoft.ML.Data; -using Microsoft.ML.Experimental; namespace Microsoft.ML.SamplesUtils { diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index 30ba69c4b5..c307ab70d1 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -133,6 +133,106 @@ public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCat return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); } + /// + /// Normalize (rescale) the column according to the mode. + /// It normalizes the data based on the observed minimum and maximum values of the data. + /// + /// The transform catalog + /// Name of the column resulting from the transformation of . + /// Name of the column to transform. If set to , the value of the will be used as source. + /// Maximum number of examples used to train the normalizer. + /// Whether to map zero to zero, preserving sparsity. + public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalog, + string outputColumnName, string inputColumnName = null, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, + bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched) + { + var columnOptions = new NormalizingEstimator.MinMaxColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero); + return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); + } + + /// + /// Normalize (rescale) the column according to the mode. + /// It normalizes the data based on the computed mean and variance of the data. + /// + /// The transform catalog + /// Name of the column resulting from the transformation of . + /// Name of the column to transform. If set to , the value of the will be used as source. + /// Maximum number of examples used to train the normalizer. + /// Whether to map zero to zero, preserving sparsity. + /// Whether to use CDF as the output. + public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog catalog, + string outputColumnName, string inputColumnName = null, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, + bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, + bool useCdf = NormalizingEstimator.Defaults.MeanVarCdf) + { + var columnOptions = new NormalizingEstimator.MeanVarianceColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero, useCdf); + return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); + } + + /// + /// Normalize (rescale) the column according to the mode. + /// It normalizes the data based on the computed mean and variance of the logarithm of the data. + /// + /// The transform catalog + /// Name of the column resulting from the transformation of . + /// Name of the column to transform. If set to , the value of the will be used as source. + /// Maximum number of examples used to train the normalizer. + /// Whether to use CDF as the output. + public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatalog catalog, + string outputColumnName, string inputColumnName = null, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, + bool useCdf = NormalizingEstimator.Defaults.LogMeanVarCdf) + { + var columnOptions = new NormalizingEstimator.LogMeanVarianceColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, useCdf); + return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); + } + + /// + /// Normalize (rescale) the column according to the mode. + /// The values are assigned into bins with equal density. + /// + /// The transform catalog + /// Name of the column resulting from the transformation of . + /// Name of the column to transform. If set to , the value of the will be used as source. + /// Maximum number of examples used to train the normalizer. + /// Whether to map zero to zero, preserving sparsity. + /// Maximum number of bins (power of 2 recommended). + public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catalog, + string outputColumnName, string inputColumnName = null, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, + bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, + int maximumBinCount = NormalizingEstimator.Defaults.MaximumBinCount) + { + var columnOptions = new NormalizingEstimator.BinningColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero, maximumBinCount); + return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); + } + + /// + /// Normalize (rescale) the column according to the mode. + /// The values are assigned into bins based on correlation with the column. + /// + /// The transform catalog + /// Name of the column resulting from the transformation of . + /// Name of the column to transform. If set to , the value of the will be used as source. + /// Name of the label column for supervised binning. + /// Maximum number of examples used to train the normalizer. + /// Whether to map zero to zero, preserving sparsity. + /// Maximum number of bins (power of 2 recommended). + /// Minimum number of examples per bin. + public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCatalog catalog, + string outputColumnName, string inputColumnName = null, + string labelColumnName = DefaultColumnNames.Label, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, + bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, + int maximumBinCount = NormalizingEstimator.Defaults.MaximumBinCount, + int mininimumExamplesPerBin = NormalizingEstimator.Defaults.MininimumBinSize) + { + var columnOptions = new NormalizingEstimator.SupervisedBinningColumOptions(outputColumnName, inputColumnName, labelColumnName, maximumExampleCount, fixZero, maximumBinCount, mininimumExamplesPerBin); + return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); + } + /// /// Normalize (rescale) columns according to specified custom parameters. /// diff --git a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs index fe34d261ca..812e01242a 100644 --- a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs +++ b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs @@ -6,7 +6,6 @@ using Microsoft.ML.Benchmarks.Harness; using Microsoft.ML.Calibrators; using Microsoft.ML.Data; -using Microsoft.ML.Experimental; using Microsoft.ML.TestFramework; using Microsoft.ML.Trainers; diff --git a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj index e8bd79f8be..bcec84e5e0 100644 --- a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj +++ b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj @@ -15,7 +15,6 @@ - diff --git a/test/Microsoft.ML.Functional.Tests/DataTransformation.cs b/test/Microsoft.ML.Functional.Tests/DataTransformation.cs index 9a34e4402e..5689be7320 100644 --- a/test/Microsoft.ML.Functional.Tests/DataTransformation.cs +++ b/test/Microsoft.ML.Functional.Tests/DataTransformation.cs @@ -3,7 +3,6 @@ // See the LICENSE file in the project root for more information. using System; -using Microsoft.ML.Experimental; using Microsoft.ML.Functional.Tests.Datasets; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework; diff --git a/test/Microsoft.ML.Functional.Tests/IntrospectiveTraining.cs b/test/Microsoft.ML.Functional.Tests/IntrospectiveTraining.cs index 62716a8a98..c60165558f 100644 --- a/test/Microsoft.ML.Functional.Tests/IntrospectiveTraining.cs +++ b/test/Microsoft.ML.Functional.Tests/IntrospectiveTraining.cs @@ -7,7 +7,6 @@ using System.Collections.Immutable; using System.Linq; using Microsoft.ML.Data; -using Microsoft.ML.Experimental; using Microsoft.ML.Functional.Tests.Datasets; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework; diff --git a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs index e248ecb6bb..78d4b003a9 100644 --- a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs +++ b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs @@ -8,7 +8,6 @@ using System.Linq; using Microsoft.ML.Calibrators; using Microsoft.ML.Data; -using Microsoft.ML.Experimental; using Microsoft.ML.Functional.Tests.Datasets; using Microsoft.ML.RunTests; using Microsoft.ML.Trainers.FastTree; diff --git a/test/Microsoft.ML.Functional.Tests/ONNX.cs b/test/Microsoft.ML.Functional.Tests/ONNX.cs index 49ac2e16fc..88438305bd 100644 --- a/test/Microsoft.ML.Functional.Tests/ONNX.cs +++ b/test/Microsoft.ML.Functional.Tests/ONNX.cs @@ -3,7 +3,6 @@ // See the LICENSE file in the project root for more information. using System.IO; -using Microsoft.ML.Experimental; using Microsoft.ML.Functional.Tests.Datasets; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework; diff --git a/test/Microsoft.ML.Functional.Tests/Training.cs b/test/Microsoft.ML.Functional.Tests/Training.cs index babb02a0c8..6f8d264805 100644 --- a/test/Microsoft.ML.Functional.Tests/Training.cs +++ b/test/Microsoft.ML.Functional.Tests/Training.cs @@ -2,10 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; using System.Linq; using Microsoft.ML.Data; -using Microsoft.ML.Experimental; using Microsoft.ML.Functional.Tests.Datasets; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework; diff --git a/test/Microsoft.ML.Tests/CachingTests.cs b/test/Microsoft.ML.Tests/CachingTests.cs index a78e54ab5a..46d1c7149e 100644 --- a/test/Microsoft.ML.Tests/CachingTests.cs +++ b/test/Microsoft.ML.Tests/CachingTests.cs @@ -5,7 +5,6 @@ using System.Linq; using System.Threading; using Microsoft.ML.Data; -using Microsoft.ML.Experimental; using Microsoft.ML.RunTests; using Microsoft.ML.StaticPipe; using Xunit; diff --git a/test/Microsoft.ML.Tests/FeatureContributionTests.cs b/test/Microsoft.ML.Tests/FeatureContributionTests.cs index 17f4ba83fb..02cf7b3d1b 100644 --- a/test/Microsoft.ML.Tests/FeatureContributionTests.cs +++ b/test/Microsoft.ML.Tests/FeatureContributionTests.cs @@ -6,7 +6,6 @@ using System.IO; using Microsoft.ML.Calibrators; using Microsoft.ML.Data; -using Microsoft.ML.Experimental; using Microsoft.ML.Data.IO; using Microsoft.ML.Internal.Utilities; using Microsoft.ML.RunTests; diff --git a/test/Microsoft.ML.Tests/OnnxConversionTest.cs b/test/Microsoft.ML.Tests/OnnxConversionTest.cs index 5315bc854e..1cd5a2a0d0 100644 --- a/test/Microsoft.ML.Tests/OnnxConversionTest.cs +++ b/test/Microsoft.ML.Tests/OnnxConversionTest.cs @@ -10,7 +10,6 @@ using System.Text.RegularExpressions; using Google.Protobuf; using Microsoft.ML.Data; -using Microsoft.ML.Experimental; using Microsoft.ML.Model.OnnxConverter; using Microsoft.ML.RunTests; using Microsoft.ML.Runtime; diff --git a/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs b/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs index 589ae5ddba..ac86a8703f 100644 --- a/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs +++ b/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs @@ -6,7 +6,6 @@ using System.Collections.Immutable; using System.Linq; using Microsoft.ML.Data; -using Microsoft.ML.Experimental; using Microsoft.ML.Internal.Utilities; using Microsoft.ML.RunTests; using Microsoft.ML.Trainers; diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs index cce1de27b5..d6c80e9482 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs @@ -5,7 +5,6 @@ using System; using System.Collections.Generic; using System.Collections.Immutable; -using System.IO; using System.Linq; using Microsoft.ML; using Microsoft.ML.Data; diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs index 2b4201860b..cb4ad1f999 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs @@ -4,10 +4,8 @@ using System; using System.Collections.Generic; -using System.IO; using System.Linq; using Microsoft.ML.Data; -using Microsoft.ML.Experimental; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework; using Microsoft.ML.Trainers; diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs index 86d8e288b0..5f89e1ad79 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs @@ -3,7 +3,6 @@ // See the LICENSE file in the project root for more information. using Microsoft.ML.Data; -using Microsoft.ML.Experimental; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework; using Microsoft.ML.Trainers; diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs index 2383601b63..fdef9d0513 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs @@ -3,7 +3,6 @@ // See the LICENSE file in the project root for more information. using Microsoft.ML.Data; -using Microsoft.ML.Experimental; using Microsoft.ML.Trainers; using Xunit; diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs index c44f0d5db2..36eb2d1b6b 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs @@ -3,7 +3,6 @@ // See the LICENSE file in the project root for more information. using Microsoft.ML.Data; -using Microsoft.ML.Experimental; using Microsoft.ML.RunTests; using Microsoft.ML.Trainers; using Xunit; From ba850dcabbd9b22009997a82c4260e458d954b98 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Mon, 1 Apr 2019 17:15:20 -0700 Subject: [PATCH 5/8] merge fix. --- .../NormalizerCatalog.cs | 101 ------------------ 1 file changed, 101 deletions(-) diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index c307ab70d1..dfab1048d0 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -9,7 +9,6 @@ namespace Microsoft.ML /// public static class NormalizationCatalog { - /// /// Normalize (rescale) several columns according to the specified . /// @@ -133,106 +132,6 @@ public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCat return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); } - /// - /// Normalize (rescale) the column according to the mode. - /// It normalizes the data based on the observed minimum and maximum values of the data. - /// - /// The transform catalog - /// Name of the column resulting from the transformation of . - /// Name of the column to transform. If set to , the value of the will be used as source. - /// Maximum number of examples used to train the normalizer. - /// Whether to map zero to zero, preserving sparsity. - public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalog, - string outputColumnName, string inputColumnName = null, - long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, - bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched) - { - var columnOptions = new NormalizingEstimator.MinMaxColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero); - return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); - } - - /// - /// Normalize (rescale) the column according to the mode. - /// It normalizes the data based on the computed mean and variance of the data. - /// - /// The transform catalog - /// Name of the column resulting from the transformation of . - /// Name of the column to transform. If set to , the value of the will be used as source. - /// Maximum number of examples used to train the normalizer. - /// Whether to map zero to zero, preserving sparsity. - /// Whether to use CDF as the output. - public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog catalog, - string outputColumnName, string inputColumnName = null, - long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, - bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, - bool useCdf = NormalizingEstimator.Defaults.MeanVarCdf) - { - var columnOptions = new NormalizingEstimator.MeanVarianceColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero, useCdf); - return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); - } - - /// - /// Normalize (rescale) the column according to the mode. - /// It normalizes the data based on the computed mean and variance of the logarithm of the data. - /// - /// The transform catalog - /// Name of the column resulting from the transformation of . - /// Name of the column to transform. If set to , the value of the will be used as source. - /// Maximum number of examples used to train the normalizer. - /// Whether to use CDF as the output. - public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatalog catalog, - string outputColumnName, string inputColumnName = null, - long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, - bool useCdf = NormalizingEstimator.Defaults.LogMeanVarCdf) - { - var columnOptions = new NormalizingEstimator.LogMeanVarianceColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, useCdf); - return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); - } - - /// - /// Normalize (rescale) the column according to the mode. - /// The values are assigned into bins with equal density. - /// - /// The transform catalog - /// Name of the column resulting from the transformation of . - /// Name of the column to transform. If set to , the value of the will be used as source. - /// Maximum number of examples used to train the normalizer. - /// Whether to map zero to zero, preserving sparsity. - /// Maximum number of bins (power of 2 recommended). - public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catalog, - string outputColumnName, string inputColumnName = null, - long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, - bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, - int maximumBinCount = NormalizingEstimator.Defaults.MaximumBinCount) - { - var columnOptions = new NormalizingEstimator.BinningColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero, maximumBinCount); - return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); - } - - /// - /// Normalize (rescale) the column according to the mode. - /// The values are assigned into bins based on correlation with the column. - /// - /// The transform catalog - /// Name of the column resulting from the transformation of . - /// Name of the column to transform. If set to , the value of the will be used as source. - /// Name of the label column for supervised binning. - /// Maximum number of examples used to train the normalizer. - /// Whether to map zero to zero, preserving sparsity. - /// Maximum number of bins (power of 2 recommended). - /// Minimum number of examples per bin. - public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCatalog catalog, - string outputColumnName, string inputColumnName = null, - string labelColumnName = DefaultColumnNames.Label, - long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, - bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, - int maximumBinCount = NormalizingEstimator.Defaults.MaximumBinCount, - int mininimumExamplesPerBin = NormalizingEstimator.Defaults.MininimumBinSize) - { - var columnOptions = new NormalizingEstimator.SupervisedBinningColumOptions(outputColumnName, inputColumnName, labelColumnName, maximumExampleCount, fixZero, maximumBinCount, mininimumExamplesPerBin); - return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); - } - /// /// Normalize (rescale) columns according to specified custom parameters. /// From bde12a7ded0678babbf1d9f6cf22047a204219a0 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Mon, 1 Apr 2019 17:18:38 -0700 Subject: [PATCH 6/8] cleanup. --- docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj index d346f97db3..7cb766c6de 100644 --- a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj +++ b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj @@ -6,7 +6,6 @@ - From d705dc2019e011b67126fdb12f6b9022b57c6020 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Mon, 1 Apr 2019 17:20:15 -0700 Subject: [PATCH 7/8] cleanup. --- src/Microsoft.ML.Transforms/Microsoft.ML.Transforms.csproj | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Microsoft.ML.Transforms/Microsoft.ML.Transforms.csproj b/src/Microsoft.ML.Transforms/Microsoft.ML.Transforms.csproj index 8260cd5ab7..4aa4a4eb79 100644 --- a/src/Microsoft.ML.Transforms/Microsoft.ML.Transforms.csproj +++ b/src/Microsoft.ML.Transforms/Microsoft.ML.Transforms.csproj @@ -48,7 +48,6 @@ - From 4833feba53899931b8df488e9a54ab5475dda3da Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Mon, 1 Apr 2019 18:03:14 -0700 Subject: [PATCH 8/8] PR feedback. --- docs/code/MlNetCookBook.md | 11 ++++------- src/Microsoft.ML.Data/Transforms/Normalizer.cs | 3 ++- src/Microsoft.ML.Transforms/NormalizerCatalog.cs | 12 +++++++----- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/code/MlNetCookBook.md b/docs/code/MlNetCookBook.md index f509ebfe57..78447515e1 100644 --- a/docs/code/MlNetCookBook.md +++ b/docs/code/MlNetCookBook.md @@ -344,7 +344,7 @@ var cachedTrainData = mlContext.Data.Cache(trainData); var pipeline = // First 'normalize' the data (rescale to be // between -1 and 1 for all examples) - mlContext.Transforms.Normalize("FeatureVector") + mlContext.Transforms.NormalizeMinMax("FeatureVector") // We add a step for caching data in memory so that the downstream iterative training // algorithm can efficiently scan through the data multiple times. Otherwise, the following // trainer will load data from disk multiple times. The caching mechanism uses an on-demand strategy. @@ -625,18 +625,15 @@ var trainData = mlContext.Data.LoadFromTextFile(dataPath, separatorChar: ',' ); -// Apply all kinds of standard ML.NET normalization to the raw features. +// Apply MinMax normalization to the raw features. var pipeline = - mlContext.Transforms.Normalize( - new NormalizingEstimator.MinMaxColumnOptions("MinMaxNormalized", "Features", fixZero: true), - new NormalizingEstimator.MeanVarianceColumnOptions("MeanVarNormalized", "Features", fixZero: true), - new NormalizingEstimator.BinningColumnOptions("BinNormalized", "Features", maximumBinCount: 256)); + mlContext.Transforms.NormalizeMinMax("MinMaxNormalized", "Features"); // Let's train our pipeline of normalizers, and then apply it to the same data. var normalizedData = pipeline.Fit(trainData).Transform(trainData); // Inspect one column of the resulting dataset. -var meanVarValues = normalizedData.GetColumn(normalizedData.Schema["MeanVarNormalized"]).ToArray(); +var meanVarValues = normalizedData.GetColumn(normalizedData.Schema["MinMaxNormalized"]).ToArray(); ``` ## How do I train my model on categorical data? diff --git a/src/Microsoft.ML.Data/Transforms/Normalizer.cs b/src/Microsoft.ML.Data/Transforms/Normalizer.cs index 114f0a328c..e0f5d5f019 100644 --- a/src/Microsoft.ML.Data/Transforms/Normalizer.cs +++ b/src/Microsoft.ML.Data/Transforms/Normalizer.cs @@ -39,7 +39,8 @@ internal static class Defaults public const long MaximumExampleCount = 1000000000; } - public enum NormalizationMode + [BestFriend] + internal enum NormalizationMode { /// /// Linear rescale such that minimum and maximum values are mapped between -1 and 1. diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index dfab1048d0..22696851c8 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -33,7 +33,6 @@ internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, } /// - /// Normalize (rescale) the column according to the mode. /// It normalizes the data based on the observed minimum and maximum values of the data. /// /// The transform catalog @@ -41,6 +40,13 @@ internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, /// Name of the column to transform. If set to , the value of the will be used as source. /// Maximum number of examples used to train the normalizer. /// Whether to map zero to zero, preserving sparsity. + /// + /// + /// + /// + /// public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, @@ -51,7 +57,6 @@ public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalo } /// - /// Normalize (rescale) the column according to the mode. /// It normalizes the data based on the computed mean and variance of the data. /// /// The transform catalog @@ -71,7 +76,6 @@ public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog } /// - /// Normalize (rescale) the column according to the mode. /// It normalizes the data based on the computed mean and variance of the logarithm of the data. /// /// The transform catalog @@ -89,7 +93,6 @@ public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatal } /// - /// Normalize (rescale) the column according to the mode. /// The values are assigned into bins with equal density. /// /// The transform catalog @@ -109,7 +112,6 @@ public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catal } /// - /// Normalize (rescale) the column according to the mode. /// The values are assigned into bins based on correlation with the column. /// /// The transform catalog