Skip to content

Commit f09b25f

Browse files
authored
Scrub projection and normalization transforms (#2865)
1 parent eb3c364 commit f09b25f

39 files changed

+605
-602
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ public static void Example()
6666

6767
// Composing a different pipeline if we wanted to normalize more than one column at a time.
6868
// Using log scale as the normalization mode.
69-
var multiColPipeline = ml.Transforms.Normalize(NormalizingEstimator.NormalizerMode.LogMeanVariance, new ColumnOptions[] { ("LogInduced", "Induced"), ("LogSpontaneous", "Spontaneous") });
69+
var multiColPipeline = ml.Transforms.Normalize(NormalizingEstimator.NormalizationMode.LogMeanVariance, new ColumnOptions[] { ("LogInduced", "Induced"), ("LogSpontaneous", "Spontaneous") });
7070
// The transformed data.
7171
var multiColtransformer = multiColPipeline.Fit(trainData);
7272
var multiColtransformedData = multiColtransformer.Transform(trainData);

docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public static void Example()
3737
};
3838

3939
// A pipeline to project Features column into Random fourier space.
40-
var rffPipeline = ml.Transforms.Projection.CreateRandomFourierFeatures(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), newDim: 4);
40+
var rffPipeline = ml.Transforms.RandomFourierKernelMap(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), rank: 4);
4141
// The transformed (projected) data.
4242
var transformedData = rffPipeline.Fit(trainData).Transform(trainData);
4343
// Getting the data of the newly created column, so we can preview it.
@@ -55,7 +55,7 @@ public static void Example()
5555
//0.165 0.117 -0.547 0.014
5656

5757
// A pipeline to project Features column into L-p normalized vector.
58-
var lpNormalizePipeline = ml.Transforms.Projection.LpNormalize(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), normKind: Transforms.LpNormalizingEstimatorBase.NormalizerKind.L1Norm);
58+
var lpNormalizePipeline = ml.Transforms.LpNormalize(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), normKind: Transforms.LpNormalizingEstimatorBase.NormFunction.L1);
5959
// The transformed (projected) data.
6060
transformedData = lpNormalizePipeline.Fit(trainData).Transform(trainData);
6161
// Getting the data of the newly created column, so we can preview it.
@@ -73,7 +73,7 @@ public static void Example()
7373
// 0.133 0.156 0.178 0.200 0.000 0.022 0.044 0.067 0.089 0.111
7474

7575
// A pipeline to project Features column into L-p normalized vector.
76-
var gcNormalizePipeline = ml.Transforms.Projection.GlobalContrastNormalize(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), substractMean:false);
76+
var gcNormalizePipeline = ml.Transforms.GlobalContrastNormalize(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), ensureZeroMean:false);
7777
// The transformed (projected) data.
7878
transformedData = gcNormalizePipeline.Fit(trainData).Transform(trainData);
7979
// Getting the data of the newly created column, so we can preview it.

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/AnomalyDetection/RandomizedPcaSample.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ public static void Example()
2929
var data = mlContext.Data.LoadFromEnumerable(samples);
3030

3131
// Create an anomaly detector. Its underlying algorithm is randomized PCA.
32-
var pipeline = mlContext.AnomalyDetection.Trainers.RandomizedPca(featureColumnName: nameof(DataPoint.Features), rank: 1, center: false);
32+
var pipeline = mlContext.AnomalyDetection.Trainers.AnalyzeRandomizedPrincipalComponents(featureColumnName: nameof(DataPoint.Features), rank: 1, ensureZeroMean: false);
3333

3434
// Train the anomaly detector.
3535
var model = pipeline.Fit(data);

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/AnomalyDetection/RandomizedPcaSampleWithOptions.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,15 @@ public static void Example()
2828
// Convert the List<DataPoint> to IDataView, a consumble format to ML.NET functions.
2929
var data = mlContext.Data.LoadFromEnumerable(samples);
3030

31-
var options = new ML.Trainers.RandomizedPcaTrainer.Options()
31+
var options = new ML.Trainers.RandomizedPrincipalComponentAnalyzer.Options()
3232
{
3333
FeatureColumnName = nameof(DataPoint.Features),
3434
Rank = 1,
3535
Seed = 10,
3636
};
3737

3838
// Create an anomaly detector. Its underlying algorithm is randomized PCA.
39-
var pipeline = mlContext.AnomalyDetection.Trainers.RandomizedPca(options);
39+
var pipeline = mlContext.AnomalyDetection.Trainers.AnalyzeRandomizedPrincipalComponents(options);
4040

4141
// Train the anomaly detector.
4242
var model = pipeline.Fit(data);

docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhiten.cs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,9 @@ public static void Example()
3838
Console.WriteLine($"{string.Join(" ", row.DenseValues().Select(x => x.ToString("f3")))} ");
3939
};
4040

41-
4241
// A pipeline to project Features column into white noise vector.
43-
var whiteningPipeline = ml.Transforms.Projection.VectorWhiten(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features),
44-
kind: Transforms.WhiteningKind.Zca);
42+
var whiteningPipeline = ml.Transforms.VectorWhiten(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features),
43+
kind: Transforms.WhiteningKind.ZeroPhaseComponentAnalysis);
4544
// The transformed (projected) data.
4645
var transformedData = whiteningPipeline.Fit(trainData).Transform(trainData);
4746
// Getting the data of the newly created column, so we can preview it.

docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithColumnOptions.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ public static void Example()
3939

4040

4141
// A pipeline to project Features column into white noise vector.
42-
var whiteningPipeline = ml.Transforms.Projection.VectorWhiten(new Transforms.VectorWhiteningEstimator.ColumnOptions(
43-
nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), kind: Transforms.WhiteningKind.Pca, pcaNum: 4));
42+
var whiteningPipeline = ml.Transforms.VectorWhiten(new Transforms.VectorWhiteningEstimator.ColumnOptions(
43+
nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), kind: Transforms.WhiteningKind.PrincipalComponentAnalysis, rank: 4));
4444
// The transformed (projected) data.
4545
var transformedData = whiteningPipeline.Fit(trainData).Transform(trainData);
4646
// Getting the data of the newly created column, so we can preview it.

src/Microsoft.ML.Data/Transforms/Normalizer.cs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ internal static class Defaults
3939
public const long MaxTrainingExamples = 1000000000;
4040
}
4141

42-
public enum NormalizerMode
42+
public enum NormalizationMode
4343
{
4444
/// <summary>
4545
/// Linear rescale such that minimum and maximum values are mapped between -1 and 1.
@@ -82,19 +82,19 @@ private protected ColumnOptionsBase(string name, string inputColumnName, long ma
8282

8383
internal abstract IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, DataViewType srcType, DataViewRowCursor cursor);
8484

85-
internal static ColumnOptionsBase Create(string outputColumnName, string inputColumnName, NormalizerMode mode)
85+
internal static ColumnOptionsBase Create(string outputColumnName, string inputColumnName, NormalizationMode mode)
8686
{
8787
switch (mode)
8888
{
89-
case NormalizerMode.MinMax:
89+
case NormalizationMode.MinMax:
9090
return new MinMaxColumnOptions(outputColumnName, inputColumnName);
91-
case NormalizerMode.MeanVariance:
91+
case NormalizationMode.MeanVariance:
9292
return new MeanVarColumnOptions(outputColumnName, inputColumnName);
93-
case NormalizerMode.LogMeanVariance:
93+
case NormalizationMode.LogMeanVariance:
9494
return new LogMeanVarColumnOptions(outputColumnName, inputColumnName);
95-
case NormalizerMode.Binning:
95+
case NormalizationMode.Binning:
9696
return new BinningColumnOptions(outputColumnName, inputColumnName);
97-
case NormalizerMode.SupervisedBinning:
97+
case NormalizationMode.SupervisedBinning:
9898
return new SupervisedBinningColumOptions(outputColumnName, inputColumnName);
9999
default:
100100
throw Contracts.ExceptParam(nameof(mode), "Unknown normalizer mode");
@@ -202,8 +202,8 @@ internal override IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, D
202202
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
203203
/// <param name="inputColumnName">Name of the column to transform.
204204
/// If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
205-
/// <param name="mode">The <see cref="NormalizerMode"/> indicating how to the old values are mapped to the new values.</param>
206-
internal NormalizingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, NormalizerMode mode = NormalizerMode.MinMax)
205+
/// <param name="mode">The <see cref="NormalizationMode"/> indicating how to the old values are mapped to the new values.</param>
206+
internal NormalizingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, NormalizationMode mode = NormalizationMode.MinMax)
207207
: this(env, mode, (outputColumnName, inputColumnName ?? outputColumnName))
208208
{
209209
}
@@ -212,9 +212,9 @@ internal NormalizingEstimator(IHostEnvironment env, string outputColumnName, str
212212
/// Initializes a new instance of <see cref="NormalizingEstimator"/>.
213213
/// </summary>
214214
/// <param name="env">The private instance of <see cref="IHostEnvironment"/>.</param>
215-
/// <param name="mode">The <see cref="NormalizerMode"/> indicating how to the old values are mapped to the new values.</param>
215+
/// <param name="mode">The <see cref="NormalizationMode"/> indicating how to the old values are mapped to the new values.</param>
216216
/// <param name="columns">An array of (outputColumnName, inputColumnName) tuples.</param>
217-
internal NormalizingEstimator(IHostEnvironment env, NormalizerMode mode, params (string outputColumnName, string inputColumnName)[] columns)
217+
internal NormalizingEstimator(IHostEnvironment env, NormalizationMode mode, params (string outputColumnName, string inputColumnName)[] columns)
218218
{
219219
Contracts.CheckValue(env, nameof(env));
220220
_host = env.Register(nameof(NormalizingEstimator));

src/Microsoft.ML.Data/Transforms/NormalizerCatalog.cs

Lines changed: 0 additions & 61 deletions
This file was deleted.

src/Microsoft.ML.Data/Transforms/TransformsCatalog.cs

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,6 @@ public sealed class TransformsCatalog : IInternalCatalog
3030
/// </summary>
3131
public TextTransforms Text { get; }
3232

33-
/// <summary>
34-
/// The list of operations for data projection.
35-
/// </summary>
36-
public ProjectionTransforms Projection { get; }
37-
3833
/// <summary>
3934
/// The list of operations for selecting features based on some criteria.
4035
/// </summary>
@@ -48,7 +43,6 @@ internal TransformsCatalog(IHostEnvironment env)
4843
Categorical = new CategoricalTransforms(this);
4944
Conversion = new ConversionTransforms(this);
5045
Text = new TextTransforms(this);
51-
Projection = new ProjectionTransforms(this);
5246
FeatureSelection = new FeatureSelectionTransforms(this);
5347
}
5448

@@ -94,20 +88,6 @@ internal TextTransforms(TransformsCatalog owner)
9488
}
9589
}
9690

97-
/// <summary>
98-
/// The catalog of projection operations.
99-
/// </summary>
100-
public sealed class ProjectionTransforms : IInternalCatalog
101-
{
102-
IHostEnvironment IInternalCatalog.Environment => _env;
103-
private readonly IHostEnvironment _env;
104-
105-
internal ProjectionTransforms(TransformsCatalog owner)
106-
{
107-
_env = owner.GetEnvironment();
108-
}
109-
}
110-
11191
/// <summary>
11292
/// The catalog of feature selection operations.
11393
/// </summary>

src/Microsoft.ML.Mkl.Components.StaticPipe/VectorWhiteningStaticExtensions.cs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,18 +61,18 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
6161
/// <param name="maxRows">Maximum number of rows used to train the transform.</param>
6262
/// <param name="pcaNum">In case of PCA whitening, indicates the number of components to retain.</param>
6363
public static Vector<float> PcaWhitening(this Vector<float> input,
64-
float eps = VectorWhiteningEstimator.Defaults.Eps,
65-
int maxRows = VectorWhiteningEstimator.Defaults.MaxRows,
66-
int pcaNum = VectorWhiteningEstimator.Defaults.PcaNum)
67-
=> new OutPipelineColumn(input, WhiteningKind.Pca, eps, maxRows, pcaNum);
64+
float eps = VectorWhiteningEstimator.Defaults.Epsilon,
65+
int maxRows = VectorWhiteningEstimator.Defaults.MaximumNumberOfRows,
66+
int pcaNum = VectorWhiteningEstimator.Defaults.Rank)
67+
=> new OutPipelineColumn(input, WhiteningKind.PrincipalComponentAnalysis, eps, maxRows, pcaNum);
6868

6969
/// <include file='../Microsoft.ML.Mkl.Components/doc.xml' path='doc/members/member[@name="Whitening"]/*'/>
7070
/// <param name="input">The column to which the transform will be applied.</param>
7171
/// <param name="eps">Whitening constant, prevents division by zero.</param>
7272
/// <param name="maxRows">Maximum number of rows used to train the transform.</param>
7373
public static Vector<float> ZcaWhitening(this Vector<float> input,
74-
float eps = VectorWhiteningEstimator.Defaults.Eps,
75-
int maxRows = VectorWhiteningEstimator.Defaults.MaxRows)
76-
=> new OutPipelineColumn(input, WhiteningKind.Zca, eps, maxRows, VectorWhiteningEstimator.Defaults.PcaNum);
74+
float eps = VectorWhiteningEstimator.Defaults.Epsilon,
75+
int maxRows = VectorWhiteningEstimator.Defaults.MaximumNumberOfRows)
76+
=> new OutPipelineColumn(input, WhiteningKind.ZeroPhaseComponentAnalysis, eps, maxRows, VectorWhiteningEstimator.Defaults.Rank);
7777
}
7878
}

0 commit comments

Comments
 (0)