diff --git a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj index e1e2b6ea3f..6b3d9f957b 100644 --- a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj +++ b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj @@ -6,6 +6,7 @@ + diff --git a/src/Microsoft.ML.HalLearners/ProjectionCatalog.cs b/src/Microsoft.ML.HalLearners/ProjectionCatalog.cs new file mode 100644 index 0000000000..8d78f017dd --- /dev/null +++ b/src/Microsoft.ML.HalLearners/ProjectionCatalog.cs @@ -0,0 +1,47 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Transforms.Projections; + +namespace Microsoft.ML +{ + public static class ProjectionCatalog + { + /// + /// Takes column filled with a vector of random variables with a known covariance matrix into a set of new variables whose covariance is the identity matrix, + /// meaning that they are uncorrelated and each have variance 1. + /// + /// The transform's catalog. + /// Name of the input column. + /// Name of the column resulting from the transformation of . Null means is replaced. + /// Whitening kind (PCA/ZCA). + /// Whitening constant, prevents division by zero. + /// Maximum number of rows used to train the transform. + /// In case of PCA whitening, indicates the number of components to retain. + /// + /// + /// + /// + /// + public static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog.ProjectionTransforms catalog, string inputColumn, string outputColumn = null, + WhiteningKind kind = VectorWhiteningTransformer.Defaults.Kind, + float eps = VectorWhiteningTransformer.Defaults.Eps, + int maxRows = VectorWhiteningTransformer.Defaults.MaxRows, + int pcaNum = VectorWhiteningTransformer.Defaults.PcaNum) + => new VectorWhiteningEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, kind, eps, maxRows, pcaNum); + + /// + /// Takes columns filled with a vector of random variables with a known covariance matrix into a set of new variables whose covariance is the identity matrix, + /// meaning that they are uncorrelated and each have variance 1. + /// + /// The transform's catalog. + /// Describes the parameters of the whitening process for each column pair. + public static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog.ProjectionTransforms catalog, params VectorWhiteningTransformer.ColumnInfo[] columns) + => new VectorWhiteningEstimator(CatalogUtils.GetEnvironment(catalog), columns); + } +} diff --git a/src/Microsoft.ML.HalLearners/TransformsStatic.cs b/src/Microsoft.ML.HalLearners/TransformsStatic.cs new file mode 100644 index 0000000000..87ea5a2c0f --- /dev/null +++ b/src/Microsoft.ML.HalLearners/TransformsStatic.cs @@ -0,0 +1,80 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Core.Data; +using Microsoft.ML.Runtime; +using Microsoft.ML.StaticPipe.Runtime; +using Microsoft.ML.Transforms.Projections; +using System.Collections.Generic; + +namespace Microsoft.ML.StaticPipe +{ + /// + /// Extensions for statically typed Whitening estimator. + /// + public static class VectorWhiteningExtensions + { + private sealed class OutPipelineColumn : Vector + { + public readonly Vector Input; + + public OutPipelineColumn(Vector input, WhiteningKind kind, float eps, int maxRows, int pcaNum) + : base(new Reconciler(kind, eps, maxRows, pcaNum), input) + { + Input = input; + } + } + + private sealed class Reconciler : EstimatorReconciler + { + private readonly WhiteningKind _kind; + private readonly float _eps; + private readonly int _maxRows; + private readonly int _pcaNum; + + public Reconciler(WhiteningKind kind, float eps, int maxRows, int pcaNum) + { + _kind = kind; + _eps = eps; + _maxRows = maxRows; + _pcaNum = pcaNum; + } + + public override IEstimator Reconcile(IHostEnvironment env, + PipelineColumn[] toOutput, + IReadOnlyDictionary inputNames, + IReadOnlyDictionary outputNames, + IReadOnlyCollection usedNames) + { + Contracts.Assert(toOutput.Length == 1); + + var infos = new VectorWhiteningTransformer.ColumnInfo[toOutput.Length]; + for (int i = 0; i < toOutput.Length; i++) + infos[i] = new VectorWhiteningTransformer.ColumnInfo(inputNames[((OutPipelineColumn)toOutput[i]).Input], outputNames[toOutput[i]], _kind, _eps, _maxRows, _pcaNum); + + return new VectorWhiteningEstimator(env, infos); + } + } + + /// + /// The column to which the transform will be applied. + /// Whitening constant, prevents division by zero when scaling the data by inverse of eigenvalues. + /// Maximum number of rows used to train the transform. + /// In case of PCA whitening, indicates the number of components to retain. + public static Vector PcaWhitening(this Vector input, + float eps = VectorWhiteningTransformer.Defaults.Eps, + int maxRows = VectorWhiteningTransformer.Defaults.MaxRows, + int pcaNum = VectorWhiteningTransformer.Defaults.PcaNum) + => new OutPipelineColumn(input, WhiteningKind.Pca, eps, maxRows, pcaNum); + + /// + /// The column to which the transform will be applied. + /// Whitening constant, prevents division by zero. + /// Maximum number of rows used to train the transform. + public static Vector ZcaWhitening(this Vector input, + float eps = VectorWhiteningTransformer.Defaults.Eps, + int maxRows = VectorWhiteningTransformer.Defaults.MaxRows) + => new OutPipelineColumn(input, WhiteningKind.Zca, eps, maxRows, VectorWhiteningTransformer.Defaults.PcaNum); + } +} diff --git a/src/Microsoft.ML.Transforms/VectorWhitening.cs b/src/Microsoft.ML.HalLearners/VectorWhitening.cs similarity index 99% rename from src/Microsoft.ML.Transforms/VectorWhitening.cs rename to src/Microsoft.ML.HalLearners/VectorWhitening.cs index 1bf5cbc861..40ded6f14e 100644 --- a/src/Microsoft.ML.Transforms/VectorWhitening.cs +++ b/src/Microsoft.ML.HalLearners/VectorWhitening.cs @@ -380,7 +380,7 @@ private static void GetColTypesAndIndex(IHostEnvironment env, IDataView inputDat for (int i = 0; i < columns.Length; i++) { - if(!inputSchema.TryGetColumnIndex(columns[i].Input, out cols[i])) + if (!inputSchema.TryGetColumnIndex(columns[i].Input, out cols[i])) throw env.ExceptSchemaMismatch(nameof(inputSchema), "input", columns[i].Input); srcTypes[i] = inputSchema.GetColumnType(cols[i]); var reason = TestColumn(srcTypes[i]); diff --git a/src/Microsoft.ML.Transforms/ProjectionCatalog.cs b/src/Microsoft.ML.Transforms/ProjectionCatalog.cs index db9089b34b..e998a7eaf6 100644 --- a/src/Microsoft.ML.Transforms/ProjectionCatalog.cs +++ b/src/Microsoft.ML.Transforms/ProjectionCatalog.cs @@ -40,40 +40,6 @@ public static RandomFourierFeaturizingEstimator CreateRandomFourierFeatures(this public static RandomFourierFeaturizingEstimator CreateRandomFourierFeatures(this TransformsCatalog.ProjectionTransforms catalog, params RandomFourierFeaturizingTransformer.ColumnInfo[] columns) => new RandomFourierFeaturizingEstimator(CatalogUtils.GetEnvironment(catalog), columns); - /// - /// Takes column filled with a vector of random variables with a known covariance matrix into a set of new variables whose covariance is the identity matrix, - /// meaning that they are uncorrelated and each have variance 1. - /// - /// The transform's catalog. - /// Name of the input column. - /// Name of the column resulting from the transformation of . Null means is replaced. - /// Whitening kind (PCA/ZCA). - /// Whitening constant, prevents division by zero. - /// Maximum number of rows used to train the transform. - /// In case of PCA whitening, indicates the number of components to retain. - /// - /// - /// - /// - /// - public static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog.ProjectionTransforms catalog, string inputColumn, string outputColumn = null, - WhiteningKind kind = VectorWhiteningTransformer.Defaults.Kind, - float eps = VectorWhiteningTransformer.Defaults.Eps, - int maxRows = VectorWhiteningTransformer.Defaults.MaxRows, - int pcaNum = VectorWhiteningTransformer.Defaults.PcaNum) - => new VectorWhiteningEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, kind, eps, maxRows, pcaNum); - - /// - /// Takes columns filled with a vector of random variables with a known covariance matrix into a set of new variables whose covariance is the identity matrix, - /// meaning that they are uncorrelated and each have variance 1. - /// - /// The transform's catalog. - /// Describes the parameters of the whitening process for each column pair. - public static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog.ProjectionTransforms catalog, params VectorWhiteningTransformer.ColumnInfo[] columns) - => new VectorWhiteningEstimator(CatalogUtils.GetEnvironment(catalog), columns); - /// /// Takes column filled with a vector of floats and computes L-p norm of it. /// diff --git a/src/Microsoft.ML.Transforms/TransformsStatic.cs b/src/Microsoft.ML.Transforms/TransformsStatic.cs index 238c9b982e..fc23ebfb38 100644 --- a/src/Microsoft.ML.Transforms/TransformsStatic.cs +++ b/src/Microsoft.ML.Transforms/TransformsStatic.cs @@ -10,74 +10,6 @@ namespace Microsoft.ML.StaticPipe { - /// - /// Extensions for statically typed Whitening estimator. - /// - public static class VectorWhiteningExtensions - { - private sealed class OutPipelineColumn : Vector - { - public readonly Vector Input; - - public OutPipelineColumn(Vector input, WhiteningKind kind, float eps, int maxRows, int pcaNum) - : base(new Reconciler(kind, eps, maxRows, pcaNum), input) - { - Input = input; - } - } - - private sealed class Reconciler : EstimatorReconciler - { - private readonly WhiteningKind _kind; - private readonly float _eps; - private readonly int _maxRows; - private readonly int _pcaNum; - - public Reconciler(WhiteningKind kind, float eps, int maxRows, int pcaNum) - { - _kind = kind; - _eps = eps; - _maxRows = maxRows; - _pcaNum = pcaNum; - } - - public override IEstimator Reconcile(IHostEnvironment env, - PipelineColumn[] toOutput, - IReadOnlyDictionary inputNames, - IReadOnlyDictionary outputNames, - IReadOnlyCollection usedNames) - { - Contracts.Assert(toOutput.Length == 1); - - var infos = new VectorWhiteningTransformer.ColumnInfo[toOutput.Length]; - for (int i = 0; i < toOutput.Length; i++) - infos[i] = new VectorWhiteningTransformer.ColumnInfo(inputNames[((OutPipelineColumn)toOutput[i]).Input], outputNames[toOutput[i]], _kind, _eps, _maxRows, _pcaNum); - - return new VectorWhiteningEstimator(env, infos); - } - } - - /// - /// The column to which the transform will be applied. - /// Whitening constant, prevents division by zero when scaling the data by inverse of eigenvalues. - /// Maximum number of rows used to train the transform. - /// In case of PCA whitening, indicates the number of components to retain. - public static Vector PcaWhitening(this Vector input, - float eps = VectorWhiteningTransformer.Defaults.Eps, - int maxRows = VectorWhiteningTransformer.Defaults.MaxRows, - int pcaNum = VectorWhiteningTransformer.Defaults.PcaNum) - => new OutPipelineColumn(input, WhiteningKind.Pca, eps, maxRows, pcaNum); - - /// - /// The column to which the transform will be applied. - /// Whitening constant, prevents division by zero. - /// Maximum number of rows used to train the transform. - public static Vector ZcaWhitening(this Vector input, - float eps = VectorWhiteningTransformer.Defaults.Eps, - int maxRows = VectorWhiteningTransformer.Defaults.MaxRows) - => new OutPipelineColumn(input, WhiteningKind.Zca, eps, maxRows, VectorWhiteningTransformer.Defaults.PcaNum); - } - /// /// Extensions for statically typed . ///