Skip to content

Moved WhiteningTransform to HalLearners #1583

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 16, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
</PropertyGroup>

<ItemGroup>
<ProjectReference Include="..\..\..\src\Microsoft.ML.HalLearners\Microsoft.ML.HalLearners.csproj" />
<ProjectReference Include="..\..\..\src\Microsoft.ML.StandardLearners\Microsoft.ML.StandardLearners.csproj" />
<ProjectReference Include="..\..\..\src\Microsoft.ML.SamplesUtils\Microsoft.ML.SamplesUtils.csproj" />
<ProjectReference Include="..\..\..\src\Microsoft.ML.FastTree\Microsoft.ML.FastTree.csproj" />
Expand Down
47 changes: 47 additions & 0 deletions src/Microsoft.ML.HalLearners/ProjectionCatalog.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Transforms.Projections;

namespace Microsoft.ML
{
public static class ProjectionCatalog
{
/// <summary>
/// Takes column filled with a vector of random variables with a known covariance matrix into a set of new variables whose covariance is the identity matrix,
/// meaning that they are uncorrelated and each have variance 1.
/// </summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="inputColumn">Name of the input column.</param>
/// <param name="outputColumn">Name of the column resulting from the transformation of <paramref name="inputColumn"/>. Null means <paramref name="inputColumn"/> is replaced. </param>
/// <param name="kind">Whitening kind (PCA/ZCA).</param>
/// <param name="eps">Whitening constant, prevents division by zero.</param>
/// <param name="maxRows">Maximum number of rows used to train the transform.</param>
/// <param name="pcaNum">In case of PCA whitening, indicates the number of components to retain.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[VectorWhiten](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs?range=5-11,16-115)]
/// ]]>
/// </format>
/// </example>
public static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog.ProjectionTransforms catalog, string inputColumn, string outputColumn = null,
WhiteningKind kind = VectorWhiteningTransformer.Defaults.Kind,
float eps = VectorWhiteningTransformer.Defaults.Eps,
int maxRows = VectorWhiteningTransformer.Defaults.MaxRows,
int pcaNum = VectorWhiteningTransformer.Defaults.PcaNum)
=> new VectorWhiteningEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, kind, eps, maxRows, pcaNum);

/// <summary>
/// Takes columns filled with a vector of random variables with a known covariance matrix into a set of new variables whose covariance is the identity matrix,
/// meaning that they are uncorrelated and each have variance 1.
/// </summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="columns">Describes the parameters of the whitening process for each column pair.</param>
public static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog.ProjectionTransforms catalog, params VectorWhiteningTransformer.ColumnInfo[] columns)
=> new VectorWhiteningEstimator(CatalogUtils.GetEnvironment(catalog), columns);
}
}
80 changes: 80 additions & 0 deletions src/Microsoft.ML.HalLearners/TransformsStatic.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Core.Data;
using Microsoft.ML.Runtime;
using Microsoft.ML.StaticPipe.Runtime;
using Microsoft.ML.Transforms.Projections;
using System.Collections.Generic;

namespace Microsoft.ML.StaticPipe
{
/// <summary>
/// Extensions for statically typed Whitening estimator.
/// </summary>
public static class VectorWhiteningExtensions
{
private sealed class OutPipelineColumn : Vector<float>
{
public readonly Vector<float> Input;

public OutPipelineColumn(Vector<float> input, WhiteningKind kind, float eps, int maxRows, int pcaNum)
: base(new Reconciler(kind, eps, maxRows, pcaNum), input)
{
Input = input;
}
}

private sealed class Reconciler : EstimatorReconciler
{
private readonly WhiteningKind _kind;
private readonly float _eps;
private readonly int _maxRows;
private readonly int _pcaNum;

public Reconciler(WhiteningKind kind, float eps, int maxRows, int pcaNum)
{
_kind = kind;
_eps = eps;
_maxRows = maxRows;
_pcaNum = pcaNum;
}

public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
PipelineColumn[] toOutput,
IReadOnlyDictionary<PipelineColumn, string> inputNames,
IReadOnlyDictionary<PipelineColumn, string> outputNames,
IReadOnlyCollection<string> usedNames)
{
Contracts.Assert(toOutput.Length == 1);

var infos = new VectorWhiteningTransformer.ColumnInfo[toOutput.Length];
for (int i = 0; i < toOutput.Length; i++)
infos[i] = new VectorWhiteningTransformer.ColumnInfo(inputNames[((OutPipelineColumn)toOutput[i]).Input], outputNames[toOutput[i]], _kind, _eps, _maxRows, _pcaNum);

return new VectorWhiteningEstimator(env, infos);
}
}

/// <include file='doc.xml' path='doc/members/member[@name="Whitening"]/*'/>
/// <param name="input">The column to which the transform will be applied.</param>
/// <param name="eps">Whitening constant, prevents division by zero when scaling the data by inverse of eigenvalues.</param>
/// <param name="maxRows">Maximum number of rows used to train the transform.</param>
/// <param name="pcaNum">In case of PCA whitening, indicates the number of components to retain.</param>
public static Vector<float> PcaWhitening(this Vector<float> input,
float eps = VectorWhiteningTransformer.Defaults.Eps,
int maxRows = VectorWhiteningTransformer.Defaults.MaxRows,
int pcaNum = VectorWhiteningTransformer.Defaults.PcaNum)
=> new OutPipelineColumn(input, WhiteningKind.Pca, eps, maxRows, pcaNum);

/// <include file='doc.xml' path='doc/members/member[@name="Whitening"]/*'/>
/// <param name="input">The column to which the transform will be applied.</param>
/// <param name="eps">Whitening constant, prevents division by zero.</param>
/// <param name="maxRows">Maximum number of rows used to train the transform.</param>
public static Vector<float> ZcaWhitening(this Vector<float> input,
float eps = VectorWhiteningTransformer.Defaults.Eps,
int maxRows = VectorWhiteningTransformer.Defaults.MaxRows)
=> new OutPipelineColumn(input, WhiteningKind.Zca, eps, maxRows, VectorWhiteningTransformer.Defaults.PcaNum);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ private static void GetColTypesAndIndex(IHostEnvironment env, IDataView inputDat

for (int i = 0; i < columns.Length; i++)
{
if(!inputSchema.TryGetColumnIndex(columns[i].Input, out cols[i]))
if (!inputSchema.TryGetColumnIndex(columns[i].Input, out cols[i]))
throw env.ExceptSchemaMismatch(nameof(inputSchema), "input", columns[i].Input);
srcTypes[i] = inputSchema.GetColumnType(cols[i]);
var reason = TestColumn(srcTypes[i]);
Expand Down
34 changes: 0 additions & 34 deletions src/Microsoft.ML.Transforms/ProjectionCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,40 +40,6 @@ public static RandomFourierFeaturizingEstimator CreateRandomFourierFeatures(this
public static RandomFourierFeaturizingEstimator CreateRandomFourierFeatures(this TransformsCatalog.ProjectionTransforms catalog, params RandomFourierFeaturizingTransformer.ColumnInfo[] columns)
=> new RandomFourierFeaturizingEstimator(CatalogUtils.GetEnvironment(catalog), columns);

/// <summary>
/// Takes column filled with a vector of random variables with a known covariance matrix into a set of new variables whose covariance is the identity matrix,
/// meaning that they are uncorrelated and each have variance 1.
/// </summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="inputColumn">Name of the input column.</param>
/// <param name="outputColumn">Name of the column resulting from the transformation of <paramref name="inputColumn"/>. Null means <paramref name="inputColumn"/> is replaced. </param>
/// <param name="kind">Whitening kind (PCA/ZCA).</param>
/// <param name="eps">Whitening constant, prevents division by zero.</param>
/// <param name="maxRows">Maximum number of rows used to train the transform.</param>
/// <param name="pcaNum">In case of PCA whitening, indicates the number of components to retain.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[VectorWhiten](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs?range=5-11,16-115)]
/// ]]>
/// </format>
/// </example>
public static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog.ProjectionTransforms catalog, string inputColumn, string outputColumn = null,
WhiteningKind kind = VectorWhiteningTransformer.Defaults.Kind,
float eps = VectorWhiteningTransformer.Defaults.Eps,
int maxRows = VectorWhiteningTransformer.Defaults.MaxRows,
int pcaNum = VectorWhiteningTransformer.Defaults.PcaNum)
=> new VectorWhiteningEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, kind, eps, maxRows, pcaNum);

/// <summary>
/// Takes columns filled with a vector of random variables with a known covariance matrix into a set of new variables whose covariance is the identity matrix,
/// meaning that they are uncorrelated and each have variance 1.
/// </summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="columns"> Describes the parameters of the whitening process for each column pair.</param>
public static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog.ProjectionTransforms catalog, params VectorWhiteningTransformer.ColumnInfo[] columns)
=> new VectorWhiteningEstimator(CatalogUtils.GetEnvironment(catalog), columns);

/// <summary>
/// Takes column filled with a vector of floats and computes L-p norm of it.
/// </summary>
Expand Down
68 changes: 0 additions & 68 deletions src/Microsoft.ML.Transforms/TransformsStatic.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,74 +10,6 @@

namespace Microsoft.ML.StaticPipe
{
/// <summary>
/// Extensions for statically typed Whitening estimator.
/// </summary>
public static class VectorWhiteningExtensions
{
private sealed class OutPipelineColumn : Vector<float>
{
public readonly Vector<float> Input;

public OutPipelineColumn(Vector<float> input, WhiteningKind kind, float eps, int maxRows, int pcaNum)
: base(new Reconciler(kind, eps, maxRows, pcaNum), input)
{
Input = input;
}
}

private sealed class Reconciler : EstimatorReconciler
{
private readonly WhiteningKind _kind;
private readonly float _eps;
private readonly int _maxRows;
private readonly int _pcaNum;

public Reconciler(WhiteningKind kind, float eps, int maxRows, int pcaNum)
{
_kind = kind;
_eps = eps;
_maxRows = maxRows;
_pcaNum = pcaNum;
}

public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
PipelineColumn[] toOutput,
IReadOnlyDictionary<PipelineColumn, string> inputNames,
IReadOnlyDictionary<PipelineColumn, string> outputNames,
IReadOnlyCollection<string> usedNames)
{
Contracts.Assert(toOutput.Length == 1);

var infos = new VectorWhiteningTransformer.ColumnInfo[toOutput.Length];
for (int i = 0; i < toOutput.Length; i++)
infos[i] = new VectorWhiteningTransformer.ColumnInfo(inputNames[((OutPipelineColumn)toOutput[i]).Input], outputNames[toOutput[i]], _kind, _eps, _maxRows, _pcaNum);

return new VectorWhiteningEstimator(env, infos);
}
}

/// <include file='doc.xml' path='doc/members/member[@name="Whitening"]/*'/>
/// <param name="input">The column to which the transform will be applied.</param>
/// <param name="eps">Whitening constant, prevents division by zero when scaling the data by inverse of eigenvalues.</param>
/// <param name="maxRows">Maximum number of rows used to train the transform.</param>
/// <param name="pcaNum">In case of PCA whitening, indicates the number of components to retain.</param>
public static Vector<float> PcaWhitening(this Vector<float> input,
float eps = VectorWhiteningTransformer.Defaults.Eps,
int maxRows = VectorWhiteningTransformer.Defaults.MaxRows,
int pcaNum = VectorWhiteningTransformer.Defaults.PcaNum)
=> new OutPipelineColumn(input, WhiteningKind.Pca, eps, maxRows, pcaNum);

/// <include file='doc.xml' path='doc/members/member[@name="Whitening"]/*'/>
/// <param name="input">The column to which the transform will be applied.</param>
/// <param name="eps">Whitening constant, prevents division by zero.</param>
/// <param name="maxRows">Maximum number of rows used to train the transform.</param>
public static Vector<float> ZcaWhitening(this Vector<float> input,
float eps = VectorWhiteningTransformer.Defaults.Eps,
int maxRows = VectorWhiteningTransformer.Defaults.MaxRows)
=> new OutPipelineColumn(input, WhiteningKind.Zca, eps, maxRows, VectorWhiteningTransformer.Defaults.PcaNum);
}

/// <summary>
/// Extensions for statically typed <see cref="LpNormalizingEstimator"/>.
/// </summary>
Expand Down