Skip to content

Lockdown Microsoft.ML.PCA public surface #2374

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/Microsoft.ML.PCA/AssemblyInfo.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Runtime.CompilerServices;
using Microsoft.ML;

[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Tests" + PublicKey.TestValue)]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PublicKey [](start = 67, length = 9)

if you change PcaTests.cs to use ml.Transforms.Projection.ProjectToPrincipalComponents instead of direct estimator call would you still need this change?

26 changes: 16 additions & 10 deletions src/Microsoft.ML.PCA/PcaTransformer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
using Microsoft.ML.Numeric;
using Microsoft.ML.Transforms.Projections;

[assembly: LoadableClass(PcaTransformer.Summary, typeof(IDataTransform), typeof(PcaTransformer), typeof(PcaTransformer.Arguments), typeof(SignatureDataTransform),
[assembly: LoadableClass(PcaTransformer.Summary, typeof(IDataTransform), typeof(PcaTransformer), typeof(PcaTransformer.Options), typeof(SignatureDataTransform),
PcaTransformer.UserName, PcaTransformer.LoaderSignature, PcaTransformer.ShortName)]

[assembly: LoadableClass(PcaTransformer.Summary, typeof(IDataTransform), typeof(PcaTransformer), null, typeof(SignatureLoadDataTransform),
Expand All @@ -36,10 +36,10 @@ namespace Microsoft.ML.Transforms.Projections
/// <include file='doc.xml' path='doc/members/member[@name="PCA"]/*' />
public sealed class PcaTransformer : OneToOneTransformerBase
{
public sealed class Arguments : TransformInputBase
internal sealed class Options : TransformInputBase
{
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", Name = "Column", ShortName = "col", SortOrder = 1)]
public Column[] Columns;
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", ShortName = "col", SortOrder = 1)]
public Column[] Column;

[Argument(ArgumentType.Multiple, HelpText = "The name of the weight column", ShortName = "weight", Purpose = SpecialPurpose.ColumnName)]
public string WeightColumn = PrincipalComponentAnalysisEstimator.Defaults.WeightColumn;
Expand Down Expand Up @@ -287,13 +287,13 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Sch
=> Create(env, ctx).MakeRowMapper(inputSchema);

// Factory method for SignatureDataTransform.
private static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
private static IDataTransform Create(IHostEnvironment env, Options args, IDataView input)
{
Contracts.CheckValue(env, nameof(env));
env.CheckValue(args, nameof(args));
env.CheckValue(input, nameof(input));
env.CheckValue(args.Columns, nameof(args.Columns));
var cols = args.Columns.Select(item => new ColumnInfo(
env.CheckValue(args.Column, nameof(args.Column));
var cols = args.Column.Select(item => new ColumnInfo(
item.Name,
item.Source,
item.WeightColumn,
Expand Down Expand Up @@ -645,7 +645,7 @@ private static void TransformFeatures(IExceptionContext ectx, in VBuffer<float>
Desc = Summary,
UserName = UserName,
ShortName = ShortName)]
internal static CommonOutputs.TransformOutput Calculate(IHostEnvironment env, Arguments input)
internal static CommonOutputs.TransformOutput Calculate(IHostEnvironment env, Options input)
{
var h = EntryPointUtils.CheckArgsAndCreateHost(env, "Pca", input);
var view = PcaTransformer.Create(h, input, input.Data);
Expand Down Expand Up @@ -683,7 +683,7 @@ internal static class Defaults
/// <param name="overSampling">Oversampling parameter for randomized PCA training.</param>
/// <param name="center">If enabled, data is centered to be zero mean.</param>
/// <param name="seed">The seed for random number generation.</param>
public PrincipalComponentAnalysisEstimator(IHostEnvironment env,
internal PrincipalComponentAnalysisEstimator(IHostEnvironment env,
string outputColumnName,
string inputColumnName = null,
string weightColumn = Defaults.WeightColumn, int rank = Defaults.Rank,
Expand All @@ -696,15 +696,21 @@ public PrincipalComponentAnalysisEstimator(IHostEnvironment env,
/// <include file='doc.xml' path='doc/members/member[@name="PCA"]/*'/>
/// <param name="env">The environment to use.</param>
/// <param name="columns">The dataset columns to use, and their specific settings.</param>
public PrincipalComponentAnalysisEstimator(IHostEnvironment env, params PcaTransformer.ColumnInfo[] columns)
internal PrincipalComponentAnalysisEstimator(IHostEnvironment env, params PcaTransformer.ColumnInfo[] columns)
{
Contracts.CheckValue(env, nameof(env));
_host = env.Register(nameof(PrincipalComponentAnalysisEstimator));
_columns = columns;
}

/// <summary>
/// Train and return a transformer.
/// </summary>
public PcaTransformer Fit(IDataView input) => new PcaTransformer(_host, input, _columns);

/// <summary>
/// Returns the output schema shape of the estimator, if the input schema shape is like the one provided.
/// </summary>
public SchemaShape GetOutputSchema(SchemaShape inputSchema)
{
_host.CheckValue(inputSchema, nameof(inputSchema));
Expand Down
2 changes: 1 addition & 1 deletion test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ Transforms.ModelCombiner Combines a sequence of TransformModels into a single mo
Transforms.NGramTranslator Produces a bag of counts of ngrams (sequences of consecutive values of length 1-n) in a given vector of keys. It does so by building a dictionary of ngrams and using the id in the dictionary as the index in the bag. Microsoft.ML.Transforms.Text.TextAnalytics NGramTransform Microsoft.ML.Transforms.Text.NgramExtractingTransformer+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.NoOperation Does nothing. Microsoft.ML.Data.NopTransform Nop Microsoft.ML.Data.NopTransform+NopInput Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.OptionalColumnCreator If the source column does not exist after deserialization, create a column with the right type and default values. Microsoft.ML.Transforms.OptionalColumnTransform MakeOptional Microsoft.ML.Transforms.OptionalColumnTransform+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.PcaCalculator PCA is a dimensionality-reduction transform which computes the projection of a numeric vector onto a low-rank subspace. Microsoft.ML.Transforms.Projections.PcaTransformer Calculate Microsoft.ML.Transforms.Projections.PcaTransformer+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.PcaCalculator PCA is a dimensionality-reduction transform which computes the projection of a numeric vector onto a low-rank subspace. Microsoft.ML.Transforms.Projections.PcaTransformer Calculate Microsoft.ML.Transforms.Projections.PcaTransformer+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.PredictedLabelColumnOriginalValueConverter Transforms a predicted label column to its original values, unless it is of type bool. Microsoft.ML.EntryPoints.FeatureCombiner ConvertPredictedLabel Microsoft.ML.EntryPoints.FeatureCombiner+PredictedLabelInput Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.RandomNumberGenerator Adds a column with a generated number sequence. Microsoft.ML.Transforms.RandomNumberGenerator Generate Microsoft.ML.Transforms.GenerateNumberTransform+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.RowRangeFilter Filters a dataview on a column of type Single, Double or Key (contiguous). Keeps the values that are in the specified min/max range. NaNs are always filtered out. If the input is a Key type, the min/max are considered percentages of the number of values. Microsoft.ML.EntryPoints.SelectRows FilterByRange Microsoft.ML.Transforms.RangeFilter+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Expand Down