From 5ac94dabe1da2a1c2e34ca6112c1a0ae5466dfe0 Mon Sep 17 00:00:00 2001 From: Gani Nazirov Date: Fri, 1 Feb 2019 11:44:44 -0800 Subject: [PATCH 1/2] Arguments to Options --- src/Microsoft.ML.PCA/AssemblyInfo.cs | 8 ++++++++ src/Microsoft.ML.PCA/PcaTransformer.cs | 26 ++++++++++++++++---------- 2 files changed, 24 insertions(+), 10 deletions(-) create mode 100644 src/Microsoft.ML.PCA/AssemblyInfo.cs diff --git a/src/Microsoft.ML.PCA/AssemblyInfo.cs b/src/Microsoft.ML.PCA/AssemblyInfo.cs new file mode 100644 index 0000000000..5e0229442c --- /dev/null +++ b/src/Microsoft.ML.PCA/AssemblyInfo.cs @@ -0,0 +1,8 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.CompilerServices; +using Microsoft.ML; + +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Tests" + PublicKey.TestValue)] \ No newline at end of file diff --git a/src/Microsoft.ML.PCA/PcaTransformer.cs b/src/Microsoft.ML.PCA/PcaTransformer.cs index 15918c4031..84552553e9 100644 --- a/src/Microsoft.ML.PCA/PcaTransformer.cs +++ b/src/Microsoft.ML.PCA/PcaTransformer.cs @@ -17,7 +17,7 @@ using Microsoft.ML.Numeric; using Microsoft.ML.Transforms.Projections; -[assembly: LoadableClass(PcaTransformer.Summary, typeof(IDataTransform), typeof(PcaTransformer), typeof(PcaTransformer.Arguments), typeof(SignatureDataTransform), +[assembly: LoadableClass(PcaTransformer.Summary, typeof(IDataTransform), typeof(PcaTransformer), typeof(PcaTransformer.Options), typeof(SignatureDataTransform), PcaTransformer.UserName, PcaTransformer.LoaderSignature, PcaTransformer.ShortName)] [assembly: LoadableClass(PcaTransformer.Summary, typeof(IDataTransform), typeof(PcaTransformer), null, typeof(SignatureLoadDataTransform), @@ -36,10 +36,10 @@ namespace Microsoft.ML.Transforms.Projections /// public sealed class PcaTransformer : OneToOneTransformerBase { - public sealed class Arguments : TransformInputBase + internal sealed class Options : TransformInputBase { - [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", Name = "Column", ShortName = "col", SortOrder = 1)] - public Column[] Columns; + [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", ShortName = "col", SortOrder = 1)] + public Column[] Column; [Argument(ArgumentType.Multiple, HelpText = "The name of the weight column", ShortName = "weight", Purpose = SpecialPurpose.ColumnName)] public string WeightColumn = PrincipalComponentAnalysisEstimator.Defaults.WeightColumn; @@ -287,13 +287,13 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Sch => Create(env, ctx).MakeRowMapper(inputSchema); // Factory method for SignatureDataTransform. - private static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) + private static IDataTransform Create(IHostEnvironment env, Options args, IDataView input) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(args, nameof(args)); env.CheckValue(input, nameof(input)); - env.CheckValue(args.Columns, nameof(args.Columns)); - var cols = args.Columns.Select(item => new ColumnInfo( + env.CheckValue(args.Column, nameof(args.Column)); + var cols = args.Column.Select(item => new ColumnInfo( item.Name, item.Source, item.WeightColumn, @@ -645,7 +645,7 @@ private static void TransformFeatures(IExceptionContext ectx, in VBuffer Desc = Summary, UserName = UserName, ShortName = ShortName)] - internal static CommonOutputs.TransformOutput Calculate(IHostEnvironment env, Arguments input) + internal static CommonOutputs.TransformOutput Calculate(IHostEnvironment env, Options input) { var h = EntryPointUtils.CheckArgsAndCreateHost(env, "Pca", input); var view = PcaTransformer.Create(h, input, input.Data); @@ -683,7 +683,7 @@ internal static class Defaults /// Oversampling parameter for randomized PCA training. /// If enabled, data is centered to be zero mean. /// The seed for random number generation. - public PrincipalComponentAnalysisEstimator(IHostEnvironment env, + internal PrincipalComponentAnalysisEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, string weightColumn = Defaults.WeightColumn, int rank = Defaults.Rank, @@ -696,15 +696,21 @@ public PrincipalComponentAnalysisEstimator(IHostEnvironment env, /// /// The environment to use. /// The dataset columns to use, and their specific settings. - public PrincipalComponentAnalysisEstimator(IHostEnvironment env, params PcaTransformer.ColumnInfo[] columns) + internal PrincipalComponentAnalysisEstimator(IHostEnvironment env, params PcaTransformer.ColumnInfo[] columns) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(PrincipalComponentAnalysisEstimator)); _columns = columns; } + /// + /// Train and return a transformer. + /// public PcaTransformer Fit(IDataView input) => new PcaTransformer(_host, input, _columns); + /// + /// Returns the output schema shape of the estimator, if the input schema shape is like the one provided. + /// public SchemaShape GetOutputSchema(SchemaShape inputSchema) { _host.CheckValue(inputSchema, nameof(inputSchema)); From b54ce70fcf10eb21035eddab332910e4ef01d51a Mon Sep 17 00:00:00 2001 From: Gani Nazirov Date: Fri, 1 Feb 2019 13:03:52 -0800 Subject: [PATCH 2/2] fix tests --- test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv index 9df43c0e84..dd69ec08ba 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv +++ b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv @@ -115,7 +115,7 @@ Transforms.ModelCombiner Combines a sequence of TransformModels into a single mo Transforms.NGramTranslator Produces a bag of counts of ngrams (sequences of consecutive values of length 1-n) in a given vector of keys. It does so by building a dictionary of ngrams and using the id in the dictionary as the index in the bag. Microsoft.ML.Transforms.Text.TextAnalytics NGramTransform Microsoft.ML.Transforms.Text.NgramExtractingTransformer+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.NoOperation Does nothing. Microsoft.ML.Data.NopTransform Nop Microsoft.ML.Data.NopTransform+NopInput Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.OptionalColumnCreator If the source column does not exist after deserialization, create a column with the right type and default values. Microsoft.ML.Transforms.OptionalColumnTransform MakeOptional Microsoft.ML.Transforms.OptionalColumnTransform+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput -Transforms.PcaCalculator PCA is a dimensionality-reduction transform which computes the projection of a numeric vector onto a low-rank subspace. Microsoft.ML.Transforms.Projections.PcaTransformer Calculate Microsoft.ML.Transforms.Projections.PcaTransformer+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput +Transforms.PcaCalculator PCA is a dimensionality-reduction transform which computes the projection of a numeric vector onto a low-rank subspace. Microsoft.ML.Transforms.Projections.PcaTransformer Calculate Microsoft.ML.Transforms.Projections.PcaTransformer+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.PredictedLabelColumnOriginalValueConverter Transforms a predicted label column to its original values, unless it is of type bool. Microsoft.ML.EntryPoints.FeatureCombiner ConvertPredictedLabel Microsoft.ML.EntryPoints.FeatureCombiner+PredictedLabelInput Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.RandomNumberGenerator Adds a column with a generated number sequence. Microsoft.ML.Transforms.RandomNumberGenerator Generate Microsoft.ML.Transforms.GenerateNumberTransform+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.RowRangeFilter Filters a dataview on a column of type Single, Double or Key (contiguous). Keeps the values that are in the specified min/max range. NaNs are always filtered out. If the input is a Key type, the min/max are considered percentages of the number of values. Microsoft.ML.EntryPoints.SelectRows FilterByRange Microsoft.ML.Transforms.RangeFilter+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput