Skip to content

Creation of components through MLContext and cleanup (Convert, DropSlots, FeatureSelection) #2365

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 7, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ public static class ConversionsExtensionsCatalog
/// </summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="hashBits">Number of bits to hash into. Must be between 1 and 31, inclusive.</param>
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
/// <param name="hashBits">Number of bits to hash into. Must be between 1 and 31, inclusive.</param>
/// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
/// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one.
/// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
Expand Down Expand Up @@ -55,7 +56,7 @@ public static TypeConvertingEstimator ConvertType(this TransformsCatalog.Convers
/// </summary>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="columns">Description of dataset columns and how to process them.</param>
public static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog, params TypeConvertingTransformer.ColumnInfo[] columns)
public static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog, params TypeConvertingEstimator.ColumnInfo[] columns)
=> new TypeConvertingEstimator(CatalogUtils.GetEnvironment(catalog), columns);

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ namespace Microsoft.ML.Data
/// </example>
public sealed class FeatureContributionCalculatingTransformer : OneToOneTransformerBase
{
public sealed class Arguments : TransformInputBase
internal sealed class Options : TransformInputBase
{
[Argument(ArgumentType.Required, HelpText = "The predictor model to apply to data", SortOrder = 1)]
public PredictorModel PredictorModel;
Expand All @@ -99,9 +99,9 @@ public sealed class Arguments : TransformInputBase
internal const string FriendlyName = "Feature Contribution Calculation";
internal const string LoaderSignature = "FeatureContribution";

public readonly int Top;
public readonly int Bottom;
public readonly bool Normalize;
internal readonly int Top;
internal readonly int Bottom;
internal readonly bool Normalize;

private readonly IFeatureContributionMapper _predictor;

Expand All @@ -128,7 +128,7 @@ private static VersionInfo GetVersionInfo()
/// <param name="numNegativeContributions">The number of negative contributions to report, sorted from highest magnitude to lowest magnitude.
/// Note that if there are fewer features with negative contributions than <paramref name="numNegativeContributions"/>, the rest will be returned as zeros.</param>
/// <param name="normalize">Whether the feature contributions should be normalized to the [-1, 1] interval.</param>
public FeatureContributionCalculatingTransformer(IHostEnvironment env, ICalculateFeatureContribution modelParameters,
internal FeatureContributionCalculatingTransformer(IHostEnvironment env, ICalculateFeatureContribution modelParameters,
string featureColumn = DefaultColumnNames.Features,
int numPositiveContributions = FeatureContributionCalculatingEstimator.Defaults.NumPositiveContributions,
int numNegativeContributions = FeatureContributionCalculatingEstimator.Defaults.NumNegativeContributions,
Expand Down Expand Up @@ -281,7 +281,7 @@ public sealed class FeatureContributionCalculatingEstimator : TrivialEstimator<F
private readonly string _featureColumn;
private readonly ICalculateFeatureContribution _predictor;

public static class Defaults
internal static class Defaults
{
public const int NumPositiveContributions = 10;
public const int NumNegativeContributions = 10;
Expand All @@ -300,7 +300,7 @@ public static class Defaults
/// <param name="numNegativeContributions">The number of negative contributions to report, sorted from highest magnitude to lowest magnitude.
/// Note that if there are fewer features with negative contributions than <paramref name="numNegativeContributions"/>, the rest will be returned as zeros.</param>
/// <param name="normalize">Whether the feature contributions should be normalized to the [-1, 1] interval.</param>
public FeatureContributionCalculatingEstimator(IHostEnvironment env, ICalculateFeatureContribution modelParameters,
internal FeatureContributionCalculatingEstimator(IHostEnvironment env, ICalculateFeatureContribution modelParameters,
string featureColumn = DefaultColumnNames.Features,
int numPositiveContributions = Defaults.NumPositiveContributions,
int numNegativeContributions = Defaults.NumNegativeContributions,
Expand All @@ -312,6 +312,10 @@ public FeatureContributionCalculatingEstimator(IHostEnvironment env, ICalculateF
_predictor = modelParameters;
}

/// <summary>
/// Returns the <see cref="SchemaShape"/> of the schema which will be produced by the transformer.
/// Used for schema propagation and verification in a pipeline.
/// </summary>
public override SchemaShape GetOutputSchema(SchemaShape inputSchema)
{
// Check that the featureColumn is present.
Expand Down Expand Up @@ -341,20 +345,20 @@ internal static class FeatureContributionEntryPoint
[TlcModule.EntryPoint(Name = "Transforms.FeatureContributionCalculationTransformer",
Desc = FeatureContributionCalculatingTransformer.Summary,
UserName = FeatureContributionCalculatingTransformer.FriendlyName)]
public static CommonOutputs.TransformOutput FeatureContributionCalculation(IHostEnvironment env, FeatureContributionCalculatingTransformer.Arguments args)
public static CommonOutputs.TransformOutput FeatureContributionCalculation(IHostEnvironment env, FeatureContributionCalculatingTransformer.Options options)
{
Contracts.CheckValue(env, nameof(env));
var host = env.Register(nameof(FeatureContributionCalculatingTransformer));
host.CheckValue(args, nameof(args));
EntryPointUtils.CheckInputArgs(host, args);
host.CheckValue(args.PredictorModel, nameof(args.PredictorModel));
host.CheckValue(options, nameof(options));
EntryPointUtils.CheckInputArgs(host, options);
host.CheckValue(options.PredictorModel, nameof(options.PredictorModel));

var predictor = args.PredictorModel.Predictor as ICalculateFeatureContribution;
var predictor = options.PredictorModel.Predictor as ICalculateFeatureContribution;
if (predictor == null)
throw host.ExceptUserArg(nameof(predictor), "The provided model parameters do not support feature contribution calculation.");
var outData = new FeatureContributionCalculatingTransformer(host, predictor, args.FeatureColumn, args.Top, args.Bottom, args.Normalize).Transform(args.Data);
var outData = new FeatureContributionCalculatingTransformer(host, predictor, options.FeatureColumn, options.Top, options.Bottom, options.Normalize).Transform(options.Data);

return new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, outData, args.Data), OutputData = outData};
return new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, outData, options.Data), OutputData = outData};
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
using Microsoft.ML.Model;
using Microsoft.ML.Transforms.FeatureSelection;

[assembly: LoadableClass(SlotsDroppingTransformer.Summary, typeof(IDataTransform), typeof(SlotsDroppingTransformer), typeof(SlotsDroppingTransformer.Arguments), typeof(SignatureDataTransform),
[assembly: LoadableClass(SlotsDroppingTransformer.Summary, typeof(IDataTransform), typeof(SlotsDroppingTransformer), typeof(SlotsDroppingTransformer.Options), typeof(SignatureDataTransform),
SlotsDroppingTransformer.FriendlyName, SlotsDroppingTransformer.LoaderSignature, "DropSlots")]

[assembly: LoadableClass(SlotsDroppingTransformer.Summary, typeof(IDataTransform), typeof(SlotsDroppingTransformer), null, typeof(SignatureLoadDataTransform),
Expand All @@ -37,14 +37,15 @@ namespace Microsoft.ML.Transforms.FeatureSelection
/// </summary>
public sealed class SlotsDroppingTransformer : OneToOneTransformerBase
{
public sealed class Arguments
internal sealed class Options
{
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Columns to drop the slots for",
Name = "Column", ShortName = "col", SortOrder = 1)]
public Column[] Columns;
}

public sealed class Column : OneToOneColumn
[BestFriend]
internal sealed class Column : OneToOneColumn
{
[Argument(ArgumentType.Multiple, HelpText = "Source slot index range(s) of the column to drop")]
public Range[] Slots;
Expand Down Expand Up @@ -112,7 +113,7 @@ internal bool TryUnparse(StringBuilder sb)
}
}

public sealed class Range
internal sealed class Range
{
[Argument(ArgumentType.Required, HelpText = "First index in the range")]
public int Min;
Expand Down Expand Up @@ -191,7 +192,8 @@ public bool IsValid()
/// <summary>
/// Describes how the transformer handles one input-output column pair.
/// </summary>
public sealed class ColumnInfo
[BestFriend]
internal sealed class ColumnInfo
{
public readonly string Name;
public readonly string InputColumnName;
Expand Down Expand Up @@ -258,7 +260,7 @@ private static VersionInfo GetVersionInfo()
/// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
/// <param name="min">Specifies the lower bound of the range of slots to be dropped. The lower bound is inclusive. </param>
/// <param name="max">Specifies the upper bound of the range of slots to be dropped. The upper bound is exclusive.</param>
public SlotsDroppingTransformer(IHostEnvironment env, string outputColumnName, string inputColumnName = null, int min = default, int? max = null)
internal SlotsDroppingTransformer(IHostEnvironment env, string outputColumnName, string inputColumnName = null, int min = default, int? max = null)
: this(env, new ColumnInfo(outputColumnName, inputColumnName, (min, max)))
{
}
Expand All @@ -268,7 +270,7 @@ public SlotsDroppingTransformer(IHostEnvironment env, string outputColumnName, s
/// </summary>
/// <param name="env">The environment to use.</param>
/// <param name="columns">Specifies the ranges of slots to drop for each column pair.</param>
public SlotsDroppingTransformer(IHostEnvironment env, params ColumnInfo[] columns)
internal SlotsDroppingTransformer(IHostEnvironment env, params ColumnInfo[] columns)
: base(Contracts.CheckRef(env, nameof(env)).Register(RegistrationName), GetColumnPairs(columns))
{
Host.AssertNonEmpty(ColumnPairs);
Expand Down Expand Up @@ -308,9 +310,9 @@ private static SlotsDroppingTransformer Create(IHostEnvironment env, ModelLoadCo
}

// Factory method for SignatureDataTransform.
private static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
private static IDataTransform Create(IHostEnvironment env, Options options, IDataView input)
{
var columns = args.Columns.Select(column => new ColumnInfo(column)).ToArray();
var columns = options.Columns.Select(column => new ColumnInfo(column)).ToArray();
return new SlotsDroppingTransformer(env, columns).MakeDataTransform(input);
}

Expand Down
Loading