Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/Microsoft.ML.Transforms/GroupTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,18 @@ public sealed class Arguments : TransformInputBase

private readonly GroupSchema _schema;

/// <summary>
/// Convenience constructor for public facing API.
/// </summary>
/// <param name="env">Host Environment.</param>
/// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param>
/// <param name="groupKey">Columns to group by</param>
/// <param name="columns">Columns to group together</param>
public GroupTransform(IHostEnvironment env, IDataView input, string[] groupKey, params string[] columns)
Copy link
Contributor

@TomFinley TomFinley Jul 12, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

string[] groupKey [](start = 69, length = 17)

Considering the cases I'm aware of where it is used, I have to think that a single string would be preferable for the convenience constructor. #Closed

: this(env, new Arguments() { GroupKey = groupKey, Column = columns }, input)
{
}

public GroupTransform(IHostEnvironment env, Arguments args, IDataView input)
: base(env, RegistrationName, input)
{
Expand Down
35 changes: 31 additions & 4 deletions src/Microsoft.ML.Transforms/HashJoinTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,14 @@ public sealed class HashJoinTransform : OneToOneTransformBase
public const int NumBitsMin = 1;
public const int NumBitsLim = 32;

private static class Defaults
{
public const bool Join = true;
public const int HashBits = NumBitsLim - 1;
public const uint Seed = 314489979;
public const bool Ordered = true;
}

public sealed class Arguments : TransformInputBase
{
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)",
Expand All @@ -45,17 +53,17 @@ public sealed class Arguments : TransformInputBase
public Column[] Column;

[Argument(ArgumentType.AtMostOnce, HelpText = "Whether the values need to be combined for a single hash")]
public bool Join = true;
public bool Join = Defaults.Join;

[Argument(ArgumentType.AtMostOnce, HelpText = "Number of bits to hash into. Must be between 1 and 31, inclusive.",
ShortName = "bits", SortOrder = 2)]
public int HashBits = NumBitsLim - 1;
public int HashBits = Defaults.HashBits;

[Argument(ArgumentType.AtMostOnce, HelpText = "Hashing seed")]
public uint Seed = 314489979;
public uint Seed = Defaults.Seed;

[Argument(ArgumentType.AtMostOnce, HelpText = "Whether the position of each term should be included in the hash", ShortName = "ord")]
public bool Ordered = true;
public bool Ordered = Defaults.Ordered;
}

public sealed class Column : OneToOneColumn
Expand Down Expand Up @@ -166,6 +174,25 @@ private static VersionInfo GetVersionInfo()

private readonly ColumnInfoEx[] _exes;

/// <summary>
/// Convenience constructor for public facing API.
/// </summary>
/// <param name="env">Host Environment.</param>
/// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param>
/// <param name="name">Name of the output column.</param>
/// <param name="source">Name of the column to be transformed. If this is null '<paramref name="name"/>' will be used.</param>
/// <param name="join">Whether the values need to be combined for a single hash.</param>
/// <param name="hashBits">Number of bits to hash into. Must be between 1 and 31, inclusive.</param>
public HashJoinTransform(IHostEnvironment env,
IDataView input,
string name,
string source = null,
bool join = Defaults.Join,
int hashBits = Defaults.HashBits)
: this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } }, Join = join, HashBits = hashBits }, input)
{
}

public HashJoinTransform(IHostEnvironment env, Arguments args, IDataView input)
: base(env, RegistrationName, Contracts.CheckRef(args, nameof(args)).Column, input, TestColumnType)
{
Expand Down
12 changes: 12 additions & 0 deletions src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,18 @@ private static VersionInfo GetVersionInfo()

private readonly VectorType[] _types;

/// <summary>
/// Convenience constructor for public facing API.
/// </summary>
/// <param name="env">Host Environment.</param>
/// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param>
/// <param name="name">Name of the output column.</param>
/// <param name="source">Name of the column to be transformed. If this is null '<paramref name="name"/>' will be used.</param>
public KeyToBinaryVectorTransform(IHostEnvironment env, IDataView input, string name, string source = null)
: this(env, new Arguments() { Column = new[] { new KeyToVectorTransform.Column() { Source = source ?? name, Name = name } } }, input)
{
}

/// <summary>
/// Public constructor corresponding to SignatureDataTransform.
/// </summary>
Expand Down
19 changes: 19 additions & 0 deletions src/Microsoft.ML.Transforms/LoadTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,25 @@ public class Arguments

internal const string Summary = "Loads specified transforms from the model file and applies them to current data.";

/// <summary>
/// A helper method to create <see cref="LoadTransform"/> for public facing API.
/// </summary>
/// <param name="env">Host Environment.</param>
/// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param>
/// <param name="modelFile">Model file to load the transforms from.</param>
/// <param name="tag">The tags (comma-separated) to be loaded (or omitted, if complement is true).</param>
/// <param name="complement">Whether to load all transforms except those marked by tags.</param>
public static IDataTransform Create(IHostEnvironment env, IDataView input, string modelFile, string[] tag, bool complement = false)
{
var args = new Arguments()
{
ModelFile = modelFile,
Tag = tag,
Complement = complement
};
return Create(env, args, input);
}

public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
{
Contracts.CheckValue(env, nameof(env));
Expand Down
12 changes: 12 additions & 0 deletions src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,18 @@ private static VersionInfo GetVersionInfo()
// The output column types, parallel to Infos.
private readonly VectorType[] _types;

/// <summary>
/// Convenience constructor for public facing API.
/// </summary>
/// <param name="env">Host Environment.</param>
/// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param>
/// <param name="name">Name of the output column.</param>
/// <param name="source">Name of the column to be transformed. If this is null '<paramref name="name"/>' will be used.</param>
public MissingValueIndicatorTransform(IHostEnvironment env, IDataView input, string name, string source = null)
: this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } } }, input)
{
}

/// <summary>
/// Public constructor corresponding to SignatureDataTransform.
/// </summary>
Expand Down
39 changes: 36 additions & 3 deletions src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ public static class MutualInformationFeatureSelectionTransform
public const string UserName = "Mutual Information Feature Selection Transform";
public const string ShortName = "MIFeatureSelection";

private static class Defaults
{
public const string LabelColumn = DefaultColumnNames.Label;
public const int SlotsInOutput = 1000;
public const int NumBins = 256;
}

public sealed class Arguments : TransformInputBase
{
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Columns to use for feature selection", ShortName = "col",
Expand All @@ -41,19 +48,45 @@ public sealed class Arguments : TransformInputBase

[Argument(ArgumentType.LastOccurenceWins, HelpText = "Column to use for labels", ShortName = "lab",
SortOrder = 4, Purpose = SpecialPurpose.ColumnName)]
public string LabelColumn = DefaultColumnNames.Label;
public string LabelColumn = Defaults.LabelColumn;

[Argument(ArgumentType.AtMostOnce, HelpText = "The maximum number of slots to preserve in output", ShortName = "topk,numSlotsToKeep",
SortOrder = 1)]
public int SlotsInOutput = 1000;
public int SlotsInOutput = Defaults.SlotsInOutput;

[Argument(ArgumentType.AtMostOnce, HelpText = "Max number of bins for R4/R8 columns, power of 2 recommended",
ShortName = "bins")]
public int NumBins = 256;
public int NumBins = Defaults.NumBins;
}

internal static string RegistrationName = "MutualInformationFeatureSelectionTransform";

/// <summary>
/// A helper method to create <see cref="MutualInformationFeatureSelectionTransform"/> for public facing API.
Copy link
Contributor

@TomFinley TomFinley Jul 12, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MutualInformationFeatureSelectionTransform [](start = 49, length = 42)

You can't be creating it, it's a static class. #Closed

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar comments as before, when documenting we ought to describe what it is actually useful for, not just describing its return values (which are part of the method signature, and we aren't even doing that quite right).


In reply to: 201912038 [](ancestors = 201912038)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will add detailed comments as part of #524.


In reply to: 201912123 [](ancestors = 201912123,201912038)

/// </summary>
/// <param name="env">Host Environment.</param>
/// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param>
/// <param name="labelColumn">Column to use for labels.</param>
/// <param name="slotsInOutput">The maximum number of slots to preserve in output.</param>
/// <param name="numBins">Max number of bins for R4/R8 columns, power of 2 recommended.</param>
/// <param name="columns">Columns to use for feature selection.</param>
public static IDataTransform Create(IHostEnvironment env,
IDataView input,
string labelColumn = Defaults.LabelColumn,
int slotsInOutput = Defaults.SlotsInOutput,
int numBins = Defaults.NumBins,
params string[] columns)
{
var args = new Arguments()
{
Column = columns,
LabelColumn = labelColumn,
SlotsInOutput = slotsInOutput,
NumBins = numBins
};
return Create(env, args, input);
}

/// <summary>
/// Create method corresponding to SignatureDataTransform.
/// </summary>
Expand Down
12 changes: 12 additions & 0 deletions src/Microsoft.ML.Transforms/NADropTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,18 @@ private static VersionInfo GetVersionInfo()
// The isNA delegates, parallel to Infos.
private readonly Delegate[] _isNAs;

/// <summary>
/// Convenience constructor for public facing API.
/// </summary>
/// <param name="env">Host Environment.</param>
/// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param>
/// <param name="name">Name of the output column.</param>
/// <param name="source">Name of the column to be transformed. If this is null '<paramref name="name"/>' will be used.</param>
public NADropTransform(IHostEnvironment env, IDataView input, string name, string source = null)
: this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } } }, input)
{
}

public NADropTransform(IHostEnvironment env, Arguments args, IDataView input)
: base(Contracts.CheckRef(env, nameof(env)), RegistrationName, env.CheckRef(args, nameof(args)).Column, input, TestType)
{
Expand Down
21 changes: 21 additions & 0 deletions src/Microsoft.ML.Transforms/NAHandleTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,27 @@ public bool TryUnparse(StringBuilder sb)
internal const string FriendlyName = "NA Handle Transform";
internal const string ShortName = "NAHandle";

/// <summary>
/// A helper method to create <see cref="NAHandleTransform"/> for public facing API.
/// </summary>
/// <param name="env">Host Environment.</param>
/// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param>
/// <param name="name">Name of the output column.</param>
/// <param name="source">Name of the column to be transformed. If this is null '<paramref name="name"/>' will be used.</param>
/// <param name="replaceWith">The replacement method to utilize.</param>
public static IDataTransform Create(IHostEnvironment env, IDataView input, string name, string source = null, ReplacementKind replaceWith = ReplacementKind.DefaultValue)
{
var args = new Arguments()
{
Column = new[]
{
new Column() { Source = source ?? name, Name = name }
},
ReplaceWith = replaceWith
};
return Create(env, args, input);
}

public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
{
Contracts.CheckValue(env, nameof(env));
Expand Down
12 changes: 12 additions & 0 deletions src/Microsoft.ML.Transforms/NAIndicatorTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,18 @@ private static string TestType(ColumnType type)
// The output column types, parallel to Infos.
private readonly ColumnType[] _types;

/// <summary>
/// Convenience constructor for public facing API.
/// </summary>
/// <param name="env">Host Environment.</param>
/// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param>
/// <param name="name">Name of the output column.</param>
/// <param name="source">Name of the column to be transformed. If this is null '<paramref name="name"/>' will be used.</param>
public NAIndicatorTransform(IHostEnvironment env, IDataView input, string name, string source = null)
: this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } } }, input)
{
}

/// <summary>
/// Public constructor corresponding to SignatureDataTransform.
/// </summary>
Expand Down
13 changes: 13 additions & 0 deletions src/Microsoft.ML.Transforms/NAReplaceTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,19 @@ private static string TestType<T>(ColumnType type)

public override bool CanSaveOnnx => true;

/// <summary>
/// Convenience constructor for public facing API.
/// </summary>
/// <param name="env">Host Environment.</param>
/// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param>
/// <param name="name">Name of the output column.</param>
/// <param name="source">Name of the column to be transformed. If this is null '<paramref name="name"/>' will be used.</param>
/// <param name="replacementKind">The replacement method to utilize.</param>
public NAReplaceTransform(IHostEnvironment env, IDataView input, string name, string source = null, ReplacementKind replacementKind = ReplacementKind.DefaultValue)
: this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } }, ReplacementKind = replacementKind }, input)
{
}

/// <summary>
/// Public constructor corresponding to SignatureDataTransform.
/// </summary>
Expand Down
11 changes: 11 additions & 0 deletions src/Microsoft.ML.Transforms/OptionalColumnTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,17 @@ private static VersionInfo GetVersionInfo()

private const string RegistrationName = "OptionalColumn";

/// <summary>
/// Convenience constructor for public facing API.
/// </summary>
/// <param name="env">Host Environment.</param>
/// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param>
/// <param name="columns">Columns to transform.</param>
public OptionalColumnTransform(IHostEnvironment env, IDataView input, params string[] columns)
: this(env, new Arguments() { Column = columns }, input)
{
}

/// <summary>
/// Public constructor corresponding to SignatureDataTransform.
/// </summary>
Expand Down
18 changes: 17 additions & 1 deletion src/Microsoft.ML.Transforms/ProduceIdTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,15 @@ namespace Microsoft.ML.Runtime.Data
/// </summary>
public sealed class ProduceIdTransform : RowToRowTransformBase
{
private static class Defaults
{
public const string Column = "Id";
}
Copy link
Contributor

@TomFinley TomFinley Jul 13, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we probably don't need this Defaults class any longer. #Closed

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed we might as well revert the whole file probably.


In reply to: 202426903 [](ancestors = 202426903)


public sealed class Arguments
{
[Argument(ArgumentType.AtMostOnce, HelpText = "Name of the column to produce", ShortName = "col", SortOrder = 1)]
public string Column = "Id";
public string Column = Defaults.Column;
}

private sealed class Bindings : ColumnBindingsBase
Expand Down Expand Up @@ -93,6 +98,17 @@ private static VersionInfo GetVersionInfo()

public override bool CanShuffle { get { return Source.CanShuffle; } }

/// <summary>
/// Convenience constructor for public facing API.
/// </summary>
/// <param name="env">Host Environment.</param>
/// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param>
/// <param name="column">Name of the column to produce.</param>
public ProduceIdTransform(IHostEnvironment env, IDataView input, string column = Defaults.Column)
: this(env, new Arguments() { Column = column }, input)
{
}

public ProduceIdTransform(IHostEnvironment env, Arguments args, IDataView input)
: base(env, LoaderSignature, input)
{
Expand Down
Loading