Skip to content

Microsoft.ML.Transforms assembly lockdown #2648

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Data/Transforms/Hashing.cs
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ private static VersionInfo GetVersionInfo()
private readonly VBuffer<ReadOnlyMemory<char>>[] _keyValues;
private readonly VectorType[] _kvTypes;

protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
{
var type = inputSchema[srcCol].Type;
if (!HashingEstimator.IsColumnTypeValid(type))
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Data/Transforms/KeyToVector.cs
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ private string TestIsKey(DataViewType type)
return "key type of known cardinality";
}

protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
{
var type = inputSchema[srcCol].Type;
string reason = TestIsKey(type);
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Data/Transforms/Normalizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,7 @@ private protected override void SaveModel(ModelSaveContext ctx)
}
}

protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
{
const string expectedType = "scalar or known-size vector of R4";

Expand Down
3 changes: 2 additions & 1 deletion src/Microsoft.ML.Data/Transforms/OneToOneTransformerBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ private void CheckInput(DataViewSchema inputSchema, int col, out int srcCol)
CheckInputColumn(inputSchema, col, srcCol);
}

protected virtual void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
[BestFriend]
private protected virtual void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
{
// By default, there are no extra checks.
}
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.HalLearners/VectorWhitening.cs
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ internal static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Da
private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(VectorWhiteningEstimator.ColumnInfo[] columns)
=> columns.Select(c => (c.Name, c.InputColumnName ?? c.Name)).ToArray();

protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
{
var inType = inputSchema[srcCol].Type;
var reason = TestColumn(inType);
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.ImageAnalytics/ImageGrayscale.cs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ private protected override void SaveModel(ModelSaveContext ctx)

private protected override IRowMapper MakeRowMapper(DataViewSchema schema) => new Mapper(this, schema);

protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
{
if (!(inputSchema[srcCol].Type is ImageType))
throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", ColumnPairs[col].inputColumnName, "image", inputSchema[srcCol].Type.ToString());
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.ImageAnalytics/ImageLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ private static IDataTransform Create(IHostEnvironment env, ModelLoadContext ctx,
private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, DataViewSchema inputSchema)
=> Create(env, ctx).MakeRowMapper(inputSchema);

protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
{
if (!(inputSchema[srcCol].Type is TextDataViewType))
throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", ColumnPairs[col].inputColumnName, TextDataViewType.Instance.ToString(), inputSchema[srcCol].Type.ToString());
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.ImageAnalytics/ImagePixelExtractor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ private protected override void SaveModel(ModelSaveContext ctx)

private protected override IRowMapper MakeRowMapper(DataViewSchema schema) => new Mapper(this, schema);

protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
{
var inputColName = _columns[col].InputColumnName;
var imageType = inputSchema[srcCol].Type as ImageType;
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.ImageAnalytics/ImageResizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ private protected override void SaveModel(ModelSaveContext ctx)

private protected override IRowMapper MakeRowMapper(DataViewSchema schema) => new Mapper(this, schema);

protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
{
if (!(inputSchema[srcCol].Type is ImageType))
throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", _columns[col].InputColumnName, "image", inputSchema[srcCol].Type.ToString());
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.PCA/PcaTransformer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ private float[][] PostProcess(float[][] y, float[] sigma, float[] z, int d, int

private protected override IRowMapper MakeRowMapper(DataViewSchema schema) => new Mapper(this, schema);

protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
{
ValidatePcaInput(Host, inputSchema[srcCol].Name, inputSchema[srcCol].Type);
}
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Transforms/CategoricalCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
namespace Microsoft.ML
{
/// <summary>
/// Static extensions for categorical transforms.
/// The catalog of categorical transformations.
Copy link
Member

@codemzs codemzs Feb 20, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

transformations [](start = 35, length = 15)

transformers? #Pending

/// </summary>
public static class CategoricalCatalog
{
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Transforms/CompositeTransformer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

namespace Microsoft.ML.Transforms
{
public static class CompositeTransformer
internal static class CompositeTransformer
{
private const string RegistrationName = "CompositeTransform";

Expand Down
23 changes: 13 additions & 10 deletions src/Microsoft.ML.Transforms/FourierDistributionSampler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,16 @@ namespace Microsoft.ML.Transforms
/// <summary>
/// Signature for an IFourierDistributionSampler constructor.
/// </summary>
public delegate void SignatureFourierDistributionSampler(float avgDist);
[BestFriend]
internal delegate void SignatureFourierDistributionSampler(float avgDist);

public interface IFourierDistributionSampler : ICanSaveModel
{
float Next(Random rand);
}

[TlcModule.ComponentKind("FourierDistributionSampler")]
public interface IFourierDistributionSamplerFactory : IComponentFactory<float, IFourierDistributionSampler>
internal interface IFourierDistributionSamplerFactory : IComponentFactory<float, IFourierDistributionSampler>
{
}

Expand All @@ -51,10 +52,11 @@ public sealed class Options : IFourierDistributionSamplerFactory
[Argument(ArgumentType.AtMostOnce, HelpText = "gamma in the kernel definition: exp(-gamma*||x-y||^2 / r^2). r is an estimate of the average intra-example distance", ShortName = "g")]
public float Gamma = 1;

public IFourierDistributionSampler CreateComponent(IHostEnvironment env, float avgDist) => new GaussianFourierSampler(env, this, avgDist);
IFourierDistributionSampler IComponentFactory<float, IFourierDistributionSampler>.CreateComponent(IHostEnvironment env, float avgDist)
=> new GaussianFourierSampler(env, this, avgDist);
}

public const string LoaderSignature = "RandGaussFourierExec";
internal const string LoaderSignature = "RandGaussFourierExec";
private static VersionInfo GetVersionInfo()
{
return new VersionInfo(
Expand All @@ -66,7 +68,7 @@ private static VersionInfo GetVersionInfo()
loaderAssemblyName: typeof(GaussianFourierSampler).Assembly.FullName);
}

public const string LoadName = "GaussianRandom";
internal const string LoadName = "GaussianRandom";

private readonly float _gamma;

Expand All @@ -79,7 +81,7 @@ public GaussianFourierSampler(IHostEnvironment env, Options args, float avgDist)
_gamma = args.Gamma / avgDist;
}

public static GaussianFourierSampler Create(IHostEnvironment env, ModelLoadContext ctx)
private static GaussianFourierSampler Create(IHostEnvironment env, ModelLoadContext ctx)
{
Contracts.CheckValue(env, nameof(env));
env.CheckValue(ctx, nameof(ctx));
Expand Down Expand Up @@ -130,7 +132,8 @@ public sealed class Options : IFourierDistributionSamplerFactory
[Argument(ArgumentType.AtMostOnce, HelpText = "a in the term exp(-a|x| / r). r is an estimate of the average intra-example L1 distance")]
public float A = 1;

public IFourierDistributionSampler CreateComponent(IHostEnvironment env, float avgDist) => new LaplacianFourierSampler(env, this, avgDist);
IFourierDistributionSampler IComponentFactory<float, IFourierDistributionSampler>.CreateComponent(IHostEnvironment env, float avgDist)
=> new LaplacianFourierSampler(env, this, avgDist);
}

private static VersionInfo GetVersionInfo()
Expand All @@ -144,8 +147,8 @@ private static VersionInfo GetVersionInfo()
loaderAssemblyName: typeof(LaplacianFourierSampler).Assembly.FullName);
}

public const string LoaderSignature = "RandLaplacianFourierExec";
public const string RegistrationName = "LaplacianRandom";
internal const string LoaderSignature = "RandLaplacianFourierExec";
internal const string RegistrationName = "LaplacianRandom";

private readonly IHost _host;
private readonly float _a;
Expand All @@ -159,7 +162,7 @@ public LaplacianFourierSampler(IHostEnvironment env, Options args, float avgDist
_a = args.A / avgDist;
}

public static LaplacianFourierSampler Create(IHostEnvironment env, ModelLoadContext ctx)
private static LaplacianFourierSampler Create(IHostEnvironment env, ModelLoadContext ctx)
{
Contracts.CheckValue(env, nameof(env));
env.CheckValue(ctx, nameof(ctx));
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Transforms/GcnTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ private static (string outputColumnName, string inputColumnName)[] GetColumnPair
return columns.Select(x => (x.Name, x.InputColumnName)).ToArray();
}

protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
{
var inType = inputSchema[srcCol].Type;
if (!LpNormalizingEstimatorBase.IsColumnTypeValid(inType))
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Transforms/KeyToVectorMapping.cs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ private string TestIsKey(DataViewType type)
return "key type of known cardinality";
}

protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
{
var type = inputSchema[srcCol].Type;
string reason = TestIsKey(type);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ private MissingValueDroppingTransformer(IHostEnvironment env, ModelLoadContext c
private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(Column[] columns)
=> columns.Select(c => (c.Name, c.Source ?? c.Name)).ToArray();

protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
{
var inType = inputSchema[srcCol].Type;
if (!(inType is VectorType))
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Transforms/MissingValueReplacing.cs
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ private static (string outputColumnName, string inputColumnName)[] GetColumnPair
// REVIEW: Currently these arrays are constructed on load but could be changed to being constructed lazily.
private readonly BitArray[] _repIsDefault;

protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
{
var type = inputSchema[srcCol].Type;
string reason = TestType(type);
Expand Down
3 changes: 3 additions & 0 deletions src/Microsoft.ML.Transforms/ProjectionCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@

namespace Microsoft.ML
{
/// <summary>
/// The catalog of projection transformations.
Copy link
Member

@codemzs codemzs Feb 20, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

transformations [](start = 34, length = 15)

transformers? #Pending

Copy link
Contributor

@TomFinley TomFinley Feb 20, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is some troublesome language usage that we need to think about how to do. We are talking about transforming usage. And the way we do that is via ITransformer, but what is actually being returned through these catalogs? IEstimator! I don't object to this preference for transformer, per se, but I would not insist on it if it is unnatural in context. The context here is how we describe the process by which we transform data, which includes ITransformer, but is not specifically related to it (it can't be, what is returned here is not ITransformer implementors at all!). #Pending

Copy link
Contributor

@TomFinley TomFinley Feb 20, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I don't disagree with your suggestion, but I wouldn't insist on it, and I don't know that I see a strong reason for it, since the term usage seems appropriate in the (deliberately) vague context of this catalog. #Pending

Copy link
Contributor Author

@artidoro artidoro Feb 20, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is part of the .Transforms section of MLContext. I preferred not to use the contraction "transforms" and use the full word "transformations". My choice is more open ended and not reference ML.NET objects.

If we want to be more specific, since these are returning estimators, we should probably use that word instead.

So in conclusion, if we want to be specific I would use "estimators" alternatively I would stick to "transformations". I would personally keep the word "transformations". #Pending

/// </summary>
public static class ProjectionCatalog
{
/// <summary>
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ private static (string outputColumnName, string inputColumnName)[] GetColumnPair
return columns.Select(x => (x.Name, x.InputColumnName)).ToArray();
}

protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
{
var type = inputSchema[srcCol].Type;
string reason = TestColumnType(type);
Expand Down
23 changes: 20 additions & 3 deletions src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -875,15 +875,31 @@ public sealed class NgramHashingEstimator : IEstimator<NgramHashingTransformer>
/// </summary>
public sealed class ColumnInfo
{
/// <summary>Name of the column resulting from the transformation of <see cref="InputColumnNames"/>.</summary>
public readonly string Name;
/// <summary>Names of the columns to transform.</summary>
public readonly string[] InputColumnNames;
/// <summary>Maximum ngram length.</summary>
public readonly int NgramLength;
/// <summary>Maximum number of tokens to skip when constructing an ngram.</summary>
public readonly int SkipLength;
/// <summary>Whether to store all ngram lengths up to <see cref="NgramLength"/>, or only <see cref="NgramLength"/>.</summary>
public readonly bool AllLengths;
/// <summary>Number of bits to hash into. Must be between 1 and 31, inclusive.</summary>
public readonly int HashBits;
/// <summary>Hashing seed.</summary>
public readonly uint Seed;
/// <summary>Whether the position of each term should be included in the hash.</summary>
public readonly bool Ordered;
/// <summary>
/// During hashing we constuct mappings between original values and the produced hash values.
/// Text representation of original values are stored in the slot names of the metadata for the new column.
/// Hashing, as such, can map many initial values to one.
/// <see cref="InvertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
/// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.
/// </summary>
public readonly int InvertHash;
/// <summary>Whether to rehash unigrams.</summary>
public readonly bool RehashUnigrams;
// For all source columns, use these friendly names for the source
// column names instead of the real column names.
Expand All @@ -893,15 +909,16 @@ public sealed class ColumnInfo
/// Describes how the transformer handles one column pair.
/// </summary>
/// <param name="name">Name of the column resulting from the transformation of <paramref name="inputColumnNames"/>.</param>
/// <param name="inputColumnNames">Name of the columns to transform. </param>
/// <param name="inputColumnNames">Names of the columns to transform. </param>
/// <param name="ngramLength">Maximum ngram length.</param>
/// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
/// <param name="allLengths">"Whether to store all ngram lengths up to ngramLength, or only ngramLength.</param>
/// <param name="allLengths">Whether to store all ngram lengths up to <paramref name="ngramLength"/>, or only <paramref name="ngramLength"/>.</param>
/// <param name="hashBits">Number of bits to hash into. Must be between 1 and 31, inclusive.</param>
/// <param name="seed">Hashing seed.</param>
/// <param name="ordered">Whether the position of each term should be included in the hash.</param>
/// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
/// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one.
/// Text representation of original values are stored in the slot names of the metadata for the new column.
/// Hashing, as such, can map many initial values to one.
/// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
/// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param>
/// <param name="rehashUnigrams">Whether to rehash unigrams.</param>
Expand Down
Loading