From 596a388bb80f50079ed26d7bb180837248ae001d Mon Sep 17 00:00:00 2001 From: Artidoro Pagnoni Date: Tue, 19 Feb 2019 16:59:13 -0800 Subject: [PATCH 1/3] internalization and some commenting for transforms assembly --- .../CategoricalCatalog.cs | 2 +- .../CompositeTransformer.cs | 2 +- .../FourierDistributionSampler.cs | 27 ++++++++++-------- .../ProjectionCatalog.cs | 3 ++ .../Text/NgramHashingTransformer.cs | 19 +++++++++++-- .../Text/NgramTransform.cs | 28 +++++++++++-------- .../Text/StopWordsRemovingTransformer.cs | 12 ++++++-- .../Text/TextCatalog.cs | 3 ++ .../Text/TextFeaturizingEstimator.cs | 2 +- .../Text/TextNormalizing.cs | 4 +++ .../Text/TokenizingByCharacters.cs | 3 ++ .../Text/WordEmbeddingsExtractor.cs | 7 +++++ .../Text/WrappedTextTransformers.cs | 2 ++ 13 files changed, 84 insertions(+), 30 deletions(-) diff --git a/src/Microsoft.ML.Transforms/CategoricalCatalog.cs b/src/Microsoft.ML.Transforms/CategoricalCatalog.cs index 06ad7db7df..24871f763d 100644 --- a/src/Microsoft.ML.Transforms/CategoricalCatalog.cs +++ b/src/Microsoft.ML.Transforms/CategoricalCatalog.cs @@ -9,7 +9,7 @@ namespace Microsoft.ML { /// - /// Static extensions for categorical transforms. + /// The catalog of categorical transformations. /// public static class CategoricalCatalog { diff --git a/src/Microsoft.ML.Transforms/CompositeTransformer.cs b/src/Microsoft.ML.Transforms/CompositeTransformer.cs index 45eb4e1441..3d822fa76c 100644 --- a/src/Microsoft.ML.Transforms/CompositeTransformer.cs +++ b/src/Microsoft.ML.Transforms/CompositeTransformer.cs @@ -16,7 +16,7 @@ namespace Microsoft.ML.Transforms { - public static class CompositeTransformer + internal static class CompositeTransformer { private const string RegistrationName = "CompositeTransform"; diff --git a/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs b/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs index d64150c78d..19bdeb9dbe 100644 --- a/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs +++ b/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs @@ -30,7 +30,8 @@ namespace Microsoft.ML.Transforms /// /// Signature for an IFourierDistributionSampler constructor. /// - public delegate void SignatureFourierDistributionSampler(float avgDist); + [BestFriend] + internal delegate void SignatureFourierDistributionSampler(float avgDist); public interface IFourierDistributionSampler : ICanSaveModel { @@ -38,7 +39,7 @@ public interface IFourierDistributionSampler : ICanSaveModel } [TlcModule.ComponentKind("FourierDistributionSampler")] - public interface IFourierDistributionSamplerFactory : IComponentFactory + internal interface IFourierDistributionSamplerFactory : IComponentFactory { } @@ -51,10 +52,11 @@ public sealed class Options : IFourierDistributionSamplerFactory [Argument(ArgumentType.AtMostOnce, HelpText = "gamma in the kernel definition: exp(-gamma*||x-y||^2 / r^2). r is an estimate of the average intra-example distance", ShortName = "g")] public float Gamma = 1; - public IFourierDistributionSampler CreateComponent(IHostEnvironment env, float avgDist) => new GaussianFourierSampler(env, this, avgDist); + IFourierDistributionSampler IComponentFactory.CreateComponent(IHostEnvironment env, float avgDist) + => new GaussianFourierSampler(env, this, avgDist); } - public const string LoaderSignature = "RandGaussFourierExec"; + internal const string LoaderSignature = "RandGaussFourierExec"; private static VersionInfo GetVersionInfo() { return new VersionInfo( @@ -66,7 +68,7 @@ private static VersionInfo GetVersionInfo() loaderAssemblyName: typeof(GaussianFourierSampler).Assembly.FullName); } - public const string LoadName = "GaussianRandom"; + internal const string LoadName = "GaussianRandom"; private readonly float _gamma; @@ -79,7 +81,7 @@ public GaussianFourierSampler(IHostEnvironment env, Options args, float avgDist) _gamma = args.Gamma / avgDist; } - public static GaussianFourierSampler Create(IHostEnvironment env, ModelLoadContext ctx) + private static GaussianFourierSampler Create(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ctx, nameof(ctx)); @@ -117,7 +119,7 @@ void ICanSaveModel.Save(ModelSaveContext ctx) ctx.Writer.Write(_gamma); } - public float Next(Random rand) + float IFourierDistributionSampler.Next(Random rand) { return (float)Stats.SampleFromGaussian(rand) * MathUtils.Sqrt(2 * _gamma); } @@ -130,7 +132,8 @@ public sealed class Options : IFourierDistributionSamplerFactory [Argument(ArgumentType.AtMostOnce, HelpText = "a in the term exp(-a|x| / r). r is an estimate of the average intra-example L1 distance")] public float A = 1; - public IFourierDistributionSampler CreateComponent(IHostEnvironment env, float avgDist) => new LaplacianFourierSampler(env, this, avgDist); + IFourierDistributionSampler IComponentFactory.CreateComponent(IHostEnvironment env, float avgDist) + => new LaplacianFourierSampler(env, this, avgDist); } private static VersionInfo GetVersionInfo() @@ -144,8 +147,8 @@ private static VersionInfo GetVersionInfo() loaderAssemblyName: typeof(LaplacianFourierSampler).Assembly.FullName); } - public const string LoaderSignature = "RandLaplacianFourierExec"; - public const string RegistrationName = "LaplacianRandom"; + internal const string LoaderSignature = "RandLaplacianFourierExec"; + internal const string RegistrationName = "LaplacianRandom"; private readonly IHost _host; private readonly float _a; @@ -159,7 +162,7 @@ public LaplacianFourierSampler(IHostEnvironment env, Options args, float avgDist _a = args.A / avgDist; } - public static LaplacianFourierSampler Create(IHostEnvironment env, ModelLoadContext ctx) + private static LaplacianFourierSampler Create(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ctx, nameof(ctx)); @@ -198,7 +201,7 @@ void ICanSaveModel.Save(ModelSaveContext ctx) ctx.Writer.Write(_a); } - public float Next(Random rand) + float IFourierDistributionSampler.Next(Random rand) { return _a * Stats.SampleFromCauchy(rand); } diff --git a/src/Microsoft.ML.Transforms/ProjectionCatalog.cs b/src/Microsoft.ML.Transforms/ProjectionCatalog.cs index 1da466ec24..627160b5c5 100644 --- a/src/Microsoft.ML.Transforms/ProjectionCatalog.cs +++ b/src/Microsoft.ML.Transforms/ProjectionCatalog.cs @@ -7,6 +7,9 @@ namespace Microsoft.ML { + /// + /// The catalog of projection transformations. + /// public static class ProjectionCatalog { /// diff --git a/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs b/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs index 434f002f84..8f308c5801 100644 --- a/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs +++ b/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs @@ -875,15 +875,30 @@ public sealed class NgramHashingEstimator : IEstimator /// public sealed class ColumnInfo { + /// Name of the column resulting from the transformation of . public readonly string Name; + /// Names of the columns to transform. public readonly string[] InputColumnNames; + /// Maximum ngram length. public readonly int NgramLength; + /// Maximum number of tokens to skip when constructing an ngram. public readonly int SkipLength; + /// Whether to store all ngram lengths up to ngramLength, or only ngramLength. public readonly bool AllLengths; + /// Number of bits to hash into. Must be between 1 and 31, inclusive. public readonly int HashBits; + /// Hashing seed. public readonly uint Seed; + /// Whether the position of each term should be included in the hash. public readonly bool Ordered; + /// + /// During hashing we constuct mappings between original values and the produced hash values. + /// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one. + /// specifies the upper bound of the number of distinct input values mapping to a hash that should be retained. + /// 0 does not retain any input values. -1 retains all input values mapping to each hash. + /// public readonly int InvertHash; + /// Whether to rehash unigrams. public readonly bool RehashUnigrams; // For all source columns, use these friendly names for the source // column names instead of the real column names. @@ -893,10 +908,10 @@ public sealed class ColumnInfo /// Describes how the transformer handles one column pair. /// /// Name of the column resulting from the transformation of . - /// Name of the columns to transform. + /// Names of the columns to transform. /// Maximum ngram length. /// Maximum number of tokens to skip when constructing an ngram. - /// "Whether to store all ngram lengths up to ngramLength, or only ngramLength. + /// Whether to store all ngram lengths up to ngramLength, or only ngramLength. /// Number of bits to hash into. Must be between 1 and 31, inclusive. /// Hashing seed. /// Whether the position of each term should be included in the hash. diff --git a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs index 3cadd72dc9..13999755b4 100644 --- a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs @@ -676,7 +676,7 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func { /// - /// Weighting criteria: a statistical measure used to evaluate how important a word is to a document in a corpus. + /// A statistical measure used to evaluate how important a word is to a document in a corpus. /// This enumeration is serialized. /// public enum WeightingCriteria @@ -797,12 +797,18 @@ internal static bool IsSchemaColumnValid(SchemaShape.Column col) /// public sealed class ColumnInfo { + /// Name of the column resulting from the transformation of . public readonly string Name; + /// Name of column to transform. public readonly string InputColumnName; + /// Maximum ngram length. public readonly int NgramLength; + /// Maximum number of tokens to skip when constructing an ngram. public readonly int SkipLength; + /// Whether to store all ngram lengths up to ngramLength, or only ngramLength. public readonly bool AllLengths; - public readonly NgramExtractingEstimator.WeightingCriteria Weighting; + /// The weighting criteria. + public readonly WeightingCriteria Weighting; /// /// Contains the maximum number of grams to store in the dictionary, for each level of ngrams, /// from 1 (in position 0) up to ngramLength (in position ngramLength-1) @@ -816,15 +822,15 @@ public sealed class ColumnInfo /// Name of column to transform. If set to , the value of the will be used as source. /// Maximum ngram length. /// Maximum number of tokens to skip when constructing an ngram. - /// "Whether to store all ngram lengths up to ngramLength, or only ngramLength. + /// Whether to store all ngram lengths up to ngramLength, or only ngramLength. /// The weighting criteria. /// Maximum number of ngrams to store in the dictionary. public ColumnInfo(string name, string inputColumnName = null, - int ngramLength = NgramExtractingEstimator.Defaults.NgramLength, - int skipLength = NgramExtractingEstimator.Defaults.SkipLength, - bool allLengths = NgramExtractingEstimator.Defaults.AllLengths, - NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.Defaults.Weighting, - int maxNumTerms = NgramExtractingEstimator.Defaults.MaxNumTerms) + int ngramLength = Defaults.NgramLength, + int skipLength = Defaults.SkipLength, + bool allLengths = Defaults.AllLengths, + WeightingCriteria weighting = Defaults.Weighting, + int maxNumTerms = Defaults.MaxNumTerms) : this(name, ngramLength, skipLength, allLengths, weighting, new int[] { maxNumTerms }, inputColumnName ?? name) { } @@ -833,7 +839,7 @@ internal ColumnInfo(string name, int ngramLength, int skipLength, bool allLengths, - NgramExtractingEstimator.WeightingCriteria weighting, + WeightingCriteria weighting, int[] maxNumTerms, string inputColumnName = null) { @@ -854,13 +860,13 @@ internal ColumnInfo(string name, { Contracts.CheckUserArg(Utils.Size(maxNumTerms) == 0 || Utils.Size(maxNumTerms) == 1 && maxNumTerms[0] > 0, nameof(maxNumTerms)); - limits[ngramLength - 1] = Utils.Size(maxNumTerms) == 0 ? NgramExtractingEstimator.Defaults.MaxNumTerms : maxNumTerms[0]; + limits[ngramLength - 1] = Utils.Size(maxNumTerms) == 0 ? Defaults.MaxNumTerms : maxNumTerms[0]; } else { Contracts.CheckUserArg(Utils.Size(maxNumTerms) <= ngramLength, nameof(maxNumTerms)); Contracts.CheckUserArg(Utils.Size(maxNumTerms) == 0 || maxNumTerms.All(i => i >= 0) && maxNumTerms[maxNumTerms.Length - 1] > 0, nameof(maxNumTerms)); - var extend = Utils.Size(maxNumTerms) == 0 ? NgramExtractingEstimator.Defaults.MaxNumTerms : maxNumTerms[maxNumTerms.Length - 1]; + var extend = Utils.Size(maxNumTerms) == 0 ? Defaults.MaxNumTerms : maxNumTerms[maxNumTerms.Length - 1]; limits = Utils.BuildArray(ngramLength, i => i < Utils.Size(maxNumTerms) ? maxNumTerms[i] : extend); } Limits = ImmutableArray.Create(limits); diff --git a/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs b/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs index 46362653cd..87f6716731 100644 --- a/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs +++ b/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs @@ -130,6 +130,9 @@ private static VersionInfo GetVersionInfo() loaderAssemblyName: typeof(StopWordsRemovingTransformer).Assembly.FullName); } + /// + /// Defines the behavior of the transformer. + /// public IReadOnlyCollection Columns => _columns.AsReadOnly(); private readonly StopWordsRemovingEstimator.ColumnInfo[] _columns; @@ -492,9 +495,13 @@ public sealed class StopWordsRemovingEstimator : TrivialEstimator public sealed class ColumnInfo { + /// Name of the column resulting from the transformation of . public readonly string Name; + /// Name of the column to transform. public readonly string InputColumnName; - public readonly StopWordsRemovingEstimator.Language Language; + /// Language-specific stop words list. + public readonly Language Language; + /// Optional column to use for languages. This overrides language value. public readonly string LanguageColumn; /// @@ -506,7 +513,7 @@ public sealed class ColumnInfo /// Optional column to use for languages. This overrides language value. public ColumnInfo(string name, string inputColumnName = null, - StopWordsRemovingEstimator.Language language = StopWordsRemovingEstimator.Defaults.DefaultLanguage, + Language language = Defaults.DefaultLanguage, string languageColumn = null) { Contracts.CheckNonWhiteSpace(name, nameof(name)); @@ -517,6 +524,7 @@ public ColumnInfo(string name, LanguageColumn = languageColumn; } } + /// /// Stopwords language. This enumeration is serialized. /// diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs index 5d5f0a0144..418d7cafff 100644 --- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs +++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs @@ -12,6 +12,9 @@ namespace Microsoft.ML using CharTokenizingDefaults = TokenizingByCharactersEstimator.Defaults; using TextNormalizeDefaults = TextNormalizingEstimator.Defaults; + /// + /// The catalog of text related transformations. + /// public static class TextCatalog { /// diff --git a/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs b/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs index 5b74b78a69..3f91e73ebf 100644 --- a/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs +++ b/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs @@ -320,7 +320,7 @@ internal TextFeaturizingEstimator(IHostEnvironment env, string name, IEnumerable } /// - /// Trains and returns a . + /// Trains and returns a . /// public ITransformer Fit(IDataView input) { diff --git a/src/Microsoft.ML.Transforms/Text/TextNormalizing.cs b/src/Microsoft.ML.Transforms/Text/TextNormalizing.cs index 2275aefd59..a695a1da56 100644 --- a/src/Microsoft.ML.Transforms/Text/TextNormalizing.cs +++ b/src/Microsoft.ML.Transforms/Text/TextNormalizing.cs @@ -88,6 +88,10 @@ private static VersionInfo GetVersionInfo() } private const string RegistrationName = "TextNormalizer"; + + /// + /// The names of the output and input column pairs on which the transformation is applied. + /// public IReadOnlyCollection<(string outputColumnName, string inputColumnName)> Columns => ColumnPairs.AsReadOnly(); private readonly TextNormalizingEstimator.CaseNormalizationMode _textCase; diff --git a/src/Microsoft.ML.Transforms/Text/TokenizingByCharacters.cs b/src/Microsoft.ML.Transforms/Text/TokenizingByCharacters.cs index 301070a127..f1171d0369 100644 --- a/src/Microsoft.ML.Transforms/Text/TokenizingByCharacters.cs +++ b/src/Microsoft.ML.Transforms/Text/TokenizingByCharacters.cs @@ -112,6 +112,9 @@ internal TokenizingByCharactersTransformer(IHostEnvironment env, bool useMarkerC _useMarkerChars = useMarkerCharacters; } + /// + /// The names of the output and input column pairs on which the transformation is applied. + /// public IReadOnlyCollection<(string outputColumnName, string inputColumnName)> Columns => ColumnPairs.AsReadOnly(); protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) diff --git a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs index 696771be97..d5dafe1276 100644 --- a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs +++ b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs @@ -93,6 +93,10 @@ internal static VersionInfo GetVersionInfo() private readonly int _linesToSkip; private readonly Model _currentVocab; private static Dictionary> _vocab = new Dictionary>(); + + /// + /// The names of the output and input column pairs on which the transformation is applied. + /// public IReadOnlyCollection<(string outputColumnName, string inputColumnName)> Columns => ColumnPairs.AsReadOnly(); private sealed class Model @@ -799,6 +803,9 @@ internal WordEmbeddingsExtractingEstimator(IHostEnvironment env, string customMo _columns = columns; } + /// + /// Specifies which word embeddings to use. + /// public enum PretrainedModelKind { [TGUI(Label = "GloVe 50D")] diff --git a/src/Microsoft.ML.Transforms/Text/WrappedTextTransformers.cs b/src/Microsoft.ML.Transforms/Text/WrappedTextTransformers.cs index e57a6c7b8b..ff0ec2631c 100644 --- a/src/Microsoft.ML.Transforms/Text/WrappedTextTransformers.cs +++ b/src/Microsoft.ML.Transforms/Text/WrappedTextTransformers.cs @@ -105,6 +105,7 @@ internal WordBagEstimator(IHostEnvironment env, _weighting = weighting; } + /// Trains and returns a . public override TransformWrapper Fit(IDataView input) { // Create arguments. @@ -242,6 +243,7 @@ internal WordHashBagEstimator(IHostEnvironment env, _invertHash = invertHash; } + /// Trains and returns a . public override TransformWrapper Fit(IDataView input) { // Create arguments. From 51099a4a1b3d2158585b238a1fe351cd2bc8214a Mon Sep 17 00:00:00 2001 From: Artidoro Pagnoni Date: Tue, 19 Feb 2019 17:19:33 -0800 Subject: [PATCH 2/3] second pass using the api review tool, internalized checkinputcolumn --- src/Microsoft.ML.Data/Transforms/Hashing.cs | 2 +- src/Microsoft.ML.Data/Transforms/KeyToVector.cs | 2 +- src/Microsoft.ML.Data/Transforms/Normalizer.cs | 2 +- src/Microsoft.ML.Data/Transforms/OneToOneTransformerBase.cs | 3 ++- src/Microsoft.ML.HalLearners/VectorWhitening.cs | 2 +- src/Microsoft.ML.ImageAnalytics/ImageGrayscale.cs | 2 +- src/Microsoft.ML.ImageAnalytics/ImageLoader.cs | 2 +- src/Microsoft.ML.ImageAnalytics/ImagePixelExtractor.cs | 2 +- src/Microsoft.ML.ImageAnalytics/ImageResizer.cs | 2 +- src/Microsoft.ML.PCA/PcaTransformer.cs | 2 +- src/Microsoft.ML.Transforms/GcnTransform.cs | 2 +- src/Microsoft.ML.Transforms/KeyToVectorMapping.cs | 2 +- src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs | 2 +- src/Microsoft.ML.Transforms/MissingValueReplacing.cs | 2 +- src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs | 2 +- src/Microsoft.ML.Transforms/Text/NgramTransform.cs | 2 +- .../Text/StopWordsRemovingTransformer.cs | 2 +- src/Microsoft.ML.Transforms/Text/TextNormalizing.cs | 2 +- src/Microsoft.ML.Transforms/Text/TokenizingByCharacters.cs | 2 +- src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs | 2 +- src/Microsoft.ML.Transforms/Text/WordTokenizing.cs | 2 +- 21 files changed, 22 insertions(+), 21 deletions(-) diff --git a/src/Microsoft.ML.Data/Transforms/Hashing.cs b/src/Microsoft.ML.Data/Transforms/Hashing.cs index 64144b6f9e..5765b6a2d0 100644 --- a/src/Microsoft.ML.Data/Transforms/Hashing.cs +++ b/src/Microsoft.ML.Data/Transforms/Hashing.cs @@ -136,7 +136,7 @@ private static VersionInfo GetVersionInfo() private readonly VBuffer>[] _keyValues; private readonly VectorType[] _kvTypes; - protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) + private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) { var type = inputSchema[srcCol].Type; if (!HashingEstimator.IsColumnTypeValid(type)) diff --git a/src/Microsoft.ML.Data/Transforms/KeyToVector.cs b/src/Microsoft.ML.Data/Transforms/KeyToVector.cs index ed841a8ecf..24d45bcf86 100644 --- a/src/Microsoft.ML.Data/Transforms/KeyToVector.cs +++ b/src/Microsoft.ML.Data/Transforms/KeyToVector.cs @@ -112,7 +112,7 @@ private string TestIsKey(DataViewType type) return "key type of known cardinality"; } - protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) + private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) { var type = inputSchema[srcCol].Type; string reason = TestIsKey(type); diff --git a/src/Microsoft.ML.Data/Transforms/Normalizer.cs b/src/Microsoft.ML.Data/Transforms/Normalizer.cs index 3dedb6e306..d48f6c686c 100644 --- a/src/Microsoft.ML.Data/Transforms/Normalizer.cs +++ b/src/Microsoft.ML.Data/Transforms/Normalizer.cs @@ -561,7 +561,7 @@ private protected override void SaveModel(ModelSaveContext ctx) } } - protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) + private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) { const string expectedType = "scalar or known-size vector of R4"; diff --git a/src/Microsoft.ML.Data/Transforms/OneToOneTransformerBase.cs b/src/Microsoft.ML.Data/Transforms/OneToOneTransformerBase.cs index 5fe1f8f8c8..e927f57eb8 100644 --- a/src/Microsoft.ML.Data/Transforms/OneToOneTransformerBase.cs +++ b/src/Microsoft.ML.Data/Transforms/OneToOneTransformerBase.cs @@ -78,7 +78,8 @@ private void CheckInput(DataViewSchema inputSchema, int col, out int srcCol) CheckInputColumn(inputSchema, col, srcCol); } - protected virtual void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) + [BestFriend] + private protected virtual void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) { // By default, there are no extra checks. } diff --git a/src/Microsoft.ML.HalLearners/VectorWhitening.cs b/src/Microsoft.ML.HalLearners/VectorWhitening.cs index eed1565275..0f19730c63 100644 --- a/src/Microsoft.ML.HalLearners/VectorWhitening.cs +++ b/src/Microsoft.ML.HalLearners/VectorWhitening.cs @@ -209,7 +209,7 @@ internal static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Da private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(VectorWhiteningEstimator.ColumnInfo[] columns) => columns.Select(c => (c.Name, c.InputColumnName ?? c.Name)).ToArray(); - protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) + private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) { var inType = inputSchema[srcCol].Type; var reason = TestColumn(inType); diff --git a/src/Microsoft.ML.ImageAnalytics/ImageGrayscale.cs b/src/Microsoft.ML.ImageAnalytics/ImageGrayscale.cs index 35b318e41a..a4feb85ce8 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImageGrayscale.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImageGrayscale.cs @@ -164,7 +164,7 @@ private protected override void SaveModel(ModelSaveContext ctx) private protected override IRowMapper MakeRowMapper(DataViewSchema schema) => new Mapper(this, schema); - protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) + private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) { if (!(inputSchema[srcCol].Type is ImageType)) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", ColumnPairs[col].inputColumnName, "image", inputSchema[srcCol].Type.ToString()); diff --git a/src/Microsoft.ML.ImageAnalytics/ImageLoader.cs b/src/Microsoft.ML.ImageAnalytics/ImageLoader.cs index 6906af7c61..e5429ac8bf 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImageLoader.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImageLoader.cs @@ -131,7 +131,7 @@ private static IDataTransform Create(IHostEnvironment env, ModelLoadContext ctx, private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, DataViewSchema inputSchema) => Create(env, ctx).MakeRowMapper(inputSchema); - protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) + private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) { if (!(inputSchema[srcCol].Type is TextDataViewType)) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", ColumnPairs[col].inputColumnName, TextDataViewType.Instance.ToString(), inputSchema[srcCol].Type.ToString()); diff --git a/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractor.cs b/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractor.cs index 158e888310..8a2cf2b0ab 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractor.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractor.cs @@ -272,7 +272,7 @@ private protected override void SaveModel(ModelSaveContext ctx) private protected override IRowMapper MakeRowMapper(DataViewSchema schema) => new Mapper(this, schema); - protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) + private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) { var inputColName = _columns[col].InputColumnName; var imageType = inputSchema[srcCol].Type as ImageType; diff --git a/src/Microsoft.ML.ImageAnalytics/ImageResizer.cs b/src/Microsoft.ML.ImageAnalytics/ImageResizer.cs index a48a80af4e..caaf20f29c 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImageResizer.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImageResizer.cs @@ -265,7 +265,7 @@ private protected override void SaveModel(ModelSaveContext ctx) private protected override IRowMapper MakeRowMapper(DataViewSchema schema) => new Mapper(this, schema); - protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) + private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) { if (!(inputSchema[srcCol].Type is ImageType)) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", _columns[col].InputColumnName, "image", inputSchema[srcCol].Type.ToString()); diff --git a/src/Microsoft.ML.PCA/PcaTransformer.cs b/src/Microsoft.ML.PCA/PcaTransformer.cs index 1907e39912..644a581317 100644 --- a/src/Microsoft.ML.PCA/PcaTransformer.cs +++ b/src/Microsoft.ML.PCA/PcaTransformer.cs @@ -499,7 +499,7 @@ private float[][] PostProcess(float[][] y, float[] sigma, float[] z, int d, int private protected override IRowMapper MakeRowMapper(DataViewSchema schema) => new Mapper(this, schema); - protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) + private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) { ValidatePcaInput(Host, inputSchema[srcCol].Name, inputSchema[srcCol].Type); } diff --git a/src/Microsoft.ML.Transforms/GcnTransform.cs b/src/Microsoft.ML.Transforms/GcnTransform.cs index 861d1bcbbc..0b86500967 100644 --- a/src/Microsoft.ML.Transforms/GcnTransform.cs +++ b/src/Microsoft.ML.Transforms/GcnTransform.cs @@ -202,7 +202,7 @@ private static (string outputColumnName, string inputColumnName)[] GetColumnPair return columns.Select(x => (x.Name, x.InputColumnName)).ToArray(); } - protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) + private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) { var inType = inputSchema[srcCol].Type; if (!LpNormalizingEstimatorBase.IsColumnTypeValid(inType)) diff --git a/src/Microsoft.ML.Transforms/KeyToVectorMapping.cs b/src/Microsoft.ML.Transforms/KeyToVectorMapping.cs index b412296918..e289b47f16 100644 --- a/src/Microsoft.ML.Transforms/KeyToVectorMapping.cs +++ b/src/Microsoft.ML.Transforms/KeyToVectorMapping.cs @@ -69,7 +69,7 @@ private string TestIsKey(DataViewType type) return "key type of known cardinality"; } - protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) + private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) { var type = inputSchema[srcCol].Type; string reason = TestIsKey(type); diff --git a/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs b/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs index 2b5181e700..d4e854dcde 100644 --- a/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs +++ b/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs @@ -103,7 +103,7 @@ private MissingValueDroppingTransformer(IHostEnvironment env, ModelLoadContext c private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(Column[] columns) => columns.Select(c => (c.Name, c.Source ?? c.Name)).ToArray(); - protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) + private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) { var inType = inputSchema[srcCol].Type; if (!(inType is VectorType)) diff --git a/src/Microsoft.ML.Transforms/MissingValueReplacing.cs b/src/Microsoft.ML.Transforms/MissingValueReplacing.cs index c6906e6b94..88e42d1543 100644 --- a/src/Microsoft.ML.Transforms/MissingValueReplacing.cs +++ b/src/Microsoft.ML.Transforms/MissingValueReplacing.cs @@ -194,7 +194,7 @@ private static (string outputColumnName, string inputColumnName)[] GetColumnPair // REVIEW: Currently these arrays are constructed on load but could be changed to being constructed lazily. private readonly BitArray[] _repIsDefault; - protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) + private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) { var type = inputSchema[srcCol].Type; string reason = TestType(type); diff --git a/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs b/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs index 96be9fab0e..693b0263b1 100644 --- a/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs +++ b/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs @@ -240,7 +240,7 @@ private static (string outputColumnName, string inputColumnName)[] GetColumnPair return columns.Select(x => (x.Name, x.InputColumnName)).ToArray(); } - protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) + private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) { var type = inputSchema[srcCol].Type; string reason = TestColumnType(type); diff --git a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs index 13999755b4..2eb34afec9 100644 --- a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs @@ -197,7 +197,7 @@ private static (string outputColumnName, string inputColumnName)[] GetColumnPair return columns.Select(x => (x.Name, x.InputColumnName)).ToArray(); } - protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) + private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) { var type = inputSchema[srcCol].Type; if (!NgramExtractingEstimator.IsColumnTypeValid(type)) diff --git a/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs b/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs index 87f6716731..a05255da22 100644 --- a/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs +++ b/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs @@ -187,7 +187,7 @@ private static (string outputColumnName, string inputColumnName)[] GetColumnPair return columns.Select(x => (x.Name, x.InputColumnName)).ToArray(); } - protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) + private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) { var type = inputSchema[srcCol].Type; if (!StopWordsRemovingEstimator.IsColumnTypeValid(type)) diff --git a/src/Microsoft.ML.Transforms/Text/TextNormalizing.cs b/src/Microsoft.ML.Transforms/Text/TextNormalizing.cs index a695a1da56..1aec2825de 100644 --- a/src/Microsoft.ML.Transforms/Text/TextNormalizing.cs +++ b/src/Microsoft.ML.Transforms/Text/TextNormalizing.cs @@ -114,7 +114,7 @@ internal TextNormalizingTransformer(IHostEnvironment env, } - protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) + private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) { var type = inputSchema[srcCol].Type; if (!TextNormalizingEstimator.IsColumnTypeValid(type)) diff --git a/src/Microsoft.ML.Transforms/Text/TokenizingByCharacters.cs b/src/Microsoft.ML.Transforms/Text/TokenizingByCharacters.cs index f1171d0369..0548bf50ba 100644 --- a/src/Microsoft.ML.Transforms/Text/TokenizingByCharacters.cs +++ b/src/Microsoft.ML.Transforms/Text/TokenizingByCharacters.cs @@ -117,7 +117,7 @@ internal TokenizingByCharactersTransformer(IHostEnvironment env, bool useMarkerC /// public IReadOnlyCollection<(string outputColumnName, string inputColumnName)> Columns => ColumnPairs.AsReadOnly(); - protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) + private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) { var type = inputSchema[srcCol].Type; if (!TokenizingByCharactersEstimator.IsColumnTypeValid(type)) diff --git a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs index d5dafe1276..53b1018934 100644 --- a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs +++ b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs @@ -303,7 +303,7 @@ private protected override void SaveModel(ModelSaveContext ctx) private protected override IRowMapper MakeRowMapper(DataViewSchema schema) => new Mapper(this, schema); - protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) + private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) { var colType = inputSchema[srcCol].Type; if (!(colType is VectorType vectorType && vectorType.ItemType is TextDataViewType)) diff --git a/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs b/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs index d1f54de979..6bb47ce1bb 100644 --- a/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs +++ b/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs @@ -121,7 +121,7 @@ internal WordTokenizingTransformer(IHostEnvironment env, params WordTokenizingEs _columns = columns.ToArray(); } - protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) + private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) { var type = inputSchema[srcCol].Type; if (!WordTokenizingEstimator.IsColumnTypeValid(type)) From e01fde19068b85f4dd94f834105452fad12ffe1d Mon Sep 17 00:00:00 2001 From: Artidoro Pagnoni Date: Wed, 20 Feb 2019 11:29:08 -0800 Subject: [PATCH 3/3] resolved review comments --- .../FourierDistributionSampler.cs | 4 ++-- .../Text/NgramHashingTransformer.cs | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs b/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs index 19bdeb9dbe..0466c3d5bf 100644 --- a/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs +++ b/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs @@ -119,7 +119,7 @@ void ICanSaveModel.Save(ModelSaveContext ctx) ctx.Writer.Write(_gamma); } - float IFourierDistributionSampler.Next(Random rand) + public float Next(Random rand) { return (float)Stats.SampleFromGaussian(rand) * MathUtils.Sqrt(2 * _gamma); } @@ -201,7 +201,7 @@ void ICanSaveModel.Save(ModelSaveContext ctx) ctx.Writer.Write(_a); } - float IFourierDistributionSampler.Next(Random rand) + public float Next(Random rand) { return _a * Stats.SampleFromCauchy(rand); } diff --git a/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs b/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs index 8f308c5801..1e8608567a 100644 --- a/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs +++ b/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs @@ -883,7 +883,7 @@ public sealed class ColumnInfo public readonly int NgramLength; /// Maximum number of tokens to skip when constructing an ngram. public readonly int SkipLength; - /// Whether to store all ngram lengths up to ngramLength, or only ngramLength. + /// Whether to store all ngram lengths up to , or only . public readonly bool AllLengths; /// Number of bits to hash into. Must be between 1 and 31, inclusive. public readonly int HashBits; @@ -893,7 +893,8 @@ public sealed class ColumnInfo public readonly bool Ordered; /// /// During hashing we constuct mappings between original values and the produced hash values. - /// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one. + /// Text representation of original values are stored in the slot names of the metadata for the new column. + /// Hashing, as such, can map many initial values to one. /// specifies the upper bound of the number of distinct input values mapping to a hash that should be retained. /// 0 does not retain any input values. -1 retains all input values mapping to each hash. /// @@ -911,12 +912,13 @@ public sealed class ColumnInfo /// Names of the columns to transform. /// Maximum ngram length. /// Maximum number of tokens to skip when constructing an ngram. - /// Whether to store all ngram lengths up to ngramLength, or only ngramLength. + /// Whether to store all ngram lengths up to , or only . /// Number of bits to hash into. Must be between 1 and 31, inclusive. /// Hashing seed. /// Whether the position of each term should be included in the hash. /// During hashing we constuct mappings between original values and the produced hash values. - /// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one. + /// Text representation of original values are stored in the slot names of the metadata for the new column. + /// Hashing, as such, can map many initial values to one. /// specifies the upper bound of the number of distinct input values mapping to a hash that should be retained. /// 0 does not retain any input values. -1 retains all input values mapping to each hash. /// Whether to rehash unigrams.