diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/KeyToValue_Term.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/KeyToValue_Term.cs index 6d3deb9f84..db09018ffe 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/KeyToValue_Term.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/KeyToValue_Term.cs @@ -12,11 +12,11 @@ public static void KeyToValue_Term() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. - var ml = new MLContext(); + var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. IEnumerable data = SamplesUtils.DatasetUtils.GetTopicsData(); - var trainData = ml.CreateStreamingDataView(data); + var trainData = mlContext.CreateStreamingDataView(data); // Preview of one of the columns of the the topics data. // The Review column contains the keys associated with a particular body of text. @@ -31,16 +31,16 @@ public static void KeyToValue_Term() // making use of default settings. string defaultColumnName = "DefaultKeys"; // REVIEW create through the catalog extension - var default_pipeline = new WordTokenizingEstimator(ml, "Review") - .Append(new ValueToKeyMappingEstimator(ml, "Review", defaultColumnName)); + var default_pipeline = mlContext.Transforms.Text.TokenizeWords("Review") + .Append(mlContext.Transforms.Conversion.MapValueToKey(defaultColumnName, "Review")); // Another pipeline, that customizes the advanced settings of the TermEstimator. // We can change the maxNumTerm to limit how many keys will get generated out of the set of words, // and condition the order in which they get evaluated by changing sort from the default Occurence (order in which they get encountered) // to value/alphabetically. string customizedColumnName = "CustomizedKeys"; - var customized_pipeline = new WordTokenizingEstimator(ml, "Review") - .Append(new ValueToKeyMappingEstimator(ml, "Review", customizedColumnName, maxNumTerms: 10, sort: ValueToKeyMappingTransformer.SortOrder.Value)); + var customized_pipeline = mlContext.Transforms.Text.TokenizeWords("Review") + .Append(mlContext.Transforms.Conversion.MapValueToKey(customizedColumnName, "Review", maxNumTerms: 10, sort: ValueToKeyMappingTransformer.SortOrder.Value)); // The transformed data. var transformedData_default = default_pipeline.Fit(trainData).Transform(trainData); @@ -61,7 +61,7 @@ public static void KeyToValue_Term() }; // Preview of the DefaultKeys column obtained after processing the input. - var defaultColumn = transformedData_default.GetColumn>(ml, defaultColumnName); + var defaultColumn = transformedData_default.GetColumn>(mlContext, defaultColumnName); printHelper(defaultColumnName, defaultColumn); // DefaultKeys column obtained post-transformation. @@ -72,7 +72,7 @@ public static void KeyToValue_Term() // 9 10 11 12 13 6 // Previewing the CustomizedKeys column obtained after processing the input. - var customizedColumn = transformedData_customized.GetColumn>(ml, customizedColumnName); + var customizedColumn = transformedData_customized.GetColumn>(mlContext, customizedColumnName); printHelper(customizedColumnName, customizedColumn); // CustomizedKeys column obtained post-transformation. @@ -84,11 +84,11 @@ public static void KeyToValue_Term() // Retrieve the original values, by appending the KeyToValue etimator to the existing pipelines // to convert the keys back to the strings. - var pipeline = default_pipeline.Append(new KeyToValueMappingEstimator(ml, defaultColumnName)); + var pipeline = default_pipeline.Append(new KeyToValueMappingEstimator(mlContext, defaultColumnName)); transformedData_default = pipeline.Fit(trainData).Transform(trainData); // Preview of the DefaultColumnName column obtained. - var originalColumnBack = transformedData_default.GetColumn>>(ml, defaultColumnName); + var originalColumnBack = transformedData_default.GetColumn>>(mlContext, defaultColumnName); foreach (var row in originalColumnBack) { diff --git a/src/Microsoft.ML.Data/Commands/CrossValidationCommand.cs b/src/Microsoft.ML.Data/Commands/CrossValidationCommand.cs index 42c5a3fe99..6411002e2e 100644 --- a/src/Microsoft.ML.Data/Commands/CrossValidationCommand.cs +++ b/src/Microsoft.ML.Data/Commands/CrossValidationCommand.cs @@ -330,7 +330,7 @@ private string GetSplitColumn(IChannel ch, IDataView input, ref IDataView output int inc = 0; while (input.Schema.TryGetColumnIndex(stratificationColumn, out tmp)) stratificationColumn = string.Format("{0}_{1:000}", origStratCol, ++inc); - output = new HashingEstimator(Host, origStratCol, stratificationColumn, 30).Fit(input).Transform(input); + output = new HashingEstimator(Host, stratificationColumn, origStratCol, 30).Fit(input).Transform(input); } } diff --git a/src/Microsoft.ML.Data/Properties/AssemblyInfo.cs b/src/Microsoft.ML.Data/Properties/AssemblyInfo.cs index abfaa6d2f9..f655581577 100644 --- a/src/Microsoft.ML.Data/Properties/AssemblyInfo.cs +++ b/src/Microsoft.ML.Data/Properties/AssemblyInfo.cs @@ -5,9 +5,11 @@ using System.Runtime.CompilerServices; using Microsoft.ML; +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Benchmarks" + PublicKey.TestValue)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.TestFramework" + PublicKey.TestValue)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Tests" + PublicKey.TestValue)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Core.Tests" + PublicKey.TestValue)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.CpuMath.PerformanceTests" + PublicKey.TestValue)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.InferenceTesting" + PublicKey.TestValue)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.OnnxTransformTest" + PublicKey.TestValue)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Predictor.Tests" + PublicKey.TestValue)] @@ -20,6 +22,10 @@ [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Api" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Ensemble" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.FastTree" + PublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.DnnImageFeaturizer.AlexNet" + PublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.DnnImageFeaturizer.ResNet101" + PublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.DnnImageFeaturizer.ResNet18" + PublicKey.Value)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.DnnImageFeaturizer.ResNet50" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.HalLearners" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.KMeansClustering" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.LightGBM" + PublicKey.Value)] diff --git a/src/Microsoft.ML.Data/TrainContext.cs b/src/Microsoft.ML.Data/TrainContext.cs index b0deb9b24b..18e71bf640 100644 --- a/src/Microsoft.ML.Data/TrainContext.cs +++ b/src/Microsoft.ML.Data/TrainContext.cs @@ -151,7 +151,7 @@ private void EnsureStratificationColumn(ref IDataView data, ref string stratific // Generate a new column with the hashed stratification column. while (data.Schema.TryGetColumnIndex(stratificationColumn, out tmp)) stratificationColumn = string.Format("{0}_{1:000}", origStratCol, ++inc); - data = new HashingEstimator(Host, origStratCol, stratificationColumn, 30).Fit(data).Transform(data); + data = new HashingEstimator(Host, stratificationColumn, origStratCol, 30).Fit(data).Transform(data); } } } diff --git a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs index d2481fb4be..ec6afaa74a 100644 --- a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs @@ -20,16 +20,16 @@ public static class ConversionsExtensionsCatalog /// Hashes the values in the input column. /// /// The transform's catalog. - /// Name of the input column. - /// Name of the column to be transformed. If this is null '' will be used. + /// Name of the column resulting from the transformation of . + /// Name of column to transform. If set to , the value of the will be used as source. /// Number of bits to hash into. Must be between 1 and 31, inclusive. /// During hashing we constuct mappings between original values and the produced hash values. /// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one. /// specifies the upper bound of the number of distinct input values mapping to a hash that should be retained. /// 0 does not retain any input values. -1 retains all input values mapping to each hash. - public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms catalog, string inputColumn, string outputColumn = null, + public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms catalog, string name, string source = null, int hashBits = HashDefaults.HashBits, int invertHash = HashDefaults.InvertHash) - => new HashingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, hashBits, invertHash); + => new HashingEstimator(CatalogUtils.GetEnvironment(catalog), name, source ?? name, hashBits, invertHash); /// /// Hashes the values in the input column. @@ -43,12 +43,12 @@ public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms /// Changes column type of the input column. /// /// The transform's catalog. - /// Name of the input column. - /// Name of the column to be transformed. If this is null '' will be used. + /// Name of the column resulting from the transformation of . + /// Name of column to transform. If set to , the value of the will be used as source. /// Number of bits to hash into. Must be between 1 and 31, inclusive. - public static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog, string inputColumn, string outputColumn = null, + public static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog, string name, string source = null, DataKind outputKind = ConvertDefaults.DefaultOutputKind) - => new TypeConvertingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, outputKind); + => new TypeConvertingEstimator(CatalogUtils.GetEnvironment(catalog), name, source, outputKind); /// /// Changes column type of the input column. @@ -62,9 +62,9 @@ public static TypeConvertingEstimator ConvertType(this TransformsCatalog.Convers /// Convert the key types back to their original values. /// /// The categorical transform's catalog. - /// Name of the input column. - public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, string inputColumn) - => new KeyToValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn); + /// Name of the input column. + public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, string source) + => new KeyToValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), source); /// /// Convert the key types (name of the column specified in the first item of the tuple) back to their original values @@ -88,43 +88,28 @@ public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog. /// Convert the key types back to their original vectors. /// /// The categorical transform's catalog. - /// The name of the input column. - /// The name of the output column. + /// Name of the column resulting from the transformation of . + /// Name of column to transform. If set to , the value of the will be used as source. /// Whether bagging is used for the conversion. public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.ConversionTransforms catalog, - string inputColumn, string outputColumn = null, bool bag = KeyToVectorMappingEstimator.Defaults.Bag) - => new KeyToVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, bag); + string name, string source = null, bool bag = KeyToVectorMappingEstimator.Defaults.Bag) + => new KeyToVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), name, source ?? name, bag); /// /// Converts value types into . /// /// The categorical transform's catalog. - /// Name of the column to be transformed. - /// Name of the output column. If this is null '' will be used. + /// Name of the column resulting from the transformation of . + /// Name of column to transform. If set to , the value of the will be used as source. /// Maximum number of keys to keep per column when auto-training. /// How items should be ordered when vectorized. If choosen they will be in the order encountered. /// If , items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a'). public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog, - string inputColumn, - string outputColumn = null, + string name, + string source = null, int maxNumTerms = ValueToKeyMappingEstimator.Defaults.MaxNumTerms, ValueToKeyMappingTransformer.SortOrder sort = ValueToKeyMappingEstimator.Defaults.Sort) - => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, maxNumTerms, sort); - - /// - /// Converts value types into loading the keys to use from . - /// - /// The categorical transform's catalog. - /// The data columns to map to keys. - /// The path of the file containing the terms. - /// - /// - public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog, - ValueToKeyMappingTransformer.ColumnInfo[] columns, - string file = null, - string termsColumn = null, - IComponentFactory loaderFactory = null) - => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns, file, termsColumn, loaderFactory); + => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), name, source, maxNumTerms, sort); /// /// Maps specified keys to specified values @@ -141,7 +126,7 @@ public static ValueMappingEstimator ValueMap keys, IEnumerable values, - params (string source, string name)[] columns) + params (string inputColumn, string outputColumn)[] columns) => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, columns); } } diff --git a/src/Microsoft.ML.Data/Transforms/Hashing.cs b/src/Microsoft.ML.Data/Transforms/Hashing.cs index 95c7e855b5..88de9aa32c 100644 --- a/src/Microsoft.ML.Data/Transforms/Hashing.cs +++ b/src/Microsoft.ML.Data/Transforms/Hashing.cs @@ -116,8 +116,8 @@ public bool TryUnparse(StringBuilder sb) public sealed class ColumnInfo { - public readonly string Input; - public readonly string Output; + public readonly string Name; + public readonly string Source; public readonly int HashBits; public readonly uint Seed; public readonly bool Ordered; @@ -126,8 +126,8 @@ public sealed class ColumnInfo /// /// Describes how the transformer handles one column pair. /// - /// Name of input column. - /// Name of the column resulting from the transformation of . Null means is replaced. + /// Name of the column resulting from the transformation of . + /// Name of column to transform. If set to , the value of the will be used as source. /// Number of bits to hash into. Must be between 1 and 31, inclusive. /// Hashing seed. /// Whether the position of each term should be included in the hash. @@ -135,8 +135,8 @@ public sealed class ColumnInfo /// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one. /// specifies the upper bound of the number of distinct input values mapping to a hash that should be retained. /// 0 does not retain any input values. -1 retains all input values mapping to each hash. - public ColumnInfo(string input, - string output = null, + public ColumnInfo(string name, + string source = null, int hashBits = HashingEstimator.Defaults.HashBits, uint seed = HashingEstimator.Defaults.Seed, bool ordered = HashingEstimator.Defaults.Ordered, @@ -146,19 +146,21 @@ public ColumnInfo(string input, throw Contracts.ExceptParam(nameof(invertHash), "Value too small, must be -1 or larger"); if (invertHash != 0 && hashBits >= 31) throw Contracts.ExceptParam(nameof(hashBits), $"Cannot support invertHash for a {0} bit hash. 30 is the maximum possible.", hashBits); - Contracts.CheckNonWhiteSpace(input, nameof(input)); - Input = input; - Output = output ?? input; + Contracts.CheckNonWhiteSpace(name, nameof(name)); + Source = source; + Name = name ?? source; HashBits = hashBits; Seed = seed; Ordered = ordered; InvertHash = invertHash; } - internal ColumnInfo(string input, string output, ModelLoadContext ctx) + internal ColumnInfo(string source, string name, ModelLoadContext ctx) { - Input = input; - Output = output; + Contracts.CheckNonWhiteSpace(name, nameof(name)); + + Source = source; + Name = name; // *** Binary format *** // int: HashBits // uint: HashSeed @@ -213,16 +215,16 @@ protected override void CheckInputColumn(Schema inputSchema, int col, int srcCol throw Host.ExceptParam(nameof(inputSchema), HashingEstimator.ExpectedColumnType); } - private static (string input, string output)[] GetColumnPairs(ColumnInfo[] columns) + private static (string source, string name)[] GetColumnPairs(ColumnInfo[] columns) { Contracts.CheckNonEmpty(columns, nameof(columns)); - return columns.Select(x => (x.Input, x.Output)).ToArray(); + return columns.Select(x => (x.Source, x.Name)).ToArray(); } private ColumnType GetOutputType(Schema inputSchema, ColumnInfo column) { var keyCount = column.HashBits < 31 ? 1 << column.HashBits : 0; - inputSchema.TryGetColumnIndex(column.Input, out int srcCol); + inputSchema.TryGetColumnIndex(column.Source, out int srcCol); var itemType = new KeyType(DataKind.U4, 0, keyCount, keyCount > 0); var srcType = inputSchema[srcCol].Type; if (srcType is VectorType vectorType) @@ -315,7 +317,7 @@ private Delegate GetGetterCore(Row input, int iinfo, out Action disposer) Host.AssertValue(input); Host.Assert(0 <= iinfo && iinfo < _columns.Length); disposer = null; - input.Schema.TryGetColumnIndex(_columns[iinfo].Input, out int srcCol); + input.Schema.TryGetColumnIndex(_columns[iinfo].Source, out int srcCol); var srcType = input.Schema[srcCol].Type; if (!(srcType is VectorType vectorType)) return ComposeGetterOne(input, iinfo, srcCol, srcType); @@ -386,8 +388,8 @@ private static IDataTransform Create(IHostEnvironment env, Arguments args, IData { var item = args.Column[i]; var kind = item.InvertHash ?? args.InvertHash; - cols[i] = new ColumnInfo(item.Source ?? item.Name, - item.Name, + cols[i] = new ColumnInfo(item.Name, + item.Source ?? item.Name, item.HashBits ?? args.HashBits, item.Seed ?? args.Seed, item.Ordered ?? args.Ordered, @@ -930,7 +932,7 @@ private InvertHashHelper(Row row, ColumnInfo ex) { Contracts.AssertValue(row); Row = row; - row.Schema.TryGetColumnIndex(ex.Input, out int srcCol); + row.Schema.TryGetColumnIndex(ex.Source, out int srcCol); _srcCol = srcCol; _srcType = row.Schema[srcCol].Type; _ex = ex; @@ -949,8 +951,7 @@ private InvertHashHelper(Row row, ColumnInfo ex) /// A hash getter, built on top of . public static InvertHashHelper Create(Row row, ColumnInfo ex, int invertHashMaxCount, Delegate dstGetter) { - row.Schema.TryGetColumnIndex(ex.Input, out int srcCol); - + row.Schema.TryGetColumnIndex(ex.Source, out int srcCol); ColumnType typeSrc = row.Schema[srcCol].Type; VectorType vectorTypeSrc = typeSrc as VectorType; @@ -1215,16 +1216,16 @@ internal static bool IsColumnTypeValid(ColumnType type) /// Initializes a new instance of . /// /// Host Environment. - /// Name of the column to be transformed. - /// Name of the output column. If this is null '' will be used. + /// Name of the column resulting from the transformation of . + /// Name of column to transform. If set to , the value of the will be used as source. /// Number of bits to hash into. Must be between 1 and 31, inclusive. /// During hashing we constuct mappings between original values and the produced hash values. /// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one. /// specifies the upper bound of the number of distinct input values mapping to a hash that should be retained. /// 0 does not retain any input values. -1 retains all input values mapping to each hash. - public HashingEstimator(IHostEnvironment env, string inputColumn, string outputColumn = null, + internal HashingEstimator(IHostEnvironment env, string name, string source = null, int hashBits = Defaults.HashBits, int invertHash = Defaults.InvertHash) - : this(env, new HashingTransformer.ColumnInfo(inputColumn, outputColumn ?? inputColumn, hashBits: hashBits, invertHash: invertHash)) + : this(env, new HashingTransformer.ColumnInfo(name, source ?? name, hashBits: hashBits, invertHash: invertHash)) { } @@ -1233,7 +1234,7 @@ public HashingEstimator(IHostEnvironment env, string inputColumn, string outputC /// /// Host Environment. /// Description of dataset columns and how to process them. - public HashingEstimator(IHostEnvironment env, params HashingTransformer.ColumnInfo[] columns) + internal HashingEstimator(IHostEnvironment env, params HashingTransformer.ColumnInfo[] columns) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(HashingEstimator)); @@ -1248,8 +1249,8 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema) var result = inputSchema.ToDictionary(x => x.Name); foreach (var colInfo in _columns) { - if (!inputSchema.TryFindColumn(colInfo.Input, out var col)) - throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.Input); + if (!inputSchema.TryFindColumn(colInfo.Source, out var col)) + throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.Source); if (!IsColumnTypeValid(col.ItemType)) throw _host.ExceptParam(nameof(inputSchema), ExpectedColumnType); var metadata = new List(); @@ -1257,7 +1258,7 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema) metadata.Add(slotMeta); if (colInfo.InvertHash != 0) metadata.Add(new SchemaShape.Column(MetadataUtils.Kinds.KeyValues, SchemaShape.Column.VectorKind.Vector, TextType.Instance, false)); - result[colInfo.Output] = new SchemaShape.Column(colInfo.Output, col.ItemType is VectorType ? SchemaShape.Column.VectorKind.Vector : SchemaShape.Column.VectorKind.Scalar, NumberType.U4, true, new SchemaShape(metadata)); + result[colInfo.Name] = new SchemaShape.Column(colInfo.Name, col.ItemType is VectorType ? SchemaShape.Column.VectorKind.Vector : SchemaShape.Column.VectorKind.Scalar, NumberType.U4, true, new SchemaShape(metadata)); } return new SchemaShape(result.Values); } diff --git a/src/Microsoft.ML.Data/Transforms/KeyToVector.cs b/src/Microsoft.ML.Data/Transforms/KeyToVector.cs index c918dc6c5c..4f3ceaf279 100644 --- a/src/Microsoft.ML.Data/Transforms/KeyToVector.cs +++ b/src/Microsoft.ML.Data/Transforms/KeyToVector.cs @@ -90,21 +90,21 @@ public sealed class Arguments /// public sealed class ColumnInfo { - public readonly string Input; - public readonly string Output; + public readonly string Name; + public readonly string Source; public readonly bool Bag; /// /// Describes how the transformer handles one column pair. /// - /// Name of input column. - /// Name of the column resulting from the transformation of . Null means is replaced. + /// Name of the column resulting from the transformation of . + /// Name of column to transform. If set to , the value of the will be used as source. /// Whether to combine multiple indicator vectors into a single bag vector instead of concatenating them. This is only relevant when the input column is a vector. - public ColumnInfo(string input, string output = null, bool bag = KeyToVectorMappingEstimator.Defaults.Bag) + public ColumnInfo(string name, string source = null, bool bag = KeyToVectorMappingEstimator.Defaults.Bag) { - Contracts.CheckNonWhiteSpace(input, nameof(input)); - Input = input; - Output = output ?? input; + Contracts.CheckNonWhiteSpace(name, nameof(name)); + Source = source ?? name; + Name = name; Bag = bag; } } @@ -114,10 +114,10 @@ public ColumnInfo(string input, string output = null, bool bag = KeyToVectorMapp public IReadOnlyCollection Columns => _columns.AsReadOnly(); private readonly ColumnInfo[] _columns; - private static (string input, string output)[] GetColumnPairs(ColumnInfo[] columns) + private static (string source, string name)[] GetColumnPairs(ColumnInfo[] columns) { Contracts.CheckValue(columns, nameof(columns)); - return columns.Select(x => (x.Input, x.Output)).ToArray(); + return columns.Select(x => (x.Source, x.Name)).ToArray(); } private string TestIsKey(ColumnType type) @@ -202,7 +202,7 @@ private KeyToVectorMappingTransformer(IHost host, ModelLoadContext ctx) _columns = new ColumnInfo[columnsLength]; for (int i = 0; i < columnsLength; i++) - _columns[i] = new ColumnInfo(ColumnPairs[i].input, ColumnPairs[i].output, bags[i]); + _columns[i] = new ColumnInfo(ColumnPairs[i].output, ColumnPairs[i].input, bags[i]); } // Factory method for SignatureDataTransform. @@ -218,8 +218,9 @@ private static IDataTransform Create(IHostEnvironment env, Arguments args, IData { var item = args.Column[i]; - cols[i] = new ColumnInfo(item.Source ?? item.Name, + cols[i] = new ColumnInfo( item.Name, + item.Source ?? item.Name, item.Bag ?? args.Bag); }; return new KeyToVectorMappingTransformer(env, cols).MakeDataTransform(input); @@ -744,13 +745,20 @@ internal static class Defaults public const bool Bag = false; } - public KeyToVectorMappingEstimator(IHostEnvironment env, params KeyToVectorMappingTransformer.ColumnInfo[] columns) + internal KeyToVectorMappingEstimator(IHostEnvironment env, params KeyToVectorMappingTransformer.ColumnInfo[] columns) : this(env, new KeyToVectorMappingTransformer(env, columns)) { } - public KeyToVectorMappingEstimator(IHostEnvironment env, string inputColumn, string outputColumn = null, bool bag = Defaults.Bag) - : this(env, new KeyToVectorMappingTransformer(env, new KeyToVectorMappingTransformer.ColumnInfo(inputColumn, outputColumn ?? inputColumn, bag))) + /// + /// Convert the key types back to their original vectors. + /// + /// The environmnet to use. + /// Name of the column resulting from the transformation of . + /// Name of column to transform. If set to , the value of the will be used as source. + /// Whether bagging is used for the conversion. + internal KeyToVectorMappingEstimator(IHostEnvironment env, string name, string source = null, bool bag = Defaults.Bag) + : this(env, new KeyToVectorMappingTransformer(env, new KeyToVectorMappingTransformer.ColumnInfo(name, source ?? name, bag))) { } @@ -765,10 +773,10 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) var result = inputSchema.ToDictionary(x => x.Name); foreach (var colInfo in Transformer.Columns) { - if (!inputSchema.TryFindColumn(colInfo.Input, out var col)) - throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.Input); + if (!inputSchema.TryFindColumn(colInfo.Source, out var col)) + throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.Source); if ((col.ItemType.GetItemType().RawKind == default) || !(col.ItemType is VectorType || col.ItemType is PrimitiveType)) - throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.Input); + throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.Source); var metadata = new List(); if (col.Metadata.TryFindColumn(MetadataUtils.Kinds.KeyValues, out var keyMeta)) @@ -779,7 +787,7 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) if (!colInfo.Bag || (col.Kind == SchemaShape.Column.VectorKind.Scalar)) metadata.Add(new SchemaShape.Column(MetadataUtils.Kinds.IsNormalized, SchemaShape.Column.VectorKind.Scalar, BoolType.Instance, false)); - result[colInfo.Output] = new SchemaShape.Column(colInfo.Output, SchemaShape.Column.VectorKind.Vector, NumberType.R4, false, new SchemaShape(metadata)); + result[colInfo.Name] = new SchemaShape.Column(colInfo.Name, SchemaShape.Column.VectorKind.Vector, NumberType.R4, false, new SchemaShape(metadata)); } return new SchemaShape(result.Values); diff --git a/src/Microsoft.ML.Data/Transforms/TypeConverting.cs b/src/Microsoft.ML.Data/Transforms/TypeConverting.cs index 3b9a3b372b..a83bba0fff 100644 --- a/src/Microsoft.ML.Data/Transforms/TypeConverting.cs +++ b/src/Microsoft.ML.Data/Transforms/TypeConverting.cs @@ -173,22 +173,24 @@ private static VersionInfo GetVersionInfo() /// public sealed class ColumnInfo { - public readonly string Input; - public readonly string Output; + public readonly string Name; + public readonly string Source; public readonly DataKind OutputKind; public readonly KeyRange OutputKeyRange; /// /// Describes how the transformer handles one column pair. /// - /// Name of input column. - /// Name of output column. + /// Name of the column resulting from the transformation of . + /// Name of column to transform. /// The expected kind of the converted column. /// New key range, if we work with key type. - public ColumnInfo(string input, string output, DataKind outputKind, KeyRange outputKeyRange = null) + public ColumnInfo(string name, string source, DataKind outputKind, KeyRange outputKeyRange = null) { - Input = input; - Output = output; + Contracts.CheckNonWhiteSpace(name, nameof(name)); + + Source = source; + Name = name; OutputKind = outputKind; OutputKeyRange = outputKeyRange; } @@ -196,22 +198,22 @@ public ColumnInfo(string input, string output, DataKind outputKind, KeyRange out private readonly ColumnInfo[] _columns; - private static (string input, string output)[] GetColumnPairs(ColumnInfo[] columns) + private static (string source, string name)[] GetColumnPairs(ColumnInfo[] columns) { Contracts.CheckNonEmpty(columns, nameof(columns)); - return columns.Select(x => (x.Input, x.Output)).ToArray(); + return columns.Select(x => (x.Source, x.Name)).ToArray(); } /// /// Convinence constructor for simple one column case. /// /// Host Environment. - /// Name of the output column. - /// Name of the column to be transformed. If this is null '' will be used. + /// Name of the column produced. + /// Name of the column to transform. If this is null '' will be used. /// The expected type of the converted column. /// New key range if we work with key type. - public TypeConvertingTransformer(IHostEnvironment env, string inputColumn, string outputColumn, DataKind outputKind, KeyRange outputKeyRange = null) - : this(env, new ColumnInfo(inputColumn, outputColumn, outputKind, outputKeyRange)) + public TypeConvertingTransformer(IHostEnvironment env, string source, string name, DataKind outputKind, KeyRange outputKeyRange = null) + : this(env, new ColumnInfo(name, source, outputKind, outputKeyRange)) { } @@ -296,7 +298,7 @@ private TypeConvertingTransformer(IHost host, ModelLoadContext ctx) range.Max = count; range.Contiguous = ctx.Reader.ReadBoolByte(); } - _columns[i] = new ColumnInfo(ColumnPairs[i].input, ColumnPairs[i].output, kind, range); + _columns[i] = new ColumnInfo(ColumnPairs[i].output, ColumnPairs[i].input, kind, range); } } @@ -344,7 +346,7 @@ internal static IDataTransform Create(IHostEnvironment env, Arguments args, IDat { kind = tempResultType.Value; } - cols[i] = new ColumnInfo(item.Source ?? item.Name, item.Name, kind, range); + cols[i] = new ColumnInfo(item.Name, item.Source ?? item.Name, kind, range); }; return new TypeConvertingTransformer(env, cols).MakeDataTransform(input); } @@ -412,7 +414,7 @@ public Mapper(TypeConvertingTransformer parent, Schema inputSchema) { throw Host.ExceptParam(nameof(inputSchema), "source column '{0}' with item type '{1}' is not compatible with destination type '{2}'", - _parent._columns[i].Input, srcCol.Type, itemType); + _parent._columns[i].Source, srcCol.Type, itemType); } } } @@ -467,7 +469,7 @@ protected override Schema.DetachedColumn[] GetOutputColumnsCore() ValueGetter getter = (ref bool dst) => dst = true; builder.Add(MetadataUtils.Kinds.IsNormalized, BoolType.Instance, getter); } - result[i] = new Schema.DetachedColumn(_parent._columns[i].Output, _types[i], builder.GetMetadata()); + result[i] = new Schema.DetachedColumn(_parent._columns[i].Name, _types[i], builder.GetMetadata()); } return result; } @@ -488,17 +490,17 @@ public void SaveAsOnnx(OnnxContext ctx) for (int iinfo = 0; iinfo < _parent._columns.Length; ++iinfo) { - string sourceColumnName = _parent._columns[iinfo].Input; + string sourceColumnName = _parent._columns[iinfo].Source; if (!ctx.ContainsColumn(sourceColumnName)) { - ctx.RemoveColumn(_parent._columns[iinfo].Output, false); + ctx.RemoveColumn(_parent._columns[iinfo].Name, false); continue; } if (!SaveAsOnnxCore(ctx, iinfo, ctx.GetVariableName(sourceColumnName), - ctx.AddIntermediateVariable(_types[iinfo], _parent._columns[iinfo].Output))) + ctx.AddIntermediateVariable(_types[iinfo], _parent._columns[iinfo].Name))) { - ctx.RemoveColumn(_parent._columns[iinfo].Output, true); + ctx.RemoveColumn(_parent._columns[iinfo].Name, true); } } } @@ -535,20 +537,20 @@ internal sealed class Defaults /// Convinence constructor for simple one column case. /// /// Host Environment. - /// Name of the input column. - /// Name of the output column. + /// Name of the column resulting from the transformation of . + /// Name of column to transform. If set to , the value of the will be used as source. /// The expected type of the converted column. - public TypeConvertingEstimator(IHostEnvironment env, - string inputColumn, string outputColumn = null, + internal TypeConvertingEstimator(IHostEnvironment env, + string name, string source = null, DataKind outputKind = Defaults.DefaultOutputKind) - : this(env, new TypeConvertingTransformer.ColumnInfo(inputColumn, outputColumn ?? inputColumn, outputKind)) + : this(env, new TypeConvertingTransformer.ColumnInfo(name, source ?? name, outputKind)) { } /// /// Create a that takes multiple pairs of columns. /// - public TypeConvertingEstimator(IHostEnvironment env, params TypeConvertingTransformer.ColumnInfo[] columns) : + internal TypeConvertingEstimator(IHostEnvironment env, params TypeConvertingTransformer.ColumnInfo[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(TypeConvertingEstimator)), new TypeConvertingTransformer(env, columns)) { } @@ -559,12 +561,12 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) var result = inputSchema.ToDictionary(x => x.Name); foreach (var colInfo in Transformer.Columns) { - if (!inputSchema.TryFindColumn(colInfo.Input, out var col)) - throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.Input); + if (!inputSchema.TryFindColumn(colInfo.Source, out var col)) + throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.Source); if (!TypeConvertingTransformer.GetNewType(Host, col.ItemType, colInfo.OutputKind, colInfo.OutputKeyRange, out PrimitiveType newType)) - throw Host.ExceptParam(nameof(inputSchema), $"Can't convert {colInfo.Input} into {newType.ToString()}"); + throw Host.ExceptParam(nameof(inputSchema), $"Can't convert {colInfo.Source} into {newType.ToString()}"); if (!Data.Conversion.Conversions.Instance.TryGetStandardConversion(col.ItemType, newType, out Delegate del, out bool identity)) - throw Host.ExceptParam(nameof(inputSchema), $"Don't know how to convert {colInfo.Input} into {newType.ToString()}"); + throw Host.ExceptParam(nameof(inputSchema), $"Don't know how to convert {colInfo.Source} into {newType.ToString()}"); var metadata = new List(); if (col.ItemType is BoolType && newType is NumberType) metadata.Add(new SchemaShape.Column(MetadataUtils.Kinds.IsNormalized, SchemaShape.Column.VectorKind.Scalar, BoolType.Instance, false)); @@ -577,7 +579,7 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) if (col.Metadata.TryFindColumn(MetadataUtils.Kinds.IsNormalized, out var normMeta)) if (col.ItemType is NumberType && newType is NumberType) metadata.Add(new SchemaShape.Column(MetadataUtils.Kinds.KeyValues, SchemaShape.Column.VectorKind.Vector, normMeta.ItemType, false)); - result[colInfo.Output] = new SchemaShape.Column(colInfo.Output, col.Kind, newType, false, col.Metadata); + result[colInfo.Name] = new SchemaShape.Column(colInfo.Name, col.Kind, newType, false, col.Metadata); } return new SchemaShape(result.Values); } diff --git a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs index ea71b7a45b..193409b4b2 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs @@ -27,17 +27,17 @@ public static class Defaults /// Initializes a new instance of . /// /// Host Environment. - /// Name of the column to be transformed. - /// Name of the output column. If this is null '' will be used. + /// Name of the column resulting from the transformation of . + /// Name of column to transform. If set to , the value of the will be used as source. /// Maximum number of keys to keep per column when auto-training. /// How items should be ordered when vectorized. If choosen they will be in the order encountered. /// If , items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a'). - public ValueToKeyMappingEstimator(IHostEnvironment env, string inputColumn, string outputColumn = null, int maxNumTerms = Defaults.MaxNumTerms, ValueToKeyMappingTransformer.SortOrder sort = Defaults.Sort) : - this(env, new [] { new ValueToKeyMappingTransformer.ColumnInfo(inputColumn, outputColumn ?? inputColumn, maxNumTerms, sort) }) + internal ValueToKeyMappingEstimator(IHostEnvironment env, string name, string source = null, int maxNumTerms = Defaults.MaxNumTerms, ValueToKeyMappingTransformer.SortOrder sort = Defaults.Sort) : + this(env, new [] { new ValueToKeyMappingTransformer.ColumnInfo(source ?? name, name, maxNumTerms, sort) }) { } - public ValueToKeyMappingEstimator(IHostEnvironment env, ValueToKeyMappingTransformer.ColumnInfo[] columns, + internal ValueToKeyMappingEstimator(IHostEnvironment env, ValueToKeyMappingTransformer.ColumnInfo[] columns, string file = null, string termsColumn = null, IComponentFactory loaderFactory = null) { diff --git a/src/Microsoft.ML.EntryPoints/FeatureCombiner.cs b/src/Microsoft.ML.EntryPoints/FeatureCombiner.cs index 1f0fe94f1b..91b65e2779 100644 --- a/src/Microsoft.ML.EntryPoints/FeatureCombiner.cs +++ b/src/Microsoft.ML.EntryPoints/FeatureCombiner.cs @@ -94,19 +94,19 @@ private static IDataView ApplyKeyToVec(List (x.Input, x.Output)).ToArray()) + viewTrain = new KeyToValueMappingTransformer(host, ktv.Select(x => (x.Source, x.Name)).ToArray()) .Transform(viewTrain); viewTrain = ValueToKeyMappingTransformer.Create(host, new ValueToKeyMappingTransformer.Arguments() { Column = ktv - .Select(c => new ValueToKeyMappingTransformer.Column() { Name = c.Output, Source = c.Output, Terms = GetTerms(viewTrain, c.Input) }) + .Select(c => new ValueToKeyMappingTransformer.Column() { Name = c.Name, Source = c.Name, Terms = GetTerms(viewTrain, c.Source) }) .ToArray(), TextKeyValues = true }, viewTrain); - viewTrain = new KeyToVectorMappingTransformer(host, ktv.Select(c => new KeyToVectorMappingTransformer.ColumnInfo(c.Output, c.Output)).ToArray()).Transform(viewTrain); + viewTrain = new KeyToVectorMappingTransformer(host, ktv.Select(c => new KeyToVectorMappingTransformer.ColumnInfo(c.Name, c.Name)).ToArray()).Transform(viewTrain); } return viewTrain; } @@ -173,7 +173,7 @@ private static IDataView ApplyConvert(List { var colName = GetUniqueName(); concatNames.Add(new KeyValuePair(col.Name, colName)); - Utils.Add(ref ktv, new KeyToVectorMappingTransformer.ColumnInfo(col.Name, colName)); + Utils.Add(ref ktv, new KeyToVectorMappingTransformer.ColumnInfo(colName, col.Name)); continue; } } @@ -184,7 +184,7 @@ private static IDataView ApplyConvert(List // This happens when the training is done on an XDF and the scoring is done on a data frame. var colName = GetUniqueName(); concatNames.Add(new KeyValuePair(col.Name, colName)); - Utils.Add(ref cvt, new TypeConvertingTransformer.ColumnInfo(col.Name, colName, DataKind.R4)); + Utils.Add(ref cvt, new TypeConvertingTransformer.ColumnInfo(colName, col.Name, DataKind.R4)); continue; } } diff --git a/src/Microsoft.ML.StaticPipe/TransformsStatic.cs b/src/Microsoft.ML.StaticPipe/TransformsStatic.cs index 9cd4091fda..aed8dc609a 100644 --- a/src/Microsoft.ML.StaticPipe/TransformsStatic.cs +++ b/src/Microsoft.ML.StaticPipe/TransformsStatic.cs @@ -582,7 +582,7 @@ public override IEstimator Reconcile(IHostEnvironment env, for (int i = 0; i < toOutput.Length; ++i) { var col = (IColInput)toOutput[i]; - infos[i] = new KeyToVectorMappingTransformer.ColumnInfo(inputNames[col.Input], outputNames[toOutput[i]], col.Bag); + infos[i] = new KeyToVectorMappingTransformer.ColumnInfo(outputNames[toOutput[i]], inputNames[col.Input], col.Bag); } return new KeyToVectorMappingEstimator(env, infos); } @@ -937,7 +937,7 @@ public override IEstimator Reconcile(IHostEnvironment env, Pipelin for (int i = 0; i < toOutput.Length; ++i) { var tcol = (IConvertCol)toOutput[i]; - infos[i] = new TypeConvertingTransformer.ColumnInfo(inputNames[tcol.Input], outputNames[toOutput[i]], tcol.Kind); + infos[i] = new TypeConvertingTransformer.ColumnInfo(outputNames[toOutput[i]], inputNames[tcol.Input], tcol.Kind); } return new TypeConvertingEstimator(env, infos); } diff --git a/src/Microsoft.ML.Transforms/OneHotEncoding.cs b/src/Microsoft.ML.Transforms/OneHotEncoding.cs index faf682da4c..74d99856b0 100644 --- a/src/Microsoft.ML.Transforms/OneHotEncoding.cs +++ b/src/Microsoft.ML.Transforms/OneHotEncoding.cs @@ -255,7 +255,7 @@ public OneHotEncodingEstimator(IHostEnvironment env, ColumnInfo[] columns, if (binaryCols.Count > 0) toBinVector = new KeyToBinaryVectorMappingEstimator(_host, binaryCols.Select(x => new KeyToBinaryVectorMappingTransformer.ColumnInfo(x.input, x.output)).ToArray()); if (cols.Count > 0) - toVector = new KeyToVectorMappingEstimator(_host, cols.Select(x => new KeyToVectorMappingTransformer.ColumnInfo(x.input, x.output, x.bag)).ToArray()); + toVector = new KeyToVectorMappingEstimator(_host, cols.Select(x => new KeyToVectorMappingTransformer.ColumnInfo(x.output, x.input, x.bag)).ToArray()); if (toBinVector != null && toVector != null) _toSomething = toVector.Append(toBinVector); diff --git a/src/Microsoft.ML.Transforms/OneHotHashEncoding.cs b/src/Microsoft.ML.Transforms/OneHotHashEncoding.cs index 05478790d9..d4bbb764d4 100644 --- a/src/Microsoft.ML.Transforms/OneHotHashEncoding.cs +++ b/src/Microsoft.ML.Transforms/OneHotHashEncoding.cs @@ -234,7 +234,7 @@ public ColumnInfo(string input, string output, bool ordered = Defaults.Ordered, int invertHash = Defaults.InvertHash) { - HashInfo = new HashingTransformer.ColumnInfo(input, output, hashBits, seed, ordered, invertHash); + HashInfo = new HashingTransformer.ColumnInfo(output, input, hashBits, seed, ordered, invertHash); OutputKind = outputKind; } } @@ -285,13 +285,13 @@ public OneHotHashEncodingEstimator(IHostEnvironment env, params ColumnInfo[] col case OneHotEncodingTransformer.OutputKind.Bin: if ((column.HashInfo.InvertHash) != 0) ch.Warning("Invert hashing is being used with binary encoding."); - binaryCols.Add((column.HashInfo.Output, column.HashInfo.Output)); + binaryCols.Add((column.HashInfo.Name, column.HashInfo.Name)); break; case OneHotEncodingTransformer.OutputKind.Ind: - cols.Add((column.HashInfo.Output, column.HashInfo.Output, false)); + cols.Add((column.HashInfo.Name, column.HashInfo.Name, false)); break; case OneHotEncodingTransformer.OutputKind.Bag: - cols.Add((column.HashInfo.Output, column.HashInfo.Output, true)); + cols.Add((column.HashInfo.Name, column.HashInfo.Name, true)); break; } } @@ -300,7 +300,7 @@ public OneHotHashEncodingEstimator(IHostEnvironment env, params ColumnInfo[] col if (binaryCols.Count > 0) toBinVector = new KeyToBinaryVectorMappingEstimator(_host, binaryCols.Select(x => new KeyToBinaryVectorMappingTransformer.ColumnInfo(x.input, x.output)).ToArray()); if (cols.Count > 0) - toVector = new KeyToVectorMappingEstimator(_host, cols.Select(x => new KeyToVectorMappingTransformer.ColumnInfo(x.input, x.output, x.bag)).ToArray()); + toVector = new KeyToVectorMappingEstimator(_host, cols.Select(x => new KeyToVectorMappingTransformer.ColumnInfo(x.output, x.input, x.bag)).ToArray()); if (toBinVector != null && toVector != null) _toSomething = toVector.Append(toBinVector); diff --git a/src/Microsoft.ML.Transforms/Text/WordHashBagProducingTransform.cs b/src/Microsoft.ML.Transforms/Text/WordHashBagProducingTransform.cs index e911dbbafe..c644dd02a6 100644 --- a/src/Microsoft.ML.Transforms/Text/WordHashBagProducingTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/WordHashBagProducingTransform.cs @@ -359,8 +359,8 @@ public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataV }); } - hashColumns.Add(new HashingTransformer.ColumnInfo(termLoaderArgs == null ? column.Source[isrc] : tmpName, - tmpName, 30, column.Seed ?? args.Seed, false, column.InvertHash ?? args.InvertHash)); + hashColumns.Add(new HashingTransformer.ColumnInfo(tmpName, termLoaderArgs == null ? column.Source[isrc] : tmpName, + 30, column.Seed ?? args.Seed, false, column.InvertHash ?? args.InvertHash)); } ngramHashColumns[iinfo] = diff --git a/test/Microsoft.ML.Benchmarks/HashBench.cs b/test/Microsoft.ML.Benchmarks/HashBench.cs index ca7f70baca..5a9a6f031e 100644 --- a/test/Microsoft.ML.Benchmarks/HashBench.cs +++ b/test/Microsoft.ML.Benchmarks/HashBench.cs @@ -72,7 +72,7 @@ private void InitMap(T val, ColumnType type, int hashBits = 20, ValueGetter dst = val; _inRow = RowImpl.Create(type, getter); // One million features is a nice, typical number. - var info = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: hashBits); + var info = new HashingTransformer.ColumnInfo("Bar", "Foo", hashBits: hashBits); var xf = new HashingTransformer(_env, new[] { info }); var mapper = xf.GetRowToRowMapper(_inRow.Schema); var column = mapper.OutputSchema["Bar"]; diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs index 4660a8d2a4..3c64969469 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs @@ -748,7 +748,7 @@ public void EntryPointPipelineEnsemble() }).Fit(data).Transform(data); data = new ColumnConcatenatingTransformer(Env, "Features", new[] { "Features1", "Features2" }).Transform(data); - data = new ValueToKeyMappingEstimator(Env, "Label", "Label", sort: ValueToKeyMappingTransformer.SortOrder.Value).Fit(data).Transform(data); + data = new ValueToKeyMappingEstimator(Env, "Label", sort: ValueToKeyMappingTransformer.SortOrder.Value).Fit(data).Transform(data); var lrInput = new LogisticRegression.Arguments { diff --git a/test/Microsoft.ML.Tests/RangeFilterTests.cs b/test/Microsoft.ML.Tests/RangeFilterTests.cs index 131bde9fa7..5c440b7d9f 100644 --- a/test/Microsoft.ML.Tests/RangeFilterTests.cs +++ b/test/Microsoft.ML.Tests/RangeFilterTests.cs @@ -28,7 +28,7 @@ public void RangeFilterTest() var cnt = data1.GetColumn(ML, "Floats").Count(); Assert.Equal(2L, cnt); - data = ML.Transforms.Conversion.Hash("Strings", "Key", hashBits: 20).Fit(data).Transform(data); + data = ML.Transforms.Conversion.Hash("Key", "Strings", hashBits: 20).Fit(data).Transform(data); var data2 = ML.Data.FilterByKeyColumnFraction(data, "Key", upperBound: 0.5); cnt = data2.GetColumn(ML, "Floats").Count(); Assert.Equal(1L, cnt); diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs index ff38fbebe5..7fcc88397c 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs @@ -34,7 +34,7 @@ public void TrainAndPredictIrisModelWithStringLabelTest() // Create Estimator var pipe = mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Append(mlContext.Transforms.Normalize("Features")) - .Append(mlContext.Transforms.Conversion.MapValueToKey("IrisPlantType", "Label"), TransformerScope.TrainTest) + .Append(mlContext.Transforms.Conversion.MapValueToKey("Label","IrisPlantType"), TransformerScope.TrainTest) .AppendCacheCheckpoint(mlContext) .Append(mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: s => s.NumThreads = 1)) .Append(mlContext.Transforms.Conversion.MapKeyToValue(("PredictedLabel", "Plant"))); diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs index 441fcc4fc1..18a53becf1 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs @@ -444,7 +444,7 @@ public void TensorFlowTransformMNISTLRTrainingTest() ReTrain = true })) .Append(mlContext.Transforms.Concatenate("Features", "Prediction")) - .Append(mlContext.Transforms.Conversion.MapValueToKey("Label", "KeyLabel", maxNumTerms: 10)) + .Append(mlContext.Transforms.Conversion.MapValueToKey("KeyLabel", "Label", maxNumTerms: 10)) .Append(mlContext.MulticlassClassification.Trainers.LightGbm("KeyLabel", "Features")); var trainedModel = pipe.Fit(trainData); diff --git a/test/Microsoft.ML.Tests/Transformers/ConvertTests.cs b/test/Microsoft.ML.Tests/Transformers/ConvertTests.cs index e2a23374b4..eb2a44d31d 100644 --- a/test/Microsoft.ML.Tests/Transformers/ConvertTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/ConvertTests.cs @@ -73,8 +73,8 @@ public void TestConvertWorkout() var data = new[] { new TestClass() { A = 1, B = new int[2] { 1,4 } }, new TestClass() { A = 2, B = new int[2] { 3,4 } }}; var dataView = ComponentCreation.CreateDataView(Env, data); - var pipe = new TypeConvertingEstimator(Env, columns: new[] {new TypeConvertingTransformer.ColumnInfo("A", "ConvA", DataKind.R4), - new TypeConvertingTransformer.ColumnInfo("B", "ConvB", DataKind.R4)}); + var pipe = new TypeConvertingEstimator(Env, columns: new[] {new TypeConvertingTransformer.ColumnInfo("ConvA", "A", DataKind.R4), + new TypeConvertingTransformer.ColumnInfo("ConvB", "B", DataKind.R4)}); TestEstimatorCore(pipe, dataView); var allTypesData = new[] @@ -113,18 +113,18 @@ public void TestConvertWorkout() var allTypesDataView = ComponentCreation.CreateDataView(Env, allTypesData); var allTypesPipe = new TypeConvertingEstimator(Env, columns: new[] { - new TypeConvertingTransformer.ColumnInfo("AA", "ConvA", DataKind.R4), - new TypeConvertingTransformer.ColumnInfo("AB", "ConvB", DataKind.R4), - new TypeConvertingTransformer.ColumnInfo("AC", "ConvC", DataKind.R4), - new TypeConvertingTransformer.ColumnInfo("AD", "ConvD", DataKind.R4), - new TypeConvertingTransformer.ColumnInfo("AE", "ConvE", DataKind.R4), - new TypeConvertingTransformer.ColumnInfo("AF", "ConvF", DataKind.R4), - new TypeConvertingTransformer.ColumnInfo("AG", "ConvG", DataKind.R4), - new TypeConvertingTransformer.ColumnInfo("AH", "ConvH", DataKind.R4), - new TypeConvertingTransformer.ColumnInfo("AK", "ConvK", DataKind.R4), - new TypeConvertingTransformer.ColumnInfo("AL", "ConvL", DataKind.R4), - new TypeConvertingTransformer.ColumnInfo("AM", "ConvM", DataKind.R4), - new TypeConvertingTransformer.ColumnInfo("AN", "ConvN", DataKind.R4)} + new TypeConvertingTransformer.ColumnInfo("ConvA", "AA", DataKind.R4), + new TypeConvertingTransformer.ColumnInfo("ConvB", "AB", DataKind.R4), + new TypeConvertingTransformer.ColumnInfo("ConvC", "AC", DataKind.R4), + new TypeConvertingTransformer.ColumnInfo("ConvD", "AD", DataKind.R4), + new TypeConvertingTransformer.ColumnInfo("ConvE", "AE", DataKind.R4), + new TypeConvertingTransformer.ColumnInfo("ConvF", "AF", DataKind.R4), + new TypeConvertingTransformer.ColumnInfo("ConvG", "AG", DataKind.R4), + new TypeConvertingTransformer.ColumnInfo("ConvH", "AH", DataKind.R4), + new TypeConvertingTransformer.ColumnInfo("ConvK", "AK", DataKind.R4), + new TypeConvertingTransformer.ColumnInfo("ConvL", "AL", DataKind.R4), + new TypeConvertingTransformer.ColumnInfo("ConvM", "AM", DataKind.R4), + new TypeConvertingTransformer.ColumnInfo("ConvN", "AN", DataKind.R4)} ); TestEstimatorCore(allTypesPipe, allTypesDataView); @@ -153,8 +153,8 @@ public void TestOldSavingAndLoading() var data = new[] { new TestClass() { A = 1, B = new int[2] { 1,4 } }, new TestClass() { A = 2, B = new int[2] { 3,4 } }}; var dataView = ComponentCreation.CreateDataView(Env, data); - var pipe = new TypeConvertingEstimator(Env, columns: new[] {new TypeConvertingTransformer.ColumnInfo("A", "ConvA", DataKind.R8), - new TypeConvertingTransformer.ColumnInfo("B", "ConvB", DataKind.R8)}); + var pipe = new TypeConvertingEstimator(Env, columns: new[] {new TypeConvertingTransformer.ColumnInfo("ConvA", "A", DataKind.R8), + new TypeConvertingTransformer.ColumnInfo("ConvB", "B", DataKind.R8)}); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); @@ -175,8 +175,8 @@ public void TestMetadata() new OneHotEncodingEstimator.ColumnInfo("A", "CatA", OneHotEncodingTransformer.OutputKind.Ind), new OneHotEncodingEstimator.ColumnInfo("B", "CatB", OneHotEncodingTransformer.OutputKind.Key) }).Append(new TypeConvertingEstimator(Env, new[] { - new TypeConvertingTransformer.ColumnInfo("CatA", "ConvA", DataKind.R8), - new TypeConvertingTransformer.ColumnInfo("CatB", "ConvB", DataKind.U2) + new TypeConvertingTransformer.ColumnInfo("ConvA", "CatA", DataKind.R8), + new TypeConvertingTransformer.ColumnInfo("ConvB", "CatB", DataKind.U2) })); var dataView = ComponentCreation.CreateDataView(Env, data); dataView = pipe.Fit(dataView).Transform(dataView); diff --git a/test/Microsoft.ML.Tests/Transformers/HashTests.cs b/test/Microsoft.ML.Tests/Transformers/HashTests.cs index 6104b92fca..04a6a15af1 100644 --- a/test/Microsoft.ML.Tests/Transformers/HashTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/HashTests.cs @@ -46,10 +46,10 @@ public void HashWorkout() var dataView = ComponentCreation.CreateDataView(Env, data); var pipe = new HashingEstimator(Env, new[]{ - new HashingTransformer.ColumnInfo("A", "HashA", hashBits:4, invertHash:-1), - new HashingTransformer.ColumnInfo("B", "HashB", hashBits:3, ordered:true), - new HashingTransformer.ColumnInfo("C", "HashC", seed:42), - new HashingTransformer.ColumnInfo("A", "HashD"), + new HashingTransformer.ColumnInfo("HashA", "A", hashBits:4, invertHash:-1), + new HashingTransformer.ColumnInfo("HashB", "B", hashBits:3, ordered:true), + new HashingTransformer.ColumnInfo("HashC", "C", seed:42), + new HashingTransformer.ColumnInfo("HashD", "A"), }); TestEstimatorCore(pipe, dataView); @@ -68,9 +68,9 @@ public void TestMetadata() var dataView = ComponentCreation.CreateDataView(Env, data); var pipe = new HashingEstimator(Env, new[] { - new HashingTransformer.ColumnInfo("A", "HashA", invertHash:1, hashBits:10), - new HashingTransformer.ColumnInfo("A", "HashAUnlim", invertHash:-1, hashBits:10), - new HashingTransformer.ColumnInfo("A", "HashAUnlimOrdered", invertHash:-1, hashBits:10, ordered:true) + new HashingTransformer.ColumnInfo("HashA", "A", invertHash:1, hashBits:10), + new HashingTransformer.ColumnInfo("HashAUnlim", "A", invertHash:-1, hashBits:10), + new HashingTransformer.ColumnInfo("HashAUnlimOrdered", "A", invertHash:-1, hashBits:10, ordered:true) }); var result = pipe.Fit(dataView).Transform(dataView); ValidateMetadata(result); @@ -108,10 +108,10 @@ public void TestOldSavingAndLoading() var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var dataView = ComponentCreation.CreateDataView(Env, data); var pipe = new HashingEstimator(Env, new[]{ - new HashingTransformer.ColumnInfo("A", "HashA", hashBits:4, invertHash:-1), - new HashingTransformer.ColumnInfo("B", "HashB", hashBits:3, ordered:true), - new HashingTransformer.ColumnInfo("C", "HashC", seed:42), - new HashingTransformer.ColumnInfo("A", "HashD"), + new HashingTransformer.ColumnInfo("HashA", "A", hashBits:4, invertHash:-1), + new HashingTransformer.ColumnInfo("HashB", "B", hashBits:3, ordered:true), + new HashingTransformer.ColumnInfo("HashC", "C", seed:42), + new HashingTransformer.ColumnInfo("HashD", "A"), }); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); @@ -132,7 +132,7 @@ private void HashTestCore(T val, PrimitiveType type, uint expected, uint expe var inRow = MetadataUtils.MetadataAsRow(builder.GetMetadata()); // First do an unordered hash. - var info = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits); + var info = new HashingTransformer.ColumnInfo("Bar", "Foo", hashBits: bits); var xf = new HashingTransformer(Env, new[] { info }); var mapper = xf.GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out int outCol); @@ -144,7 +144,7 @@ private void HashTestCore(T val, PrimitiveType type, uint expected, uint expe Assert.Equal(expected, result); // Next do an ordered hash. - info = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits, ordered: true); + info = new HashingTransformer.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: true); xf = new HashingTransformer(Env, new[] { info }); mapper = xf.GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol); @@ -162,7 +162,7 @@ private void HashTestCore(T val, PrimitiveType type, uint expected, uint expe builder.Add("Foo", new VectorType(type, vecLen), (ref VBuffer dst) => denseVec.CopyTo(ref dst)); inRow = MetadataUtils.MetadataAsRow(builder.GetMetadata()); - info = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits, ordered: false); + info = new HashingTransformer.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: false); xf = new HashingTransformer(Env, new[] { info }); mapper = xf.GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol); @@ -177,7 +177,7 @@ private void HashTestCore(T val, PrimitiveType type, uint expected, uint expe Assert.All(vecResult.DenseValues(), v => Assert.Equal(expected, v)); // Now do ordered with the dense vector. - info = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits, ordered: true); + info = new HashingTransformer.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: true); xf = new HashingTransformer(Env, new[] { info }); mapper = xf.GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol); @@ -196,7 +196,7 @@ private void HashTestCore(T val, PrimitiveType type, uint expected, uint expe builder.Add("Foo", new VectorType(type, vecLen), (ref VBuffer dst) => sparseVec.CopyTo(ref dst)); inRow = MetadataUtils.MetadataAsRow(builder.GetMetadata()); - info = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits, ordered: false); + info = new HashingTransformer.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: false); xf = new HashingTransformer(Env, new[] { info }); mapper = xf.GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol); @@ -209,7 +209,7 @@ private void HashTestCore(T val, PrimitiveType type, uint expected, uint expe Assert.Equal(expected, vecResult.GetItemOrDefault(3)); Assert.Equal(expected, vecResult.GetItemOrDefault(7)); - info = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits, ordered: true); + info = new HashingTransformer.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: true); xf = new HashingTransformer(Env, new[] { info }); mapper = xf.GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol); diff --git a/test/Microsoft.ML.Tests/Transformers/KeyToVectorEstimatorTests.cs b/test/Microsoft.ML.Tests/Transformers/KeyToVectorEstimatorTests.cs index 967231f604..2a228129f3 100644 --- a/test/Microsoft.ML.Tests/Transformers/KeyToVectorEstimatorTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/KeyToVectorEstimatorTests.cs @@ -57,10 +57,10 @@ public void KeyToVectorWorkout() new ValueToKeyMappingTransformer.ColumnInfo("C", "TermC", textKeyValues:true) }).Fit(dataView).Transform(dataView); - var pipe = new KeyToVectorMappingEstimator(Env, new KeyToVectorMappingTransformer.ColumnInfo("TermA", "CatA", false), - new KeyToVectorMappingTransformer.ColumnInfo("TermB", "CatB", true), - new KeyToVectorMappingTransformer.ColumnInfo("TermC", "CatC", true), - new KeyToVectorMappingTransformer.ColumnInfo("TermC", "CatCNonBag", false)); + var pipe = new KeyToVectorMappingEstimator(Env, new KeyToVectorMappingTransformer.ColumnInfo("CatA", "TermA", false), + new KeyToVectorMappingTransformer.ColumnInfo("CatB", "TermB", true), + new KeyToVectorMappingTransformer.ColumnInfo("CatC", "TermC", true), + new KeyToVectorMappingTransformer.ColumnInfo("CatCNonBag", "TermC", false)); TestEstimatorCore(pipe, dataView); Done(); } @@ -121,14 +121,14 @@ public void TestMetadataPropagation() dataView = termTransformer.Transform(dataView); var pipe = new KeyToVectorMappingEstimator(Env, - new KeyToVectorMappingTransformer.ColumnInfo("TA", "CatA", true), - new KeyToVectorMappingTransformer.ColumnInfo("TB", "CatB", false), - new KeyToVectorMappingTransformer.ColumnInfo("TC", "CatC", false), - new KeyToVectorMappingTransformer.ColumnInfo("TD", "CatD", true), - new KeyToVectorMappingTransformer.ColumnInfo("TE", "CatE", false), - new KeyToVectorMappingTransformer.ColumnInfo("TF", "CatF", true), - new KeyToVectorMappingTransformer.ColumnInfo("TG", "CatG", true), - new KeyToVectorMappingTransformer.ColumnInfo("TH", "CatH", false) + new KeyToVectorMappingTransformer.ColumnInfo("CatA", "TA", true), + new KeyToVectorMappingTransformer.ColumnInfo("CatB", "TB", false), + new KeyToVectorMappingTransformer.ColumnInfo("CatC", "TC", false), + new KeyToVectorMappingTransformer.ColumnInfo("CatD", "TD", true), + new KeyToVectorMappingTransformer.ColumnInfo("CatE", "TE", false), + new KeyToVectorMappingTransformer.ColumnInfo("CatF", "TF", true), + new KeyToVectorMappingTransformer.ColumnInfo("CatG", "TG", true), + new KeyToVectorMappingTransformer.ColumnInfo("CatH", "TH", false) ); var result = pipe.Fit(dataView).Transform(dataView); @@ -221,8 +221,8 @@ public void TestOldSavingAndLoading() var transformer = est.Fit(dataView); dataView = transformer.Transform(dataView); var pipe = new KeyToVectorMappingEstimator(Env, - new KeyToVectorMappingTransformer.ColumnInfo("TermA", "CatA", false), - new KeyToVectorMappingTransformer.ColumnInfo("TermB", "CatB", true) + new KeyToVectorMappingTransformer.ColumnInfo("CatA", "TermA", false), + new KeyToVectorMappingTransformer.ColumnInfo("CatB", "TermB", true) ); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); diff --git a/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs b/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs index 432bbb8291..06743db102 100644 --- a/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs @@ -249,7 +249,7 @@ public void NgramWorkout() .Read(sentimentDataPath); var est = new WordTokenizingEstimator(Env, "text", "text") - .Append(new ValueToKeyMappingEstimator(Env, "text", "terms")) + .Append(new ValueToKeyMappingEstimator(Env, "terms", "text")) .Append(new NgramExtractingEstimator(Env, "terms", "ngrams")) .Append(new NgramHashingEstimator(Env, "terms", "ngramshash"));