Skip to content

Metadata fixes for the ValueMappingEstimator #2098

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jan 15, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 7 additions & 10 deletions src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,14 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema)
var isKey = Transformer.ValueColumnType is KeyType;
var columnType = (isKey) ? PrimitiveType.FromKind(DataKind.U4) :
Transformer.ValueColumnType;
var metadataShape = SchemaShape.Create(Transformer.ValueColumnMetadata.Schema);
foreach (var (Input, Output) in _columns)
{
if (!inputSchema.TryFindColumn(Input, out var originalColumn))
throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", Input);

// Get the type from TOutputType
var col = new SchemaShape.Column(Output, vectorKind, columnType, isKey, originalColumn.Metadata);
// Create the Value column
var col = new SchemaShape.Column(Output, vectorKind, columnType, isKey, metadataShape);
resultDic[Output] = col;
}
return new SchemaShape(resultDic.Values);
Expand Down Expand Up @@ -191,18 +192,14 @@ internal static IDataView CreateDataView<TKey, TValue>(IHostEnvironment env,
// set of values. This is used for generating the metadata of
// the column.
HashSet<TValue> valueSet = new HashSet<TValue>();
HashSet<TKey> keySet = new HashSet<TKey>();
for (int i = 0; i < values.Count(); ++i)
foreach (var v in values)
{
var v = values.ElementAt(i);
if (valueSet.Contains(v))
continue;
valueSet.Add(v);

var k = keys.ElementAt(i);
keySet.Add(k);
}
var metaKeys = keySet.ToArray();

var metaKeys = valueSet.ToArray();

// Key Values are treated in one of two ways:
// If the values are of type uint or ulong, these values are used directly as the keys types and no new keys are created.
Expand Down Expand Up @@ -387,7 +384,7 @@ protected ValueMappingTransformer(IHostEnvironment env, IDataView lookupMap,
Host.CheckNonEmpty(valueColumn, nameof(valueColumn), "A value column must be specified when passing in an IDataView for the value mapping");
_valueMap = CreateValueMapFromDataView(lookupMap, keyColumn, valueColumn);
int valueColumnIdx = 0;
Host.Assert(lookupMap.Schema.TryGetColumnIndex(valueColumn, out valueColumnIdx));
Host.Check(lookupMap.Schema.TryGetColumnIndex(valueColumn, out valueColumnIdx));
_valueMetadata = lookupMap.Schema[valueColumnIdx].Metadata;

// Create the byte array of the original IDataView, this is used for saving out the data.
Expand Down
Loading