Skip to content

Commit f9c1a9b

Browse files
committed
added test for column property and some review comments
1 parent 18ed85a commit f9c1a9b

File tree

8 files changed

+49
-14
lines changed

8 files changed

+49
-14
lines changed

src/Microsoft.ML.Data/Transforms/ColumnConcatenatingEstimator.cs

+6-3
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@
1111

1212
namespace Microsoft.ML.Transforms
1313
{
14-
public sealed class ColumnConcatenatingEstimator : IEstimator<ITransformer>
14+
/// <summary>
15+
/// Concatenates columns in an <see cref="IDataView"/> together.
16+
/// </summary>
17+
public sealed class ColumnConcatenatingEstimator : IEstimator<ColumnConcatenatingTransformer>
1518
{
1619
private readonly IHost _host;
1720
private readonly string _name;
@@ -38,9 +41,9 @@ internal ColumnConcatenatingEstimator(IHostEnvironment env, string outputColumnN
3841
}
3942

4043
/// <summary>
41-
/// Train and return a transformer.
44+
/// Trains and returns a <see cref="ColumnConcatenatingTransformer"/>.
4245
/// </summary>
43-
public ITransformer Fit(IDataView input)
46+
public ColumnConcatenatingTransformer Fit(IDataView input)
4447
{
4548
_host.CheckValue(input, nameof(input));
4649
return new ColumnConcatenatingTransformer(_host, _name, _source);

src/Microsoft.ML.Data/Transforms/ColumnConcatenatingTransformer.cs

+5-1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ namespace Microsoft.ML.Data
3333
{
3434
using PfaType = PfaUtils.Type;
3535

36+
/// <summary>
37+
/// Concatenates columns in an <see cref="IDataView"/> together.
38+
/// </summary>
3639
public sealed class ColumnConcatenatingTransformer : RowToRowTransformerBase
3740
{
3841
internal const string Summary = "Concatenates one or more columns of the same item type.";
@@ -215,7 +218,8 @@ internal ColumnInfo(ModelLoadContext ctx)
215218

216219
private readonly ColumnInfo[] _columns;
217220

218-
internal IReadOnlyCollection<ColumnInfo> Columns => _columns.AsReadOnly();
221+
public IReadOnlyCollection<(string outputColumnName, string[] inputColumnNames)> Columns
222+
=> _columns.Select(col => (outputColumnName: col.Name, inputColumnNames: col.Sources.Select(source => source.name).ToArray())).ToArray().AsReadOnly();
219223

220224
/// <summary>
221225
/// Concatename columns in <paramref name="inputColumnNames"/> into one column <paramref name="outputColumnName"/>.

src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog,
4545
=> new ColumnCopyingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
4646

4747
/// <summary>
48-
/// Concatenates two columns together.
48+
/// Concatenates columns together.
4949
/// </summary>
5050
/// <param name="catalog">The transform's catalog.</param>
5151
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnNames"/>.</param>

src/Microsoft.ML.Data/Transforms/Normalizer.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ internal NormalizingEstimator(IHostEnvironment env, params ColumnBase[] columns)
239239
}
240240

241241
/// <summary>
242-
/// Train and return a transformer.
242+
/// Trains and returns a <see cref="NormalizingTransformer"/>.
243243
/// </summary>
244244
public NormalizingTransformer Fit(IDataView input)
245245
{

src/Microsoft.ML.Transforms/MissingValueHandlingTransformer.cs

+4-4
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@
2121
namespace Microsoft.ML.Transforms
2222
{
2323
/// <include file='doc.xml' path='doc/members/member[@name="NAHandle"]'/>
24-
public static class MissingValueHandlingTransformer
24+
internal static class MissingValueHandlingTransformer
2525
{
26-
internal enum ReplacementKind : byte
26+
public enum ReplacementKind : byte
2727
{
2828
/// <summary>
2929
/// Replace with the default value of the column based on its type. For example, 'zero' for numeric and 'empty' for string/text columns.
@@ -56,7 +56,7 @@ internal enum ReplacementKind : byte
5656
Max = Maximum,
5757
}
5858

59-
internal sealed class Options : TransformInputBase
59+
public sealed class Options : TransformInputBase
6060
{
6161
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:rep:src)", Name = "Column", ShortName = "col", SortOrder = 1)]
6262
public Column[] Columns;
@@ -74,7 +74,7 @@ internal sealed class Options : TransformInputBase
7474
public bool Concat = true;
7575
}
7676

77-
internal sealed class Column : OneToOneColumn
77+
public sealed class Column : OneToOneColumn
7878
{
7979
[Argument(ArgumentType.AtMostOnce, HelpText = "The replacement method to utilize")]
8080
public ReplacementKind? Kind;

src/Microsoft.ML.Transforms/MissingValueReplacing.cs

+18-3
Original file line numberDiff line numberDiff line change
@@ -893,7 +893,8 @@ private bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, ColInfo info, string src
893893

894894
public sealed class MissingValueReplacingEstimator : IEstimator<MissingValueReplacingTransformer>
895895
{
896-
public static class Defaults
896+
[BestFriend]
897+
internal static class Defaults
897898
{
898899
public const ColumnInfo.ReplacementMode ReplacementMode = ColumnInfo.ReplacementMode.DefaultValue;
899900
public const bool ImputeBySlot = true;
@@ -912,9 +913,23 @@ public enum ReplacementMode : byte
912913
Maximum = 3,
913914
}
914915

916+
/// <summary>
917+
/// Name of the column resulting from the transformation of <see cref="InputColumnName"/>
918+
/// </summary>
915919
public readonly string Name;
920+
/// <summary>
921+
/// Name of column to transform.
922+
/// </summary>
916923
public readonly string InputColumnName;
924+
/// <summary>
925+
/// If true, per-slot imputation of replacement is performed.
926+
/// Otherwise, replacement value is imputed for the entire vector column. This setting is ignored for scalars and variable vectors,
927+
/// where imputation is always for the entire column.
928+
/// </summary>
917929
public readonly bool ImputeBySlot;
930+
/// <summary>
931+
/// What to replace the missing value with.
932+
/// </summary>
918933
public readonly ReplacementMode Replacement;
919934

920935
/// <summary>
@@ -927,7 +942,7 @@ public enum ReplacementMode : byte
927942
/// Otherwise, replacement value is imputed for the entire vector column. This setting is ignored for scalars and variable vectors,
928943
/// where imputation is always for the entire column.</param>
929944
public ColumnInfo(string name, string inputColumnName = null, ReplacementMode replacementMode = Defaults.ReplacementMode,
930-
bool imputeBySlot = MissingValueReplacingEstimator.Defaults.ImputeBySlot)
945+
bool imputeBySlot = Defaults.ImputeBySlot)
931946
{
932947
Contracts.CheckNonWhiteSpace(name, nameof(name));
933948
Name = name;
@@ -985,7 +1000,7 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema)
9851000
}
9861001

9871002
/// <summary>
988-
/// Train and return a transformer.
1003+
/// Trains and returns a <see cref="MissingValueReplacingTransformer"/>.
9891004
/// </summary>
9901005
public MissingValueReplacingTransformer Fit(IDataView input) => new MissingValueReplacingTransformer(_host, input, _columns);
9911006
}

test/Microsoft.ML.Benchmarks/Helpers/EnvironmentFactory.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ internal static MLContext CreateRankingEnvironment<TEvaluator, TLoader, TTransfo
3939
environment.ComponentCatalog.RegisterAssembly(typeof(TTransformer).Assembly);
4040
environment.ComponentCatalog.RegisterAssembly(typeof(TTrainer).Assembly);
4141

42-
environment.ComponentCatalog.RegisterAssembly(typeof(MissingValueHandlingTransformer).Assembly);
42+
environment.ComponentCatalog.RegisterAssembly(typeof(MissingValueDroppingTransformer).Assembly);
4343

4444
return ctx;
4545
}

test/Microsoft.ML.Tests/Transformers/ConcatTests.cs

+13
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
using Microsoft.Data.DataView;
77
using Microsoft.ML.Data;
88
using Microsoft.ML.Data.IO;
9+
using Microsoft.ML.Internal.Utilities;
910
using Microsoft.ML.RunTests;
1011
using Microsoft.ML.Transforms;
1112
using Xunit;
@@ -107,6 +108,18 @@ ColumnType GetType(Schema schema, string name)
107108
new ColumnConcatenatingTransformer.ColumnInfo("f3", new[] { ("float4", "FLOAT4"), ("float1", "FLOAT1") }));
108109
data = concater.Transform(data);
109110

111+
// Test Columns property.
112+
var columns = concater.Columns;
113+
var colEnumerator = columns.GetEnumerator();
114+
colEnumerator.MoveNext();
115+
Assert.True(colEnumerator.Current.outputColumnName == "f2" &&
116+
colEnumerator.Current.inputColumnNames[0] == "float1" &&
117+
colEnumerator.Current.inputColumnNames[1] == "float1");
118+
colEnumerator.MoveNext();
119+
Assert.True(colEnumerator.Current.outputColumnName == "f3" &&
120+
colEnumerator.Current.inputColumnNames[0] == "float4" &&
121+
colEnumerator.Current.inputColumnNames[1] == "float1");
122+
110123
ColumnType t;
111124
t = GetType(data.Schema, "f2");
112125
Assert.True(t is VectorType vt2 && vt2.ItemType == NumberType.R4 && vt2.Size == 2);

0 commit comments

Comments
 (0)