Skip to content

Commit e0e36af

Browse files
authored
Add a sample to SelectColumns (#2380)
* Adding a sample for SelectColumn.
1 parent b3509bd commit e0e36af

File tree

2 files changed

+85
-2
lines changed

2 files changed

+85
-2
lines changed
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using Microsoft.ML.Data;
4+
5+
namespace Microsoft.ML.Samples.Dynamic
6+
{
7+
public class SelectColumns
8+
{
9+
public static void Example()
10+
{
11+
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
12+
// as well as the source of randomness.
13+
var mlContext = new MLContext();
14+
15+
// Get a small dataset as an IEnumerable and them read it as ML.NET's data type.
16+
IEnumerable<SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData();
17+
var trainData = mlContext.Data.ReadFromEnumerable(data);
18+
19+
// Preview of the data.
20+
//
21+
// Age Case Education induced parity pooled.stratum row_num ...
22+
// 26.0 1.0 0-5yrs 1.0 6.0 3.0 1.0 ...
23+
// 42.0 1.0 0-5yrs 1.0 1.0 1.0 2.0 ...
24+
// 39.0 1.0 0-5yrs 2.0 6.0 4.0 3.0 ...
25+
// 34.0 1.0 0-5yrs 2.0 4.0 2.0 4.0 ...
26+
// 35.0 1.0 6-11yrs 1.0 3.0 32.0 5.0 ...
27+
28+
// Select a subset of columns to keep.
29+
var pipeline = mlContext.Transforms.SelectColumns(new string[] { "Age", "Education" });
30+
31+
// Now we can transform the data and look at the output to confirm the behavior of CopyColumns.
32+
// Don't forget that this operation doesn't actually evaluate data until we read the data below,
33+
// as transformations are lazy in ML.NET.
34+
var transformedData = pipeline.Fit(trainData).Transform(trainData);
35+
36+
// Print the number of columns in the schema
37+
Console.WriteLine($"There are {transformedData.Schema.Count} columns in the dataset.");
38+
39+
// Expected output:
40+
// There are 2 columns in the dataset.
41+
42+
// We can extract the newly created column as an IEnumerable of SampleInfertDataTransformed, the class we define below.
43+
var rowEnumerable = mlContext.CreateEnumerable<SampleInfertDataTransformed>(transformedData, reuseRowObject: false);
44+
45+
// And finally, we can write out the rows of the dataset, looking at the columns of interest.
46+
Console.WriteLine($"Age and Educations columns obtained post-transformation.");
47+
foreach (var row in rowEnumerable)
48+
{
49+
Console.WriteLine($"Age: {row.Age} Education: {row.Education}");
50+
}
51+
52+
// Expected output:
53+
// Age and Education columns obtained post-transformation.
54+
// Age: 26 Education: 0 - 5yrs
55+
// Age: 42 Education: 0 - 5yrs
56+
// Age: 39 Education: 0 - 5yrs
57+
// Age: 34 Education: 0 - 5yrs
58+
// Age: 35 Education: 6 - 11yrs
59+
}
60+
61+
private class SampleInfertDataTransformed
62+
{
63+
public float Age { get; set; }
64+
public string Education { get; set; }
65+
}
66+
}
67+
}

src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ public static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog,
5353
/// <example>
5454
/// <format type="text/markdown">
5555
/// <![CDATA[
56-
/// [!code-csharp[Concat](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ConcatTransform.cs)]
56+
/// [!code-csharp[Concat](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Concatenate.cs)]
5757
/// ]]>
5858
/// </format>
5959
/// </example>
@@ -84,11 +84,27 @@ public static ColumnSelectingEstimator DropColumns(this TransformsCatalog catalo
8484
=> ColumnSelectingEstimator.DropColumns(CatalogUtils.GetEnvironment(catalog), columnsToDrop);
8585

8686
/// <summary>
87-
/// ColumnSelectingEstimator is used to select a list of columns that user wants to drop from a given input.
87+
/// ColumnSelectingEstimator is used to select a list of columns that user wants to keep from a given input.
8888
/// </summary>
89+
/// <remarks>
90+
/// <format type="text/markdown">
91+
/// <see cref="SelectColumns"/> operates on the schema of an input IDataView,
92+
/// either dropping unselected columns from the schema or keeping them but marking them as hidden in the schema. Keeping columns hidden
93+
/// is recommended when it is necessary to understand how the inputs of a pipeline map to outputs of the pipeline. This feature
94+
/// is useful, for example, in debugging a pipeline of transforms by allowing you to print out results from the middle of the pipeline.
95+
/// For more information on hidden columns, please refer to [IDataView Design Principles](~/../docs/samples/docs/code/IDataViewDesignPrinciples.md).
96+
/// </format>
97+
/// </remarks>
8998
/// <param name="catalog">The transform's catalog.</param>
9099
/// <param name="keepColumns">The array of column names to keep.</param>
91100
/// <param name="keepHidden">If true will keep hidden columns and false will remove hidden columns.</param>
101+
/// <example>
102+
/// <format type="text/markdown">
103+
/// <![CDATA[
104+
/// [!code-csharp[SelectColumns](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/SelectColumns.cs)]
105+
/// ]]>
106+
/// </format>
107+
/// </example>
92108
public static ColumnSelectingEstimator SelectColumns(this TransformsCatalog catalog,
93109
string[] keepColumns,
94110
bool keepHidden = ColumnSelectingTransformer.Defaults.KeepHidden)

0 commit comments

Comments
 (0)