|
| 1 | +using System; |
| 2 | +using System.Collections.Generic; |
| 3 | +using Microsoft.ML.Data; |
| 4 | + |
| 5 | +namespace Microsoft.ML.Samples.Dynamic |
| 6 | +{ |
| 7 | + public class SelectColumns |
| 8 | + { |
| 9 | + public static void Example() |
| 10 | + { |
| 11 | + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, |
| 12 | + // as well as the source of randomness. |
| 13 | + var mlContext = new MLContext(); |
| 14 | + |
| 15 | + // Get a small dataset as an IEnumerable and them read it as ML.NET's data type. |
| 16 | + IEnumerable<SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData(); |
| 17 | + var trainData = mlContext.Data.ReadFromEnumerable(data); |
| 18 | + |
| 19 | + // Preview of the data. |
| 20 | + // |
| 21 | + // Age Case Education induced parity pooled.stratum row_num ... |
| 22 | + // 26.0 1.0 0-5yrs 1.0 6.0 3.0 1.0 ... |
| 23 | + // 42.0 1.0 0-5yrs 1.0 1.0 1.0 2.0 ... |
| 24 | + // 39.0 1.0 0-5yrs 2.0 6.0 4.0 3.0 ... |
| 25 | + // 34.0 1.0 0-5yrs 2.0 4.0 2.0 4.0 ... |
| 26 | + // 35.0 1.0 6-11yrs 1.0 3.0 32.0 5.0 ... |
| 27 | + |
| 28 | + // Select a subset of columns to keep. |
| 29 | + var pipeline = mlContext.Transforms.SelectColumns(new string[] { "Age", "Education" }); |
| 30 | + |
| 31 | + // Now we can transform the data and look at the output to confirm the behavior of CopyColumns. |
| 32 | + // Don't forget that this operation doesn't actually evaluate data until we read the data below, |
| 33 | + // as transformations are lazy in ML.NET. |
| 34 | + var transformedData = pipeline.Fit(trainData).Transform(trainData); |
| 35 | + |
| 36 | + // Print the number of columns in the schema |
| 37 | + Console.WriteLine($"There are {transformedData.Schema.Count} columns in the dataset."); |
| 38 | + |
| 39 | + // Expected output: |
| 40 | + // There are 2 columns in the dataset. |
| 41 | + |
| 42 | + // We can extract the newly created column as an IEnumerable of SampleInfertDataTransformed, the class we define below. |
| 43 | + var rowEnumerable = mlContext.CreateEnumerable<SampleInfertDataTransformed>(transformedData, reuseRowObject: false); |
| 44 | + |
| 45 | + // And finally, we can write out the rows of the dataset, looking at the columns of interest. |
| 46 | + Console.WriteLine($"Age and Educations columns obtained post-transformation."); |
| 47 | + foreach (var row in rowEnumerable) |
| 48 | + { |
| 49 | + Console.WriteLine($"Age: {row.Age} Education: {row.Education}"); |
| 50 | + } |
| 51 | + |
| 52 | + // Expected output: |
| 53 | + // Age and Education columns obtained post-transformation. |
| 54 | + // Age: 26 Education: 0 - 5yrs |
| 55 | + // Age: 42 Education: 0 - 5yrs |
| 56 | + // Age: 39 Education: 0 - 5yrs |
| 57 | + // Age: 34 Education: 0 - 5yrs |
| 58 | + // Age: 35 Education: 6 - 11yrs |
| 59 | + } |
| 60 | + |
| 61 | + private class SampleInfertDataTransformed |
| 62 | + { |
| 63 | + public float Age { get; set; } |
| 64 | + public string Education { get; set; } |
| 65 | + } |
| 66 | + } |
| 67 | +} |
0 commit comments