Skip to content

Commit f4b4dd9

Browse files
committed
Adding in samples and documentation. General code cleanup. Made the RowToRowMapperTransform create a new mapper if possible for each cursor.
1 parent 87071c8 commit f4b4dd9

File tree

19 files changed

+903
-490
lines changed

19 files changed

+903
-490
lines changed
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using Microsoft.ML;
4+
using Microsoft.ML.Data;
5+
using Microsoft.ML.Featurizers;
6+
7+
namespace Samples.Dynamic
8+
{
9+
public static class CategoryImputer
10+
{
11+
public static void Example()
12+
{
13+
// Create a new ML context, for ML.NET operations. It can be used for
14+
// exception tracking and logging, as well as the source of randomness.
15+
var mlContext = new MLContext();
16+
17+
// Create a small dataset as an IEnumerable.
18+
var samples = new List<InputData>()
19+
{
20+
new InputData(){ Feature1 = 1f },
21+
22+
new InputData(){ Feature1 = float.NaN },
23+
24+
new InputData(){ Feature1 = 1f },
25+
26+
new InputData(){ Feature1 = float.NaN },
27+
28+
new InputData(){ Feature1 = 9f },
29+
};
30+
31+
// Convert training data to IDataView.
32+
var dataview = mlContext.Data.LoadFromEnumerable(samples);
33+
34+
// A pipeline for filling in the missing values in the feature1 column
35+
var pipeline = mlContext.Transforms.CatagoryImputerTransformer("Feature1");
36+
37+
// The transformed data.
38+
var transformedData = pipeline.Fit(dataview).Transform(dataview);
39+
40+
// Now let's take a look at what this did. The NaN values should be filled in with the most frequent value, 1.
41+
// We can extract the newly created columns as an IEnumerable of TransformedData.
42+
var featuresColumn = mlContext.Data.CreateEnumerable<TransformedData>(
43+
transformedData, reuseRowObject: false);
44+
45+
// And we can write out a few rows
46+
Console.WriteLine($"Features column obtained post-transformation.");
47+
foreach (var featureRow in featuresColumn)
48+
Console.WriteLine(featureRow.Feature1);
49+
50+
// Expected output:
51+
// Features column obtained post-transformation.
52+
// 1
53+
// 1
54+
// 1
55+
// 1
56+
// 9
57+
}
58+
59+
private class InputData
60+
{
61+
public float Feature1;
62+
}
63+
64+
private sealed class TransformedData
65+
{
66+
public float Feature1 { get; set; }
67+
}
68+
}
69+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using Microsoft.ML;
4+
using Microsoft.ML.Data;
5+
using Microsoft.ML.Featurizers;
6+
7+
namespace Samples.Dynamic
8+
{
9+
public static class RobustScaler
10+
{
11+
public static void Example()
12+
{
13+
// Create a new ML context, for ML.NET operations. It can be used for
14+
// exception tracking and logging, as well as the source of randomness.
15+
var mlContext = new MLContext();
16+
17+
// Create a small dataset as an IEnumerable.
18+
var samples = new List<InputData>()
19+
{
20+
new InputData(){ Feature1 = 1f },
21+
22+
new InputData(){ Feature1 = 3f },
23+
24+
new InputData(){ Feature1 = 5f },
25+
26+
new InputData(){ Feature1 = 7f },
27+
28+
new InputData(){ Feature1 = 9f },
29+
};
30+
31+
// Convert training data to IDataView.
32+
var dataview = mlContext.Data.LoadFromEnumerable(samples);
33+
34+
// A pipeline for centering and scaling the feature1 column
35+
var pipeline = mlContext.Transforms.RobustScalerTransformer("Feature1");
36+
37+
// The transformed data.
38+
var transformedData = pipeline.Fit(dataview).Transform(dataview);
39+
40+
// Now let's take a look at what this did. The values should be centered around 0 and scaled.
41+
// We can extract the newly created columns as an IEnumerable of TransformedData.
42+
var featuresColumn = mlContext.Data.CreateEnumerable<TransformedData>(
43+
transformedData, reuseRowObject: false);
44+
45+
// And we can write out a few rows
46+
Console.WriteLine($"Features column obtained post-transformation.");
47+
foreach (var featureRow in featuresColumn)
48+
Console.WriteLine(featureRow.Feature1);
49+
50+
// Expected output:
51+
// Features column obtained post-transformation.
52+
// -1
53+
// -.5
54+
// 0
55+
// .5
56+
// 1
57+
}
58+
59+
private class InputData
60+
{
61+
public float Feature1;
62+
}
63+
64+
private sealed class TransformedData
65+
{
66+
public float Feature1 { get; set; }
67+
}
68+
}
69+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using Microsoft.ML;
4+
using Microsoft.ML.Data;
5+
using Microsoft.ML.Featurizers;
6+
7+
namespace Samples.Dynamic
8+
{
9+
public static class RobustScalerWithCenter
10+
{
11+
public static void Example()
12+
{
13+
// Create a new ML context, for ML.NET operations. It can be used for
14+
// exception tracking and logging, as well as the source of randomness.
15+
var mlContext = new MLContext();
16+
17+
// Create a small dataset as an IEnumerable.
18+
var samples = new List<InputData>()
19+
{
20+
new InputData(){ Feature1 = 1f },
21+
22+
new InputData(){ Feature1 = 3f },
23+
24+
new InputData(){ Feature1 = 5f },
25+
26+
new InputData(){ Feature1 = 7f },
27+
28+
new InputData(){ Feature1 = 9f },
29+
};
30+
31+
// Convert training data to IDataView.
32+
var dataview = mlContext.Data.LoadFromEnumerable(samples);
33+
34+
// A pipeline for Centering the feature1 column
35+
var pipeline = mlContext.Transforms.RobustScalerTransformer("Feature1", scale: false);
36+
37+
// The transformed data.
38+
var transformedData = pipeline.Fit(dataview).Transform(dataview);
39+
40+
// Now let's take a look at what this did. The values should be centered around 0.
41+
// We can extract the newly created columns as an IEnumerable of TransformedData.
42+
var featuresColumn = mlContext.Data.CreateEnumerable<TransformedData>(
43+
transformedData, reuseRowObject: false);
44+
45+
// And we can write out a few rows
46+
Console.WriteLine($"Features column obtained post-transformation.");
47+
foreach (var featureRow in featuresColumn)
48+
Console.WriteLine(featureRow.Feature1);
49+
50+
// Expected output:
51+
// Features column obtained post-transformation.
52+
// -4
53+
// -2
54+
// 0
55+
// 2
56+
// 4
57+
}
58+
59+
private class InputData
60+
{
61+
public float Feature1;
62+
}
63+
64+
private sealed class TransformedData
65+
{
66+
public float Feature1 { get; set; }
67+
}
68+
}
69+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using Microsoft.ML;
4+
using Microsoft.ML.Data;
5+
using Microsoft.ML.Featurizers;
6+
7+
namespace Samples.Dynamic
8+
{
9+
public static class RobustScalerWithScale
10+
{
11+
public static void Example()
12+
{
13+
// Create a new ML context, for ML.NET operations. It can be used for
14+
// exception tracking and logging, as well as the source of randomness.
15+
var mlContext = new MLContext();
16+
17+
// Create a small dataset as an IEnumerable.
18+
var samples = new List<InputData>()
19+
{
20+
new InputData(){ Feature1 = 1f },
21+
22+
new InputData(){ Feature1 = 3f },
23+
24+
new InputData(){ Feature1 = 5f },
25+
26+
new InputData(){ Feature1 = 7f },
27+
28+
new InputData(){ Feature1 = 9f },
29+
};
30+
31+
// Convert training data to IDataView.
32+
var dataview = mlContext.Data.LoadFromEnumerable(samples);
33+
34+
// A pipeline for scaling the feature1 column
35+
var pipeline = mlContext.Transforms.RobustScalerTransformer("Feature1", center: false);
36+
37+
// The transformed data.
38+
var transformedData = pipeline.Fit(dataview).Transform(dataview);
39+
40+
// Now let's take a look at what this did. The values should be scaled by the range * ratio.
41+
// We can extract the newly created columns as an IEnumerable of TransformedData.
42+
var featuresColumn = mlContext.Data.CreateEnumerable<TransformedData>(
43+
transformedData, reuseRowObject: false);
44+
45+
// And we can write out a few rows
46+
Console.WriteLine($"Features column obtained post-transformation.");
47+
foreach (var featureRow in featuresColumn)
48+
Console.WriteLine(featureRow.Feature1);
49+
50+
// Expected output:
51+
// Features column obtained post-transformation.
52+
// 0.25
53+
// .75
54+
// 1.25
55+
// 1.75
56+
// 2.25
57+
}
58+
59+
private class InputData
60+
{
61+
public float Feature1;
62+
}
63+
64+
private sealed class TransformedData
65+
{
66+
public float Feature1 { get; set; }
67+
}
68+
}
69+
}
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using Microsoft.ML;
4+
using Microsoft.ML.Data;
5+
using Microsoft.ML.Featurizers;
6+
7+
namespace Samples.Dynamic
8+
{
9+
public static class ToStringMultipleColumns
10+
{
11+
public static void Example()
12+
{
13+
// Create a new ML context, for ML.NET operations. It can be used for
14+
// exception tracking and logging, as well as the source of randomness.
15+
var mlContext = new MLContext();
16+
17+
// Create a small dataset as an IEnumerable.
18+
var samples = new List<InputData>()
19+
{
20+
new InputData(){ Feature1 = 0.1f, Feature2 = 1.1, Feature3 = 1 },
21+
22+
new InputData(){ Feature1 = 0.2f, Feature2 =1.2, Feature3 = 2 },
23+
24+
new InputData(){ Feature1 = 0.3f, Feature2 = 1.3, Feature3 = 3 },
25+
26+
new InputData(){ Feature1 = 0.4f, Feature2 = 1.4, Feature3 = 4 },
27+
28+
new InputData(){ Feature1 = 0.5f, Feature2 = 1.5, Feature3 = 5 },
29+
30+
new InputData(){ Feature1 = 0.6f, Feature2 = 1.6, Feature3 = 6 },
31+
};
32+
33+
// Convert training data to IDataView.
34+
var dataview = mlContext.Data.LoadFromEnumerable(samples);
35+
36+
// A pipeline for converting the "Feature1", "Feature2" and
37+
// "Feature3" columns into their string representations
38+
//
39+
var pipeline = mlContext.Transforms.ToStringTransformer(new InputOutputColumnPair("Feature1"),
40+
new InputOutputColumnPair("Feature2"), new InputOutputColumnPair("Feature3"));
41+
42+
// The transformed data.
43+
var transformedData = pipeline.Fit(dataview).Transform(dataview);
44+
45+
// Now let's take a look at what this did.
46+
// We can extract the newly created columns as an IEnumerable of
47+
// TransformedData.
48+
var featuresColumn = mlContext.Data.CreateEnumerable<TransformedData>(
49+
transformedData, reuseRowObject: false);
50+
51+
// And we can write out a few rows
52+
Console.WriteLine($"Features column obtained post-transformation.");
53+
foreach (var featureRow in featuresColumn)
54+
Console.WriteLine(featureRow.Feature1 + " " + featureRow.Feature2 + " " + featureRow.Feature3);
55+
56+
// Expected output:
57+
// Features column obtained post-transformation.
58+
// 0.100000 1.100000 1
59+
// 0.200000 1.200000 2
60+
// 0.300000 1.300000 3
61+
// 0.400000 1.400000 4
62+
// 0.500000 1.500000 5
63+
// 0.600000 1.600000 6
64+
}
65+
66+
private class InputData
67+
{
68+
public float Feature1;
69+
public double Feature2;
70+
public int Feature3;
71+
}
72+
73+
private sealed class TransformedData
74+
{
75+
public string Feature1 { get; set; }
76+
public string Feature2 { get; set; }
77+
public string Feature3 { get; set; }
78+
}
79+
}
80+
}

0 commit comments

Comments
 (0)