Skip to content

Commit 78b161b

Browse files
authored
Lockdown HAL Project (#2497)
1 parent 2b79fdb commit 78b161b

File tree

22 files changed

+515
-165
lines changed

22 files changed

+515
-165
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ public static void Example()
2323
"PercentNonRetail", "CharlesRiver", "NitricOxides", "RoomsPerDwelling", "PercentPre40s",
2424
"EmploymentDistance", "HighwayDistance", "TaxRate", "TeacherRatio");
2525
var learner = mlContext.Regression.Trainers.OrdinaryLeastSquares(
26-
labelColumn: "MedianHomeValue", featureColumn: "Features");
26+
labelColumnName: "MedianHomeValue", featureColumnName: "Features");
2727

2828
var transformedData = transformPipeline.Fit(data).Transform(data);
2929

@@ -40,7 +40,7 @@ public static void Example()
4040
// FeatureContributionCalculatingEstimator can be use as an intermediary step in a pipeline.
4141
// The features retained by FeatureContributionCalculatingEstimator will be in the FeatureContribution column.
4242
var pipeline = mlContext.Model.Explainability.FeatureContributionCalculation(model.Model, model.FeatureColumn, numPositiveContributions: 11)
43-
.Append(mlContext.Regression.Trainers.OrdinaryLeastSquares(featureColumn: "FeatureContributions"));
43+
.Append(mlContext.Regression.Trainers.OrdinaryLeastSquares(featureColumnName: "FeatureContributions"));
4444
var outData = featureContributionCalculator.Fit(scoredData).Transform(scoredData);
4545

4646
// Let's extract the weights from the linear model to use as a comparison

docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIRegressionExample.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ public static void Example()
2121
var pipeline = mlContext.Transforms.Concatenate("Features", featureNames)
2222
.Append(mlContext.Transforms.Normalize("Features"))
2323
.Append(mlContext.Regression.Trainers.OrdinaryLeastSquares(
24-
labelColumn: labelName, featureColumn: "Features"));
24+
labelColumnName: labelName, featureColumnName: "Features"));
2525
var model = pipeline.Fit(data);
2626

2727
// Extract the model from the pipeline

docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -54,24 +54,6 @@ public static void Example()
5454
//0.181 0.361 -0.335 -0.157
5555
//0.165 0.117 -0.547 0.014
5656

57-
// A pipeline to project Features column into white noise vector.
58-
var whiteningPipeline = ml.Transforms.Projection.VectorWhiten(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), kind: Transforms.Projections.WhiteningKind.Zca);
59-
// The transformed (projected) data.
60-
transformedData = whiteningPipeline.Fit(trainData).Transform(trainData);
61-
// Getting the data of the newly created column, so we can preview it.
62-
var whitening = transformedData.GetColumn<VBuffer<float>>(ml, nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features));
63-
64-
printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), whitening);
65-
66-
// Features column obtained post-transformation.
67-
//
68-
//-0.394 -0.318 -0.243 -0.168 0.209 0.358 0.433 0.589 0.873 2.047
69-
//-0.034 0.030 0.094 0.159 0.298 0.427 0.492 0.760 1.855 -1.197
70-
// 0.099 0.161 0.223 0.286 0.412 0.603 0.665 1.797 -1.265 -0.172
71-
// 0.211 0.277 0.344 0.410 0.606 1.267 1.333 -1.340 -0.205 0.065
72-
// 0.454 0.523 0.593 0.664 1.886 -0.757 -0.687 -0.022 0.176 0.310
73-
// 0.863 0.938 1.016 1.093 -1.326 -0.096 -0.019 0.189 0.330 0.483
74-
7557
// A pipeline to project Features column into L-p normalized vector.
7658
var lpNormalizePipeline = ml.Transforms.Projection.LpNormalize(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), normKind: Transforms.Projections.LpNormalizingEstimatorBase.NormalizerKind.L1Norm);
7759
// The transformed (projected) data.
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
namespace Microsoft.ML.Samples.Dynamic
2+
{
3+
public static class SymbolicStochasticGradientDescent
4+
{
5+
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.HalLearners/">Microsoft.ML.HalLearners</a>.
6+
// In this example we will use the adult income dataset. The goal is to predict
7+
// if a person's income is above $50K or not, based on different pieces of information about that person.
8+
// For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult
9+
public static void Example()
10+
{
11+
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
12+
// as a catalog of available operations and as the source of randomness.
13+
// Setting the seed to a fixed number in this examples to make outputs deterministic.
14+
var mlContext = new MLContext(seed: 0);
15+
16+
// Download and featurize the dataset.
17+
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
18+
19+
// Leave out 10% of data for testing.
20+
var split = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
21+
// Create data training pipeline.
22+
var pipeline = mlContext.BinaryClassification.Trainers.SymbolicStochasticGradientDescent(labelColumnName: "IsOver50K", numberOfIterations: 25);
23+
var model = pipeline.Fit(split.TrainSet);
24+
25+
// Evaluate how the model is doing on the test data.
26+
var dataWithPredictions = model.Transform(split.TestSet);
27+
var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K");
28+
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
29+
// Accuracy: 0.85
30+
// AUC: 0.90
31+
// F1 Score: 0.64
32+
// Negative Precision: 0.88
33+
// Negative Recall: 0.93
34+
// Positive Precision: 0.72
35+
// Positive Recall: 0.58
36+
}
37+
}
38+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
namespace Microsoft.ML.Samples.Dynamic
2+
{
3+
public static class SymbolicStochasticGradientDescentWithOptions
4+
{
5+
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.HalLearners/">Microsoft.ML.HalLearners</a>.
6+
// In this example we will use the adult income dataset. The goal is to predict
7+
// if a person's income is above $50K or not, based on different pieces of information about that person.
8+
// For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult
9+
public static void Example()
10+
{
11+
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
12+
// as a catalog of available operations and as the source of randomness.
13+
// Setting the seed to a fixed number in this examples to make outputs deterministic.
14+
var mlContext = new MLContext(seed: 0);
15+
16+
// Download and featurize the dataset.
17+
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
18+
19+
// Leave out 10% of data for testing.
20+
var split = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
21+
// Create data training pipeline
22+
var pipeline = mlContext.BinaryClassification.Trainers.SymbolicStochasticGradientDescent(
23+
new ML.Trainers.HalLearners.SymSgdClassificationTrainer.Options()
24+
{
25+
LabelColumn = "IsOver50K",
26+
LearningRate = 0.2f,
27+
NumberOfIterations = 10,
28+
NumberOfThreads = 1,
29+
30+
});
31+
32+
var model = pipeline.Fit(split.TrainSet);
33+
34+
// Evaluate how the model is doing on the test data.
35+
var dataWithPredictions = model.Transform(split.TestSet);
36+
var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K");
37+
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
38+
// Accuracy: 0.84
39+
// AUC: 0.88
40+
// F1 Score: 0.60
41+
// Negative Precision: 0.87
42+
// Negative Recall: 0.93
43+
// Positive Precision: 0.69
44+
// Positive Recall: 0.53
45+
}
46+
}
47+
}

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorization.cs

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,12 @@
55

66
namespace Microsoft.ML.Samples.Dynamic
77
{
8-
public static partial class MatrixFactorization
8+
public static class MatrixFactorization
99
{
10-
// This example first creates in-memory data and then use it to train a matrix factorization mode with default parameters. Afterward, quality metrics are reported.
10+
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.Recommender/">Microsoft.ML.Recommender</a>.
11+
// In this example we will create in-memory data and then use it to train
12+
// a matrix factorization model with default parameters. Afterward, quality metrics are reported.
13+
1114
public static void Example()
1215
{
1316
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
@@ -35,13 +38,13 @@ public static void Example()
3538
// Calculate regression matrices for the prediction result.
3639
var metrics = mlContext.Recommendation().Evaluate(prediction,
3740
label: nameof(MatrixElement.Value), score: nameof(MatrixElementForScore.Score));
38-
3941
// Print out some metrics for checking the model's quality.
40-
Console.WriteLine($"L1 - {metrics.L1}"); // 0.17208
41-
Console.WriteLine($"L2 - {metrics.L2}"); // 0.04766
42-
Console.WriteLine($"LossFunction - {metrics.LossFn}"); // 0.04766
43-
Console.WriteLine($"RMS - {metrics.Rms}"); //0.21831
44-
Console.WriteLine($"RSquared - {metrics.RSquared}"); // 0.97616
42+
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
43+
// L1: 0.17
44+
// L2: 0.05
45+
// LossFunction: 0.05
46+
// RMS: 0.22
47+
// RSquared: 0.98
4548

4649
// Create two two entries for making prediction. Of course, the prediction value, Score, is unknown so it can be anything
4750
// (here we use Score=0 and it will be overwritten by the true prediction). If any of row and column indexes are out-of-range

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorizationWithOptions.cs

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@
66

77
namespace Microsoft.ML.Samples.Dynamic
88
{
9-
public static partial class MatrixFactorization
9+
public static class MatrixFactorizationWithOptions
1010
{
11-
// This example first creates in-memory data and then use it to train a matrix factorization model. Afterward, quality metrics are reported.
12-
public static void ExampleWithOptions()
11+
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.Recommender/">Microsoft.ML.Recommender</a>.
12+
// In this example we will create in-memory data and then use it to train a matrix factorization model.
13+
// Afterward, quality metrics are reported.
14+
public static void Example()
1315
{
1416
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
1517
// as a catalog of available operations and as the source of randomness.
@@ -29,8 +31,8 @@ public static void ExampleWithOptions()
2931
MatrixColumnIndexColumnName = nameof(MatrixElement.MatrixColumnIndex),
3032
MatrixRowIndexColumnName = nameof(MatrixElement.MatrixRowIndex),
3133
LabelColumnName = nameof(MatrixElement.Value),
32-
NumIterations = 10,
33-
NumThreads = 1,
34+
NumberOfIterations = 10,
35+
NumberOfThreads = 1,
3436
ApproximationRank = 32,
3537
LearningRate = 0.3
3638
};
@@ -46,13 +48,13 @@ public static void ExampleWithOptions()
4648
// Calculate regression matrices for the prediction result.
4749
var metrics = mlContext.Recommendation().Evaluate(prediction,
4850
label: nameof(MatrixElement.Value), score: nameof(MatrixElementForScore.Score));
49-
5051
// Print out some metrics for checking the model's quality.
51-
Console.WriteLine($"L1 - {metrics.L1}"); // 0.16375
52-
Console.WriteLine($"L2 - {metrics.L2}"); // 0.04407
53-
Console.WriteLine($"LossFunction - {metrics.LossFn}"); // 0.04407
54-
Console.WriteLine($"RMS - {metrics.Rms}"); // 0.2099
55-
Console.WriteLine($"RSquared - {metrics.RSquared}"); // 0.97797
52+
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
53+
// L1: 0.16
54+
// L2: 0.04
55+
// LossFunction: 0.04
56+
// RMS: 0.21
57+
// RSquared: 0.98
5658

5759
// Create two two entries for making prediction. Of course, the prediction value, Score, is unknown so it can be anything
5860
// (here we use Score=0 and it will be overwritten by the true prediction). If any of row and column indexes are out-of-range
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
using System;
2+
using Microsoft.ML.Data;
3+
using Microsoft.ML.SamplesUtils;
4+
5+
namespace Microsoft.ML.Samples.Dynamic
6+
{
7+
public static class OrdinaryLeastSquares
8+
{
9+
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.HalLearners/">Microsoft.ML.HalLearners</a>.
10+
// In this examples we will use the housing price dataset. The goal is to predict median home value.
11+
// For more details about this dataset, please see https://archive.ics.uci.edu/ml/machine-learning-databases/housing/
12+
public static void Example()
13+
{
14+
// Downloading a regression dataset from github.com/dotnet/machinelearning
15+
string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset();
16+
17+
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
18+
// as well as the source of randomness.
19+
var mlContext = new MLContext(seed: 3);
20+
21+
// Creating a data reader, based on the format of the data
22+
// The data is tab separated with all numeric columns.
23+
// The first column being the label and rest are numeric features
24+
// Here only seven numeric columns are used as features
25+
var dataView = mlContext.Data.ReadFromTextFile(dataFile, new TextLoader.Arguments
26+
{
27+
Separators = new[] { '\t' },
28+
HasHeader = true,
29+
Columns = new[]
30+
{
31+
new TextLoader.Column("Label", DataKind.R4, 0),
32+
new TextLoader.Column("Features", DataKind.R4, 1, 6)
33+
}
34+
});
35+
36+
//////////////////// Data Preview ////////////////////
37+
// MedianHomeValue CrimesPerCapita PercentResidental PercentNonRetail CharlesRiver NitricOxides RoomsPerDwelling PercentPre40s
38+
// 24.00 0.00632 18.00 2.310 0 0.5380 6.5750 65.20
39+
// 21.60 0.02731 00.00 7.070 0 0.4690 6.4210 78.90
40+
// 34.70 0.02729 00.00 7.070 0 0.4690 7.1850 61.10
41+
42+
var split = mlContext.Regression.TrainTestSplit(dataView, testFraction: 0.2);
43+
44+
// Create the estimator, here we only need OrdinaryLeastSquares trainer
45+
// as data is already processed in a form consumable by the trainer
46+
var pipeline = mlContext.Regression.Trainers.OrdinaryLeastSquares();
47+
48+
var model = pipeline.Fit(split.TrainSet);
49+
50+
// Check the weights that the model learned
51+
var weightsValues = model.Model.Weights;
52+
Console.WriteLine($"weight 0 - {weightsValues[0]}"); // CrimesPerCapita (weight 0) = -0.1682112
53+
Console.WriteLine($"weight 3 - {weightsValues[3]}"); // CharlesRiver (weight 1) = 3.663493
54+
var dataWithPredictions = model.Transform(split.TestSet);
55+
var metrics = mlContext.Regression.Evaluate(dataWithPredictions);
56+
57+
ConsoleUtils.PrintMetrics(metrics);
58+
// L1: 4.15
59+
// L2: 31.98
60+
// LossFunction: 31.98
61+
// RMS: 5.65
62+
// RSquared: 0.56
63+
}
64+
}
65+
}

0 commit comments

Comments
 (0)