Skip to content

Lockdown HAL Project #2497

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Feb 15, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public static void Example()
"PercentNonRetail", "CharlesRiver", "NitricOxides", "RoomsPerDwelling", "PercentPre40s",
"EmploymentDistance", "HighwayDistance", "TaxRate", "TeacherRatio");
var learner = mlContext.Regression.Trainers.OrdinaryLeastSquares(
labelColumn: "MedianHomeValue", featureColumn: "Features");
labelColumnName: "MedianHomeValue", featureColumnName: "Features");

var transformedData = transformPipeline.Fit(data).Transform(data);

Expand All @@ -40,7 +40,7 @@ public static void Example()
// FeatureContributionCalculatingEstimator can be use as an intermediary step in a pipeline.
// The features retained by FeatureContributionCalculatingEstimator will be in the FeatureContribution column.
var pipeline = mlContext.Model.Explainability.FeatureContributionCalculation(model.Model, model.FeatureColumn, numPositiveContributions: 11)
.Append(mlContext.Regression.Trainers.OrdinaryLeastSquares(featureColumn: "FeatureContributions"));
.Append(mlContext.Regression.Trainers.OrdinaryLeastSquares(featureColumnName: "FeatureContributions"));
var outData = featureContributionCalculator.Fit(scoredData).Transform(scoredData);

// Let's extract the weights from the linear model to use as a comparison
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public static void Example()
var pipeline = mlContext.Transforms.Concatenate("Features", featureNames)
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.Regression.Trainers.OrdinaryLeastSquares(
labelColumn: labelName, featureColumn: "Features"));
labelColumnName: labelName, featureColumnName: "Features"));
var model = pipeline.Fit(data);

// Extract the model from the pipeline
Expand Down
18 changes: 0 additions & 18 deletions docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs
Original file line number Diff line number Diff line change
Expand Up @@ -54,24 +54,6 @@ public static void Example()
//0.181 0.361 -0.335 -0.157
//0.165 0.117 -0.547 0.014

// A pipeline to project Features column into white noise vector.
var whiteningPipeline = ml.Transforms.Projection.VectorWhiten(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), kind: Transforms.Projections.WhiteningKind.Zca);
// The transformed (projected) data.
transformedData = whiteningPipeline.Fit(trainData).Transform(trainData);
// Getting the data of the newly created column, so we can preview it.
var whitening = transformedData.GetColumn<VBuffer<float>>(ml, nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features));

printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), whitening);

// Features column obtained post-transformation.
//
//-0.394 -0.318 -0.243 -0.168 0.209 0.358 0.433 0.589 0.873 2.047
//-0.034 0.030 0.094 0.159 0.298 0.427 0.492 0.760 1.855 -1.197
// 0.099 0.161 0.223 0.286 0.412 0.603 0.665 1.797 -1.265 -0.172
// 0.211 0.277 0.344 0.410 0.606 1.267 1.333 -1.340 -0.205 0.065
// 0.454 0.523 0.593 0.664 1.886 -0.757 -0.687 -0.022 0.176 0.310
// 0.863 0.938 1.016 1.093 -1.326 -0.096 -0.019 0.189 0.330 0.483

// A pipeline to project Features column into L-p normalized vector.
var lpNormalizePipeline = ml.Transforms.Projection.LpNormalize(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), normKind: Transforms.Projections.LpNormalizingEstimatorBase.NormalizerKind.L1Norm);
// The transformed (projected) data.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
namespace Microsoft.ML.Samples.Dynamic
{
public static class SymbolicStochasticGradientDescent
{
Copy link
Member

@sfilipi sfilipi Feb 12, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @shmoradims, @jwood803 so you don't work on the same. #Closed

// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.HalLearners/">Microsoft.ML.HalLearners</a>.
// In this example we will use the adult income dataset. The goal is to predict
// if a person's income is above $50K or not, based on different pieces of information about that person.
// For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
// Setting the seed to a fixed number in this examples to make outputs deterministic.
var mlContext = new MLContext(seed: 0);

// Download and featurize the dataset.
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var split = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
// Create data training pipeline.
var pipeline = mlContext.BinaryClassification.Trainers.SymbolicStochasticGradientDescent(labelColumnName: "IsOver50K", numberOfIterations: 25);
var model = pipeline.Fit(split.TrainSet);

// Evaluate how the model is doing on the test data.
var dataWithPredictions = model.Transform(split.TestSet);
var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K");
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
// Accuracy: 0.85
// AUC: 0.90
// F1 Score: 0.64
// Negative Precision: 0.88
// Negative Recall: 0.93
// Positive Precision: 0.72
// Positive Recall: 0.58
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
namespace Microsoft.ML.Samples.Dynamic
{
public static class SymbolicStochasticGradientDescentWithOptions
{
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.HalLearners/">Microsoft.ML.HalLearners</a>.
// In this example we will use the adult income dataset. The goal is to predict
// if a person's income is above $50K or not, based on different pieces of information about that person.
// For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult
public static void Example()
Copy link
Contributor

@artidoro artidoro Feb 13, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Xml #Resolved

{
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
// Setting the seed to a fixed number in this examples to make outputs deterministic.
var mlContext = new MLContext(seed: 0);

// Download and featurize the dataset.
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var split = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
// Create data training pipeline
var pipeline = mlContext.BinaryClassification.Trainers.SymbolicStochasticGradientDescent(
new ML.Trainers.HalLearners.SymSgdClassificationTrainer.Options()
{
LabelColumn = "IsOver50K",
LearningRate = 0.2f,
NumberOfIterations = 10,
NumberOfThreads = 1,

});

var model = pipeline.Fit(split.TrainSet);

// Evaluate how the model is doing on the test data.
var dataWithPredictions = model.Transform(split.TestSet);
var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K");
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
// Accuracy: 0.84
// AUC: 0.88
// F1 Score: 0.60
// Negative Precision: 0.87
// Negative Recall: 0.93
// Positive Precision: 0.69
// Positive Recall: 0.53
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@

namespace Microsoft.ML.Samples.Dynamic
{
public static partial class MatrixFactorization
public static class MatrixFactorization
{
// This example first creates in-memory data and then use it to train a matrix factorization mode with default parameters. Afterward, quality metrics are reported.
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.Recommender/">Microsoft.ML.Recommender</a>.
// In this example we will create in-memory data and then use it to train
// a matrix factorization model with default parameters. Afterward, quality metrics are reported.

public static void Example()
{
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
Expand Down Expand Up @@ -35,13 +38,13 @@ public static void Example()
// Calculate regression matrices for the prediction result.
var metrics = mlContext.Recommendation().Evaluate(prediction,
label: nameof(MatrixElement.Value), score: nameof(MatrixElementForScore.Score));

// Print out some metrics for checking the model's quality.
Console.WriteLine($"L1 - {metrics.L1}"); // 0.17208
Console.WriteLine($"L2 - {metrics.L2}"); // 0.04766
Console.WriteLine($"LossFunction - {metrics.LossFn}"); // 0.04766
Console.WriteLine($"RMS - {metrics.Rms}"); //0.21831
Console.WriteLine($"RSquared - {metrics.RSquared}"); // 0.97616
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
// L1: 0.17
// L2: 0.05
// LossFunction: 0.05
// RMS: 0.22
// RSquared: 0.98

// Create two two entries for making prediction. Of course, the prediction value, Score, is unknown so it can be anything
// (here we use Score=0 and it will be overwritten by the true prediction). If any of row and column indexes are out-of-range
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@

namespace Microsoft.ML.Samples.Dynamic
{
public static partial class MatrixFactorization
public static class MatrixFactorizationWithOptions
{
// This example first creates in-memory data and then use it to train a matrix factorization model. Afterward, quality metrics are reported.
public static void ExampleWithOptions()
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.Recommender/">Microsoft.ML.Recommender</a>.
// In this example we will create in-memory data and then use it to train a matrix factorization model.
// Afterward, quality metrics are reported.
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
Expand All @@ -29,8 +31,8 @@ public static void ExampleWithOptions()
MatrixColumnIndexColumnName = nameof(MatrixElement.MatrixColumnIndex),
MatrixRowIndexColumnName = nameof(MatrixElement.MatrixRowIndex),
LabelColumnName = nameof(MatrixElement.Value),
NumIterations = 10,
NumThreads = 1,
NumberOfIterations = 10,
NumberOfThreads = 1,
ApproximationRank = 32,
LearningRate = 0.3
};
Expand All @@ -46,13 +48,13 @@ public static void ExampleWithOptions()
// Calculate regression matrices for the prediction result.
var metrics = mlContext.Recommendation().Evaluate(prediction,
label: nameof(MatrixElement.Value), score: nameof(MatrixElementForScore.Score));

// Print out some metrics for checking the model's quality.
Console.WriteLine($"L1 - {metrics.L1}"); // 0.16375
Console.WriteLine($"L2 - {metrics.L2}"); // 0.04407
Console.WriteLine($"LossFunction - {metrics.LossFn}"); // 0.04407
Console.WriteLine($"RMS - {metrics.Rms}"); // 0.2099
Console.WriteLine($"RSquared - {metrics.RSquared}"); // 0.97797
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
// L1: 0.16
// L2: 0.04
// LossFunction: 0.04
// RMS: 0.21
// RSquared: 0.98

// Create two two entries for making prediction. Of course, the prediction value, Score, is unknown so it can be anything
// (here we use Score=0 and it will be overwritten by the true prediction). If any of row and column indexes are out-of-range
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
using System;
using Microsoft.ML.Data;
using Microsoft.ML.SamplesUtils;

namespace Microsoft.ML.Samples.Dynamic
{
public static class OrdinaryLeastSquares
{
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.HalLearners/">Microsoft.ML.HalLearners</a>.
// In this examples we will use the housing price dataset. The goal is to predict median home value.
// For more details about this dataset, please see https://archive.ics.uci.edu/ml/machine-learning-databases/housing/
public static void Example()
{
// Downloading a regression dataset from github.com/dotnet/machinelearning
string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset();

// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
// as well as the source of randomness.
var mlContext = new MLContext(seed: 3);

// Creating a data reader, based on the format of the data
Copy link
Contributor

@zeahmed zeahmed Feb 15, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

newline please. #Closed

// The data is tab separated with all numeric columns.
// The first column being the label and rest are numeric features
// Here only seven numeric columns are used as features
var dataView = mlContext.Data.ReadFromTextFile(dataFile, new TextLoader.Arguments
{
Separators = new[] { '\t' },
HasHeader = true,
Columns = new[]
{
new TextLoader.Column("Label", DataKind.R4, 0),
new TextLoader.Column("Features", DataKind.R4, 1, 6)
}
});

//////////////////// Data Preview ////////////////////
// MedianHomeValue CrimesPerCapita PercentResidental PercentNonRetail CharlesRiver NitricOxides RoomsPerDwelling PercentPre40s
// 24.00 0.00632 18.00 2.310 0 0.5380 6.5750 65.20
// 21.60 0.02731 00.00 7.070 0 0.4690 6.4210 78.90
// 34.70 0.02729 00.00 7.070 0 0.4690 7.1850 61.10

var split = mlContext.Regression.TrainTestSplit(dataView, testFraction: 0.2);

// Create the estimator, here we only need OrdinaryLeastSquares trainer
// as data is already processed in a form consumable by the trainer
var pipeline = mlContext.Regression.Trainers.OrdinaryLeastSquares();

var model = pipeline.Fit(split.TrainSet);

// Check the weights that the model learned
var weightsValues = model.Model.Weights;
Console.WriteLine($"weight 0 - {weightsValues[0]}"); // CrimesPerCapita (weight 0) = -0.1682112
Console.WriteLine($"weight 3 - {weightsValues[3]}"); // CharlesRiver (weight 1) = 3.663493
var dataWithPredictions = model.Transform(split.TestSet);
var metrics = mlContext.Regression.Evaluate(dataWithPredictions);

ConsoleUtils.PrintMetrics(metrics);
// L1: 4.15
// L2: 31.98
// LossFunction: 31.98
// RMS: 5.65
// RSquared: 0.56
}
}
}
Loading