Skip to content

Documentation for BinaryClassification.AveragedPerceptron #2483

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 24 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
bf9d946
Updated docs for AveragedPerceptron
Feb 8, 2019
4222e85
Added a sample
Feb 8, 2019
48dffb0
Internalize and cleanup recommender project (#2451)
Ivanidzo4ka Feb 8, 2019
cdc78df
Remove dead code (#2481)
mareklinka Feb 9, 2019
6526a01
ITransformer derives from ICanSaveModel and explicit implementation f…
artidoro Feb 9, 2019
7dfadca
Add a project for functional tests without visibility into internals …
rogancarr Feb 9, 2019
07580a8
Add analyzer for detecting BestFriend usages on public declarations (…
mareklinka Feb 9, 2019
7d7ebb6
TensorFlow: Fixed shape issue where unknown shape will be induced fro…
zeahmed Feb 10, 2019
a3b6522
Towards #2326 - removing some namespaces (#2442)
sfilipi Feb 10, 2019
069abb7
Lockdown Microsoft.ML.TimeSeries public surface (#2344)
codemzs Feb 11, 2019
b863ac2
Update cookbook (#2494)
jwood803 Feb 11, 2019
8444a5a
Replace ConditionalFact usages with custom facts (#2402)
mareklinka Feb 11, 2019
f269adc
Towards 2326: Microsoft.ML.Ensemble and Microsoft.ML.TimeSeries names…
sfilipi Feb 12, 2019
f4f03ba
Addressed PR comments
Feb 12, 2019
ceb3aa2
Added sample for the second overload with trainer options.
Feb 12, 2019
56607ba
Get rid of value tuples in TrainTest and CrossValidation (#2507)
Ivanidzo4ka Feb 12, 2019
2b96f6d
Fixed the failing tests
Feb 12, 2019
87be8dc
Updated docs for AveragedPerceptron
Feb 8, 2019
ba0abff
Added a sample
Feb 8, 2019
a5538ed
Addressed PR comments
Feb 12, 2019
6b56065
Added sample for the second overload with trainer options.
Feb 12, 2019
c487733
Fixed the failing tests
Feb 12, 2019
5219d0c
Merge branch 'doc_binary_clf' of https://github.com/shmoradims/machin…
Feb 12, 2019
59673b8
Fixed breaking changes from master.
Feb 12, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
15 changes: 15 additions & 0 deletions Microsoft.ML.sln
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.TestFramework"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Predictor.Tests", "test\Microsoft.ML.Predictor.Tests\Microsoft.ML.Predictor.Tests.csproj", "{6B047E09-39C9-4583-96F3-685D84CA4117}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Functional.Tests", "test\Microsoft.ML.Functional.Tests\Microsoft.ML.Functional.Tests.csproj", "{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.ResultProcessor", "src\Microsoft.ML.ResultProcessor\Microsoft.ML.ResultProcessor.csproj", "{3769FCC3-9AFF-4C37-97E9-6854324681DF}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.FastTree", "src\Microsoft.ML.FastTree\Microsoft.ML.FastTree.csproj", "{B7B593C5-FB8C-4ADA-A638-5B53B47D087E}"
Expand Down Expand Up @@ -928,6 +930,18 @@ Global
{5E920CAC-5A28-42FB-936E-49C472130953}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
{5E920CAC-5A28-42FB-936E-49C472130953}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
{5E920CAC-5A28-42FB-936E-49C472130953}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Debug|Any CPU.Build.0 = Debug|Any CPU
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Release|Any CPU.ActiveCfg = Release|Any CPU
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Release|Any CPU.Build.0 = Release|Any CPU
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -1011,6 +1025,7 @@ Global
{85D0CAFD-2FE8-496A-88C7-585D35B94243} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
{31D38B21-102B-41C0-9E0A-2FE0BF68D123} = {D3D38B03-B557-484D-8348-8BADEE4DF592}
{5E920CAC-5A28-42FB-936E-49C472130953} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D}
Expand Down
2 changes: 2 additions & 0 deletions build/Dependencies.props
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
<PropertyGroup>
<BenchmarkDotNetVersion>0.11.3</BenchmarkDotNetVersion>
<MicrosoftMLTestModelsPackageVersion>0.0.3-test</MicrosoftMLTestModelsPackageVersion>
<MicrosoftMLTensorFlowTestModelsVersion>0.0.10-test</MicrosoftMLTensorFlowTestModelsVersion>
<MicrosoftMLOnnxTestModelsVersion>0.0.4-test</MicrosoftMLOnnxTestModelsVersion>
</PropertyGroup>

</Project>
14 changes: 7 additions & 7 deletions docs/code/MlNetCookBook.md
Original file line number Diff line number Diff line change
Expand Up @@ -688,11 +688,11 @@ var catColumns = data.GetColumn<string[]>(mlContext, "CategoricalFeatures").Take
// Build several alternative featurization pipelines.
var pipeline =
// Convert each categorical feature into one-hot encoding independently.
mlContext.Transforms.Categorical.OneHotEncoding("CategoricalFeatures", "CategoricalOneHot")
mlContext.Transforms.Categorical.OneHotEncoding("CategoricalOneHot", "CategoricalFeatures")
// Convert all categorical features into indices, and build a 'word bag' of these.
.Append(mlContext.Transforms.Categorical.OneHotEncoding("CategoricalFeatures", "CategoricalBag", CategoricalTransform.OutputKind.Bag))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("CategoricalBag", "CategoricalFeatures", CategoricalTransform.OutputKind.Bag))
// One-hot encode the workclass column, then drop all the categories that have fewer than 10 instances in the train set.
.Append(mlContext.Transforms.Categorical.OneHotEncoding("Workclass", "WorkclassOneHot"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("WorkclassOneHot", "Workclass"))
.Append(mlContext.Transforms.FeatureSelection.CountFeatureSelectingEstimator("WorkclassOneHot", "WorkclassOneHotTrimmed", count: 10));

// Let's train our pipeline, and then apply it to the same data.
Expand Down Expand Up @@ -825,20 +825,20 @@ var pipeline =
.Append(mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent());

// Split the data 90:10 into train and test sets, train and evaluate.
var (trainData, testData) = mlContext.MulticlassClassification.TrainTestSplit(data, testFraction: 0.1);
var split = mlContext.MulticlassClassification.TrainTestSplit(data, testFraction: 0.1);

// Train the model.
var model = pipeline.Fit(trainData);
var model = pipeline.Fit(split.TrainSet);
// Compute quality metrics on the test set.
var metrics = mlContext.MulticlassClassification.Evaluate(model.Transform(testData));
var metrics = mlContext.MulticlassClassification.Evaluate(model.Transform(split.TestSet));
Console.WriteLine(metrics.AccuracyMicro);

// Now run the 5-fold cross-validation experiment, using the same pipeline.
var cvResults = mlContext.MulticlassClassification.CrossValidate(data, pipeline, numFolds: 5);

// The results object is an array of 5 elements. For each of the 5 folds, we have metrics, model and scored test data.
// Let's compute the average micro-accuracy.
var microAccuracies = cvResults.Select(r => r.metrics.AccuracyMicro);
var microAccuracies = cvResults.Select(r => r.Metrics.AccuracyMicro);
Console.WriteLine(microAccuracies.Average());

```
Expand Down
6 changes: 3 additions & 3 deletions docs/samples/Microsoft.ML.Samples/Dynamic/Calibrator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public static void Calibration()
var data = reader.Read(dataFile);

// Split the dataset into two parts: one used for training, the other to train the calibrator
var (trainData, calibratorTrainingData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var split = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);

// Featurize the text column through the FeaturizeText API.
// Then append the StochasticDualCoordinateAscentBinary binary classifier, setting the "Label" column as the label of the dataset, and
Expand All @@ -56,12 +56,12 @@ public static void Calibration()
loss: new HingeLoss())); // By specifying loss: new HingeLoss(), StochasticDualCoordinateAscent will train a support vector machine (SVM).

// Fit the pipeline, and get a transformer that knows how to score new data.
var transformer = pipeline.Fit(trainData);
var transformer = pipeline.Fit(split.TrainSet);
IPredictor model = transformer.LastTransformer.Model;

// Let's score the new data. The score will give us a numerical estimation of the chance that the particular sample
// bears positive sentiment. This estimate is relative to the numbers obtained.
var scoredData = transformer.Transform(calibratorTrainingData);
var scoredData = transformer.Transform(split.TestSet);
var scoredDataPreview = scoredData.Preview();

PrintRowViewValues(scoredDataPreview);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Microsoft.ML.Core.Data;
using Microsoft.ML.Data;
using Microsoft.ML.TimeSeries;
using Microsoft.ML.TimeSeriesProcessing;
using Microsoft.ML.Transforms.TimeSeries;

namespace Microsoft.ML.Samples.Dynamic
{
Expand Down Expand Up @@ -54,16 +52,9 @@ public static void IidChangePointDetectorTransform()
// Setup IidSpikeDetector arguments
string outputColumnName = nameof(ChangePointPrediction.Prediction);
string inputColumnName = nameof(IidChangePointData.Value);
var args = new IidChangePointDetector.Arguments()
{
Source = inputColumnName,
Name = outputColumnName,
Confidence = 95, // The confidence for spike detection in the range [0, 100]
ChangeHistoryLength = Size / 4, // The length of the sliding window on p-values for computing the martingale score.
};

// The transformed data.
var transformedData = new IidChangePointEstimator(ml, args).Fit(dataView).Transform(dataView);
var transformedData = ml.Transforms.IidChangePointEstimator(outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView).Transform(dataView);

// Getting the data of the newly created column as an IEnumerable of ChangePointPrediction.
var predictionColumn = ml.CreateEnumerable<ChangePointPrediction>(transformedData, reuseRowObject: false);
Expand Down Expand Up @@ -119,16 +110,9 @@ public static void IidChangePointDetectorPrediction()
// Setup IidSpikeDetector arguments
string outputColumnName = nameof(ChangePointPrediction.Prediction);
string inputColumnName = nameof(IidChangePointData.Value);
var args = new IidChangePointDetector.Arguments()
{
Source = inputColumnName,
Name = outputColumnName,
Confidence = 95, // The confidence for spike detection in the range [0, 100]
ChangeHistoryLength = Size / 4, // The length of the sliding window on p-values for computing the martingale score.
};

// Time Series model.
ITransformer model = new IidChangePointEstimator(ml, args).Fit(dataView);
ITransformer model = ml.Transforms.IidChangePointEstimator(outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView);

// Create a time series prediction engine from the model.
var engine = model.CreateTimeSeriesPredictionFunction<IidChangePointData, ChangePointPrediction>(ml);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Microsoft.ML.Core.Data;
using Microsoft.ML.Data;
using Microsoft.ML.TimeSeries;
using Microsoft.ML.TimeSeriesProcessing;
using Microsoft.ML.Transforms.TimeSeries;

namespace Microsoft.ML.Samples.Dynamic
{
Expand Down Expand Up @@ -51,16 +49,9 @@ public static void IidSpikeDetectorTransform()
// Setup IidSpikeDetector arguments
string outputColumnName = nameof(IidSpikePrediction.Prediction);
string inputColumnName = nameof(IidSpikeData.Value);
var args = new IidSpikeDetector.Arguments()
{
Source = inputColumnName,
Name = outputColumnName,
Confidence = 95, // The confidence for spike detection in the range [0, 100]
PvalueHistoryLength = Size / 4 // The size of the sliding window for computing the p-value; shorter windows are more sensitive to spikes.
};

// The transformed data.
var transformedData = new IidSpikeEstimator(ml, args).Fit(dataView).Transform(dataView);
var transformedData = ml.Transforms.IidSpikeEstimator(outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView).Transform(dataView);

// Getting the data of the newly created column as an IEnumerable of IidSpikePrediction.
var predictionColumn = ml.CreateEnumerable<IidSpikePrediction>(transformedData, reuseRowObject: false);
Expand Down Expand Up @@ -108,16 +99,8 @@ public static void IidSpikeDetectorPrediction()
// Setup IidSpikeDetector arguments
string outputColumnName = nameof(IidSpikePrediction.Prediction);
string inputColumnName = nameof(IidSpikeData.Value);
var args = new IidSpikeDetector.Arguments()
{
Source = inputColumnName,
Name = outputColumnName,
Confidence = 95, // The confidence for spike detection in the range [0, 100]
PvalueHistoryLength = Size / 4 // The size of the sliding window for computing the p-value; shorter windows are more sensitive to spikes.
};

// The transformed model.
ITransformer model = new IidSpikeEstimator(ml, args).Fit(dataView);
ITransformer model = ml.Transforms.IidChangePointEstimator(outputColumnName, inputColumnName, 95, Size).Fit(dataView);

// Create a time series prediction engine from the model.
var engine = model.CreateTimeSeriesPredictionFunction<IidSpikeData, IidSpikePrediction>(ml);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public static void LogisticRegression()

IDataView data = reader.Read(dataFilePath);

var (trainData, testData) = ml.BinaryClassification.TrainTestSplit(data, testFraction: 0.2);
var split = ml.BinaryClassification.TrainTestSplit(data, testFraction: 0.2);

var pipeline = ml.Transforms.Concatenate("Text", "workclass", "education", "marital-status",
"relationship", "ethnicity", "sex", "native-country")
Expand All @@ -66,9 +66,9 @@ public static void LogisticRegression()
"education-num", "capital-gain", "capital-loss", "hours-per-week"))
.Append(ml.BinaryClassification.Trainers.LogisticRegression());

var model = pipeline.Fit(trainData);
var model = pipeline.Fit(split.TrainSet);

var dataWithPredictions = model.Transform(testData);
var dataWithPredictions = model.Transform(split.TestSet);

var metrics = ml.BinaryClassification.Evaluate(dataWithPredictions);

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
using System;
using System.Linq;
using Microsoft.Data.DataView;
using Microsoft.ML.Learners;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;
using Microsoft.ML.SamplesUtils;
using Microsoft.ML.Trainers.HalLearners;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using System;
using System.Linq;
using Microsoft.ML.Learners;
using Microsoft.ML.Trainers;

namespace Microsoft.ML.Samples.Dynamic.PermutationFeatureImportance
{
Expand Down
4 changes: 2 additions & 2 deletions docs/samples/Microsoft.ML.Samples/Dynamic/SDCA.cs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public static void SDCA_BinaryClassification()
// Step 3: Run Cross-Validation on this pipeline.
var cvResults = mlContext.BinaryClassification.CrossValidate(data, pipeline, labelColumn: "Sentiment");

var accuracies = cvResults.Select(r => r.metrics.Accuracy);
var accuracies = cvResults.Select(r => r.Metrics.Accuracy);
Console.WriteLine(accuracies.Average());

// If we wanted to specify more advanced parameters for the algorithm,
Expand All @@ -70,7 +70,7 @@ public static void SDCA_BinaryClassification()

// Run Cross-Validation on this second pipeline.
var cvResults_advancedPipeline = mlContext.BinaryClassification.CrossValidate(data, pipeline, labelColumn: "Sentiment", numFolds: 3);
accuracies = cvResults_advancedPipeline.Select(r => r.metrics.Accuracy);
accuracies = cvResults_advancedPipeline.Select(r => r.Metrics.Accuracy);
Console.WriteLine(accuracies.Average());

}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Microsoft.ML.Core.Data;
using Microsoft.ML.Data;
using Microsoft.ML.TimeSeries;
using Microsoft.ML.TimeSeriesProcessing;
using Microsoft.ML.Transforms.TimeSeries;

namespace Microsoft.ML.Samples.Dynamic
{
Expand Down Expand Up @@ -49,19 +47,9 @@ public static void SsaChangePointDetectorTransform()
// Setup SsaChangePointDetector arguments
var inputColumnName = nameof(SsaChangePointData.Value);
var outputColumnName = nameof(ChangePointPrediction.Prediction);
var args = new SsaChangePointDetector.Arguments()
{
Source = inputColumnName,
Name = outputColumnName,
Confidence = 95, // The confidence for spike detection in the range [0, 100]
ChangeHistoryLength = 8, // The length of the window for detecting a change in trend; shorter windows are more sensitive to spikes.
TrainingWindowSize = TrainingSize, // The number of points from the beginning of the sequence used for training.
SeasonalWindowSize = SeasonalitySize + 1 // An upper bound on the largest relevant seasonality in the input time series."

};

// The transformed data.
var transformedData = new SsaChangePointEstimator(ml, args).Fit(dataView).Transform(dataView);
var transformedData = ml.Transforms.SsaChangePointEstimator(outputColumnName, inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(dataView).Transform(dataView);

// Getting the data of the newly created column as an IEnumerable of ChangePointPrediction.
var predictionColumn = ml.CreateEnumerable<ChangePointPrediction>(transformedData, reuseRowObject: false);
Expand Down Expand Up @@ -120,19 +108,9 @@ public static void SsaChangePointDetectorPrediction()
// Setup SsaChangePointDetector arguments
var inputColumnName = nameof(SsaChangePointData.Value);
var outputColumnName = nameof(ChangePointPrediction.Prediction);
var args = new SsaChangePointDetector.Arguments()
{
Source = inputColumnName,
Name = outputColumnName,
Confidence = 95, // The confidence for spike detection in the range [0, 100]
ChangeHistoryLength = 8, // The length of the window for detecting a change in trend; shorter windows are more sensitive to spikes.
TrainingWindowSize = TrainingSize, // The number of points from the beginning of the sequence used for training.
SeasonalWindowSize = SeasonalitySize + 1 // An upper bound on the largest relevant seasonality in the input time series."

};

// Train the change point detector.
ITransformer model = new SsaChangePointEstimator(ml, args).Fit(dataView);
ITransformer model = ml.Transforms.SsaChangePointEstimator(outputColumnName, inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(dataView);

// Create a prediction engine from the model for feeding new data.
var engine = model.CreateTimeSeriesPredictionFunction<SsaChangePointData, ChangePointPrediction>(ml);
Expand Down
Loading