Skip to content

Commit 8730c87

Browse files
authored
Better names to calibreated linear classification models (#3034)
1 parent 0a78320 commit 8730c87

File tree

53 files changed

+307
-308
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+307
-308
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/LogisticRegression.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ public static void Example()
6363
.Append(ml.Transforms.Text.FeaturizeText("TextFeatures", "Text"))
6464
.Append(ml.Transforms.Concatenate("Features", "TextFeatures", "age", "fnlwgt",
6565
"education-num", "capital-gain", "capital-loss", "hours-per-week"))
66-
.Append(ml.BinaryClassification.Trainers.LogisticRegression());
66+
.Append(ml.BinaryClassification.Trainers.LbfgsLogisticRegression());
6767

6868
var model = pipeline.Fit(split.TrainSet);
6969

docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PfiBinaryClassificationExample.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ public static void Example()
2222
// Then append a logistic regression trainer.
2323
var pipeline = mlContext.Transforms.Concatenate("Features", featureNames)
2424
.Append(mlContext.Transforms.Normalize("Features"))
25-
.Append(mlContext.BinaryClassification.Trainers.LogisticRegression(
25+
.Append(mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(
2626
labelColumnName: labelName, featureColumnName: "Features"));
2727
var model = pipeline.Fit(data);
2828

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscent.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ public static void Example()
6060
// If we wanted to specify more advanced parameters for the algorithm,
6161
// we could do so by tweaking the 'advancedSetting'.
6262
var advancedPipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText", "Features")
63-
.Append(mlContext.BinaryClassification.Trainers.SdcaCalibrated(
64-
new SdcaCalibratedBinaryTrainer.Options {
63+
.Append(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(
64+
new SdcaLogisticRegressionBinaryTrainer.Options {
6565
LabelColumnName = "Sentiment",
6666
FeatureColumnName = "Features",
6767
ConvergenceTolerance = 0.01f, // The learning rate for adjusting bias from being regularized

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscentWithOptions.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ public static void Example()
2222
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);
2323

2424
// Define the trainer options.
25-
var options = new SdcaCalibratedBinaryTrainer.Options()
25+
var options = new SdcaLogisticRegressionBinaryTrainer.Options()
2626
{
2727
// Make the convergence tolerance tighter.
2828
ConvergenceTolerance = 0.05f,
@@ -33,7 +33,7 @@ public static void Example()
3333
};
3434

3535
// Create data training pipeline.
36-
var pipeline = mlContext.BinaryClassification.Trainers.SdcaCalibrated(options);
36+
var pipeline = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(options);
3737

3838
// Fit this pipeline to the training data.
3939
var model = pipeline.Fit(trainTestData.TrainSet);

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescent.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ public static void Example()
1919
// Leave out 10% of data for testing.
2020
var split = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);
2121
// Create data training pipeline.
22-
var pipeline = mlContext.BinaryClassification.Trainers.SymbolicSgd(labelColumnName: "IsOver50K", numberOfIterations: 25);
22+
var pipeline = mlContext.BinaryClassification.Trainers.SymbolicSgdLogisticRegression(labelColumnName: "IsOver50K", numberOfIterations: 25);
2323
var model = pipeline.Fit(split.TrainSet);
2424

2525
// Evaluate how the model is doing on the test data.

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescentWithOptions.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ public static void Example()
1919
// Leave out 10% of data for testing.
2020
var split = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);
2121
// Create data training pipeline
22-
var pipeline = mlContext.BinaryClassification.Trainers.SymbolicSgd(
23-
new ML.Trainers.SymbolicSgdTrainer.Options()
22+
var pipeline = mlContext.BinaryClassification.Trainers.SymbolicSgdLogisticRegression(
23+
new ML.Trainers.SymbolicSgdLogisticRegressionBinaryTrainer.Options()
2424
{
2525
LearningRate = 0.2f,
2626
NumberOfIterations = 10,

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/StochasticDualCoordinateAscent.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ public static void Example()
3030
// Convert the string labels into key types.
3131
mlContext.Transforms.Conversion.MapValueToKey("Label")
3232
// Apply StochasticDualCoordinateAscent multiclass trainer.
33-
.Append(mlContext.MulticlassClassification.Trainers.SdcaCalibrated());
33+
.Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy());
3434

3535
// Split the data into training and test sets. Only training set is used in fitting
3636
// the created pipeline. Metrics are computed on the test.

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/PoissonRegression.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ public static void Example()
2121
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
2222

2323
// Define the trainer.
24-
var pipeline = mlContext.Regression.Trainers.PoissonRegression();
24+
var pipeline = mlContext.Regression.Trainers.LbfgsPoissonRegression();
2525

2626
// Train the model.
2727
var model = pipeline.Fit(trainingData);

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/PoissonRegressionWithOptions.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ public static void Example()
2222
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
2323

2424
// Define trainer options.
25-
var options = new PoissonRegressionTrainer.Options
25+
var options = new LbfgsPoissonRegressionTrainer.Options
2626
{
2727
// Reduce optimization tolerance to speed up training at the cost of accuracy.
2828
OptmizationTolerance = 1e-4f,
@@ -33,7 +33,7 @@ public static void Example()
3333
};
3434

3535
// Define the trainer.
36-
var pipeline = mlContext.Regression.Trainers.PoissonRegression(options);
36+
var pipeline = mlContext.Regression.Trainers.LbfgsPoissonRegression(options);
3737

3838
// Train the model.
3939
var model = pipeline.Fit(trainingData);

src/Microsoft.ML.Ensemble/Trainer/Multiclass/MulticlassDataPartitionEnsembleTrainer.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ public Arguments()
6464
// non-default column names. Unfortuantely no method of resolving this temporary strikes me as being any
6565
// less laborious than the proper fix, which is that this "meta" component should itself be a trainer
6666
// estimator, as opposed to a regular trainer.
67-
var trainerEstimator = new LbfgsMaximumEntropyTrainer(env, LabelColumnName, FeatureColumnName);
68-
return TrainerUtils.MapTrainerEstimatorToTrainer<LbfgsMaximumEntropyTrainer,
67+
var trainerEstimator = new LbfgsMaximumEntropyMulticlassTrainer(env, LabelColumnName, FeatureColumnName);
68+
return TrainerUtils.MapTrainerEstimatorToTrainer<LbfgsMaximumEntropyMulticlassTrainer,
6969
MaximumEntropyModelParameters, MaximumEntropyModelParameters>(env, trainerEstimator);
7070
})
7171
};

src/Microsoft.ML.Mkl.Components/MklComponentsCatalog.cs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
namespace Microsoft.ML
1111
{
1212
/// <summary>
13-
/// The trainer catalog extensions for the <see cref="OlsTrainer"/> and <see cref="SymbolicSgdTrainer"/>.
13+
/// The trainer catalog extensions for the <see cref="OlsTrainer"/> and <see cref="SymbolicSgdLogisticRegressionBinaryTrainer"/>.
1414
/// </summary>
1515
public static class MklComponentsCatalog
1616
{
@@ -69,9 +69,9 @@ public static OlsTrainer Ols(
6969
}
7070

7171
/// <summary>
72-
/// Predict a target using a linear binary classification model trained with the <see cref="SymbolicSgdTrainer"/>.
72+
/// Predict a target using a linear binary classification model trained with the <see cref="SymbolicSgdLogisticRegressionBinaryTrainer"/>.
7373
/// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function.
74-
/// The <see cref="SymbolicSgdTrainer"/> parallelizes SGD using <a href="https://www.microsoft.com/en-us/research/project/project-parade/#!symbolic-execution">symbolic execution</a>.
74+
/// The <see cref="SymbolicSgdLogisticRegressionBinaryTrainer"/> parallelizes SGD using <a href="https://www.microsoft.com/en-us/research/project/project-parade/#!symbolic-execution">symbolic execution</a>.
7575
/// </summary>
7676
/// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
7777
/// <param name="labelColumnName">The name of the label column.</param>
@@ -84,45 +84,45 @@ public static OlsTrainer Ols(
8484
/// ]]>
8585
/// </format>
8686
/// </example>
87-
public static SymbolicSgdTrainer SymbolicSgd(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
87+
public static SymbolicSgdLogisticRegressionBinaryTrainer SymbolicSgdLogisticRegression(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
8888
string labelColumnName = DefaultColumnNames.Label,
8989
string featureColumnName = DefaultColumnNames.Features,
90-
int numberOfIterations = SymbolicSgdTrainer.Defaults.NumberOfIterations)
90+
int numberOfIterations = SymbolicSgdLogisticRegressionBinaryTrainer.Defaults.NumberOfIterations)
9191
{
9292
Contracts.CheckValue(catalog, nameof(catalog));
9393
var env = CatalogUtils.GetEnvironment(catalog);
9494

95-
var options = new SymbolicSgdTrainer.Options
95+
var options = new SymbolicSgdLogisticRegressionBinaryTrainer.Options
9696
{
9797
LabelColumnName = labelColumnName,
9898
FeatureColumnName = featureColumnName,
9999
};
100100

101-
return new SymbolicSgdTrainer(env, options);
101+
return new SymbolicSgdLogisticRegressionBinaryTrainer(env, options);
102102
}
103103

104104
/// <summary>
105-
/// Predict a target using a linear binary classification model trained with the <see cref="SymbolicSgdTrainer"/>.
105+
/// Predict a target using a linear binary classification model trained with the <see cref="SymbolicSgdLogisticRegressionBinaryTrainer"/>.
106106
/// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function.
107-
/// The <see cref="SymbolicSgdTrainer"/> parallelizes SGD using <a href="https://www.microsoft.com/en-us/research/project/project-parade/#!symbolic-execution">symbolic execution</a>.
107+
/// The <see cref="SymbolicSgdLogisticRegressionBinaryTrainer"/> parallelizes SGD using <a href="https://www.microsoft.com/en-us/research/project/project-parade/#!symbolic-execution">symbolic execution</a>.
108108
/// </summary>
109109
/// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
110-
/// <param name="options">Algorithm advanced options. See <see cref="SymbolicSgdTrainer.Options"/>.</param>
110+
/// <param name="options">Algorithm advanced options. See <see cref="SymbolicSgdLogisticRegressionBinaryTrainer.Options"/>.</param>
111111
/// <example>
112112
/// <format type="text/markdown">
113113
/// <![CDATA[
114114
/// [!code-csharp[SymbolicStochasticGradientDescent](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescentWithOptions.cs)]
115115
/// ]]>
116116
/// </format>
117117
/// </example>
118-
public static SymbolicSgdTrainer SymbolicSgd(
118+
public static SymbolicSgdLogisticRegressionBinaryTrainer SymbolicSgdLogisticRegression(
119119
this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
120-
SymbolicSgdTrainer.Options options)
120+
SymbolicSgdLogisticRegressionBinaryTrainer.Options options)
121121
{
122122
Contracts.CheckValue(catalog, nameof(catalog));
123123
Contracts.CheckValue(options, nameof(options));
124124
var env = CatalogUtils.GetEnvironment(catalog);
125-
return new SymbolicSgdTrainer(env, options);
125+
return new SymbolicSgdLogisticRegressionBinaryTrainer(env, options);
126126
}
127127

128128
/// <summary>

src/Microsoft.ML.Mkl.Components/SymSgdClassificationTrainer.cs

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,20 @@
1919
using Microsoft.ML.Trainers;
2020
using Microsoft.ML.Transforms;
2121

22-
[assembly: LoadableClass(typeof(SymbolicSgdTrainer), typeof(SymbolicSgdTrainer.Options),
22+
[assembly: LoadableClass(typeof(SymbolicSgdLogisticRegressionBinaryTrainer), typeof(SymbolicSgdLogisticRegressionBinaryTrainer.Options),
2323
new[] { typeof(SignatureBinaryClassifierTrainer), typeof(SignatureTrainer), typeof(SignatureFeatureScorerTrainer) },
24-
SymbolicSgdTrainer.UserNameValue,
25-
SymbolicSgdTrainer.LoadNameValue,
26-
SymbolicSgdTrainer.ShortName)]
24+
SymbolicSgdLogisticRegressionBinaryTrainer.UserNameValue,
25+
SymbolicSgdLogisticRegressionBinaryTrainer.LoadNameValue,
26+
SymbolicSgdLogisticRegressionBinaryTrainer.ShortName)]
2727

28-
[assembly: LoadableClass(typeof(void), typeof(SymbolicSgdTrainer), null, typeof(SignatureEntryPointModule), SymbolicSgdTrainer.LoadNameValue)]
28+
[assembly: LoadableClass(typeof(void), typeof(SymbolicSgdLogisticRegressionBinaryTrainer), null, typeof(SignatureEntryPointModule), SymbolicSgdLogisticRegressionBinaryTrainer.LoadNameValue)]
2929

3030
namespace Microsoft.ML.Trainers
3131
{
3232
using TPredictor = CalibratedModelParametersBase<LinearBinaryModelParameters, PlattCalibrator>;
3333

3434
/// <include file='doc.xml' path='doc/members/member[@name="SymSGD"]/*' />
35-
public sealed class SymbolicSgdTrainer : TrainerEstimatorBase<BinaryPredictionTransformer<TPredictor>, TPredictor>
35+
public sealed class SymbolicSgdLogisticRegressionBinaryTrainer : TrainerEstimatorBase<BinaryPredictionTransformer<TPredictor>, TPredictor>
3636
{
3737
internal const string LoadNameValue = "SymbolicSGD";
3838
internal const string UserNameValue = "Symbolic SGD (binary)";
@@ -195,9 +195,9 @@ private protected override TPredictor TrainModelCore(TrainContext context)
195195
private protected override PredictionKind PredictionKind => PredictionKind.BinaryClassification;
196196

197197
/// <summary>
198-
/// Initializes a new instance of <see cref="SymbolicSgdTrainer"/>
198+
/// Initializes a new instance of <see cref="SymbolicSgdLogisticRegressionBinaryTrainer"/>
199199
/// </summary>
200-
internal SymbolicSgdTrainer(IHostEnvironment env, Options options)
200+
internal SymbolicSgdLogisticRegressionBinaryTrainer(IHostEnvironment env, Options options)
201201
: base(Contracts.CheckRef(env, nameof(env)).Register(LoadNameValue), TrainerUtils.MakeR4VecFeature(options.FeatureColumnName),
202202
TrainerUtils.MakeBoolScalarLabel(options.LabelColumnName))
203203
{
@@ -223,7 +223,7 @@ private protected override BinaryPredictionTransformer<TPredictor> MakeTransform
223223
=> new BinaryPredictionTransformer<TPredictor>(Host, model, trainSchema, FeatureColumn.Name);
224224

225225
/// <summary>
226-
/// Continues the training of <see cref="SymbolicSgdTrainer"/> using an already trained <paramref name="modelParameters"/>
226+
/// Continues the training of <see cref="SymbolicSgdLogisticRegressionBinaryTrainer"/> using an already trained <paramref name="modelParameters"/>
227227
/// a <see cref="BinaryPredictionTransformer"/>.
228228
/// </summary>
229229
public BinaryPredictionTransformer<TPredictor> Fit(IDataView trainData, LinearModelParameters modelParameters)
@@ -241,8 +241,8 @@ private protected override SchemaShape.Column[] GetOutputColumnsCore(SchemaShape
241241

242242
[TlcModule.EntryPoint(Name = "Trainers.SymSgdBinaryClassifier",
243243
Desc = "Train a symbolic SGD.",
244-
UserName = SymbolicSgdTrainer.UserNameValue,
245-
ShortName = SymbolicSgdTrainer.ShortName)]
244+
UserName = SymbolicSgdLogisticRegressionBinaryTrainer.UserNameValue,
245+
ShortName = SymbolicSgdLogisticRegressionBinaryTrainer.ShortName)]
246246
internal static CommonOutputs.BinaryClassificationOutput TrainSymSgd(IHostEnvironment env, Options options)
247247
{
248248
Contracts.CheckValue(env, nameof(env));
@@ -251,7 +251,7 @@ internal static CommonOutputs.BinaryClassificationOutput TrainSymSgd(IHostEnviro
251251
EntryPointUtils.CheckInputArgs(host, options);
252252

253253
return TrainerEntryPointsUtils.Train<Options, CommonOutputs.BinaryClassificationOutput>(host, options,
254-
() => new SymbolicSgdTrainer(host, options),
254+
() => new SymbolicSgdLogisticRegressionBinaryTrainer(host, options),
255255
() => TrainerEntryPointsUtils.FindColumn(host, options.TrainingData.Schema, options.LabelColumnName));
256256
}
257257

@@ -324,7 +324,7 @@ public void Free()
324324
// giving an array, we are at _storage[_storageIndex][_indexInCurArray].
325325
private int _indexInCurArray;
326326
// This is used to access AccelMemBudget, AccelChunkSize and UsedMemory
327-
private readonly SymbolicSgdTrainer _trainer;
327+
private readonly SymbolicSgdLogisticRegressionBinaryTrainer _trainer;
328328

329329
private readonly IChannel _ch;
330330

@@ -336,7 +336,7 @@ public void Free()
336336
/// </summary>
337337
/// <param name="trainer"></param>
338338
/// <param name="ch"></param>
339-
public ArrayManager(SymbolicSgdTrainer trainer, IChannel ch)
339+
public ArrayManager(SymbolicSgdLogisticRegressionBinaryTrainer trainer, IChannel ch)
340340
{
341341
_storage = new List<VeryLongArray>();
342342
// Setting the default value to 2^17.
@@ -500,7 +500,7 @@ private sealed class InputDataManager : IDisposable
500500
// This is the index to go over the instances in instanceProperties
501501
private int _instanceIndex;
502502
// This is used to access AccelMemBudget, AccelChunkSize and UsedMemory
503-
private readonly SymbolicSgdTrainer _trainer;
503+
private readonly SymbolicSgdLogisticRegressionBinaryTrainer _trainer;
504504
private readonly IChannel _ch;
505505

506506
// Whether memorySize was big enough to load the entire instances into the buffer
@@ -511,7 +511,7 @@ private sealed class InputDataManager : IDisposable
511511
// Tells if we have gone through the dataset entirely.
512512
public bool FinishedTheLoad => !_cursorMoveNext;
513513

514-
public InputDataManager(SymbolicSgdTrainer trainer, FloatLabelCursor.Factory cursorFactory, IChannel ch)
514+
public InputDataManager(SymbolicSgdLogisticRegressionBinaryTrainer trainer, FloatLabelCursor.Factory cursorFactory, IChannel ch)
515515
{
516516
_instIndices = new ArrayManager<int>(trainer, ch);
517517
_instValues = new ArrayManager<float>(trainer, ch);

0 commit comments

Comments
 (0)