Skip to content

Better names to calibreated linear classification models #3034

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Mar 25, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ public static void Example()
.Append(ml.Transforms.Text.FeaturizeText("TextFeatures", "Text"))
.Append(ml.Transforms.Concatenate("Features", "TextFeatures", "age", "fnlwgt",
"education-num", "capital-gain", "capital-loss", "hours-per-week"))
.Append(ml.BinaryClassification.Trainers.LogisticRegression());
.Append(ml.BinaryClassification.Trainers.LbfgsLogisticRegression());
Copy link
Contributor

@rogancarr rogancarr Mar 22, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How do you feel about making the other calibrated linear trainers, like SDCA into XyzLogisticRegression(). #Resolved

Copy link
Contributor

@rogancarr rogancarr Mar 22, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can always add that at a later time. #Resolved


var model = pipeline.Fit(split.TrainSet);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public static void Example()
// Then append a logistic regression trainer.
var pipeline = mlContext.Transforms.Concatenate("Features", featureNames)
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.BinaryClassification.Trainers.LogisticRegression(
.Append(mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(
labelColumnName: labelName, featureColumnName: "Features"));
var model = pipeline.Fit(data);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ public static void Example()
// If we wanted to specify more advanced parameters for the algorithm,
// we could do so by tweaking the 'advancedSetting'.
var advancedPipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText", "Features")
.Append(mlContext.BinaryClassification.Trainers.SdcaCalibrated(
new SdcaCalibratedBinaryTrainer.Options {
.Append(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(
new SdcaLogisticRegressionBinaryTrainer.Options {
LabelColumnName = "Sentiment",
FeatureColumnName = "Features",
ConvergenceTolerance = 0.01f, // The learning rate for adjusting bias from being regularized
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public static void Example()
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Define the trainer options.
var options = new SdcaCalibratedBinaryTrainer.Options()
var options = new SdcaLogisticRegressionBinaryTrainer.Options()
{
// Make the convergence tolerance tighter.
ConvergenceTolerance = 0.05f,
Expand All @@ -33,7 +33,7 @@ public static void Example()
};

// Create data training pipeline.
var pipeline = mlContext.BinaryClassification.Trainers.SdcaCalibrated(options);
var pipeline = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(options);

// Fit this pipeline to the training data.
var model = pipeline.Fit(trainTestData.TrainSet);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public static void Example()
// Leave out 10% of data for testing.
var split = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);
// Create data training pipeline.
var pipeline = mlContext.BinaryClassification.Trainers.SymbolicSgd(labelColumnName: "IsOver50K", numberOfIterations: 25);
var pipeline = mlContext.BinaryClassification.Trainers.SymbolicSgdLogisticRegression(labelColumnName: "IsOver50K", numberOfIterations: 25);
var model = pipeline.Fit(split.TrainSet);

// Evaluate how the model is doing on the test data.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ public static void Example()
// Leave out 10% of data for testing.
var split = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);
// Create data training pipeline
var pipeline = mlContext.BinaryClassification.Trainers.SymbolicSgd(
new ML.Trainers.SymbolicSgdTrainer.Options()
var pipeline = mlContext.BinaryClassification.Trainers.SymbolicSgdLogisticRegression(
new ML.Trainers.SymbolicSgdLogisticRegressionBinaryTrainer.Options()
{
LearningRate = 0.2f,
NumberOfIterations = 10,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public static void Example()
// Convert the string labels into key types.
mlContext.Transforms.Conversion.MapValueToKey("Label")
// Apply StochasticDualCoordinateAscent multiclass trainer.
.Append(mlContext.MulticlassClassification.Trainers.SdcaCalibrated());
.Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy());

// Split the data into training and test sets. Only training set is used in fitting
// the created pipeline. Metrics are computed on the test.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public static void Example()
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);

// Define the trainer.
var pipeline = mlContext.Regression.Trainers.PoissonRegression();
var pipeline = mlContext.Regression.Trainers.LbfgsPoissonRegression();

// Train the model.
var model = pipeline.Fit(trainingData);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public static void Example()
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);

// Define trainer options.
var options = new PoissonRegressionTrainer.Options
var options = new LbfgsPoissonRegressionTrainer.Options
{
// Reduce optimization tolerance to speed up training at the cost of accuracy.
OptmizationTolerance = 1e-4f,
Expand All @@ -33,7 +33,7 @@ public static void Example()
};

// Define the trainer.
var pipeline = mlContext.Regression.Trainers.PoissonRegression(options);
var pipeline = mlContext.Regression.Trainers.LbfgsPoissonRegression(options);

// Train the model.
var model = pipeline.Fit(trainingData);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ public Arguments()
// non-default column names. Unfortuantely no method of resolving this temporary strikes me as being any
// less laborious than the proper fix, which is that this "meta" component should itself be a trainer
// estimator, as opposed to a regular trainer.
var trainerEstimator = new LbfgsMaximumEntropyTrainer(env, LabelColumnName, FeatureColumnName);
return TrainerUtils.MapTrainerEstimatorToTrainer<LbfgsMaximumEntropyTrainer,
var trainerEstimator = new LbfgsMaximumEntropyMulticlassTrainer(env, LabelColumnName, FeatureColumnName);
return TrainerUtils.MapTrainerEstimatorToTrainer<LbfgsMaximumEntropyMulticlassTrainer,
MaximumEntropyModelParameters, MaximumEntropyModelParameters>(env, trainerEstimator);
})
};
Expand Down
26 changes: 13 additions & 13 deletions src/Microsoft.ML.Mkl.Components/MklComponentsCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
namespace Microsoft.ML
{
/// <summary>
/// The trainer catalog extensions for the <see cref="OlsTrainer"/> and <see cref="SymbolicSgdTrainer"/>.
/// The trainer catalog extensions for the <see cref="OlsTrainer"/> and <see cref="SymbolicSgdLogisticRegressionBinaryTrainer"/>.
/// </summary>
public static class MklComponentsCatalog
{
Expand Down Expand Up @@ -69,9 +69,9 @@ public static OlsTrainer Ols(
}

/// <summary>
/// Predict a target using a linear binary classification model trained with the <see cref="SymbolicSgdTrainer"/>.
/// Predict a target using a linear binary classification model trained with the <see cref="SymbolicSgdLogisticRegressionBinaryTrainer"/>.
/// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function.
/// The <see cref="SymbolicSgdTrainer"/> parallelizes SGD using <a href="https://www.microsoft.com/en-us/research/project/project-parade/#!symbolic-execution">symbolic execution</a>.
/// The <see cref="SymbolicSgdLogisticRegressionBinaryTrainer"/> parallelizes SGD using <a href="https://www.microsoft.com/en-us/research/project/project-parade/#!symbolic-execution">symbolic execution</a>.
/// </summary>
/// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
/// <param name="labelColumnName">The name of the label column.</param>
Expand All @@ -84,45 +84,45 @@ public static OlsTrainer Ols(
/// ]]>
/// </format>
/// </example>
public static SymbolicSgdTrainer SymbolicSgd(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
public static SymbolicSgdLogisticRegressionBinaryTrainer SymbolicSgdLogisticRegression(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
string labelColumnName = DefaultColumnNames.Label,
string featureColumnName = DefaultColumnNames.Features,
int numberOfIterations = SymbolicSgdTrainer.Defaults.NumberOfIterations)
int numberOfIterations = SymbolicSgdLogisticRegressionBinaryTrainer.Defaults.NumberOfIterations)
{
Contracts.CheckValue(catalog, nameof(catalog));
var env = CatalogUtils.GetEnvironment(catalog);

var options = new SymbolicSgdTrainer.Options
var options = new SymbolicSgdLogisticRegressionBinaryTrainer.Options
{
LabelColumnName = labelColumnName,
FeatureColumnName = featureColumnName,
};

return new SymbolicSgdTrainer(env, options);
return new SymbolicSgdLogisticRegressionBinaryTrainer(env, options);
}

/// <summary>
/// Predict a target using a linear binary classification model trained with the <see cref="SymbolicSgdTrainer"/>.
/// Predict a target using a linear binary classification model trained with the <see cref="SymbolicSgdLogisticRegressionBinaryTrainer"/>.
/// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function.
/// The <see cref="SymbolicSgdTrainer"/> parallelizes SGD using <a href="https://www.microsoft.com/en-us/research/project/project-parade/#!symbolic-execution">symbolic execution</a>.
/// The <see cref="SymbolicSgdLogisticRegressionBinaryTrainer"/> parallelizes SGD using <a href="https://www.microsoft.com/en-us/research/project/project-parade/#!symbolic-execution">symbolic execution</a>.
/// </summary>
/// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
/// <param name="options">Algorithm advanced options. See <see cref="SymbolicSgdTrainer.Options"/>.</param>
/// <param name="options">Algorithm advanced options. See <see cref="SymbolicSgdLogisticRegressionBinaryTrainer.Options"/>.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[SymbolicStochasticGradientDescent](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescentWithOptions.cs)]
/// ]]>
/// </format>
/// </example>
public static SymbolicSgdTrainer SymbolicSgd(
public static SymbolicSgdLogisticRegressionBinaryTrainer SymbolicSgdLogisticRegression(
this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
SymbolicSgdTrainer.Options options)
SymbolicSgdLogisticRegressionBinaryTrainer.Options options)
{
Contracts.CheckValue(catalog, nameof(catalog));
Contracts.CheckValue(options, nameof(options));
var env = CatalogUtils.GetEnvironment(catalog);
return new SymbolicSgdTrainer(env, options);
return new SymbolicSgdLogisticRegressionBinaryTrainer(env, options);
}

/// <summary>
Expand Down
32 changes: 16 additions & 16 deletions src/Microsoft.ML.Mkl.Components/SymSgdClassificationTrainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,20 @@
using Microsoft.ML.Trainers;
using Microsoft.ML.Transforms;

[assembly: LoadableClass(typeof(SymbolicSgdTrainer), typeof(SymbolicSgdTrainer.Options),
[assembly: LoadableClass(typeof(SymbolicSgdLogisticRegressionBinaryTrainer), typeof(SymbolicSgdLogisticRegressionBinaryTrainer.Options),
Copy link
Contributor

@rogancarr rogancarr Mar 25, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SymbolicSgdLogisticRegressionBinaryTrainer [](start = 32, length = 42)

LogisticRegressionBinaryTrainer => LbfgsLogisticRegressionTrainer. Shall SymbolicSgdBinaryTrainer => SymbolicSgdLogisticRegressionTrainer instead of SymbolicSgdLogisticRegressionBinaryTrainer? Right now, the namings don't line up. #Resolved

Copy link
Member

@abgoswam abgoswam Mar 25, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rogancarr..could you cross-check ? To me the Class names do seem to match up fine, with both having the word Binary in the name

  • LbfgsLogisticRegressionBinaryTrainer
  • SymbolicSgdLogisticRegressionBinaryTrainer

In reply to: 268859710 [](ancestors = 268859710)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah. Our conclusion was having LR and Binary for trainer classes and only LR for APIs.


In reply to: 268865148 [](ancestors = 268865148,268859710)

Copy link
Contributor

@rogancarr rogancarr Mar 25, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it. I was mixing up catalogs & classes. #Resolved

new[] { typeof(SignatureBinaryClassifierTrainer), typeof(SignatureTrainer), typeof(SignatureFeatureScorerTrainer) },
SymbolicSgdTrainer.UserNameValue,
SymbolicSgdTrainer.LoadNameValue,
SymbolicSgdTrainer.ShortName)]
SymbolicSgdLogisticRegressionBinaryTrainer.UserNameValue,
SymbolicSgdLogisticRegressionBinaryTrainer.LoadNameValue,
SymbolicSgdLogisticRegressionBinaryTrainer.ShortName)]

[assembly: LoadableClass(typeof(void), typeof(SymbolicSgdTrainer), null, typeof(SignatureEntryPointModule), SymbolicSgdTrainer.LoadNameValue)]
[assembly: LoadableClass(typeof(void), typeof(SymbolicSgdLogisticRegressionBinaryTrainer), null, typeof(SignatureEntryPointModule), SymbolicSgdLogisticRegressionBinaryTrainer.LoadNameValue)]

namespace Microsoft.ML.Trainers
{
using TPredictor = CalibratedModelParametersBase<LinearBinaryModelParameters, PlattCalibrator>;

/// <include file='doc.xml' path='doc/members/member[@name="SymSGD"]/*' />
public sealed class SymbolicSgdTrainer : TrainerEstimatorBase<BinaryPredictionTransformer<TPredictor>, TPredictor>
public sealed class SymbolicSgdLogisticRegressionBinaryTrainer : TrainerEstimatorBase<BinaryPredictionTransformer<TPredictor>, TPredictor>
{
internal const string LoadNameValue = "SymbolicSGD";
internal const string UserNameValue = "Symbolic SGD (binary)";
Expand Down Expand Up @@ -195,9 +195,9 @@ private protected override TPredictor TrainModelCore(TrainContext context)
private protected override PredictionKind PredictionKind => PredictionKind.BinaryClassification;

/// <summary>
/// Initializes a new instance of <see cref="SymbolicSgdTrainer"/>
/// Initializes a new instance of <see cref="SymbolicSgdLogisticRegressionBinaryTrainer"/>
/// </summary>
internal SymbolicSgdTrainer(IHostEnvironment env, Options options)
internal SymbolicSgdLogisticRegressionBinaryTrainer(IHostEnvironment env, Options options)
: base(Contracts.CheckRef(env, nameof(env)).Register(LoadNameValue), TrainerUtils.MakeR4VecFeature(options.FeatureColumnName),
TrainerUtils.MakeBoolScalarLabel(options.LabelColumnName))
{
Expand All @@ -223,7 +223,7 @@ private protected override BinaryPredictionTransformer<TPredictor> MakeTransform
=> new BinaryPredictionTransformer<TPredictor>(Host, model, trainSchema, FeatureColumn.Name);

/// <summary>
/// Continues the training of <see cref="SymbolicSgdTrainer"/> using an already trained <paramref name="modelParameters"/>
/// Continues the training of <see cref="SymbolicSgdLogisticRegressionBinaryTrainer"/> using an already trained <paramref name="modelParameters"/>
/// a <see cref="BinaryPredictionTransformer"/>.
/// </summary>
public BinaryPredictionTransformer<TPredictor> Fit(IDataView trainData, LinearModelParameters modelParameters)
Expand All @@ -241,8 +241,8 @@ private protected override SchemaShape.Column[] GetOutputColumnsCore(SchemaShape

[TlcModule.EntryPoint(Name = "Trainers.SymSgdBinaryClassifier",
Desc = "Train a symbolic SGD.",
UserName = SymbolicSgdTrainer.UserNameValue,
ShortName = SymbolicSgdTrainer.ShortName)]
UserName = SymbolicSgdLogisticRegressionBinaryTrainer.UserNameValue,
ShortName = SymbolicSgdLogisticRegressionBinaryTrainer.ShortName)]
internal static CommonOutputs.BinaryClassificationOutput TrainSymSgd(IHostEnvironment env, Options options)
{
Contracts.CheckValue(env, nameof(env));
Expand All @@ -251,7 +251,7 @@ internal static CommonOutputs.BinaryClassificationOutput TrainSymSgd(IHostEnviro
EntryPointUtils.CheckInputArgs(host, options);

return TrainerEntryPointsUtils.Train<Options, CommonOutputs.BinaryClassificationOutput>(host, options,
() => new SymbolicSgdTrainer(host, options),
() => new SymbolicSgdLogisticRegressionBinaryTrainer(host, options),
() => TrainerEntryPointsUtils.FindColumn(host, options.TrainingData.Schema, options.LabelColumnName));
}

Expand Down Expand Up @@ -324,7 +324,7 @@ public void Free()
// giving an array, we are at _storage[_storageIndex][_indexInCurArray].
private int _indexInCurArray;
// This is used to access AccelMemBudget, AccelChunkSize and UsedMemory
private readonly SymbolicSgdTrainer _trainer;
private readonly SymbolicSgdLogisticRegressionBinaryTrainer _trainer;

private readonly IChannel _ch;

Expand All @@ -336,7 +336,7 @@ public void Free()
/// </summary>
/// <param name="trainer"></param>
/// <param name="ch"></param>
public ArrayManager(SymbolicSgdTrainer trainer, IChannel ch)
public ArrayManager(SymbolicSgdLogisticRegressionBinaryTrainer trainer, IChannel ch)
{
_storage = new List<VeryLongArray>();
// Setting the default value to 2^17.
Expand Down Expand Up @@ -500,7 +500,7 @@ private sealed class InputDataManager : IDisposable
// This is the index to go over the instances in instanceProperties
private int _instanceIndex;
// This is used to access AccelMemBudget, AccelChunkSize and UsedMemory
private readonly SymbolicSgdTrainer _trainer;
private readonly SymbolicSgdLogisticRegressionBinaryTrainer _trainer;
private readonly IChannel _ch;

// Whether memorySize was big enough to load the entire instances into the buffer
Expand All @@ -511,7 +511,7 @@ private sealed class InputDataManager : IDisposable
// Tells if we have gone through the dataset entirely.
public bool FinishedTheLoad => !_cursorMoveNext;

public InputDataManager(SymbolicSgdTrainer trainer, FloatLabelCursor.Factory cursorFactory, IChannel ch)
public InputDataManager(SymbolicSgdLogisticRegressionBinaryTrainer trainer, FloatLabelCursor.Factory cursorFactory, IChannel ch)
{
_instIndices = new ArrayManager<int>(trainer, ch);
_instValues = new ArrayManager<float>(trainer, ch);
Expand Down
Loading