-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Multiple feature columns in FFM #2205
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -257,7 +257,7 @@ public static IEnumerable<BinaryLabelFloatFeatureVectorSample> GenerateBinaryLa | |
// Initialize an example with a random label and an empty feature vector. | ||
var sample = new BinaryLabelFloatFeatureVectorSample() { Label = rnd.Next() % 2 == 0, Features = new float[_simpleBinaryClassSampleFeatureLength] }; | ||
// Fill feature vector according the assigned label. | ||
for (int j = 0; j < 10; ++j) | ||
for (int j = 0; j < _simpleBinaryClassSampleFeatureLength; ++j) | ||
{ | ||
var value = (float)rnd.NextDouble(); | ||
// Positive class gets larger feature value. | ||
|
@@ -271,6 +271,58 @@ public static IEnumerable<BinaryLabelFloatFeatureVectorSample> GenerateBinaryLa | |
return data; | ||
} | ||
|
||
public class FfmExample | ||
{ | ||
public bool Label; | ||
|
||
[VectorType(_simpleBinaryClassSampleFeatureLength)] | ||
public float[] Field0; | ||
|
||
[VectorType(_simpleBinaryClassSampleFeatureLength)] | ||
public float[] Field1; | ||
|
||
[VectorType(_simpleBinaryClassSampleFeatureLength)] | ||
public float[] Field2; | ||
} | ||
|
||
public static IEnumerable<FfmExample> GenerateFfmSamples(int exampleCount) | ||
{ | ||
var rnd = new Random(0); | ||
var data = new List<FfmExample>(); | ||
for (int i = 0; i < exampleCount; ++i) | ||
{ | ||
// Initialize an example with a random label and an empty feature vector. | ||
var sample = new FfmExample() { Label = rnd.Next() % 2 == 0, | ||
Field0 = new float[_simpleBinaryClassSampleFeatureLength], | ||
Field1 = new float[_simpleBinaryClassSampleFeatureLength], | ||
Field2 = new float[_simpleBinaryClassSampleFeatureLength] }; | ||
// Fill feature vector according the assigned label. | ||
for (int j = 0; j < 10; ++j) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
_simpleBinaryClassSampleFeatureLength ? #Closed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
{ | ||
var value0 = (float)rnd.NextDouble(); | ||
// Positive class gets larger feature value. | ||
if (sample.Label) | ||
value0 += 0.2f; | ||
sample.Field0[j] = value0; | ||
|
||
var value1 = (float)rnd.NextDouble(); | ||
// Positive class gets smaller feature value. | ||
if (sample.Label) | ||
value1 -= 0.2f; | ||
sample.Field1[j] = value1; | ||
|
||
var value2 = (float)rnd.NextDouble(); | ||
// Positive class gets larger feature value. | ||
if (sample.Label) | ||
value2 += 0.8f; | ||
sample.Field2[j] = value2; | ||
} | ||
|
||
data.Add(sample); | ||
} | ||
return data; | ||
} | ||
|
||
/// <summary> | ||
/// feature vector's length in <see cref="MulticlassClassificationExample"/>. | ||
/// </summary> | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,7 +40,7 @@ public sealed class FieldAwareFactorizationMachineTrainer : TrainerBase<FieldAwa | |
internal const string LoadName = "FieldAwareFactorizationMachine"; | ||
internal const string ShortName = "ffm"; | ||
|
||
public sealed class Arguments : LearnerInputBaseWithLabel | ||
public sealed class Arguments : LearnerInputBaseWithWeight | ||
{ | ||
[Argument(ArgumentType.AtMostOnce, HelpText = "Initial learning rate", ShortName = "lr", SortOrder = 1)] | ||
[TlcModule.SweepableFloatParam(0.001f, 1.0f, isLogScale: true)] | ||
|
@@ -65,6 +65,15 @@ public sealed class Arguments : LearnerInputBaseWithLabel | |
[Argument(ArgumentType.AtMostOnce, HelpText = "Whether to normalize the input vectors so that the concatenation of all fields' feature vectors is unit-length", ShortName = "norm", SortOrder = 6)] | ||
public bool Norm = true; | ||
|
||
/// <summary> | ||
/// Extra feature column names. The column named <see cref="LearnerInputBase.FeatureColumn"/> stores features from the first field. | ||
/// The i-th string in <see cref="ExtraFeatureColumns"/> stores the name of the (i+1)-th field's feature column. | ||
/// </summary> | ||
[Argument(ArgumentType.Multiple, HelpText = "Extra columns to use for feature vectors. The i-th specified string denotes the column containing features form the (i+1)-th field." + | ||
" Note that the first field is specified by \"feat\" instead of \"exfeat\".", | ||
ShortName = "exfeat", SortOrder = 7)] | ||
public string[] ExtraFeatureColumns; | ||
|
||
[Argument(ArgumentType.AtMostOnce, HelpText = "Whether to shuffle for each training iteration", ShortName = "shuf", SortOrder = 90)] | ||
public bool Shuffle = true; | ||
|
||
|
@@ -122,13 +131,26 @@ public FieldAwareFactorizationMachineTrainer(IHostEnvironment env, Arguments arg | |
{ | ||
Initialize(env, args); | ||
Info = new TrainerInfo(supportValid: true, supportIncrementalTrain: true); | ||
|
||
// There can be multiple feature columns in FFM, jointly specified by args.FeatureColumn and args.ExtraFeatureColumns. | ||
FeatureColumns = new SchemaShape.Column[1 + args.ExtraFeatureColumns.Length]; | ||
|
||
// Treat the default feature column as the 1st field. | ||
FeatureColumns[0] = new SchemaShape.Column(args.FeatureColumn, SchemaShape.Column.VectorKind.Vector, NumberType.R4, false); | ||
|
||
// Add 2nd, 3rd, and other fields from a FFM-specific argument, args.ExtraFeatureColumns. | ||
for (int i = 0; args.ExtraFeatureColumns != null && i < args.ExtraFeatureColumns.Length; i++) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
could this be a simple foreach loop instead? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess it can't. The array size is pre-defined and I prefer not to create an intermediate list just for calling In reply to: 250316802 [](ancestors = 250316802) |
||
FeatureColumns[i + 1] = new SchemaShape.Column(args.ExtraFeatureColumns[i], SchemaShape.Column.VectorKind.Vector, NumberType.R4, false); | ||
|
||
LabelColumn = new SchemaShape.Column(args.LabelColumn, SchemaShape.Column.VectorKind.Scalar, BoolType.Instance, false); | ||
WeightColumn = args.WeightColumn.IsExplicit ? new SchemaShape.Column(args.WeightColumn, SchemaShape.Column.VectorKind.Scalar, NumberType.R4, false) : default; | ||
} | ||
|
||
/// <summary> | ||
/// Initializing a new instance of <see cref="FieldAwareFactorizationMachineTrainer"/>. | ||
/// </summary> | ||
/// <param name="env">The private instance of <see cref="IHostEnvironment"/>.</param> | ||
/// <param name="featureColumns">The name of column hosting the features.</param> | ||
/// <param name="featureColumns">The name of column hosting the features. The i-th element stores feature column of the i-th field.</param> | ||
/// <param name="labelColumn">The name of the label column.</param> | ||
/// <param name="advancedSettings">A delegate to apply all the advanced arguments to the algorithm.</param> | ||
/// <param name="weights">The name of the optional weights' column.</param> | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,15 +2,43 @@ | |
// The .NET Foundation licenses this file to you under the MIT license. | ||
// See the LICENSE file in the project root for more information. | ||
|
||
using System.Linq; | ||
using Microsoft.ML.Data; | ||
using Microsoft.ML.FactorizationMachine; | ||
using Microsoft.ML.RunTests; | ||
using Microsoft.ML.SamplesUtils; | ||
using Xunit; | ||
|
||
namespace Microsoft.ML.Tests.TrainerEstimators | ||
{ | ||
public partial class TrainerEstimators : TestDataPipeBase | ||
{ | ||
[Fact] | ||
public void FfmBinaryClassificationWithAdvancedArguments() | ||
{ | ||
var mlContext = new MLContext(seed: 0); | ||
var data = DatasetUtils.GenerateFfmSamples(500); | ||
var dataView = ComponentCreation.CreateDataView(mlContext, data.ToList()); | ||
|
||
var ffmArgs = new FieldAwareFactorizationMachineTrainer.Arguments(); | ||
|
||
// Customized the field names. | ||
ffmArgs.FeatureColumn = nameof(DatasetUtils.FfmExample.Field0); // First field. | ||
ffmArgs.ExtraFeatureColumns = new[]{ nameof(DatasetUtils.FfmExample.Field1), nameof(DatasetUtils.FfmExample.Field2) }; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
this looks slightly odd . isn't it ? am curious -- why move away from the convention used in iteration #2, where we were re-defining There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Framework of generating entry points generates the hidden field (old feature column name) and therefore we have two fields with the same name and an error. In reply to: 250309618 [](ancestors = 250309618) |
||
|
||
var pipeline = new FieldAwareFactorizationMachineTrainer(mlContext, ffmArgs); | ||
|
||
var model = pipeline.Fit(dataView); | ||
var prediction = model.Transform(dataView); | ||
|
||
var metrics = mlContext.BinaryClassification.Evaluate(prediction); | ||
|
||
// Run a sanity check against a few of the metrics. | ||
Assert.InRange(metrics.Accuracy, 0.9, 1); | ||
Assert.InRange(metrics.Auc, 0.9, 1); | ||
Assert.InRange(metrics.Auprc, 0.9, 1); | ||
} | ||
|
||
[Fact] | ||
public void FieldAwareFactorizationMachine_Estimator() | ||
{ | ||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
am curious - is this attribute required ? #Closed
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is required.
In reply to: 249994977 [](ancestors = 249994977)