From cf65b6295885a5d632c78c2b4297e94b2f04d721 Mon Sep 17 00:00:00 2001 From: Wei-Sheng Chin Date: Sun, 18 Nov 2018 19:44:20 -0800 Subject: [PATCH 01/10] Sync with the latest LIBMF to enable implicit-feedback recommendation --- .../MatrixFactorizationTrainer.cs | 32 +++- .../SafeTrainingAndModelBuffer.cs | 141 ++++++++++++++++-- src/Native/MatrixFactorizationNative/libmf | 2 +- 3 files changed, 158 insertions(+), 17 deletions(-) diff --git a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs index de68f9be1e..1dca9e2f61 100644 --- a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs +++ b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs @@ -91,6 +91,13 @@ public sealed class MatrixFactorizationTrainer : TrainerBase 0, nameof(args.Lambda), posError); Host.CheckUserArg(args.Eta > 0, nameof(args.Eta), posError); + _fun = args.Fun; _lambda = args.Lambda; _k = args.K; _iter = args.NumIterations; _eta = args.Eta; + _alpha = args.Eta; + _c = args.C; _threads = args.NumThreads ?? Environment.ProcessorCount; _quiet = args.Quiet; _doNmf = args.NonNegative; @@ -224,10 +249,13 @@ public MatrixFactorizationTrainer(IHostEnvironment env, var args = new Arguments(); advancedSettings?.Invoke(args); + _fun = args.Fun; _lambda = args.Lambda; _k = args.K; _iter = args.NumIterations; _eta = args.Eta; + _alpha = args.Alpha; + _c = args.C; _threads = args.NumThreads ?? Environment.ProcessorCount; _quiet = args.Quiet; _doNmf = args.NonNegative; @@ -338,8 +366,8 @@ private MatrixFactorizationPredictor TrainCore(IChannel ch, RoleMappedData data, private SafeTrainingAndModelBuffer PrepareBuffer() { - return new SafeTrainingAndModelBuffer(Host, _k, Math.Max(20, 2 * _threads), - _threads, _iter, _lambda, _eta, _doNmf, _quiet, copyData: false); + return new SafeTrainingAndModelBuffer(Host, _fun, _k, _threads, Math.Max(20, 2 * _threads), + _iter, _lambda, _eta, _alpha, _c, _doNmf, _quiet, copyData: false); } /// diff --git a/src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs b/src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs index 615b0875f8..2774e30a2a 100644 --- a/src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs +++ b/src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs @@ -44,23 +44,107 @@ private unsafe struct MFProblem [StructLayout(LayoutKind.Explicit)] private struct MFParameter { + /// + /// Enum of loss functions which can be minimized. + /// 0: square loss for regression. + /// 1: absolute loss for regression. + /// 2: KL-divergence for regression. + /// 5: logistic loss for binary classification. + /// 6: squared hinge loss for binary classification. + /// 7: hinge loss for binary classification. + /// 10: row-wise Bayesian personalized ranking. + /// 11: column-wise Bayesian personalized ranking. + /// 12: squared loss for implicit-feedback matrix factorization. + /// Fun 12 is solved by a coordinate descent method while other functions invoke + /// a stochastic gradient method. + /// [FieldOffset(0)] - public int K; + public int Fun; + + /// + /// Rank of factor matrices. + /// [FieldOffset(4)] - public int NrThreads; + public int K; + + /// + /// Number of threads which can be used for training. + /// [FieldOffset(8)] - public int NrBins; + public int NrThreads; + + /// + /// Number of blocks that the training matrix is divided into. The parallel stochastic gradient + /// method in LIBMF processes assigns each thread a block at one time. The ratings in one block + /// would be sequentially accessed (not randomaly accessed like standard stochastic gradient methods). + /// [FieldOffset(12)] - public int NrIters; + public int NrBins; + + /// + /// Number of training iteration. At one iteration, all values in the training matrix are roughly accessed once. + /// [FieldOffset(16)] - public float Lambda; + public int NrIters; + + /// + /// L1-norm regularization coefficient of left factor matrix. + /// [FieldOffset(20)] - public float Eta; + public float LambdaP1; + + /// + /// L2-norm regularization coefficient of left factor matrix. + /// [FieldOffset(24)] - public int DoNmf; + public float LambdaP2; + + /// + /// L1-norm regularization coefficient of right factor matrix. + /// [FieldOffset(28)] - public int Quiet; + public float LambdaQ1; + + /// + /// L2-norm regularization coefficient of right factor matrix. + /// [FieldOffset(32)] + public float LambdaQ2; + + /// + /// Learning rate of LIBMF's stochastic gradient method. + /// + [FieldOffset(36)] + public float Eta; + + /// + /// Coefficient of loss function on unobserved entries in the training matrix. It's used only with fun=12. + /// + [FieldOffset(40)] + public float Alpha; + + /// + /// Desired value of unobserved entries in the training matrix. It's used only with fun=12. + /// + [FieldOffset(44)] + public float C; + + /// + /// Specify if the factor matrices should be non-negative. + /// + [FieldOffset(48)] + public int DoNmf; + + /// + /// Set to true so that LIBMF may produce less information to STDOUT. + /// + [FieldOffset(52)] + public int Quiet; + + /// + /// Set to false so that LIBMF may reuse and modifiy the data passed in. + /// + [FieldOffset(56)] public int CopyData; } @@ -68,14 +152,36 @@ private struct MFParameter private unsafe struct MFModel { [FieldOffset(0)] - public int M; + public int Fun; + /// + /// Number of rows in the training matrix. + /// [FieldOffset(4)] - public int N; + public int M; + /// + /// Number of columns in the training matrix. + /// [FieldOffset(8)] + public int N; + /// + /// Rank of factor matrices. + /// + [FieldOffset(12)] public int K; + /// + /// Average value in the training matrix. + /// [FieldOffset(16)] + public float B; + /// + /// Left factor matrix. Its shape is M-by-K stored in row-major format. + /// + [FieldOffset(20)] // pointer is 8-byte on 64-bit machine. public float* P; - [FieldOffset(24)] + /// + /// Right factor matrix. Its shape is N-by-K stored in row-major format. + /// + [FieldOffset(28)] // pointer is 8-byte on 64-bit machine. public float* Q; } @@ -100,16 +206,23 @@ private unsafe struct MFModel private unsafe MFModel* _pMFModel; private readonly IHost _host; - public SafeTrainingAndModelBuffer(IHostEnvironment env, int k, int nrBins, int nrThreads, int nrIters, double lambda, double eta, + public SafeTrainingAndModelBuffer(IHostEnvironment env, int fun, int k, int nrThreads, + int nrBins, int nrIters, double lambda, double eta, double alpha, double c, bool doNmf, bool quiet, bool copyData) { _host = env.Register("SafeTrainingAndModelBuffer"); + _mfParam.Fun = fun; _mfParam.K = k; - _mfParam.NrBins = nrBins; _mfParam.NrThreads = nrThreads; + _mfParam.NrBins = nrBins; _mfParam.NrIters = nrIters; - _mfParam.Lambda = (float)lambda; + _mfParam.LambdaP1 = 0; + _mfParam.LambdaP2 = (float)lambda; + _mfParam.LambdaQ1 = 0; + _mfParam.LambdaQ2 = (float)lambda; _mfParam.Eta = (float)eta; + _mfParam.Alpha = (float)alpha; + _mfParam.C = (float)c; _mfParam.DoNmf = doNmf ? 1 : 0; _mfParam.Quiet = quiet ? 1 : 0; _mfParam.CopyData = copyData ? 1 : 0; diff --git a/src/Native/MatrixFactorizationNative/libmf b/src/Native/MatrixFactorizationNative/libmf index 1ecc365249..f06dac6e89 160000 --- a/src/Native/MatrixFactorizationNative/libmf +++ b/src/Native/MatrixFactorizationNative/libmf @@ -1 +1 @@ -Subproject commit 1ecc365249e5cac5e72c66317a141298dc52f6e3 +Subproject commit f06dac6e89c52ecff085b9c5baf811f3e27eab09 From 17729d3f165a42b139c0c60260b3fa7c54eb8ba6 Mon Sep 17 00:00:00 2001 From: Wei-Sheng Chin Date: Sun, 18 Nov 2018 21:09:28 -0800 Subject: [PATCH 02/10] Fix pointer alignment --- src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs b/src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs index 2774e30a2a..33bb90ae0b 100644 --- a/src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs +++ b/src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs @@ -176,12 +176,12 @@ private unsafe struct MFModel /// /// Left factor matrix. Its shape is M-by-K stored in row-major format. /// - [FieldOffset(20)] // pointer is 8-byte on 64-bit machine. + [FieldOffset(24)] // pointer is 8-byte on 64-bit machine. public float* P; /// /// Right factor matrix. Its shape is N-by-K stored in row-major format. /// - [FieldOffset(28)] // pointer is 8-byte on 64-bit machine. + [FieldOffset(32)] // pointer is 8-byte on 64-bit machine. public float* Q; } From 4e0558e8b732c223cde052a8eb82775702211ea7 Mon Sep 17 00:00:00 2001 From: Wei-Sheng Chin Date: Sun, 18 Nov 2018 22:47:37 -0800 Subject: [PATCH 03/10] Fix Mac build --- src/Native/MatrixFactorizationNative/libmf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Native/MatrixFactorizationNative/libmf b/src/Native/MatrixFactorizationNative/libmf index f06dac6e89..f92a18161b 160000 --- a/src/Native/MatrixFactorizationNative/libmf +++ b/src/Native/MatrixFactorizationNative/libmf @@ -1 +1 @@ -Subproject commit f06dac6e89c52ecff085b9c5baf811f3e27eab09 +Subproject commit f92a18161b6824fda4c4ab698a69d299a836841a From da132c38c85d0d36b0d3ec10d390007529e00a5e Mon Sep 17 00:00:00 2001 From: Wei-Sheng Chin Date: Mon, 19 Nov 2018 21:03:56 -0800 Subject: [PATCH 04/10] Address comments and add a test --- .../MatrixFactorizationTrainer.cs | 18 ++-- .../MatrixFactorizationTests.cs | 94 +++++++++++++++++++ 2 files changed, 104 insertions(+), 8 deletions(-) diff --git a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs index 1dca9e2f61..75ceeedc98 100644 --- a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs +++ b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs @@ -89,14 +89,16 @@ namespace Microsoft.ML.Trainers public sealed class MatrixFactorizationTrainer : TrainerBase, IEstimator { + public enum LibMFLossFunctionType { SquareLossRegression=0, SquareLossOneClass=12 }; + public sealed class Arguments { [Argument(ArgumentType.AtMostOnce, HelpText = "Loss function minimized for finding factor matrices. " + "Two values are allowed, 0 or 12. The values \"0\" means traditional collaborative filtering problem with squared loss. " + "The value \"12\" triggers one-class matrix factorization for implicit-feedback recommendation problem.")] [TGUI(SuggestedSweeps = "0,12")] - [TlcModule.SweepableDiscreteParam("Fun", new object[] { 0, 12 })] - public int Fun = 0; + [TlcModule.SweepableDiscreteParam("Fun", new object[] { LibMFLossFunctionType.SquareLossRegression, LibMFLossFunctionType.SquareLossOneClass })] + public LibMFLossFunctionType Fun = LibMFLossFunctionType.SquareLossRegression; [Argument(ArgumentType.AtMostOnce, HelpText = "Regularization parameter. " + "It's the weight of factor matrices' norms in the objective function minimized by matrix factorization's algorithm. " + @@ -123,16 +125,16 @@ public sealed class Arguments [TlcModule.SweepableDiscreteParam("Eta", new object[] { 0.001f, 0.01f, 0.1f })] public double Eta = 0.1; - [Argument(ArgumentType.AtMostOnce, HelpText = "Coefficient of negative entries' loss in one-class matrix factorization.")] + [Argument(ArgumentType.AtMostOnce, HelpText = "Importance of negative entries' loss in one-class matrix factorization.")] [TGUI(SuggestedSweeps = "1,0.01,0.0001,0.000001")] - [TlcModule.SweepableDiscreteParam("Eta", new object[] { 1f, 0.01f, 0.0001f, 0.000001f})] + [TlcModule.SweepableDiscreteParam("Alpha", new object[] { 1f, 0.01f, 0.0001f, 0.000001f})] public double Alpha = 0.1; [Argument(ArgumentType.AtMostOnce, HelpText = "Desired negative entries' value in one-class matrix factorization. In one-class matrix factorization, " + "all matrix values observed are one (which can be viewed as positive cases in binary classification) while unobserved values " + "(which can be viewed as negative cases in binary classification) need to be specified manually using this option.")] [TGUI(SuggestedSweeps = "0.000001,0,0001,0.01")] - [TlcModule.SweepableDiscreteParam("Eta", new object[] { 0.000001f, 0.0001f, 0.01f })] + [TlcModule.SweepableDiscreteParam("C", new object[] { 0.000001f, 0.0001f, 0.01f })] public double C = 0.000001f; [Argument(ArgumentType.AtMostOnce, HelpText = "Number of threads can be used in the training procedure.", ShortName = "t")] @@ -215,12 +217,12 @@ public MatrixFactorizationTrainer(IHostEnvironment env, Arguments args) : base(e Host.CheckUserArg(args.Lambda > 0, nameof(args.Lambda), posError); Host.CheckUserArg(args.Eta > 0, nameof(args.Eta), posError); - _fun = args.Fun; + _fun = (int)args.Fun; _lambda = args.Lambda; _k = args.K; _iter = args.NumIterations; _eta = args.Eta; - _alpha = args.Eta; + _alpha = args.Alpha; _c = args.C; _threads = args.NumThreads ?? Environment.ProcessorCount; _quiet = args.Quiet; @@ -249,7 +251,7 @@ public MatrixFactorizationTrainer(IHostEnvironment env, var args = new Arguments(); advancedSettings?.Invoke(args); - _fun = args.Fun; + _fun = (int)args.Fun; _lambda = args.Lambda; _k = args.K; _iter = args.NumIterations; diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs index 3fd11ec6f2..031c68a739 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs @@ -331,5 +331,99 @@ public void MatrixFactorizationInMemoryDataZeroBaseIndex() // The presence of out-of-range indexes may lead to NaN Assert.True(float.IsNaN(pred.Score)); } + + const int _oneClassMatrixColumnCount = 2; + const int _oneClassMatrixRowCount = 3; + + internal class OneClassMatrixElementZeroBased + { + [KeyType(Contiguous = true, Count = _oneClassMatrixColumnCount, Min = 0)] + public uint MatrixColumnIndex; + [KeyType(Contiguous = true, Count = _oneClassMatrixRowCount, Min = 0)] + public uint MatrixRowIndex; + public float Value; + } + + internal class OneClassMatrixElementZeroBasedForScore + { + [KeyType(Contiguous = true, Count = _oneClassMatrixColumnCount, Min = 0)] + public uint MatrixColumnIndex; + [KeyType(Contiguous = true, Count = _oneClassMatrixRowCount, Min = 0)] + public uint MatrixRowIndex; + public float Value; + public float Score; + } + + [ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // This test is being fixed as part of issue #1441. + public void OneClassMatrixFactorizationInMemoryDataZeroBaseIndex() + { + // Create an in-memory matrix as a list of tuples (column index, row index, value). + // Iterators i and j are column and row indexes, respectively. For one-class matrix factorization problem, + // unspecified matrix elements are all a constant provided by user. If that constant is 0.15, the following + // list means a 3-by-2 training matrix with elements: + // (0, 0, 1), (1, 1, 1), (0, 2, 1), (0, 1, 0.15), (1, 0, 0.15), (1, 2, 0.15). + // because matrix elements at (0, 1), (1, 0), and (1, 2) are not specified. + var dataMatrix = new List(); + dataMatrix.Add(new OneClassMatrixElementZeroBased() { MatrixColumnIndex = 0, MatrixRowIndex = 0, Value = 1 }); + dataMatrix.Add(new OneClassMatrixElementZeroBased() { MatrixColumnIndex = 1, MatrixRowIndex = 1, Value = 1 }); + dataMatrix.Add(new OneClassMatrixElementZeroBased() { MatrixColumnIndex = 0, MatrixRowIndex = 2, Value = 1 }); + + // Convert the in-memory matrix into an IDataView so that ML.NET components can consume it. + var dataView = ComponentCreation.CreateDataView(Env, dataMatrix); + + // Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the + // matrix's column index, and "MatrixRowIndex" as the matrix's row index. + var mlContext = new MLContext(seed: 1, conc: 1); + var pipeline = new MatrixFactorizationTrainer(mlContext, + nameof(OneClassMatrixElementZeroBased.MatrixColumnIndex), + nameof(OneClassMatrixElementZeroBased.MatrixRowIndex), + nameof(OneClassMatrixElementZeroBased.Value), + advancedSettings: s => + { + s.Fun = MatrixFactorizationTrainer.LibMFLossFunctionType.SquareLossOneClass; + s.NumIterations = 100; + s.NumThreads = 1; // To eliminate randomness, # of threads must be 1. + // Let's test non-default regularization coefficient. + s.Lambda = 0.025; + s.K = 16; + // Importance coefficient of loss function over matrix elements not specified in the input matrix. + s.Alpha = 0.01; + // Desired value for matrix elements not specified in the input matrix. + s.C = 0.15; + }); + + // Train a matrix factorization model. + var model = pipeline.Fit(dataView); + + // Apply the trained model to the training set. + var prediction = model.Transform(dataView); + + // Calculate regression matrices for the prediction result. + var metrics = mlContext.Regression.Evaluate(prediction, label: "Value", score: "Score"); + + // Make sure the prediction error is not too large. + Assert.InRange(metrics.L2, 0, 0.0016); + + // Create data for testing. Note that the 2nd element is not specified in the training data so it should + // be close to the constant specified by s.C = 0.15. Comparing with the data structure used in training phase, + // one extra float is added into OneClassMatrixElementZeroBasedForScore for storing the prediction result. Note + // that the prediction engine may ignore the Value and assign the predicted value to Score. + var testDataMatrix = new List(); + testDataMatrix.Add(new OneClassMatrixElementZeroBasedForScore() { MatrixColumnIndex = 0, MatrixRowIndex = 0, Value = 0, Score = 0 }); + testDataMatrix.Add(new OneClassMatrixElementZeroBasedForScore() { MatrixColumnIndex = 1, MatrixRowIndex = 2, Value = 0, Score = 0 }); + + // Convert the in-memory matrix into an IDataView so that ML.NET components can consume it. + var testDataView = ComponentCreation.CreateDataView(Env, testDataMatrix); + + // Apply the trained model to the test data. + var testPrediction = model.Transform(testDataView); + + double tolerance = Math.Pow(10, -6); + var testResults = new List(testPrediction.AsEnumerable(mlContext, false)); + // Positive example (i.e., examples can be found in dataMatrix) is close to 1. + Assert.InRange(testResults[0].Score, 0.982391 - tolerance, 0.982391 + tolerance); + // Negative example (i.e., examples can not be found in dataMatrix) is close to 0.15 (specified by s.C = 0.15 in the trainer). + Assert.InRange(testResults[1].Score, 0.141411 - tolerance, 0.141411 + tolerance); + } } } \ No newline at end of file From 9e090c935ebf66feeb97440e2d5bea4a10599187 Mon Sep 17 00:00:00 2001 From: Wei-Sheng Chin Date: Mon, 19 Nov 2018 22:16:52 -0800 Subject: [PATCH 05/10] Increase tol --- .../TrainerEstimators/MatrixFactorizationTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs index 031c68a739..51b4aab3b6 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs @@ -418,7 +418,7 @@ public void OneClassMatrixFactorizationInMemoryDataZeroBaseIndex() // Apply the trained model to the test data. var testPrediction = model.Transform(testDataView); - double tolerance = Math.Pow(10, -6); + double tolerance = Math.Pow(10, -5); var testResults = new List(testPrediction.AsEnumerable(mlContext, false)); // Positive example (i.e., examples can be found in dataMatrix) is close to 1. Assert.InRange(testResults[0].Score, 0.982391 - tolerance, 0.982391 + tolerance); From 55630c766c5c7fbc11c79bcb68c806d852f7a379 Mon Sep 17 00:00:00 2001 From: Wei-Sheng Chin Date: Mon, 19 Nov 2018 22:51:48 -0800 Subject: [PATCH 06/10] Address comments and make a better doc for test --- .../MatrixFactorizationTrainer.cs | 2 +- .../MatrixFactorizationTests.cs | 29 ++++++++++++++----- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs index 75ceeedc98..7bb1e30b8a 100644 --- a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs +++ b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs @@ -89,7 +89,7 @@ namespace Microsoft.ML.Trainers public sealed class MatrixFactorizationTrainer : TrainerBase, IEstimator { - public enum LibMFLossFunctionType { SquareLossRegression=0, SquareLossOneClass=12 }; + public enum LibMFLossFunctionType { SquareLossRegression = 0, SquareLossOneClass = 12 }; public sealed class Arguments { diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs index 51b4aab3b6..2b38d62a1c 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs @@ -332,10 +332,24 @@ public void MatrixFactorizationInMemoryDataZeroBaseIndex() Assert.True(float.IsNaN(pred.Score)); } - const int _oneClassMatrixColumnCount = 2; - const int _oneClassMatrixRowCount = 3; - - internal class OneClassMatrixElementZeroBased + // The following ingredients are used to define a 3-by-2 one-class + // matrix used in a test for one-class matrix factorization. One-class + // matrix means that all the available elements in the training matrix + // are 1. Such a matrix is common. Let's use Game store as an example. + // Assume that user IDs are row indexes and game IDs are column + // indexes. By encoding all users' purchase history as a matrix (i.e., + // if the value at u-th row and v-th column is 1, then u-th user owns + // the v-th game), a one-class matrix gets created because, from the + // purchase history, users didn't explicitly tell us which games they + // will not buy. If you train a simple model from those a one-class + // matrix using standard collaborative filtering, all your predictions + // would be 1! One-class matrix factorization assumes unspecified + // matrix entries are all 0 (or a small constant value selected by the + // user) so that the trainined model becomes non-trivial. + private const int _oneClassMatrixColumnCount = 2; + private const int _oneClassMatrixRowCount = 3; + + private class OneClassMatrixElementZeroBased { [KeyType(Contiguous = true, Count = _oneClassMatrixColumnCount, Min = 0)] public uint MatrixColumnIndex; @@ -344,7 +358,7 @@ internal class OneClassMatrixElementZeroBased public float Value; } - internal class OneClassMatrixElementZeroBasedForScore + private class OneClassMatrixElementZeroBasedForScore { [KeyType(Contiguous = true, Count = _oneClassMatrixColumnCount, Min = 0)] public uint MatrixColumnIndex; @@ -418,12 +432,11 @@ public void OneClassMatrixFactorizationInMemoryDataZeroBaseIndex() // Apply the trained model to the test data. var testPrediction = model.Transform(testDataView); - double tolerance = Math.Pow(10, -5); var testResults = new List(testPrediction.AsEnumerable(mlContext, false)); // Positive example (i.e., examples can be found in dataMatrix) is close to 1. - Assert.InRange(testResults[0].Score, 0.982391 - tolerance, 0.982391 + tolerance); + CompareNumbersWithTolerance(0.982391, testResults[0].Score, digitsOfPrecision: 5); // Negative example (i.e., examples can not be found in dataMatrix) is close to 0.15 (specified by s.C = 0.15 in the trainer). - Assert.InRange(testResults[1].Score, 0.141411 - tolerance, 0.141411 + tolerance); + CompareNumbersWithTolerance(0.141411, testResults[1].Score, digitsOfPrecision: 5); } } } \ No newline at end of file From 56e8f64d5060359528c27769d954141654a041cb Mon Sep 17 00:00:00 2001 From: Wei-Sheng Chin Date: Tue, 20 Nov 2018 08:16:51 -0800 Subject: [PATCH 07/10] Polish --- .../MatrixFactorizationTrainer.cs | 22 ++++++++---- .../MatrixFactorizationTests.cs | 36 +++++++++---------- 2 files changed, 33 insertions(+), 25 deletions(-) diff --git a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs index 7bb1e30b8a..560e07dcf9 100644 --- a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs +++ b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs @@ -93,9 +93,11 @@ public enum LibMFLossFunctionType { SquareLossRegression = 0, SquareLossOneClass public sealed class Arguments { - [Argument(ArgumentType.AtMostOnce, HelpText = "Loss function minimized for finding factor matrices. " + - "Two values are allowed, 0 or 12. The values \"0\" means traditional collaborative filtering problem with squared loss. " + - "The value \"12\" triggers one-class matrix factorization for implicit-feedback recommendation problem.")] + /// + /// Loss function minimized for finding factor matrices. Two values are allowed, 0 or 12. The values 0 means traditional collaborative filtering + /// problem with squared loss. The value 12 triggers one-class matrix factorization for implicit-feedback recommendation problem. + /// + [Argument(ArgumentType.AtMostOnce, HelpText = "Loss function minimized for finding factor matrices.")] [TGUI(SuggestedSweeps = "0,12")] [TlcModule.SweepableDiscreteParam("Fun", new object[] { LibMFLossFunctionType.SquareLossRegression, LibMFLossFunctionType.SquareLossOneClass })] public LibMFLossFunctionType Fun = LibMFLossFunctionType.SquareLossRegression; @@ -125,14 +127,20 @@ public sealed class Arguments [TlcModule.SweepableDiscreteParam("Eta", new object[] { 0.001f, 0.01f, 0.1f })] public double Eta = 0.1; - [Argument(ArgumentType.AtMostOnce, HelpText = "Importance of negative entries' loss in one-class matrix factorization.")] + /// + /// Importance of unobserved (i.e., negative) entries' loss in one-class matrix factorization. + /// + [Argument(ArgumentType.AtMostOnce, HelpText = "Importance of unobserved entries' loss in one-class matrix factorization.")] [TGUI(SuggestedSweeps = "1,0.01,0.0001,0.000001")] [TlcModule.SweepableDiscreteParam("Alpha", new object[] { 1f, 0.01f, 0.0001f, 0.000001f})] public double Alpha = 0.1; - [Argument(ArgumentType.AtMostOnce, HelpText = "Desired negative entries' value in one-class matrix factorization. In one-class matrix factorization, " + - "all matrix values observed are one (which can be viewed as positive cases in binary classification) while unobserved values " + - "(which can be viewed as negative cases in binary classification) need to be specified manually using this option.")] + /// + /// Desired negative entries' value in one-class matrix factorization. In one-class matrix factorization, all matrix values observed are one + /// (which can be viewed as positive cases in binary classification) while unobserved values (which can be viewed as negative cases in binary + /// classification) need to be specified manually using this option. + /// + [Argument(ArgumentType.AtMostOnce, HelpText = "Desired negative entries' value in one-class matrix factorization")] [TGUI(SuggestedSweeps = "0.000001,0,0001,0.01")] [TlcModule.SweepableDiscreteParam("C", new object[] { 0.000001f, 0.0001f, 0.01f })] public double C = 0.000001f; diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs index 2b38d62a1c..2c0cd86160 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs @@ -333,19 +333,20 @@ public void MatrixFactorizationInMemoryDataZeroBaseIndex() } // The following ingredients are used to define a 3-by-2 one-class - // matrix used in a test for one-class matrix factorization. One-class - // matrix means that all the available elements in the training matrix - // are 1. Such a matrix is common. Let's use Game store as an example. - // Assume that user IDs are row indexes and game IDs are column - // indexes. By encoding all users' purchase history as a matrix (i.e., - // if the value at u-th row and v-th column is 1, then u-th user owns - // the v-th game), a one-class matrix gets created because, from the - // purchase history, users didn't explicitly tell us which games they - // will not buy. If you train a simple model from those a one-class - // matrix using standard collaborative filtering, all your predictions - // would be 1! One-class matrix factorization assumes unspecified - // matrix entries are all 0 (or a small constant value selected by the - // user) so that the trainined model becomes non-trivial. + // matrix used in a test, OneClassMatrixFactorizationInMemoryDataZeroBaseIndex, + // for one-class matrix factorization. One-class matrix means that all + // the available elements in the training matrix are 1. Such a matrix + // is common. Let's use online game store as an example. Assume that + // user IDs are row indexes and game IDs are column indexes. By + // encoding all users' purchase history as a matrix (i.e., if the value + // at the u-th row and the v-th column is 1, then the u-th user owns + // the v-th game), a one-class matrix gets created because all matrix + // elements are 1. If you train a prediction model from that matrix + // using standard collaborative filtering, all your predictions would + // be 1! One-class matrix factorization assumes unspecified matrix + // entries are all 0 (or a small constant value selected by the user) + // so that the trainined model can assign purchased itemas higher + // scores than those not purchased. private const int _oneClassMatrixColumnCount = 2; private const int _oneClassMatrixRowCount = 3; @@ -371,10 +372,9 @@ private class OneClassMatrixElementZeroBasedForScore [ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // This test is being fixed as part of issue #1441. public void OneClassMatrixFactorizationInMemoryDataZeroBaseIndex() { - // Create an in-memory matrix as a list of tuples (column index, row index, value). - // Iterators i and j are column and row indexes, respectively. For one-class matrix factorization problem, - // unspecified matrix elements are all a constant provided by user. If that constant is 0.15, the following - // list means a 3-by-2 training matrix with elements: + // Create an in-memory matrix as a list of tuples (column index, row index, value). For one-class matrix + // factorization problem, unspecified matrix elements are all a constant provided by user. If that constant is 0.15, + // the following list means a 3-by-2 training matrix with elements: // (0, 0, 1), (1, 1, 1), (0, 2, 1), (0, 1, 0.15), (1, 0, 0.15), (1, 2, 0.15). // because matrix elements at (0, 1), (1, 0), and (1, 2) are not specified. var dataMatrix = new List(); @@ -421,7 +421,7 @@ public void OneClassMatrixFactorizationInMemoryDataZeroBaseIndex() // Create data for testing. Note that the 2nd element is not specified in the training data so it should // be close to the constant specified by s.C = 0.15. Comparing with the data structure used in training phase, // one extra float is added into OneClassMatrixElementZeroBasedForScore for storing the prediction result. Note - // that the prediction engine may ignore the Value and assign the predicted value to Score. + // that the prediction engine may ignore Value and assign the predicted value to Score. var testDataMatrix = new List(); testDataMatrix.Add(new OneClassMatrixElementZeroBasedForScore() { MatrixColumnIndex = 0, MatrixRowIndex = 0, Value = 0, Score = 0 }); testDataMatrix.Add(new OneClassMatrixElementZeroBasedForScore() { MatrixColumnIndex = 1, MatrixRowIndex = 2, Value = 0, Score = 0 }); From 8d718354acbb9c5d33722d3345fc8a7114992e35 Mon Sep 17 00:00:00 2001 From: Wei-Sheng Chin Date: Tue, 20 Nov 2018 10:16:28 -0800 Subject: [PATCH 08/10] Address comments --- .../MatrixFactorizationTrainer.cs | 15 ++++++++++----- .../TrainerEstimators/MatrixFactorizationTests.cs | 2 +- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs index 560e07dcf9..0cc3e1bceb 100644 --- a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs +++ b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs @@ -89,7 +89,7 @@ namespace Microsoft.ML.Trainers public sealed class MatrixFactorizationTrainer : TrainerBase, IEstimator { - public enum LibMFLossFunctionType { SquareLossRegression = 0, SquareLossOneClass = 12 }; + public enum LossFunctionType { SquareLossRegression = 0, SquareLossOneClass = 12 }; public sealed class Arguments { @@ -99,8 +99,8 @@ public sealed class Arguments /// [Argument(ArgumentType.AtMostOnce, HelpText = "Loss function minimized for finding factor matrices.")] [TGUI(SuggestedSweeps = "0,12")] - [TlcModule.SweepableDiscreteParam("Fun", new object[] { LibMFLossFunctionType.SquareLossRegression, LibMFLossFunctionType.SquareLossOneClass })] - public LibMFLossFunctionType Fun = LibMFLossFunctionType.SquareLossRegression; + [TlcModule.SweepableDiscreteParam("LossFunction", new object[] { LossFunctionType.SquareLossRegression, LossFunctionType.SquareLossOneClass })] + public LossFunctionType LossFunction = LossFunctionType.SquareLossRegression; [Argument(ArgumentType.AtMostOnce, HelpText = "Regularization parameter. " + "It's the weight of factor matrices' norms in the objective function minimized by matrix factorization's algorithm. " + @@ -129,6 +129,10 @@ public sealed class Arguments /// /// Importance of unobserved (i.e., negative) entries' loss in one-class matrix factorization. + /// In general, only a few of matrix entries (e.g., less than 1%) in the training are observed (i.e., positive). + /// To balance the contributions from unobserved and obverved in the overall loss function, this parameter is + /// usually a small value so that the solver is able to find a factorization equally good to unobserved and observed + /// entries. If only 10000 observed entries present in a 200000-by-300000 training matrix, one can try Alpha = 10000 / (200000*300000). /// [Argument(ArgumentType.AtMostOnce, HelpText = "Importance of unobserved entries' loss in one-class matrix factorization.")] [TGUI(SuggestedSweeps = "1,0.01,0.0001,0.000001")] @@ -224,8 +228,9 @@ public MatrixFactorizationTrainer(IHostEnvironment env, Arguments args) : base(e Host.CheckUserArg(args.NumIterations > 0, nameof(args.NumIterations), posError); Host.CheckUserArg(args.Lambda > 0, nameof(args.Lambda), posError); Host.CheckUserArg(args.Eta > 0, nameof(args.Eta), posError); + Host.CheckUserArg(args.Alpha > 0, nameof(args.Alpha), posError); - _fun = (int)args.Fun; + _fun = (int)args.LossFunction; _lambda = args.Lambda; _k = args.K; _iter = args.NumIterations; @@ -259,7 +264,7 @@ public MatrixFactorizationTrainer(IHostEnvironment env, var args = new Arguments(); advancedSettings?.Invoke(args); - _fun = (int)args.Fun; + _fun = (int)args.LossFunction; _lambda = args.Lambda; _k = args.K; _iter = args.NumIterations; diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs index 2c0cd86160..4b1e8cf92e 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs @@ -394,7 +394,7 @@ public void OneClassMatrixFactorizationInMemoryDataZeroBaseIndex() nameof(OneClassMatrixElementZeroBased.Value), advancedSettings: s => { - s.Fun = MatrixFactorizationTrainer.LibMFLossFunctionType.SquareLossOneClass; + s.LossFunction = MatrixFactorizationTrainer.LossFunctionType.SquareLossOneClass; s.NumIterations = 100; s.NumThreads = 1; // To eliminate randomness, # of threads must be 1. // Let's test non-default regularization coefficient. From 9ad7a382396ca84c586b306c15c0931630532d10 Mon Sep 17 00:00:00 2001 From: Wei-Sheng Chin Date: Tue, 20 Nov 2018 14:39:18 -0800 Subject: [PATCH 09/10] Fix comments --- .../MatrixFactorizationTrainer.cs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs index 0cc3e1bceb..80e2ba2d3e 100644 --- a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs +++ b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs @@ -132,15 +132,20 @@ public sealed class Arguments /// In general, only a few of matrix entries (e.g., less than 1%) in the training are observed (i.e., positive). /// To balance the contributions from unobserved and obverved in the overall loss function, this parameter is /// usually a small value so that the solver is able to find a factorization equally good to unobserved and observed - /// entries. If only 10000 observed entries present in a 200000-by-300000 training matrix, one can try Alpha = 10000 / (200000*300000). + /// entries. If only 10000 observed entries present in a 200000-by-300000 training matrix, one can try Alpha = 10000 / (200000*300000 - 10000). + /// When most entries in the training matrix are observed, one can use Alpha >> 1; for example, if only 10000 in previous + /// matrix is not observed, one can try Alpha = (200000 * 300000 - 10000) / 10000. Consequently, + /// Alpha = (# of observed entries) / (# of unobserved entries) can make observed and unobserved entries equally important + /// in the minimized loss function. However, the best setting in machine learning is alwasy data-depedent so user still need to + /// try multiple values. /// [Argument(ArgumentType.AtMostOnce, HelpText = "Importance of unobserved entries' loss in one-class matrix factorization.")] [TGUI(SuggestedSweeps = "1,0.01,0.0001,0.000001")] [TlcModule.SweepableDiscreteParam("Alpha", new object[] { 1f, 0.01f, 0.0001f, 0.000001f})] - public double Alpha = 0.1; + public double Alpha = 0.0001; /// - /// Desired negative entries' value in one-class matrix factorization. In one-class matrix factorization, all matrix values observed are one + /// Desired negative entries value in one-class matrix factorization. In one-class matrix factorization, all matrix values observed are one /// (which can be viewed as positive cases in binary classification) while unobserved values (which can be viewed as negative cases in binary /// classification) need to be specified manually using this option. /// From e34abb52a2512982fb7b1e865030b6e7eebeaad5 Mon Sep 17 00:00:00 2001 From: Wei-Sheng Chin Date: Tue, 20 Nov 2018 15:18:13 -0800 Subject: [PATCH 10/10] Trigger build --- src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs index 80e2ba2d3e..a0b0d35ce6 100644 --- a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs +++ b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs @@ -136,7 +136,7 @@ public sealed class Arguments /// When most entries in the training matrix are observed, one can use Alpha >> 1; for example, if only 10000 in previous /// matrix is not observed, one can try Alpha = (200000 * 300000 - 10000) / 10000. Consequently, /// Alpha = (# of observed entries) / (# of unobserved entries) can make observed and unobserved entries equally important - /// in the minimized loss function. However, the best setting in machine learning is alwasy data-depedent so user still need to + /// in the minimized loss function. However, the best setting in machine learning is alwasy data-depedent so user still needs to /// try multiple values. /// [Argument(ArgumentType.AtMostOnce, HelpText = "Importance of unobserved entries' loss in one-class matrix factorization.")]