|
| 1 | +using System; |
| 2 | +using System.Collections.Generic; |
| 3 | +using System.Linq; |
| 4 | +using Microsoft.ML; |
| 5 | +using Microsoft.ML.Data; |
| 6 | +using Microsoft.ML.Trainers; |
| 7 | + |
| 8 | +namespace Samples.Dynamic.Trainers.Recommendation |
| 9 | +{ |
| 10 | + public static class OneClassMatrixFactorizationWithOptions |
| 11 | + { |
| 12 | + // This example shows the use of ML.NET's one-class matrix factorization module which implements a coordinate descent method |
| 13 | + // described in Algorithm 1 in a <a href="https://www.csie.ntu.edu.tw/~cjlin/papers/one-class-mf/biased-mf-sdm-with-supp.pdf">paper</a>. |
| 14 | + // See page 28 in of <a href="https://www.csie.ntu.edu.tw/~cjlin/talks/facebook.pdf">slides</a> for a brief introduction to |
| 15 | + // one-class matrix factorization. |
| 16 | + // In this example we will create in-memory data and then use it to train a one-class matrix factorization model. |
| 17 | + // Afterward, prediction values are reported. |
| 18 | + // To run this example, it requires installation of additional nuget package |
| 19 | + // <a href="https://www.nuget.org/packages/Microsoft.ML.Recommender/">Microsoft.ML.Recommender</a>. |
| 20 | + public static void Example() |
| 21 | + { |
| 22 | + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, |
| 23 | + // as a catalog of available operations and as the source of randomness. |
| 24 | + var mlContext = new MLContext(seed: 0); |
| 25 | + |
| 26 | + // Get a small in-memory dataset. |
| 27 | + GetOneClassMatrix(out List<MatrixElement> data, out List<MatrixElement> testData); |
| 28 | + |
| 29 | + // Convert the in-memory matrix into an IDataView so that ML.NET components can consume it. |
| 30 | + var dataView = mlContext.Data.LoadFromEnumerable(data); |
| 31 | + |
| 32 | + // Create a matrix factorization trainer which takes "Value" as the training label, "MatrixColumnIndex" as the |
| 33 | + // matrix's column index, and "MatrixRowIndex" as the matrix's row index. Here nameof(...) is used to extract field |
| 34 | + // names' in MatrixElement class. |
| 35 | + var options = new MatrixFactorizationTrainer.Options |
| 36 | + { |
| 37 | + MatrixColumnIndexColumnName = nameof(MatrixElement.MatrixColumnIndex), |
| 38 | + MatrixRowIndexColumnName = nameof(MatrixElement.MatrixRowIndex), |
| 39 | + LabelColumnName = nameof(MatrixElement.Value), |
| 40 | + NumberOfIterations = 20, |
| 41 | + NumberOfThreads = 8, |
| 42 | + ApproximationRank = 32, |
| 43 | + Alpha = 1, |
| 44 | + // The desired values of matrix elements not specified in the training set. |
| 45 | + // If the training set doesn't tell the value at the u-th row and v-th column, |
| 46 | + // its desired value would be set 0.15. In other words, this parameter determines |
| 47 | + // the value of all missing matrix elements. |
| 48 | + C = 0.15, |
| 49 | + // This argument enables one-class matrix factorization. |
| 50 | + LossFunction = MatrixFactorizationTrainer.LossFunctionType.SquareLossOneClass |
| 51 | + }; |
| 52 | + |
| 53 | + var pipeline = mlContext.Recommendation().Trainers.MatrixFactorization(options); |
| 54 | + |
| 55 | + // Train a matrix factorization model. |
| 56 | + var model = pipeline.Fit(dataView); |
| 57 | + |
| 58 | + // Apply the trained model to the test set. Notice that training is a partial |
| 59 | + var prediction = model.Transform(mlContext.Data.LoadFromEnumerable(testData)); |
| 60 | + |
| 61 | + var results = mlContext.Data.CreateEnumerable<MatrixElement>(prediction, false).ToList(); |
| 62 | + // Feed the test data into the model and then iterate through a few predictions. |
| 63 | + foreach (var pred in results.Take(15)) |
| 64 | + Console.WriteLine($"Predicted value at row {pred.MatrixRowIndex - 1} and column {pred.MatrixColumnIndex - 1} is " + |
| 65 | + $"{pred.Score} and its expected value is {pred.Value}."); |
| 66 | + |
| 67 | + // Expected output similar to: |
| 68 | + // Predicted value at row 0 and column 0 is 0.9873335 and its expected value is 1. |
| 69 | + // Predicted value at row 1 and column 0 is 0.1499522 and its expected value is 0.15. |
| 70 | + // Predicted value at row 2 and column 0 is 0.1499791 and its expected value is 0.15. |
| 71 | + // Predicted value at row 3 and column 0 is 0.1499254 and its expected value is 0.15. |
| 72 | + // Predicted value at row 4 and column 0 is 0.1499074 and its expected value is 0.15. |
| 73 | + // Predicted value at row 5 and column 0 is 0.1499968 and its expected value is 0.15. |
| 74 | + // Predicted value at row 6 and column 0 is 0.1499791 and its expected value is 0.15. |
| 75 | + // Predicted value at row 7 and column 0 is 0.1499805 and its expected value is 0.15. |
| 76 | + // Predicted value at row 8 and column 0 is 0.1500055 and its expected value is 0.15. |
| 77 | + // Predicted value at row 9 and column 0 is 0.1499199 and its expected value is 0.15. |
| 78 | + // Predicted value at row 10 and column 0 is 0.9873335 and its expected value is 1. |
| 79 | + // Predicted value at row 11 and column 0 is 0.1499522 and its expected value is 0.15. |
| 80 | + // Predicted value at row 12 and column 0 is 0.1499791 and its expected value is 0.15. |
| 81 | + // Predicted value at row 13 and column 0 is 0.1499254 and its expected value is 0.15. |
| 82 | + // Predicted value at row 14 and column 0 is 0.1499074 and its expected value is 0.15. |
| 83 | + // |
| 84 | + // Note: use the advanced options constructor to set the number of threads to 1 for a deterministic behavior. |
| 85 | + |
| 86 | + // Assume that row index is user ID and column index game ID, the following list contains the games recommended by the trained model. |
| 87 | + // Note that sometime, you may want to exclude training data from your predicted results because those would represent games that |
| 88 | + // were already purchased. |
| 89 | + // The variable topColumns stores two matrix elements with the highest predicted scores on the 1st row. |
| 90 | + var topColumns = results.Where(element => element.MatrixRowIndex == 1).OrderByDescending(element => element.Score).Take(2); |
| 91 | + |
| 92 | + Console.WriteLine("Top 2 predictions on the 1st row:"); |
| 93 | + foreach (var top in topColumns) |
| 94 | + Console.WriteLine($"Predicted value at row {top.MatrixRowIndex - 1} and column {top.MatrixColumnIndex - 1} is {top.Score} and its expected value is {top.Value}."); |
| 95 | + |
| 96 | + // Expected output similar to: |
| 97 | + // Top 2 predictions at the 2nd row: |
| 98 | + // Predicted value at row 0 and column 0 is 0.9871138 and its expected value is 1. |
| 99 | + // Predicted value at row 0 and column 10 is 0.9871138 and its expected value is 1. |
| 100 | + } |
| 101 | + |
| 102 | + // The following variables defines the shape of a matrix. Its shape is _synthesizedMatrixRowCount-by-_synthesizedMatrixColumnCount. |
| 103 | + // Because in ML.NET key type's minimal value is zero, the first row index is always zero in C# data structure (e.g., MatrixColumnIndex=0 |
| 104 | + // and MatrixRowIndex=0 in MatrixElement below specifies the value at the upper-left corner in the training matrix). If user's row index |
| 105 | + // starts with 1, their row index 1 would be mapped to the 2nd row in matrix factorization module and their first row may contain no values. |
| 106 | + // This behavior is also true to column index. |
| 107 | + private const uint _synthesizedMatrixColumnCount = 60; |
| 108 | + private const uint _synthesizedMatrixRowCount = 100; |
| 109 | + |
| 110 | + // A data structure used to encode a single value in matrix |
| 111 | + private class MatrixElement |
| 112 | + { |
| 113 | + // Matrix column index. Its allowed range is from 0 to _synthesizedMatrixColumnCount - 1. |
| 114 | + [KeyType(_synthesizedMatrixColumnCount)] |
| 115 | + public uint MatrixColumnIndex { get; set; } |
| 116 | + // Matrix row index. Its allowed range is from 0 to _synthesizedMatrixRowCount - 1. |
| 117 | + [KeyType(_synthesizedMatrixRowCount)] |
| 118 | + public uint MatrixRowIndex { get; set; } |
| 119 | + // The value at the MatrixColumnIndex-th column and the MatrixRowIndex-th row. |
| 120 | + public float Value { get; set; } |
| 121 | + // The predicted value at the MatrixColumnIndex-th column and the MatrixRowIndex-th row. |
| 122 | + public float Score { get; set; } |
| 123 | + } |
| 124 | + |
| 125 | + // Create an in-memory matrix as a list of tuples (column index, row index, value). Notice that one-class matrix |
| 126 | + // factorization handle scenerios where only positive signals (e.g., on Facebook, only likes are recorded and no dislike before) |
| 127 | + // can be observed so that all values are set to 1. |
| 128 | + private static void GetOneClassMatrix(out List<MatrixElement> observedMatrix, out List<MatrixElement> fullMatrix) |
| 129 | + { |
| 130 | + // The matrix factorization model will be trained only using observedMatrix but we will see it can learn all information |
| 131 | + // carried in fullMatrix. |
| 132 | + observedMatrix = new List<MatrixElement>(); |
| 133 | + fullMatrix = new List<MatrixElement>(); |
| 134 | + for (uint i = 0; i < _synthesizedMatrixColumnCount; ++i) |
| 135 | + for (uint j = 0; j < _synthesizedMatrixRowCount; ++j) |
| 136 | + { |
| 137 | + if ((i + j) % 10 == 0) |
| 138 | + { |
| 139 | + // Set observed elements' values to 1 (means like). |
| 140 | + observedMatrix.Add(new MatrixElement() { MatrixColumnIndex = i, MatrixRowIndex = j, Value = 1, Score = 0 }); |
| 141 | + fullMatrix.Add(new MatrixElement() { MatrixColumnIndex = i, MatrixRowIndex = j, Value = 1, Score = 0 }); |
| 142 | + } |
| 143 | + else |
| 144 | + // Set unobserved elements' values to 0.15, a value smaller than observed values (means dislike). |
| 145 | + fullMatrix.Add(new MatrixElement() { MatrixColumnIndex = i, MatrixRowIndex = j, Value = 0.15f, Score = 0 }); |
| 146 | + } |
| 147 | + } |
| 148 | + } |
| 149 | +} |
0 commit comments