From 39ec7525a17649cc806a88712bf39df21febc8b6 Mon Sep 17 00:00:00 2001 From: Wei-Sheng Chin Date: Fri, 5 Apr 2019 15:55:53 -0700 Subject: [PATCH 1/3] Polish marshalling of MF model and MF problem and enable 32-bit tests Reset pointer after deleting it Consistency of declaring types in bridge classes More explicit-size types --- .../SafeTrainingAndModelBuffer.cs | 72 +++++++------- .../UnmanagedMemory.cpp | 95 ++++++++++++++++--- .../UnmanagedMemory.h | 29 +++++- .../MatrixFactorizationFactAttribute.cs | 2 +- 4 files changed, 145 insertions(+), 53 deletions(-) diff --git a/src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs b/src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs index 6111a1adc4..5220140e91 100644 --- a/src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs +++ b/src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs @@ -17,31 +17,50 @@ namespace Microsoft.ML.Recommender.Internal /// internal sealed class SafeTrainingAndModelBuffer : IDisposable { - [StructLayout(LayoutKind.Explicit)] + [StructLayout(LayoutKind.Sequential)] private struct MFNode { - [FieldOffset(0)] + /// + /// Row index. + /// public int U; - [FieldOffset(4)] + + /// + /// olumn index; + /// public int V; - [FieldOffset(8)] + + /// + /// Matrix element's value at -th row and -th column. + /// public float R; } - [StructLayout(LayoutKind.Explicit)] + [StructLayout(LayoutKind.Sequential)] private unsafe struct MFProblem { - [FieldOffset(0)] + /// + /// Number of rows. + /// public int M; - [FieldOffset(4)] + + /// + /// Number of columns. + /// public int N; - [FieldOffset(8)] + + /// + /// Number of specified matrix elements in . + /// public long Nnz; - [FieldOffset(16)] + + /// + /// Specified matrix elements. + /// public MFNode* R; } - [StructLayout(LayoutKind.Explicit)] + [StructLayout(LayoutKind.Sequential)] private struct MFParameter { /// @@ -58,19 +77,16 @@ private struct MFParameter /// Fun 12 is solved by a coordinate descent method while other functions invoke /// a stochastic gradient method. /// - [FieldOffset(0)] public int Fun; /// /// Rank of factor matrices. /// - [FieldOffset(4)] public int K; /// /// Number of threads which can be used for training. /// - [FieldOffset(8)] public int NrThreads; /// @@ -78,110 +94,100 @@ private struct MFParameter /// method in LIBMF processes assigns each thread a block at one time. The ratings in one block /// would be sequentially accessed (not randomaly accessed like standard stochastic gradient methods). /// - [FieldOffset(12)] public int NrBins; /// /// Number of training iteration. At one iteration, all values in the training matrix are roughly accessed once. /// - [FieldOffset(16)] public int NrIters; /// /// L1-norm regularization coefficient of left factor matrix. /// - [FieldOffset(20)] public float LambdaP1; /// /// L2-norm regularization coefficient of left factor matrix. /// - [FieldOffset(24)] public float LambdaP2; /// /// L1-norm regularization coefficient of right factor matrix. /// - [FieldOffset(28)] public float LambdaQ1; /// /// L2-norm regularization coefficient of right factor matrix. /// - [FieldOffset(32)] public float LambdaQ2; /// /// Learning rate of LIBMF's stochastic gradient method. /// - [FieldOffset(36)] public float Eta; /// /// Coefficient of loss function on unobserved entries in the training matrix. It's used only with fun=12. /// - [FieldOffset(40)] public float Alpha; /// /// Desired value of unobserved entries in the training matrix. It's used only with fun=12. /// - [FieldOffset(44)] public float C; /// /// Specify if the factor matrices should be non-negative. /// - [FieldOffset(48)] public byte DoNmf; /// /// Set to true so that LIBMF may produce less information to STDOUT. /// - [FieldOffset(49)] public byte Quiet; /// /// Set to false so that LIBMF may reuse and modifiy the data passed in. /// - [FieldOffset(50)] public byte CopyData; } - [StructLayout(LayoutKind.Explicit)] + [StructLayout(LayoutKind.Sequential)] private unsafe struct MFModel { - [FieldOffset(0)] + /// + /// See . + /// public int Fun; + /// /// Number of rows in the training matrix. /// - [FieldOffset(4)] public int M; + /// /// Number of columns in the training matrix. /// - [FieldOffset(8)] public int N; + /// /// Rank of factor matrices. /// - [FieldOffset(12)] public int K; + /// /// Average value in the training matrix. /// - [FieldOffset(16)] public float B; + /// /// Left factor matrix. Its shape is M-by-K stored in row-major format. /// - [FieldOffset(24)] // pointer is 8-byte on 64-bit machine. public float* P; + /// /// Right factor matrix. Its shape is N-by-K stored in row-major format. /// - [FieldOffset(32)] // pointer is 8-byte on 64-bit machine. public float* Q; } diff --git a/src/Native/MatrixFactorizationNative/UnmanagedMemory.cpp b/src/Native/MatrixFactorizationNative/UnmanagedMemory.cpp index 75b6ccac93..83ed2f096f 100644 --- a/src/Native/MatrixFactorizationNative/UnmanagedMemory.cpp +++ b/src/Native/MatrixFactorizationNative/UnmanagedMemory.cpp @@ -9,7 +9,7 @@ using namespace mf; -mf_parameter make_param(const mf_parameter_bridge *param_bridge) +inline mf_parameter TranslateToParam(const mf_parameter_bridge *param_bridge) { mf_parameter param; param.fun = param_bridge->fun; @@ -30,30 +30,97 @@ mf_parameter make_param(const mf_parameter_bridge *param_bridge) return param; } -EXPORT_API(void) MFDestroyModel(mf_model *&model) +inline mf_problem TranslateToProblem(const mf_problem_bridge *prob_bridge) { - return mf_destroy_model(&model); + mf_problem prob; + prob.m = prob_bridge->m; + prob.n = prob_bridge->n; + prob.nnz = prob_bridge->nnz; + prob.R = prob_bridge->R; + return prob; } -EXPORT_API(mf_model*) MFTrain(const mf_problem *prob, const mf_parameter_bridge *param_bridge) +inline void TranslateToModelBridge(const mf_model *model, mf_model_bridge *model_bridge) { - auto param = make_param(param_bridge); - return mf_train(prob, param); + model_bridge->fun = model->fun; + model_bridge->m = model->m; + model_bridge->n = model->n; + model_bridge->k = model->k; + model_bridge->b = model->b; + model_bridge->P = model->P; + model_bridge->Q = model->Q; } -EXPORT_API(mf_model*) MFTrainWithValidation(const mf_problem *tr, const mf_problem *va, const mf_parameter_bridge *param_bridge) +inline void TranslateToModel(const mf_model_bridge *model_bridge, mf_model *model) { - auto param = make_param(param_bridge); - return mf_train_with_validation(tr, va, param); + model->fun = model_bridge->fun; + model->m = model_bridge->m; + model->n = model_bridge->n; + model->k = model_bridge->k; + model->b = model_bridge->b; + model->P = model_bridge->P; + model->Q = model_bridge->Q; } -EXPORT_API(float) MFCrossValidation(const mf_problem *prob, int nr_folds, const mf_parameter_bridge *param_bridge) +EXPORT_API(void) MFDestroyModel(mf_model_bridge *&model_bridge) { - auto param = make_param(param_bridge); - return mf_cross_validation(prob, nr_folds, param); + // Transfer the ownership of P and Q back to the original LIBMF class, so that + // mf_destroy_model can be called. + auto model = new mf_model; + model->P = model_bridge->P; + model->Q = model_bridge->Q; + mf_destroy_model(&model); // delete model, model->P, amd model->Q. + + // Delete bridge class allocated in MFTrain, MFTrainWithValidation, or MFCrossValidation. + delete model_bridge; + model_bridge = nullptr; +} + +EXPORT_API(mf_model_bridge*) MFTrain(const mf_problem_bridge *prob_bridge, const mf_parameter_bridge *param_bridge) +{ + // Convert objects created outside LIBMF. Notice that the called LIBMF function doesn't take the ownership of + // allocated memory in those external objects. + auto prob = TranslateToProblem(prob_bridge); + auto param = TranslateToParam(param_bridge); + + // The model contains 3 allocated things --- itself, P, and Q. + // We will delete itself and transfer the ownership of P and Q to the associated bridge class. The bridge class + // will then be sent to C#. + auto model = mf_train(&prob, param); + auto model_bridge = new mf_model_bridge; + TranslateToModelBridge(model, model_bridge); + delete model; + return model_bridge; // To clean memory up, we need to delete model_bridge, model_bridge->P, and model_bridge->Q. +} + +EXPORT_API(mf_model_bridge*) MFTrainWithValidation(const mf_problem_bridge *tr_bridge, const mf_problem_bridge *va_bridge, const mf_parameter_bridge *param_bridge) +{ + // Convert objects created outside LIBMF. Notice that the called LIBMF function doesn't take the ownership of + // allocated memory in those external objects. + auto tr = TranslateToProblem(tr_bridge); + auto va = TranslateToProblem(va_bridge); + auto param = TranslateToParam(param_bridge); + + // The model contains 3 allocated things --- itself, P, and Q. + // We will delete itself and transfer the ownership of P and Q to the associated bridge class. The bridge class + // will then be sent to C#. + auto model = mf_train_with_validation(&tr, &va, param); + auto model_bridge = new mf_model_bridge; + TranslateToModelBridge(model, model_bridge); + delete model; + return model_bridge; // To clean memory up, we need to delete model_bridge, model_bridge->P, and model_bridge->Q. +} + +EXPORT_API(float) MFCrossValidation(const mf_problem_bridge *prob_bridge, int32_t nr_folds, const mf_parameter_bridge *param_bridge) +{ + auto param = TranslateToParam(param_bridge); + auto prob = TranslateToProblem(prob_bridge); + return mf_cross_validation(&prob, nr_folds, param); } -EXPORT_API(float) MFPredict(const mf_model *model, int p_idx, int q_idx) +EXPORT_API(float) MFPredict(const mf_model_bridge *model_bridge, int32_t p_idx, int32_t q_idx) { - return mf_predict(model, p_idx, q_idx); + mf_model model; + TranslateToModel(model_bridge, &model); + return mf_predict(&model, p_idx, q_idx); } diff --git a/src/Native/MatrixFactorizationNative/UnmanagedMemory.h b/src/Native/MatrixFactorizationNative/UnmanagedMemory.h index 2b07d7843b..c63096bf93 100644 --- a/src/Native/MatrixFactorizationNative/UnmanagedMemory.h +++ b/src/Native/MatrixFactorizationNative/UnmanagedMemory.h @@ -27,12 +27,31 @@ struct mf_parameter_bridge uint8_t copy_data; }; -EXPORT_API(void) MFDestroyModel(mf_model *&model); +struct mf_problem_bridge +{ + int32_t m; + int32_t n; + int64_t nnz; + struct mf_node *R; +}; + +struct mf_model_bridge +{ + int32_t fun; + int32_t m; + int32_t n; + int32_t k; + float b; + float *P; + float *Q; +}; + +EXPORT_API(void) MFDestroyModel(mf_model_bridge *&model); -EXPORT_API(mf_model*) MFTrain(const mf_problem *prob, const mf_parameter_bridge *parameter_bridge); +EXPORT_API(mf_model_bridge*) MFTrain(const mf_problem_bridge *prob_bridge, const mf_parameter_bridge *parameter_bridge); -EXPORT_API(mf_model*) MFTrainWithValidation(const mf_problem *tr, const mf_problem *va, const mf_parameter_bridge *parameter_bridge); +EXPORT_API(mf_model_bridge*) MFTrainWithValidation(const mf_problem_bridge *tr, const mf_problem_bridge *va, const mf_parameter_bridge *parameter_bridge); -EXPORT_API(float) MFCrossValidation(const mf_problem *prob, int nr_folds, const mf_parameter_bridge* parameter_bridge); +EXPORT_API(float) MFCrossValidation(const mf_problem_bridge *prob, int32_t nr_folds, const mf_parameter_bridge* parameter_bridge); -EXPORT_API(float) MFPredict(const mf_model *model, int p_idx, int q_idx); +EXPORT_API(float) MFPredict(const mf_model_bridge *model, int32_t p_idx, int32_t q_idx); diff --git a/test/Microsoft.ML.TestFramework/Attributes/MatrixFactorizationFactAttribute.cs b/test/Microsoft.ML.TestFramework/Attributes/MatrixFactorizationFactAttribute.cs index 9104008ce2..24c4e13f40 100644 --- a/test/Microsoft.ML.TestFramework/Attributes/MatrixFactorizationFactAttribute.cs +++ b/test/Microsoft.ML.TestFramework/Attributes/MatrixFactorizationFactAttribute.cs @@ -17,7 +17,7 @@ public sealed class MatrixFactorizationFactAttribute : EnvironmentSpecificFactAt /// protected override bool IsEnvironmentSupported() { - return Environment.Is64BitProcess; + return true; } } } \ No newline at end of file From 86dccf0b898c95c6eca701fef7e29e9ff529a810 Mon Sep 17 00:00:00 2001 From: Wei-Sheng Chin Date: Mon, 8 Apr 2019 08:54:58 -0700 Subject: [PATCH 2/3] Fix typo --- src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs b/src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs index 5220140e91..6d8f0d67de 100644 --- a/src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs +++ b/src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs @@ -26,7 +26,7 @@ private struct MFNode public int U; /// - /// olumn index; + /// Column index; /// public int V; From 2d1afe1bf6e850f4f3bb642fb546e83ff115f084 Mon Sep 17 00:00:00 2001 From: Wei-Sheng Chin Date: Mon, 8 Apr 2019 10:10:31 -0700 Subject: [PATCH 3/3] Remove old message --- .../Attributes/MatrixFactorizationFactAttribute.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Microsoft.ML.TestFramework/Attributes/MatrixFactorizationFactAttribute.cs b/test/Microsoft.ML.TestFramework/Attributes/MatrixFactorizationFactAttribute.cs index 24c4e13f40..4b9969c54f 100644 --- a/test/Microsoft.ML.TestFramework/Attributes/MatrixFactorizationFactAttribute.cs +++ b/test/Microsoft.ML.TestFramework/Attributes/MatrixFactorizationFactAttribute.cs @@ -10,7 +10,7 @@ namespace Microsoft.ML.TestFramework.Attributes /// public sealed class MatrixFactorizationFactAttribute : EnvironmentSpecificFactAttribute { - public MatrixFactorizationFactAttribute() : base("Disabled - this test is being fixed as part of https://github.com/dotnet/machinelearning/issues/1441") + public MatrixFactorizationFactAttribute() : base("") { }