diff --git a/src/Microsoft.ML.Data/Prediction/Calibrator.cs b/src/Microsoft.ML.Data/Prediction/Calibrator.cs
index 6e08ee38f4..885ab84d9a 100644
--- a/src/Microsoft.ML.Data/Prediction/Calibrator.cs
+++ b/src/Microsoft.ML.Data/Prediction/Calibrator.cs
@@ -1110,20 +1110,22 @@ private static VersionInfo GetVersionInfo()
public readonly float Min;
/// The value of probability in each bin.
- public readonly float[] BinProbs;
+ public IReadOnlyList BinProbs => _binProbs;
+
+ private readonly float[] _binProbs;
/// Initializes a new instance of .
/// The to use.
/// The minimum value in the first bin.
/// The values of the probability in each bin.
/// The bin size.
- public NaiveCalibrator(IHostEnvironment env, float min, float binSize, float[] binProbs)
+ internal NaiveCalibrator(IHostEnvironment env, float min, float binSize, float[] binProbs)
{
Contracts.CheckValue(env, nameof(env));
_host = env.Register(RegistrationName);
Min = min;
BinSize = binSize;
- BinProbs = binProbs;
+ _binProbs = binProbs;
}
private NaiveCalibrator(IHostEnvironment env, ModelLoadContext ctx)
@@ -1147,9 +1149,9 @@ private NaiveCalibrator(IHostEnvironment env, ModelLoadContext ctx)
Min = ctx.Reader.ReadFloat();
_host.CheckDecode(FloatUtils.IsFinite(Min));
- BinProbs = ctx.Reader.ReadFloatArray();
- _host.CheckDecode(Utils.Size(BinProbs) > 0);
- _host.CheckDecode(BinProbs.All(x => (0 <= x && x <= 1)));
+ _binProbs = ctx.Reader.ReadFloatArray();
+ _host.CheckDecode(Utils.Size(_binProbs) > 0);
+ _host.CheckDecode(_binProbs.All(x => (0 <= x && x <= 1)));
}
private static NaiveCalibrator Create(IHostEnvironment env, ModelLoadContext ctx)
@@ -1180,7 +1182,7 @@ private void SaveCore(ModelSaveContext ctx)
ctx.Writer.Write(sizeof(float));
ctx.Writer.Write(BinSize);
ctx.Writer.Write(Min);
- ctx.Writer.WriteSingleArray(BinProbs);
+ ctx.Writer.WriteSingleArray(_binProbs);
}
///
@@ -1190,8 +1192,8 @@ public float PredictProbability(float output)
{
if (float.IsNaN(output))
return output;
- int binIdx = GetBinIdx(output, Min, BinSize, BinProbs.Length);
- return BinProbs[binIdx];
+ int binIdx = GetBinIdx(output, Min, BinSize, _binProbs.Length);
+ return _binProbs[binIdx];
}
// get the bin for a given output
@@ -1205,11 +1207,6 @@ internal static int GetBinIdx(float output, float min, float binSize, int numBin
return binIdx;
}
- /// Get the summary of current calibrator settings
- public string GetSummary()
- {
- return string.Format("Naive Calibrator has {0} bins, starting at {1}, with bin size of {2}", BinProbs.Length, Min, BinSize);
- }
}
///
@@ -1218,8 +1215,91 @@ public string GetSummary()
[BestFriend]
internal abstract class CalibratorTrainerBase : ICalibratorTrainer
{
+ public sealed class DataStore : IEnumerable
+ {
+ public readonly struct DataItem
+ {
+ // The actual binary label of this example.
+ public readonly bool Target;
+ // The weight associated with this example.
+ public readonly float Weight;
+ // The output of the example.
+ public readonly float Score;
+
+ public DataItem(bool target, float weight, float score)
+ {
+ Target = target;
+ Weight = weight;
+ Score = score;
+ }
+ }
+
+ // REVIEW: Should probably be a long.
+ private int _itemsSeen;
+ private readonly Random _random;
+
+ private static int _randSeed;
+
+ private readonly int _capacity;
+ private DataItem[] _data;
+ private bool _dataSorted;
+
+ public DataStore()
+ : this(1000000)
+ {
+ }
+
+ public DataStore(int capacity)
+ {
+ Contracts.CheckParam(capacity > 0, nameof(capacity), "must be positive");
+
+ _capacity = capacity;
+ _data = new DataItem[Math.Min(4, capacity)];
+ // REVIEW: Horrifying. At a point when we have the IHost stuff plumbed through
+ // calibrator training and also have the appetite to change a bunch of baselines, this
+ // should be seeded using the host random.
+ _random = new System.Random(System.Threading.Interlocked.Increment(ref _randSeed) - 1);
+ }
+
+ ///
+ /// An enumerator over the entries sorted by score.
+ ///
+ ///
+ public IEnumerator GetEnumerator()
+ {
+ if (!_dataSorted)
+ {
+ var comp = Comparer.Create((x, y) => x.Score.CompareTo(y.Score));
+ Array.Sort(_data, 0, Math.Min(_itemsSeen, _capacity), comp);
+ _dataSorted = true;
+ }
+ return _data.Take(_itemsSeen).GetEnumerator();
+ }
+
+ IEnumerator IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+
+ public void AddToStore(float score, bool isPositive, float weight)
+ {
+ // Can't calibrate NaN scores.
+ if (weight == 0 || float.IsNaN(score))
+ return;
+ int index = _itemsSeen++;
+ if (_itemsSeen <= _capacity)
+ Utils.EnsureSize(ref _data, _itemsSeen, _capacity);
+ else
+ {
+ index = _random.Next(_itemsSeen); // 0 to items_seen - 1.
+ if (index >= _capacity) // Don't keep it.
+ return;
+ }
+ _data[index] = new DataItem(isPositive, weight, score);
+ }
+ }
protected readonly IHost Host;
- protected CalibrationDataStore Data;
+ protected DataStore Data;
protected const int DefaultMaxNumSamples = 1000000;
protected int MaxNumSamples;
@@ -1239,7 +1319,7 @@ protected CalibratorTrainerBase(IHostEnvironment env, string name)
bool ICalibratorTrainer.ProcessTrainingExample(float output, bool labelIs1, float weight)
{
if (Data == null)
- Data = new CalibrationDataStore(MaxNumSamples);
+ Data = new DataStore(MaxNumSamples);
Data.AddToStore(output, labelIs1, weight);
return true;
}
@@ -1485,7 +1565,13 @@ private static VersionInfo GetVersionInfo()
private readonly IHost _host;
+ ///
+ /// Slope value for this calibrator.
+ ///
public Double Slope { get; }
+ ///
+ /// Offset value for this calibrator
+ ///
public Double Offset { get; }
bool ICanSavePfa.CanSavePfa => true;
bool ICanSaveOnnx.CanSaveOnnx(OnnxContext ctx) => true;
@@ -1493,7 +1579,7 @@ private static VersionInfo GetVersionInfo()
///
/// Initializes a new instance of .
///
- public PlattCalibrator(IHostEnvironment env, Double slope, Double offset)
+ internal PlattCalibrator(IHostEnvironment env, Double slope, Double offset)
{
Contracts.CheckValue(env, nameof(env));
_host = env.Register(RegistrationName);
@@ -1556,6 +1642,7 @@ private void SaveCore(ModelSaveContext ctx)
}
}
+ /// Given a classifier output, produce the probability.
public float PredictProbability(float output)
{
if (float.IsNaN(output))
@@ -1563,7 +1650,7 @@ public float PredictProbability(float output)
return PredictProbability(output, Slope, Offset);
}
- public static float PredictProbability(float output, Double a, Double b)
+ internal static float PredictProbability(float output, Double a, Double b)
{
return (float)(1 / (1 + Math.Exp(a * output + b)));
}
@@ -1597,11 +1684,6 @@ bool ISingleCanSaveOnnx.SaveAsOnnx(OnnxContext ctx, string[] scoreProbablityColu
return true;
}
- public string GetSummary()
- {
- return string.Format("Platt calibrator parameters: A={0}, B={1}", Slope, Offset);
- }
-
IParameterMixer IParameterMixer.CombineParameters(IList calibrators)
{
Double a = 0;
@@ -1703,12 +1785,17 @@ public override ICalibrator CreateCalibrator(IChannel ch)
///
/// The pair-adjacent violators calibrator.
- /// The function that is implemented by this calibrator is:
- /// f(x) = v_i, if minX_i <= x <= maxX_i
- /// = linear interpolate between v_i and v_i+1, if maxX_i < x < minX_i+1
- /// = v_0, if x < minX_0
- /// = v_n, if x > maxX_n
///
+ ///
+ /// The function that is implemented by this calibrator is:
+ /// P(x) =
+ ///
+ /// - [i], if [i] <= x <= [i]>
+ /// - Linear interpolation between [i] and [i+1], if [i] < x < [i+1]
+ /// - [0], if x < [0]
+ /// - [n], if x > [n]
+ ///
+ ///
public sealed class PavCalibrator : ICalibrator, ICanSaveInBinaryFormat
{
internal const string LoaderSignature = "PAVCaliExec";
@@ -1731,8 +1818,17 @@ private static VersionInfo GetVersionInfo()
private const float MaxToReturn = 1 - Epsilon; // max predicted is 1 - min;
private readonly IHost _host;
+ ///
+ /// Bottom borders of PAV intervals.
+ ///
public readonly ImmutableArray Mins;
+ ///
+ /// Upper borders of PAV intervals.
+ ///
public readonly ImmutableArray Maxes;
+ ///
+ /// Values of PAV intervals.
+ ///
public readonly ImmutableArray Values;
///
@@ -1742,7 +1838,7 @@ private static VersionInfo GetVersionInfo()
/// The minimum values for each piece.
/// The maximum values for each piece.
/// The actual values for each piece.
- public PavCalibrator(IHostEnvironment env, ImmutableArray mins, ImmutableArray maxes, ImmutableArray values)
+ internal PavCalibrator(IHostEnvironment env, ImmutableArray mins, ImmutableArray maxes, ImmutableArray values)
{
Contracts.AssertValue(env);
_host = env.Register(RegistrationName);
@@ -1851,6 +1947,7 @@ private void SaveCore(ModelSaveContext ctx)
_host.CheckDecode(valuePrev <= 1);
}
+ /// Given a classifier output, produce the probability.
public float PredictProbability(float output)
{
if (float.IsNaN(output))
@@ -1890,95 +1987,6 @@ private float FindValue(float score)
float t = (score - Maxes[pos - 1]) / (Mins[pos] - Maxes[pos - 1]);
return Values[pos - 1] + t * (Values[pos] - Values[pos - 1]);
}
-
- public string GetSummary()
- {
- return string.Format("PAV calibrator with {0} intervals", Mins.Length);
- }
- }
-
- public sealed class CalibrationDataStore : IEnumerable
- {
- public readonly struct DataItem
- {
- // The actual binary label of this example.
- public readonly bool Target;
- // The weight associated with this example.
- public readonly float Weight;
- // The output of the example.
- public readonly float Score;
-
- public DataItem(bool target, float weight, float score)
- {
- Target = target;
- Weight = weight;
- Score = score;
- }
- }
-
- // REVIEW: Should probably be a long.
- private int _itemsSeen;
- private readonly Random _random;
-
- private static int _randSeed;
-
- private readonly int _capacity;
- private DataItem[] _data;
- private bool _dataSorted;
-
- public CalibrationDataStore()
- : this(1000000)
- {
- }
-
- public CalibrationDataStore(int capacity)
- {
- Contracts.CheckParam(capacity > 0, nameof(capacity), "must be positive");
-
- _capacity = capacity;
- _data = new DataItem[Math.Min(4, capacity)];
- // REVIEW: Horrifying. At a point when we have the IHost stuff plumbed through
- // calibrator training and also have the appetite to change a bunch of baselines, this
- // should be seeded using the host random.
- _random = new System.Random(System.Threading.Interlocked.Increment(ref _randSeed) - 1);
- }
-
- ///
- /// An enumerator over the entries sorted by score.
- ///
- ///
- public IEnumerator GetEnumerator()
- {
- if (!_dataSorted)
- {
- var comp = Comparer.Create((x, y) => x.Score.CompareTo(y.Score));
- Array.Sort(_data, 0, Math.Min(_itemsSeen, _capacity), comp);
- _dataSorted = true;
- }
- return _data.Take(_itemsSeen).GetEnumerator();
- }
-
- IEnumerator IEnumerable.GetEnumerator()
- {
- return GetEnumerator();
- }
-
- public void AddToStore(float score, bool isPositive, float weight)
- {
- // Can't calibrate NaN scores.
- if (weight == 0 || float.IsNaN(score))
- return;
- int index = _itemsSeen++;
- if (_itemsSeen <= _capacity)
- Utils.EnsureSize(ref _data, _itemsSeen, _capacity);
- else
- {
- index = _random.Next(_itemsSeen); // 0 to items_seen - 1.
- if (index >= _capacity) // Don't keep it.
- return;
- }
- _data[index] = new DataItem(isPositive, weight, score);
- }
}
internal static class Calibrate
diff --git a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs
index f352becdbb..d0e353c3c6 100644
--- a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs
+++ b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs
@@ -556,19 +556,25 @@ private static VersionInfo GetVersionInfo()
/// and all subsequent correspond to each weight in turn. This is null if and
/// only if is false.
///
- public readonly IReadOnlyList StandardErrors;
+ public IReadOnlyList StandardErrors => _standardErrors;
+
+ private readonly double[] _standardErrors;
///
/// t-Statistic values corresponding to each of the model standard errors. This is
/// null if and only if is false.
///
- public readonly IReadOnlyList TValues;
+ public IReadOnlyList TValues => _tValues;
+
+ private readonly double[] _tValues;
///
/// p-values corresponding to each of the model standard errors. This is null
/// if and only if is false.
///
- public readonly IReadOnlyList PValues;
+ public IReadOnlyList PValues => _pValues;
+
+ private readonly double[] _pValues;
///
/// Constructs a new OLS regression model parameters from trained model.
@@ -612,9 +618,9 @@ internal OlsLinearRegressionModelParameters(IHostEnvironment env, in VBuffer= 0);
+ Host.CheckDecode(FloatUtils.IsFinite(_standardErrors[i]) && _standardErrors[i] >= 0);
- TValues = ctx.Reader.ReadDoubleArray(m);
- TValueCheckDecode(Bias, TValues[0]);
+ _tValues = ctx.Reader.ReadDoubleArray(m);
+ TValueCheckDecode(Bias, _tValues[0]);
var weightValues = Weight.GetValues();
for (int i = 1; i < m; ++i)
- TValueCheckDecode(weightValues[i - 1], TValues[i]);
+ TValueCheckDecode(weightValues[i - 1], _tValues[i]);
- PValues = ctx.Reader.ReadDoubleArray(m);
+ _pValues = ctx.Reader.ReadDoubleArray(m);
for (int i = 0; i < m; ++i)
- ProbCheckDecode(PValues[i]);
+ ProbCheckDecode(_pValues[i]);
}
private protected override void SaveCore(ModelSaveContext ctx)
@@ -682,15 +688,15 @@ private protected override void SaveCore(ModelSaveContext ctx)
ctx.Writer.WriteBoolByte(HasStatistics);
if (!HasStatistics)
{
- Contracts.Assert(StandardErrors == null & TValues == null & PValues == null);
+ Contracts.Assert(_standardErrors == null & _tValues == null & _pValues == null);
return;
}
- Contracts.Assert(Weight.Length + 1 == StandardErrors.Count);
- Contracts.Assert(Weight.Length + 1 == TValues.Count);
- Contracts.Assert(Weight.Length + 1 == PValues.Count);
- ctx.Writer.WriteDoublesNoCount(StandardErrors as double[]);
- ctx.Writer.WriteDoublesNoCount(TValues as double[]);
- ctx.Writer.WriteDoublesNoCount(PValues as double[]);
+ Contracts.Assert(Weight.Length + 1 == _standardErrors.Length);
+ Contracts.Assert(Weight.Length + 1 == _tValues.Length);
+ Contracts.Assert(Weight.Length + 1 == _pValues.Length);
+ ctx.Writer.WriteDoublesNoCount(_standardErrors);
+ ctx.Writer.WriteDoublesNoCount(_tValues);
+ ctx.Writer.WriteDoublesNoCount(_pValues);
}
private static void TValueCheckDecode(Double param, Double tvalue)
@@ -725,14 +731,14 @@ private protected override void SaveSummary(TextWriter writer, RoleMappedSchema
writer.WriteLine();
writer.WriteLine("Index\tName\tWeight\tStdErr\tt-Value\tp-Value");
const string format = "{0}\t{1}\t{2}\t{3:g4}\t{4:g4}\t{5:e4}";
- writer.WriteLine(format, "", "Bias", Bias, StandardErrors[0], TValues[0], PValues[0]);
+ writer.WriteLine(format, "", "Bias", Bias, _standardErrors[0], _tValues[0], _pValues[0]);
Contracts.Assert(Weight.IsDense);
var coeffs = Weight.GetValues();
for (int i = 0; i < coeffs.Length; i++)
{
var name = names.GetItemOrDefault(i);
writer.WriteLine(format, i, name.IsEmpty ? $"f{i}" : name.ToString(),
- coeffs[i], StandardErrors[i + 1], TValues[i + 1], PValues[i + 1]);
+ coeffs[i], _standardErrors[i + 1], _tValues[i + 1], _pValues[i + 1]);
}
}
else
diff --git a/src/Microsoft.ML.Recommender/MatrixFactorizationPredictor.cs b/src/Microsoft.ML.Recommender/MatrixFactorizationPredictor.cs
index d4cb435903..77755ba91e 100644
--- a/src/Microsoft.ML.Recommender/MatrixFactorizationPredictor.cs
+++ b/src/Microsoft.ML.Recommender/MatrixFactorizationPredictor.cs
@@ -64,15 +64,19 @@ private static VersionInfo GetVersionInfo()
/// This is two dimensional matrix with size of * flattened into one-dimensional matrix.
/// Row by row.
///
- public readonly IReadOnlyList LeftFactorMatrix;
+ public IReadOnlyList LeftFactorMatrix => _leftFactorMatrix;
+
+ private readonly float[] _leftFactorMatrix;
///
- /// Left approximation matrix
+ /// Right approximation matrix
///
///
/// This is two dimensional matrix with size of * flattened into one-dimensional matrix.
/// Row by row.
///
- public readonly IReadOnlyList RightFactorMatrix;
+ public IReadOnlyList RightFactorMatrix => _rightFactorMatrix;
+
+ private readonly float[] _rightFactorMatrix;
PredictionKind IPredictor.PredictionKind => PredictionKind.Recommendation;
@@ -91,12 +95,12 @@ internal MatrixFactorizationModelParameters(IHostEnvironment env, SafeTrainingAn
_host.CheckValue(matrixColumnIndexType, nameof(matrixColumnIndexType));
_host.CheckValue(matrixRowIndexType, nameof(matrixRowIndexType));
buffer.Get(out NumberOfRows, out NumberOfColumns, out ApproximationRank, out var leftFactorMatrix, out var rightFactorMatrix);
- LeftFactorMatrix = leftFactorMatrix;
- RightFactorMatrix = rightFactorMatrix;
+ _leftFactorMatrix = leftFactorMatrix;
+ _rightFactorMatrix = rightFactorMatrix;
_host.Assert(NumberOfColumns == matrixColumnIndexType.GetCountAsInt32(_host));
_host.Assert(NumberOfRows == matrixRowIndexType.GetCountAsInt32(_host));
- _host.Assert(LeftFactorMatrix.Count == NumberOfRows * ApproximationRank);
- _host.Assert(RightFactorMatrix.Count == ApproximationRank * NumberOfColumns);
+ _host.Assert(_leftFactorMatrix.Length == NumberOfRows * ApproximationRank);
+ _host.Assert(_rightFactorMatrix.Length == ApproximationRank * NumberOfColumns);
MatrixColumnIndexType = matrixColumnIndexType;
MatrixRowIndexType = matrixRowIndexType;
@@ -134,8 +138,8 @@ private MatrixFactorizationModelParameters(IHostEnvironment env, ModelLoadContex
ApproximationRank = ctx.Reader.ReadInt32();
_host.CheckDecode(ApproximationRank > 0);
- LeftFactorMatrix = Utils.ReadSingleArray(ctx.Reader, checked(NumberOfRows * ApproximationRank));
- RightFactorMatrix = Utils.ReadSingleArray(ctx.Reader, checked(NumberOfColumns * ApproximationRank));
+ _leftFactorMatrix = Utils.ReadSingleArray(ctx.Reader, checked(NumberOfRows * ApproximationRank));
+ _rightFactorMatrix = Utils.ReadSingleArray(ctx.Reader, checked(NumberOfColumns * ApproximationRank));
MatrixColumnIndexType = new KeyType(typeof(uint), NumberOfColumns);
MatrixRowIndexType = new KeyType(typeof(uint), NumberOfRows);
@@ -173,10 +177,10 @@ void ICanSaveModel.Save(ModelSaveContext ctx)
ctx.Writer.Write(NumberOfRows);
ctx.Writer.Write(NumberOfColumns);
ctx.Writer.Write(ApproximationRank);
- _host.Check(Utils.Size(LeftFactorMatrix) == NumberOfRows * ApproximationRank, "Unexpected matrix size of a factor matrix (matrix P in LIBMF paper)");
- _host.Check(Utils.Size(RightFactorMatrix) == NumberOfColumns * ApproximationRank, "Unexpected matrix size of a factor matrix (matrix Q in LIBMF paper)");
- Utils.WriteSinglesNoCount(ctx.Writer, LeftFactorMatrix as float[]);
- Utils.WriteSinglesNoCount(ctx.Writer, RightFactorMatrix as float[]);
+ _host.Check(Utils.Size(_leftFactorMatrix) == NumberOfRows * ApproximationRank, "Unexpected matrix size of a factor matrix (matrix P in LIBMF paper)");
+ _host.Check(Utils.Size(_rightFactorMatrix) == NumberOfColumns * ApproximationRank, "Unexpected matrix size of a factor matrix (matrix Q in LIBMF paper)");
+ Utils.WriteSinglesNoCount(ctx.Writer, _leftFactorMatrix);
+ Utils.WriteSinglesNoCount(ctx.Writer, _rightFactorMatrix);
}
///
@@ -186,18 +190,18 @@ void ICanSaveInTextFormat.SaveAsText(TextWriter writer, RoleMappedSchema schema)
{
writer.WriteLine("# Imputed matrix is P * Q'");
writer.WriteLine("# P in R^({0} x {1}), rows correpond to Y item", NumberOfRows, ApproximationRank);
- for (int i = 0; i < LeftFactorMatrix.Count; ++i)
+ for (int i = 0; i < _leftFactorMatrix.Length; ++i)
{
- writer.Write(LeftFactorMatrix[i].ToString("G"));
+ writer.Write(_leftFactorMatrix[i].ToString("G"));
if (i % ApproximationRank == ApproximationRank - 1)
writer.WriteLine();
else
writer.Write('\t');
}
writer.WriteLine("# Q in R^({0} x {1}), rows correpond to X item", NumberOfColumns, ApproximationRank);
- for (int i = 0; i < RightFactorMatrix.Count; ++i)
+ for (int i = 0; i < _rightFactorMatrix.Length; ++i)
{
- writer.Write(RightFactorMatrix[i].ToString("G"));
+ writer.Write(_rightFactorMatrix[i].ToString("G"));
if (i % ApproximationRank == ApproximationRank - 1)
writer.WriteLine();
else
@@ -272,7 +276,7 @@ private float Score(int columnIndex, int rowIndex)
// Starting position of the columnIndex-th column in the right factor factor matrix
int columnOffset = columnIndex * ApproximationRank;
for (int i = 0; i < ApproximationRank; i++)
- score += LeftFactorMatrix[rowOffset + i] * RightFactorMatrix[columnOffset + i];
+ score += _leftFactorMatrix[rowOffset + i] * _rightFactorMatrix[columnOffset + i];
return score;
}