Skip to content

added in DcgTruncationLevel to AutoML api #5433

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Dec 12, 2020
22 changes: 17 additions & 5 deletions src/Microsoft.ML.AutoML/API/RankingExperiment.cs
Original file line number Diff line number Diff line change
@@ -26,10 +26,20 @@ public sealed class RankingExperimentSettings : ExperimentSettings
/// The default value is a collection auto-populated with all possible trainers (all values of <see cref="RankingTrainer" />).
/// </value>
public ICollection<RankingTrainer> Trainers { get; }

/// <summary>
/// Maximum truncation level for computing (N)DCG
/// </summary>
/// <value>
/// The default value is 10.
/// </value>
public uint OptimizationMetricTruncationLevel { get; set; }

public RankingExperimentSettings()
{
OptimizingMetric = RankingMetric.Ndcg;
Trainers = Enum.GetValues(typeof(RankingTrainer)).OfType<RankingTrainer>().ToList();
OptimizationMetricTruncationLevel = 10;
}
}
public enum RankingMetric
@@ -68,10 +78,11 @@ public static class RankingExperimentResultExtensions
/// </summary>
/// <param name="results">Enumeration of AutoML experiment run results.</param>
/// <param name="metric">Metric to consider when selecting the best run.</param>
/// <param name="optimizationMetricTruncationLevel">Maximum truncation level for computing (N)DCG. Defaults to 10.</param>
/// <returns>The best experiment run.</returns>
public static RunDetail<RankingMetrics> Best(this IEnumerable<RunDetail<RankingMetrics>> results, RankingMetric metric = RankingMetric.Ndcg)
public static RunDetail<RankingMetrics> Best(this IEnumerable<RunDetail<RankingMetrics>> results, RankingMetric metric = RankingMetric.Ndcg, uint optimizationMetricTruncationLevel = 10)
{
var metricsAgent = new RankingMetricsAgent(null, metric);
var metricsAgent = new RankingMetricsAgent(null, metric, optimizationMetricTruncationLevel);
var isMetricMaximizing = new OptimizingMetricInfo(metric).IsMaximizing;
return BestResultUtil.GetBestRun(results, metricsAgent, isMetricMaximizing);
}
@@ -81,10 +92,11 @@ public static RunDetail<RankingMetrics> Best(this IEnumerable<RunDetail<RankingM
/// </summary>
/// <param name="results">Enumeration of AutoML experiment cross validation run results.</param>
/// <param name="metric">Metric to consider when selecting the best run.</param>
/// <param name="optimizationMetricTruncationLevel">Maximum truncation level for computing (N)DCG. Defaults to 10.</param>
/// <returns>The best experiment run.</returns>
public static CrossValidationRunDetail<RankingMetrics> Best(this IEnumerable<CrossValidationRunDetail<RankingMetrics>> results, RankingMetric metric = RankingMetric.Ndcg)
public static CrossValidationRunDetail<RankingMetrics> Best(this IEnumerable<CrossValidationRunDetail<RankingMetrics>> results, RankingMetric metric = RankingMetric.Ndcg, uint optimizationMetricTruncationLevel = 10)
{
var metricsAgent = new RankingMetricsAgent(null, metric);
var metricsAgent = new RankingMetricsAgent(null, metric, optimizationMetricTruncationLevel);
var isMetricMaximizing = new OptimizingMetricInfo(metric).IsMaximizing;
return BestResultUtil.GetBestRun(results, metricsAgent, isMetricMaximizing);
}
@@ -103,7 +115,7 @@ public sealed class RankingExperiment : ExperimentBase<RankingMetrics, RankingEx
{
internal RankingExperiment(MLContext context, RankingExperimentSettings settings)
: base(context,
new RankingMetricsAgent(context, settings.OptimizingMetric),
new RankingMetricsAgent(context, settings.OptimizingMetric, settings.OptimizationMetricTruncationLevel),
new OptimizingMetricInfo(settings.OptimizingMetric),
settings,
TaskKind.Ranking,
Original file line number Diff line number Diff line change
@@ -2,19 +2,30 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using Microsoft.ML.Data;
using Microsoft.ML.Runtime;

namespace Microsoft.ML.AutoML
{
internal class RankingMetricsAgent : IMetricsAgent<RankingMetrics>
{
private readonly MLContext _mlContext;
private readonly RankingMetric _optimizingMetric;
private readonly uint _dcgTruncationLevel;

public RankingMetricsAgent(MLContext mlContext, RankingMetric optimizingMetric)
public RankingMetricsAgent(MLContext mlContext, RankingMetric metric, uint optimizationMetricTruncationLevel)
{
_mlContext = mlContext;
_optimizingMetric = optimizingMetric;
_optimizingMetric = metric;

if (optimizationMetricTruncationLevel <= 0)
throw _mlContext.ExceptUserArg(nameof(optimizationMetricTruncationLevel), "DCG Truncation Level must be greater than 0");

// We want to make sure we always report metrics for at least 10 results (e.g. NDCG@10) to the user.
// Producing extra results adds no measurable performance impact, so we report at least 2x of the
// user's requested optimization truncation level.
_dcgTruncationLevel = optimizationMetricTruncationLevel;
}

// Optimizing metric used: NDCG@10 and DCG@10
@@ -28,11 +39,9 @@ public double GetScore(RankingMetrics metrics)
switch (_optimizingMetric)
{
case RankingMetric.Ndcg:
return (metrics.NormalizedDiscountedCumulativeGains.Count >= 10) ? metrics.NormalizedDiscountedCumulativeGains[9] :
metrics.NormalizedDiscountedCumulativeGains[metrics.NormalizedDiscountedCumulativeGains.Count - 1];
return metrics.NormalizedDiscountedCumulativeGains[Math.Min(metrics.NormalizedDiscountedCumulativeGains.Count, (int)_dcgTruncationLevel) - 1];
case RankingMetric.Dcg:
return (metrics.DiscountedCumulativeGains.Count >= 10) ? metrics.DiscountedCumulativeGains[9] :
metrics.DiscountedCumulativeGains[metrics.DiscountedCumulativeGains.Count-1];
return metrics.DiscountedCumulativeGains[Math.Min(metrics.DiscountedCumulativeGains.Count, (int)_dcgTruncationLevel) - 1];
default:
throw MetricsAgentUtil.BuildMetricNotSupportedException(_optimizingMetric);
}
@@ -59,7 +68,12 @@ public bool IsModelPerfect(double score)

public RankingMetrics EvaluateMetrics(IDataView data, string labelColumn, string groupIdColumn)
{
return _mlContext.Ranking.Evaluate(data, labelColumn, groupIdColumn);
var rankingEvalOptions = new RankingEvaluatorOptions
{
DcgTruncationLevel = Math.Max(10, 2 * (int)_dcgTruncationLevel)
};

return _mlContext.Ranking.Evaluate(data, rankingEvalOptions, labelColumn, groupIdColumn);
}
}
}
4 changes: 2 additions & 2 deletions src/Microsoft.ML.AutoML/Utils/BestResultUtil.cs
Original file line number Diff line number Diff line change
@@ -35,9 +35,9 @@ public static RunDetail<MulticlassClassificationMetrics> GetBestRun(IEnumerable<
}

public static RunDetail<RankingMetrics> GetBestRun(IEnumerable<RunDetail<RankingMetrics>> results,
RankingMetric metric)
RankingMetric metric, uint dcgTruncationLevel)
{
var metricsAgent = new RankingMetricsAgent(null, metric);
var metricsAgent = new RankingMetricsAgent(null, metric, dcgTruncationLevel);

var metricInfo = new OptimizingMetricInfo(metric);
return GetBestRun(results, metricsAgent, metricInfo.IsMaximizing);
4 changes: 2 additions & 2 deletions src/Microsoft.ML.Data/Evaluators/RankingEvaluator.cs
Original file line number Diff line number Diff line change
@@ -35,7 +35,7 @@ public sealed class RankingEvaluatorOptions
/// Maximum truncation level for computing (N)DCG
/// </value>
[Argument(ArgumentType.AtMostOnce, HelpText = "Maximum truncation level for computing (N)DCG", ShortName = "t")]
public int DcgTruncationLevel = 3;
public int DcgTruncationLevel = 10;

/// <value>
/// Label relevance gains
@@ -858,7 +858,7 @@ public sealed class Arguments : ArgumentsBase
public string GroupIdColumn;

[Argument(ArgumentType.AtMostOnce, HelpText = "Maximum truncation level for computing (N)DCG", ShortName = "t")]
public int DcgTruncationLevel = 3;
public int DcgTruncationLevel = 10;

[Argument(ArgumentType.AtMostOnce, HelpText = "Label relevance gains", ShortName = "gains")]
public string LabelGains = "0,3,7,15,31";
Original file line number Diff line number Diff line change
@@ -4,38 +4,80 @@ Making per-feature arrays
Changing data from row-wise to column-wise
Processed 40 instances
Binning and forming Feature objects
Reserved memory for tree learner: 10764 bytes
Reserved memory for tree learner: %Number% bytes
Starting to train ...
Not training a calibrator because it is not needed.
Not adding a normalizer.
Making per-feature arrays
Changing data from row-wise to column-wise
Processed 32 instances
Binning and forming Feature objects
Reserved memory for tree learner: 6396 bytes
Reserved memory for tree learner: %Number% bytes
Starting to train ...
Not training a calibrator because it is not needed.
NDCG@1: 0.000000
NDCG@2: 0.000000
NDCG@3: 0.000000
NDCG@4: 0.000000
NDCG@5: 0.000000
NDCG@6: 0.000000
NDCG@7: 0.000000
NDCG@8: 0.000000
NDCG@9: 0.000000
NDCG@10: 0.000000
DCG@1: 0.000000
DCG@2: 0.000000
DCG@3: 0.000000
DCG@4: 0.000000
DCG@5: 0.000000
DCG@6: 0.000000
DCG@7: 0.000000
DCG@8: 0.000000
DCG@9: 0.000000
DCG@10: 0.000000
NDCG@1: 0.000000
NDCG@2: 0.000000
NDCG@3: 0.000000
NDCG@4: 0.000000
NDCG@5: 0.000000
NDCG@6: 0.000000
NDCG@7: 0.000000
NDCG@8: 0.000000
NDCG@9: 0.000000
NDCG@10: 0.000000
DCG@1: 0.000000
DCG@2: 0.000000
DCG@3: 0.000000
DCG@4: 0.000000
DCG@5: 0.000000
DCG@6: 0.000000
DCG@7: 0.000000
DCG@8: 0.000000
DCG@9: 0.000000
DCG@10: 0.000000

OVERALL RESULTS
---------------------------------------
NDCG@1: 0.000000 (0.0000)
NDCG@2: 0.000000 (0.0000)
NDCG@3: 0.000000 (0.0000)
NDCG@4: 0.000000 (0.0000)
NDCG@5: 0.000000 (0.0000)
NDCG@6: 0.000000 (0.0000)
NDCG@7: 0.000000 (0.0000)
NDCG@8: 0.000000 (0.0000)
NDCG@9: 0.000000 (0.0000)
NDCG@10: 0.000000 (0.0000)
DCG@1: 0.000000 (0.0000)
DCG@2: 0.000000 (0.0000)
DCG@3: 0.000000 (0.0000)
DCG@4: 0.000000 (0.0000)
DCG@5: 0.000000 (0.0000)
DCG@6: 0.000000 (0.0000)
DCG@7: 0.000000 (0.0000)
DCG@8: 0.000000 (0.0000)
DCG@9: 0.000000 (0.0000)
DCG@10: 0.000000 (0.0000)

---------------------------------------
Physical memory usage(MB): %Number%
Loading