dotnet · sfilipi · Sep 19, 2018 · Sep 7, 2018 · Sep 7, 2018 · Sep 8, 2018
diff --git a/src/Microsoft.ML.Api/TypedCursor.cs b/src/Microsoft.ML.Api/TypedCursor.cs
@@ -622,7 +622,7 @@ public ICursor GetRootCursor()
     /// </summary>
     public static class CursoringUtils
     {
-        private const string NeedEnvObsoleteMessage = "This method is obsolete. Please use the overload that takes an additional 'env' argument. An environment can be created via new TlcEnvironment().";
+        private const string NeedEnvObsoleteMessage = "This method is obsolete. Please use the overload that takes an additional 'env' argument. An environment can be created via new LocalEnvironment().";
 
         /// <summary>
         /// Generate a strongly-typed cursorable wrapper of the <see cref="IDataView"/>.

diff --git a/src/Microsoft.ML.Data/Scorers/PredictionTransformer.cs b/src/Microsoft.ML.Data/Scorers/PredictionTransformer.cs
@@ -18,6 +18,9 @@
 [assembly: LoadableClass(typeof(RegressionPredictionTransformer<IPredictorProducing<float>>), typeof(RegressionPredictionTransformer), null, typeof(SignatureLoadModel),
     "", RegressionPredictionTransformer.LoaderSignature)]
 
+[assembly: LoadableClass(typeof(RankingPredictionTransformer<IPredictorProducing<float>>), typeof(RankingPredictionTransformer), null, typeof(SignatureLoadModel),
+    "", RankingPredictionTransformer.LoaderSignature)]
+
 namespace Microsoft.ML.Runtime.Data
 {
     public abstract class PredictionTransformerBase<TModel> : IPredictionTransformer<TModel>, ICanSaveModel
@@ -301,6 +304,52 @@ private static VersionInfo GetVersionInfo()
         }
     }
 
+    public sealed class RankingPredictionTransformer<TModel> : PredictionTransformerBase<TModel>
+    where TModel : class, IPredictorProducing<float>
+    {
+        private readonly GenericScorer _scorer;
+
+        public RankingPredictionTransformer(IHostEnvironment env, TModel model, ISchema inputSchema, string featureColumn)
+            : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(RankingPredictionTransformer<TModel>)), model, inputSchema, featureColumn)
+        {
+            var schema = new RoleMappedSchema(inputSchema, null, featureColumn);
+            _scorer = new GenericScorer(Host, new GenericScorer.Arguments(), new EmptyDataView(Host, inputSchema), BindableMapper.Bind(Host, schema), schema);
+        }
+
+        internal RankingPredictionTransformer(IHostEnvironment env, ModelLoadContext ctx)
+            : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(RankingPredictionTransformer<TModel>)), ctx)
+        {
+            var schema = new RoleMappedSchema(TrainSchema, null, FeatureColumn);
+            _scorer = new GenericScorer(Host, new GenericScorer.Arguments(), new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema);
+        }
+
+        public override IDataView Transform(IDataView input)
+        {
+            Host.CheckValue(input, nameof(input));
+            return _scorer.ApplyToData(Host, input);
+        }
+
+        protected override void SaveCore(ModelSaveContext ctx)
+        {
+            Contracts.AssertValue(ctx);
+            ctx.SetVersionInfo(GetVersionInfo());
+
+            // *** Binary format ***
+            // <base info>
+            base.SaveCore(ctx);
+        }
+
+        private static VersionInfo GetVersionInfo()
+        {
+            return new VersionInfo(
+                modelSignature: "MC  RANK",
+                verWrittenCur: 0x00010001, // Initial
+                verReadableCur: 0x00010001,
+                verWeCanReadBack: 0x00010001,
+                loaderSignature: RankingPredictionTransformer.LoaderSignature);
+        }
+    }
+
     internal static class BinaryPredictionTransformer
     {
         public const string LoaderSignature = "BinaryPredXfer";
@@ -324,4 +373,12 @@ internal static class RegressionPredictionTransformer
         public static RegressionPredictionTransformer<IPredictorProducing<float>> Create(IHostEnvironment env, ModelLoadContext ctx)
             => new RegressionPredictionTransformer<IPredictorProducing<float>>(env, ctx);
     }
+
+    internal static class RankingPredictionTransformer
+    {
+        public const string LoaderSignature = "RankingPredXfer";
+
+        public static RankingPredictionTransformer<IPredictorProducing<float>> Create(IHostEnvironment env, ModelLoadContext ctx)
+            => new RankingPredictionTransformer<IPredictorProducing<float>>(env, ctx);
+    }
 }
diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiStacking.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiStacking.cs
@@ -57,7 +57,7 @@ public Arguments()
                     env => new Ova(env, new Ova.Arguments()
                     {
                         PredictorType = ComponentFactoryUtils.CreateFromFunction(
-                             e => new AveragedPerceptronTrainer(e, new AveragedPerceptronTrainer.Arguments()))
+                            e => new FastTreeBinaryClassificationTrainer(e, DefaultColumnNames.Label, DefaultColumnNames.Features))
                     }));
             }
         }

diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/RegressionStacking.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/RegressionStacking.cs
@@ -49,7 +49,7 @@ public sealed class Arguments : ArgumentsBase, ISupportRegressionOutputCombinerF
             public Arguments()
             {
                 BasePredictorType = ComponentFactoryUtils.CreateFromFunction(
-                    env => new FastTreeRegressionTrainer(env, new FastTreeRegressionTrainer.Arguments()));
+                    env => new FastTreeRegressionTrainer(env, DefaultColumnNames.Label, DefaultColumnNames.Features));
             }
 
             public IRegressionOutputCombiner CreateComponent(IHostEnvironment env) => new RegressionStacking(env, this);

diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/Stacking.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/Stacking.cs
@@ -5,6 +5,7 @@
 using System;
 using Microsoft.ML.Runtime;
 using Microsoft.ML.Runtime.CommandLine;
+using Microsoft.ML.Runtime.Data;
 using Microsoft.ML.Runtime.Ensemble.OutputCombiners;
 using Microsoft.ML.Runtime.EntryPoints;
 using Microsoft.ML.Runtime.FastTree;
@@ -46,7 +47,7 @@ public sealed class Arguments : ArgumentsBase, ISupportBinaryOutputCombinerFacto
             public Arguments()
             {
                 BasePredictorType = ComponentFactoryUtils.CreateFromFunction(
-                    env => new FastTreeBinaryClassificationTrainer(env, new FastTreeBinaryClassificationTrainer.Arguments()));
+                    env => new FastTreeBinaryClassificationTrainer(env, DefaultColumnNames.Label, DefaultColumnNames.Features));
             }
 
             public IBinaryOutputCombiner CreateComponent(IHostEnvironment env) => new Stacking(env, this);

diff --git a/src/Microsoft.ML.FastTree/BoostingFastTree.cs b/src/Microsoft.ML.FastTree/BoostingFastTree.cs
@@ -6,17 +6,25 @@
 
 using System;
 using System.Linq;
+using Microsoft.ML.Core.Data;
 using Microsoft.ML.Runtime.CommandLine;
 using Microsoft.ML.Runtime.FastTree.Internal;
 using Microsoft.ML.Runtime.Internal.Internallearn;
 
 namespace Microsoft.ML.Runtime.FastTree
 {
-    public abstract class BoostingFastTreeTrainerBase<TArgs, TPredictor> : FastTreeTrainerBase<TArgs, TPredictor>
+    public abstract class BoostingFastTreeTrainerBase<TArgs, TTransformer, TModel> : FastTreeTrainerBase<TArgs, TTransformer, TModel>
+        where TTransformer : IPredictionTransformer<TModel>
         where TArgs : BoostedTreeArgs, new()
-        where TPredictor : IPredictorProducing<Float>
+        where TModel : IPredictorProducing<Float>
     {
-        public BoostingFastTreeTrainerBase(IHostEnvironment env, TArgs args) : base(env, args)
+        protected BoostingFastTreeTrainerBase(IHostEnvironment env, TArgs args, SchemaShape.Column label) : base(env, args, label)
+        {
+        }
+
+        protected BoostingFastTreeTrainerBase(IHostEnvironment env, SchemaShape.Column label, string featureColumn,
+            string weightColumn = null, string groupIdColumn = null, Action<TArgs> advancedSettings = null)
+            : base(env, label, featureColumn, weightColumn, groupIdColumn, advancedSettings)
         {
         }
 

diff --git a/src/Microsoft.ML.FastTree/FastTree.cs b/src/Microsoft.ML.FastTree/FastTree.cs
@@ -12,6 +12,7 @@
 using System.IO;
 using System.Linq;
 using System.Text;
+using Microsoft.ML.Core.Data;
 using Microsoft.ML.Runtime.CommandLine;
 using Microsoft.ML.Runtime.Data;
 using Microsoft.ML.Runtime.Data.Conversion;
@@ -43,10 +44,11 @@ internal static class FastTreeShared
         public static readonly object TrainLock = new object();
     }
 
-    public abstract class FastTreeTrainerBase<TArgs, TPredictor> :
-        TrainerBase<TPredictor>
+    public abstract class FastTreeTrainerBase<TArgs, TTransformer, TModel> :
+        TrainerEstimatorBase<TTransformer, TModel>
+        where TTransformer: IPredictionTransformer<TModel>
         where TArgs : TreeArgs, new()
-        where TPredictor : IPredictorProducing<Float>
+        where TModel : IPredictorProducing<Float>
     {
         protected readonly TArgs Args;
         protected readonly bool AllowGC;
@@ -87,34 +89,53 @@ public abstract class FastTreeTrainerBase<TArgs, TPredictor> :
 
         private protected virtual bool NeedCalibration => false;
 
-        private protected FastTreeTrainerBase(IHostEnvironment env, TArgs args)
-            : base(env, RegisterName)
+        /// <summary>
+        /// Constructor to use when instantiating the classing deriving from here through the API.
+        /// </summary>
+        private protected FastTreeTrainerBase(IHostEnvironment env, SchemaShape.Column label, string featureColumn,
+            string weightColumn = null, string groupIdColumn = null, Action<TArgs> advancedSettings = null)
+            : base(Contracts.CheckRef(env, nameof(env)).Register(RegisterName), MakeFeatureColumn(featureColumn), label, MakeWeightColumn(weightColumn))
+        {
+            Args = new TArgs();
+
+            //apply the advanced args, if the user supplied any
+            advancedSettings?.Invoke(Args);
+            Args.LabelColumn = label.Name;
+
+            if (weightColumn != null)
+                Args.WeightColumn = weightColumn;
+
+            if (groupIdColumn != null)
+                Args.GroupIdColumn = groupIdColumn;
+
+            // The discretization step renders this trainer non-parametric, and therefore it does not need normalization.
+            // Also since it builds its own internal discretized columnar structures, it cannot benefit from caching.
+            // Finally, even the binary classifiers, being logitboost, tend to not benefit from external calibration.
+            Info = new TrainerInfo(normalization: false, caching: false, calibration: NeedCalibration, supportValid: true);
+            // REVIEW: CLR 4.6 has a bug that is only exposed in Scope, and if we trigger GC.Collect in scope environment
+            // with memory consumption more than 5GB, GC get stuck in infinite loop. So for now let's call GC only if we call things from LocalEnvironment.
+            AllowGC = (env is HostEnvironmentBase<LocalEnvironment>);
+
+            Initialize(env);
+        }
+
+        /// <summary>
+        /// Legacy constructor that is used when invoking the classsing deriving from this, through maml.
+        /// </summary>
+        private protected FastTreeTrainerBase(IHostEnvironment env, TArgs args, SchemaShape.Column label)
+            : base(Contracts.CheckRef(env, nameof(env)).Register(RegisterName), MakeFeatureColumn(args.FeatureColumn), label, MakeWeightColumn(args.WeightColumn))
         {
             Host.CheckValue(args, nameof(args));
             Args = args;
             // The discretization step renders this trainer non-parametric, and therefore it does not need normalization.
             // Also since it builds its own internal discretized columnar structures, it cannot benefit from caching.
             // Finally, even the binary classifiers, being logitboost, tend to not benefit from external calibration.
             Info = new TrainerInfo(normalization: false, caching: false, calibration: NeedCalibration, supportValid: true);
-            int numThreads = Args.NumThreads ?? Environment.ProcessorCount;
-            if (Host.ConcurrencyFactor > 0 && numThreads > Host.ConcurrencyFactor)
-            {
-                using (var ch = Host.Start("FastTreeTrainerBase"))
-                {
-                    numThreads = Host.ConcurrencyFactor;
-                    ch.Warning("The number of threads specified in trainer arguments is larger than the concurrency factor "
-                        + "setting of the environment. Using {0} training threads instead.", numThreads);
-                    ch.Done();
-                }
-            }
-            ParallelTraining = Args.ParallelTrainer != null ? Args.ParallelTrainer.CreateComponent(env) : new SingleTrainer();
-            ParallelTraining.InitEnvironment();
             // REVIEW: CLR 4.6 has a bug that is only exposed in Scope, and if we trigger GC.Collect in scope environment
-            // with memory consumption more than 5GB, GC get stuck in infinite loop. So for now let's call GC only if we call things from ConsoleEnvironment.
-            AllowGC = (env is HostEnvironmentBase<ConsoleEnvironment>);
-            Tests = new List<Test>();
+            // with memory consumption more than 5GB, GC get stuck in infinite loop. So for now let's call GC only if we call things from LocalEnvironment.
+            AllowGC = (env is HostEnvironmentBase<LocalEnvironment>);
 
-            InitializeThreads(numThreads);
+            Initialize(env);
         }
 
         protected abstract void PrepareLabels(IChannel ch);
@@ -133,6 +154,39 @@ protected virtual Float GetMaxLabel()
             return Float.PositiveInfinity;
         }
 
+        private static SchemaShape.Column MakeWeightColumn(string weightColumn)
+        {
+            if (weightColumn == null)
+                return null;
+            return new SchemaShape.Column(weightColumn, SchemaShape.Column.VectorKind.Scalar, NumberType.R4, false);
+        }
+
+        private static SchemaShape.Column MakeFeatureColumn(string featureColumn)
+        {
+            return new SchemaShape.Column(featureColumn, SchemaShape.Column.VectorKind.Vector, NumberType.R4, false);
+        }
+
+        private void Initialize(IHostEnvironment env)
+        {
+            int numThreads = Args.NumThreads ?? Environment.ProcessorCount;
+            if (Host.ConcurrencyFactor > 0 && numThreads > Host.ConcurrencyFactor)
+            {
+                using (var ch = Host.Start("FastTreeTrainerBase"))
+                {
+                    numThreads = Host.ConcurrencyFactor;
+                    ch.Warning("The number of threads specified in trainer arguments is larger than the concurrency factor "
+                        + "setting of the environment. Using {0} training threads instead.", numThreads);
+                    ch.Done();
+                }
+            }
+            ParallelTraining = Args.ParallelTrainer != null ? Args.ParallelTrainer.CreateComponent(env) : new SingleTrainer();
+            ParallelTraining.InitEnvironment();
+
+            Tests = new List<Test>();
+
+            InitializeThreads(numThreads);
+        }
+
         protected void ConvertData(RoleMappedData trainData)
         {
             trainData.Schema.Schema.TryGetColumnIndex(DefaultColumnNames.Features, out int featureIndex);