dotnet · yaeldekel · Mar 19, 2019 · Mar 19, 2019 · Mar 19, 2019 · Mar 19, 2019
diff --git a/Microsoft.ML.sln b/Microsoft.ML.sln
@@ -266,6 +266,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Microsoft.ML.Ensemble", "Mi
 		pkg\Microsoft.ML.Ensemble\Microsoft.ML.Ensemble.symbols.nupkgproj = pkg\Microsoft.ML.Ensemble\Microsoft.ML.Ensemble.symbols.nupkgproj
 	EndProjectSection
 EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Experimental", "src\Microsoft.ML.Experimental\Microsoft.ML.Experimental.csproj", "{E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -948,6 +950,18 @@ Global
 		{5E920CAC-5A28-42FB-936E-49C472130953}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
 		{5E920CAC-5A28-42FB-936E-49C472130953}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
 		{5E920CAC-5A28-42FB-936E-49C472130953}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
+		{E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU
+		{E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU
+		{E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU
+		{E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU
+		{E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC}.Release|Any CPU.Build.0 = Release|Any CPU
+		{E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU
+		{E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
+		{E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
+		{E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
@@ -1033,6 +1047,7 @@ Global
 		{31D38B21-102B-41C0-9E0A-2FE0BF68D123} = {D3D38B03-B557-484D-8348-8BADEE4DF592}
 		{5E920CAC-5A28-42FB-936E-49C472130953} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
 		{AD7058C9-5608-49A8-BE23-58C33A74EE91} = {D3D38B03-B557-484D-8348-8BADEE4DF592}
+		{E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
 	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D}

diff --git a/build/Dependencies.props b/build/Dependencies.props
@@ -14,7 +14,7 @@
   <PropertyGroup>
     <GoogleProtobufPackageVersion>3.5.1</GoogleProtobufPackageVersion>
     <LightGBMPackageVersion>2.2.3</LightGBMPackageVersion>
-    <MicrosoftMLOnnxRuntimePackageVersion>0.2.1</MicrosoftMLOnnxRuntimePackageVersion>
+    <MicrosoftMLOnnxRuntimePackageVersion>0.3.0</MicrosoftMLOnnxRuntimePackageVersion>
     <MlNetMklDepsPackageVersion>0.0.0.9</MlNetMklDepsPackageVersion>
     <ParquetDotNetPackageVersion>2.1.3</ParquetDotNetPackageVersion>
     <SystemDrawingCommonPackageVersion>4.5.0</SystemDrawingCommonPackageVersion>

diff --git a/docs/code/MlNetCookBook.md b/docs/code/MlNetCookBook.md
@@ -244,7 +244,7 @@ We tried to make `Preview` debugger-friendly: our expectation is that, if you en
 Here is the code sample:
 ```csharp
 var estimator = mlContext.Transforms.Categorical.MapValueToKey("Label")
-    .Append(mlContext.MulticlassClassification.Trainers.Sdca())
+    .Append(mlContext.MulticlassClassification.Trainers.SdcaCalibrated())
     .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));
 
 var data = mlContext.Data.LoadFromTextFile(new TextLoader.Column[] {
@@ -423,7 +423,7 @@ var pipeline =
     // Cache data in memory for steps after the cache check point stage.
     .AppendCacheCheckpoint(mlContext)
     // Use the multi-class SDCA model to predict the label using features.
-    .Append(mlContext.MulticlassClassification.Trainers.Sdca())
+    .Append(mlContext.MulticlassClassification.Trainers.SdcaCalibrated())
     // Apply the inverse conversion from 'PredictedLabel' column back to string value.
     .Append(mlContext.Transforms.Conversion.MapKeyToValue(("PredictedLabel", "Data")));
 
@@ -547,13 +547,13 @@ var pipeline =
     // Cache data in memory for steps after the cache check point stage.
     .AppendCacheCheckpoint(mlContext)
     // Use the multi-class SDCA model to predict the label using features.
-    .Append(mlContext.MulticlassClassification.Trainers.Sdca());
+    .Append(mlContext.MulticlassClassification.Trainers.SdcaCalibrated());
 
 // Train the model.
 var trainedModel = pipeline.Fit(trainData);
 
 // Inspect the model parameters. 
-var modelParameters = trainedModel.LastTransformer.Model as MulticlassLogisticRegressionModelParameters;
+var modelParameters = trainedModel.LastTransformer.Model as MaximumEntropyModelParameters;
 
 // Now we can use 'modelParameters' to look at the weights.
 // 'weights' will be an array of weight vectors, one vector per class.
@@ -822,7 +822,7 @@ var pipeline =
     // Notice that unused part in the data may not be cached.
     .AppendCacheCheckpoint(mlContext)
     // Use the multi-class SDCA model to predict the label using features.
-    .Append(mlContext.MulticlassClassification.Trainers.Sdca());
+    .Append(mlContext.MulticlassClassification.Trainers.SdcaCalibrated());
 
 // Split the data 90:10 into train and test sets, train and evaluate.
 var split = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/BootstrapSample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/BootstrapSample.cs
@@ -3,7 +3,7 @@
 
 namespace Microsoft.ML.Samples.Dynamic
 {
-    public static class Bootstrap
+    public static class BootstrapSample
     {
         public static void Example()
         {

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TrainTestSplit.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TrainTestSplit.cs
@@ -0,0 +1,107 @@
+using System;
+using System.Collections.Generic;
+using System.Collections.Immutable;
+using System.Linq;
+using Microsoft.ML.Data;
+using static Microsoft.ML.DataOperationsCatalog;
+
+namespace Microsoft.ML.Samples.Dynamic
+{
+    /// <summary>
+    /// Sample class showing how to use TrainTestSplit.
+    /// </summary>
+    public static class TrainTestSplit
+    {
+        public static void Example()
+        {
+            // Creating the ML.Net IHostEnvironment object, needed for the pipeline.
+            var mlContext = new MLContext();
+
+            // Generate some data points.
+            var examples = GenerateRandomDataPoints(10);
+
+            // Convert the examples list to an IDataView object, which is consumable by ML.NET API.
+            var dataview = mlContext.Data.LoadFromEnumerable(examples);
+
+            // Leave out 10% of the dataset for testing.For some types of problems, for example for ranking or anomaly detection,
+            // we must ensure that the split leaves the rows with the same value in a particular column, in one of the splits. 
+            // So below, we specify Group column as the column containing the sampling keys.
+            // Notice how keeping the rows with the same value in the Group column overrides the testFraction definition. 
+            TrainTestData split = mlContext.Data.TrainTestSplit(dataview, testFraction: 0.1, samplingKeyColumnName: "Group");
+
+            PrintPreviewRows(split);
+
+            //  The data in the Train split.
+            //  [Group, 1], [Features, 0.8173254]
+            //  [Group, 1], [Features, 0.5581612]
+            //  [Group, 1], [Features, 0.5588848]
+            //  [Group, 1], [Features, 0.4421779]
+            //  [Group, 1], [Features, 0.2737045]
+
+            //  The data in the Test split.
+            //  [Group, 0], [Features, 0.7262433]
+            //  [Group, 0], [Features, 0.7680227]
+            //  [Group, 0], [Features, 0.2060332]
+            //  [Group, 0], [Features, 0.9060271]
+            //  [Group, 0], [Features, 0.9775497]
+
+            // Example of a split without specifying a sampling key column.
+            split = mlContext.Data.TrainTestSplit(dataview, testFraction: 0.2);
+            PrintPreviewRows(split);
+
+            // The data in the Train split.
+            // [Group, 0], [Features, 0.7262433]
+            // [Group, 1], [Features, 0.8173254]
+            // [Group, 0], [Features, 0.7680227]
+            // [Group, 1], [Features, 0.5581612]
+            // [Group, 0], [Features, 0.2060332]
+            // [Group, 1], [Features, 0.4421779]
+            // [Group, 0], [Features, 0.9775497]
+            // [Group, 1], [Features, 0.2737045]
+
+            // The data in the Test split.
+            // [Group, 1], [Features, 0.5588848]
+            // [Group, 0], [Features, 0.9060271]
+
+        }
+
+        private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int seed = 0)
+        {
+            var random = new Random(seed);
+            for (int i = 0; i < count; i++)
+            {
+                yield return new DataPoint
+                {
+                    Group = i % 2,
+
+                    // Create random features that are correlated with label.
+                    Features = (float)random.NextDouble()
+                };
+            }
+        }
+
+        // Example with label and group column. A data set is a collection of such examples.
+        private class DataPoint
+        {
+            public float Group { get; set; }
+
+            public float Features { get; set; }
+        }
+
+        // print helper
+        private static void PrintPreviewRows(TrainTestData split)
+        {
+
+            var trainDataPreview = split.TrainSet.Preview();
+            var testDataPreview = split.TestSet.Preview();
+
+            Console.WriteLine($"The data in the Train split.");
+            foreach (var row in trainDataPreview.RowView)
+                Console.WriteLine($"{row.Values[0]}, {row.Values[1]}");
+
+            Console.WriteLine($"\nThe data in the Test split.");
+            foreach (var row in testDataPreview.RowView)
+                Console.WriteLine($"{row.Values[0]}, {row.Values[1]}");
+        }
+    }
+}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs
@@ -35,12 +35,12 @@ public static void Example()
             // Create a Feature Contribution Calculator
             // Calculate the feature contributions for all features given trained model parameters
             // And don't normalize the contribution scores
-            var featureContributionCalculator = mlContext.Model.Explainability.FeatureContributionCalculation(model.Model, model.FeatureColumnName, numPositiveContributions: 11, normalize: false);
+            var featureContributionCalculator = mlContext.Transforms.CalculateFeatureContribution(model, numberOfPositiveContributions: 11, normalize: false);
             var outputData = featureContributionCalculator.Fit(scoredData).Transform(scoredData);
 
             // FeatureContributionCalculatingEstimator can be use as an intermediary step in a pipeline. 
             // The features retained by FeatureContributionCalculatingEstimator will be in the FeatureContribution column.
-            var pipeline = mlContext.Model.Explainability.FeatureContributionCalculation(model.Model, model.FeatureColumnName, numPositiveContributions: 11)
+            var pipeline = mlContext.Transforms.CalculateFeatureContribution(model, numberOfPositiveContributions: 11)
                 .Append(mlContext.Regression.Trainers.Ols(featureColumnName: "FeatureContributions"));
             var outData = featureContributionCalculator.Fit(scoredData).Transform(scoredData);
 

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs
@@ -58,7 +58,8 @@ public static void Example()
 
             // Composing a different pipeline if we wanted to normalize more than one column at a time. 
             // Using log scale as the normalization mode. 
-            var multiColPipeline = ml.Transforms.Normalize(NormalizingEstimator.NormalizationMode.LogMeanVariance, new ColumnOptions[] { ("LogInduced", "Induced"), ("LogSpontaneous", "Spontaneous") });
+            var multiColPipeline = ml.Transforms.Normalize("LogInduced", "Induced", NormalizingEstimator.NormalizationMode.LogMeanVariance)
+                .Append(ml.Transforms.Normalize("LogSpontaneous", "Spontaneous", NormalizingEstimator.NormalizationMode.LogMeanVariance));
             // The transformed data.
             var multiColtransformer = multiColPipeline.Fit(trainData);
             var multiColtransformedData = multiColtransformer.Transform(trainData);

diff --git a/...samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIRegressionExample.cs b/...samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIRegressionExample.cs
@@ -31,7 +31,7 @@ public static void Example()
             // Compute the permutation metrics using the properly normalized data.
             var transformedData = model.Transform(data);
             var permutationMetrics = mlContext.Regression.PermutationFeatureImportance(
-                linearPredictor, transformedData, label: labelName, features: "Features", permutationCount: 3);
+                linearPredictor, transformedData, labelColumnName: labelName, permutationCount: 3);
 
             // Now let's look at which features are most important to the model overall
             // Get the feature indices sorted by their impact on R-Squared

diff --git a/...crosoft.ML.Samples/Dynamic/PermutationFeatureImportance/PfiBinaryClassificationExample.cs b/...crosoft.ML.Samples/Dynamic/PermutationFeatureImportance/PfiBinaryClassificationExample.cs
@@ -35,7 +35,7 @@ public static void Example()
             // Compute the permutation metrics using the properly normalized data.
             var transformedData = model.Transform(data);
             var permutationMetrics = mlContext.BinaryClassification.PermutationFeatureImportance(
-                linearPredictor, transformedData, label: labelName, features: "Features", permutationCount: 3);
+                linearPredictor, transformedData, labelColumnName: labelName, permutationCount: 3);
 
             // Now let's look at which features are most important to the model overall.
             // Get the feature indices sorted by their impact on AreaUnderRocCurve.

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs
@@ -69,10 +69,11 @@ public static void Example()
             };
 
             var model = mlContext.Transforms.Text.TokenizeIntoWords("TokenizedWords", "Sentiment_Text")
-                .Append(mlContext.Transforms.Conversion.MapValue(lookupMap, "Words", "Ids", new ColumnOptions[] { ("VariableLenghtFeatures", "TokenizedWords") }))
+                .Append(mlContext.Transforms.Conversion.MapValue("VariableLenghtFeatures", lookupMap,
+                    lookupMap.Schema["Words"], lookupMap.Schema["Ids"], "TokenizedWords"))
                 .Append(mlContext.Transforms.CustomMapping(ResizeFeaturesAction, "Resize"))
                 .Append(tensorFlowModel.ScoreTensorFlowModel(new[] { "Prediction/Softmax" }, new[] { "Features" }))
-                .Append(mlContext.Transforms.CopyColumns(("Prediction", "Prediction/Softmax")))
+                .Append(mlContext.Transforms.CopyColumns("Prediction", "Prediction/Softmax"))
                 .Fit(dataView);
             var engine = mlContext.Model.CreatePredictionEngine<IMDBSentiment, OutputScores>(model);
 

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForest.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForest.cs
@@ -0,0 +1,100 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
+{
+    public static class FastForest
+    {
+        // This example requires installation of additional NuGet package
+        // <a href="https://www.nuget.org/packages/Microsoft.ML.FastTree/">Microsoft.ML.FastTree</a>.
+        public static void Example()
+        {
+            // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
+            // as a catalog of available operations and as the source of randomness.
+            // Setting the seed to a fixed number in this example to make outputs deterministic.
+            var mlContext = new MLContext(seed: 0);
+
+            // Create a list of training data points.
+            var dataPoints = GenerateRandomDataPoints(1000);
+
+            // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
+            var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
+
+            // Define the trainer.
+            var pipeline = mlContext.BinaryClassification.Trainers.FastForest();
+
+            // Train the model.
+            var model = pipeline.Fit(trainingData);
+
+            // Create testing data. Use different random seed to make it different from training data.
+            var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
+
+            // Run the model on test data set.
+            var transformedTestData = model.Transform(testData);
+
+            // Convert IDataView object to a list.
+            var predictions = mlContext.Data.CreateEnumerable<Prediction>(transformedTestData, reuseRowObject: false).ToList();
+
+            // Look at 5 predictions
+            foreach (var p in predictions.Take(5))
+                Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
+
+            // Expected output:
+            //   Label: True, Prediction: True
+            //   Label: False, Prediction: False
+            //   Label: True, Prediction: True
+            //   Label: True, Prediction: True
+            //   Label: False, Prediction: False
+
+            // Evaluate the overall metrics
+            var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(transformedTestData);
+            SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
+
+            // Expected output:
+            //   Accuracy: 0.74
+            //   AUC: 0.83
+            //   F1 Score: 0.74
+            //   Negative Precision: 0.78
+            //   Negative Recall: 0.71
+            //   Positive Precision: 0.71
+            //   Positive Recall: 0.78
+        }
+
+        private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int seed=0)
+        {
+            var random = new Random(seed);
+            float randomFloat() => (float)random.NextDouble();
+            for (int i = 0; i < count; i++)
+            {
+                var label = randomFloat() > 0.5f;
+                yield return new DataPoint
+                {
+                    Label = label,
+                    // Create random features that are correlated with the label.
+                    // For data points with false label, the feature values are slightly increased by adding a constant.
+                    Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray()
+                };
+            }
+        }
+
+        // Example with label and 50 feature values. A data set is a collection of such examples.
+        private class DataPoint
+        {
+            public bool Label { get; set; }
+            [VectorType(50)]
+            public float[] Features { get; set; }
+        }
+
+        // Class used to capture predictions.
+        private class Prediction
+        {
+            // Original label.
+            public bool Label { get; set; }
+            // Predicted label from the trainer.
+            public bool PredictedLabel { get; set; }
+        }
+    }
+}
+
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForest.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForest.tt
@@ -0,0 +1,24 @@
+<#@ include file="TreeSamplesTemplate.ttinclude"#>
+
+<#+
+string ClassName="FastForest";
+string Trainer = "FastForest";
+string TrainerOptions = null;
+bool IsCalibrated = false;
+
+string ExpectedOutputPerInstance= @"// Expected output:
+            //   Label: True, Prediction: True
+            //   Label: False, Prediction: False
+            //   Label: True, Prediction: True
+            //   Label: True, Prediction: True
+            //   Label: False, Prediction: False";
+
+string ExpectedOutput = @"// Expected output:
+            //   Accuracy: 0.74
+            //   AUC: 0.83
+            //   F1 Score: 0.74
+            //   Negative Precision: 0.78
+            //   Negative Recall: 0.71
+            //   Positive Precision: 0.71
+            //   Positive Recall: 0.78";
+#>