dotnet · artidoro · Apr 10, 2019
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMapping.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMapping.cs
@@ -0,0 +1,70 @@
+using System;
+using System.Collections.Generic;
+using Microsoft.ML;
+
+namespace Samples.Dynamic
+{
+    public static class CustomMapping
+    {
+        public static void Example()
+        {
+            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, 
+            // as well as the source of randomness.
+            var mlContext = new MLContext();
+
+            // Get a small dataset as an IEnumerable and convert it to an IDataView.
+            var samples = new List<InputData>
+            {
+                new InputData { Age = 26 },
+                new InputData { Age = 35 },
+                new InputData { Age = 34 },
+                new InputData { Age = 28 },
+            };
+            var data = mlContext.Data.LoadFromEnumerable(samples);
+
+            // We define the custom mapping between input and output rows that will be applied by the transformation.
+            Action<InputData, CustomMappingOutput > mapping =
+                (input, output) => output.IsUnderThirty = input.Age < 30;
+
+            // Custom transformations can be used to transform data directly, or as part of a pipeline of estimators.
+            // Note: If contractName is null in the CustomMapping estimator, any pipeline of estimators containing it,
+            // cannot be saved and loaded back. 
+            var pipeline = mlContext.Transforms.CustomMapping(mapping, contractName: null);
+
+            // Now we can transform the data and look at the output to confirm the behavior of the estimator.
+            // This operation doesn't actually evaluate data until we read the data below.
+            var transformer = pipeline.Fit(data);
+            var transformedData = transformer.Transform(data);
+
+            var dataEnumerable = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, reuseRowObject: true);
+            Console.WriteLine("Age\t IsUnderThirty");
+            foreach (var row in dataEnumerable)
+                Console.WriteLine($"{row.Age}\t {row.IsUnderThirty}");
+
+            // Expected output:
+            // Age      IsUnderThirty
+            // 26       True
+            // 35       False
+            // 34       False
+            // 28       True
+        }
+
+        // Defines only the column to be generated by the custom mapping transformation in addition to the columns already present.
+        private class CustomMappingOutput
+        {
+            public bool IsUnderThirty { get; set; }
+        }
+
+        // Defines the schema of the input data.
+        private class InputData
+        {
+            public float Age { get; set; }
+        }
+
+        // Defines the schema of the transformed data, which includes the new column IsUnderThirty.
+        private class TransformedData : InputData
+        {
+            public bool IsUnderThirty { get; set; }
+        }
+    }
+}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMappingSample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMappingSample.cs
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMappingSaveAndLoad.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMappingSaveAndLoad.cs
@@ -0,0 +1,87 @@
+using System;
+using System.Collections.Generic;
+using Microsoft.ML;
+using Microsoft.ML.Transforms;
+
+namespace Samples.Dynamic
+{
+    public static class CustomMappingSaveAndLoad
+    {
+        public static void Example()
+        {
+            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, 
+            // as well as the source of randomness.
+            var mlContext = new MLContext();
+
+            // Get a small dataset as an IEnumerable and convert it to an IDataView.
+            var samples = new List<InputData>
+            {
+                new InputData { Age = 26 },
+                new InputData { Age = 35 },
+                new InputData { Age = 34 },
+                new InputData { Age = 28 },
+            };
+            var data = mlContext.Data.LoadFromEnumerable(samples);
+
+            // Custom transformations can be used to transform data directly, or as part of a pipeline of estimators.
+            var pipeline = mlContext.Transforms.CustomMapping(new IsUnderThirtyCustomAction().GetMapping(), contractName: "IsUnderThirty");
+            var transformer = pipeline.Fit(data);
+
+            // To save and load the CustomMapping estimator, the assembly in which the custom action is defined needs to be registered in the
+            // environment. The following registers the assembly where IsUnderThirtyCustomAction is defined.    
+            mlContext.ComponentCatalog.RegisterAssembly(typeof(IsUnderThirtyCustomAction).Assembly);
+
+            // Now the transform pipeline can be saved and loaded through the usual MLContext method. 
+            mlContext.Model.Save(transformer, data.Schema, "customTransform.zip");
+            var loadedTransform = mlContext.Model.Load("customTransform.zip", out var inputSchema);
+
+            // Now we can transform the data and look at the output to confirm the behavior of the estimator.
+            // This operation doesn't actually evaluate data until we read the data below.
+            var transformedData = loadedTransform.Transform(data);
+
+            var dataEnumerable = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, reuseRowObject: true);
+            Console.WriteLine("Age\tIsUnderThirty");
+            foreach (var row in dataEnumerable)
+                Console.WriteLine($"{row.Age}\t {row.IsUnderThirty}");
+
+            // Expected output:
+            // Age      IsUnderThirty
+            // 26       True
+            // 35       False
+            // 34       False
+            // 28       True
+        }
+
+        // The custom action needs to implement the abstract class CustomMappingFactory, and needs to have attribute
+        // CustomMappingFactoryAttribute with argument equal to the contractName used to define the CustomMapping estimator
+        // which uses the action.
+        [CustomMappingFactoryAttribute("IsUnderThirty")]
+        private class IsUnderThirtyCustomAction : CustomMappingFactory<InputData, CustomMappingOutput>
+        {
+            // We define the custom mapping between input and output rows that will be applied by the transformation.
+            public static void CustomAction(InputData input, CustomMappingOutput output)
+                => output.IsUnderThirty = input.Age < 30;
+
+            public override Action<InputData, CustomMappingOutput> GetMapping()
+                => CustomAction;
+        }
+
+        // Defines only the column to be generated by the custom mapping transformation in addition to the columns already present.
+        private class CustomMappingOutput
+        {
+            public bool IsUnderThirty { get; set; }
+        }
+
+        // Defines the schema of the input data.
+        private class InputData
+        {
+            public float Age { get; set; }
+        }
+
+        // Defines the schema of the transformed data, which includes the new column IsUnderThirty.
+        private class TransformedData : InputData
+        {
+            public bool IsUnderThirty { get; set; }
+        }
+    }
+}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/IndicateMissingValues.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/IndicateMissingValues.cs
@@ -7,26 +7,24 @@ namespace Microsoft.ML.Samples.Dynamic
 {
     public static class IndicateMissingValues
     {
-
         public static void Example()
         {
             // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, 
             // as well as the source of randomness.
             var mlContext = new MLContext();
 
+            // Get a small dataset as an IEnumerable and convert it to an IDataView.
             var samples = new List<DataPoint>()
             {
-                new DataPoint(){ Label = 3, Features = new float[3] {1, 1, 0} },
-                new DataPoint(){ Label = 32, Features = new float[3] {0, float.NaN, 1} },
-                new DataPoint(){ Label = float.NaN, Features = new float[3] {-1, float.NaN, -3} },
+                new DataPoint(){ Features = new float[3] {1, 1, 0} },
+                new DataPoint(){ Features = new float[3] {0, float.NaN, 1} },
+                new DataPoint(){ Features = new float[3] {-1, float.NaN, -3} },
             };
-            // Convert training data to IDataView, the general data type used in ML.NET.
             var data = mlContext.Data.LoadFromEnumerable(samples);
 
-            // IndicateMissingValues is used to create a boolean containing
-            // 'true' where the value in the input column is NaN. This value can be used
-            // to replace missing values with other values.
-            IEstimator<ITransformer> pipeline = mlContext.Transforms.IndicateMissingValues("MissingIndicator", "Features");
+            // IndicateMissingValues is used to create a boolean containing 'true' where the value in the 
+            // input column is missing. For floats and doubles, missing values are represented as NaN.
+            var pipeline = mlContext.Transforms.IndicateMissingValues("MissingIndicator", "Features");
 
             // Now we can transform the data and look at the output to confirm the behavior of the estimator.
             // This operation doesn't actually evaluate data until we read the data below.
@@ -36,32 +34,18 @@ public static void Example()
             // We can extract the newly created column as an IEnumerable of SampleDataTransformed, the class we define below.
             var rowEnumerable = mlContext.Data.CreateEnumerable<SampleDataTransformed>(transformedData, reuseRowObject: false);
 
-            // a small printing utility
-            Func<object[], string> vectorPrinter = (object[] vector) =>
-            {
-                string preview = "[";
-                foreach (var slot in vector)
-                    preview += $"{slot} ";
-               return preview += "]";
-
-            };
-
             // And finally, we can write out the rows of the dataset, looking at the columns of interest.
             foreach (var row in rowEnumerable)
-            {
-                Console.WriteLine($"Label: {row.Label} Features: {vectorPrinter(row.Features.Cast<object>().ToArray())} MissingIndicator: {vectorPrinter(row.MissingIndicator.Cast<object>().ToArray())}");
-            }
+                Console.WriteLine($"Features: [{string.Join(", ", row.Features)}]\t MissingIndicator: [{string.Join(", ", row.MissingIndicator)}]");
 
             // Expected output:
-            // 
-            // Label: 3 Features: [1 1 0] MissingIndicator: [False False False]
-            // Label: 32 Features: [0 NaN 1] MissingIndicator: [False True False]
-            // Label: NaN Features: [-1 NaN -3 ] MissingIndicator: [False True False]
+            // Features: [1, 1, 0]      MissingIndicator: [False, False, False]
+            // Features: [0, NaN, 1]    MissingIndicator: [False, True, False]
+            // Features: [-1, NaN, -3]  MissingIndicator: [False, True, False]
         }
 
         private class DataPoint
         {
-            public float Label { get; set; }
             [VectorType(3)]
             public float[] Features { get; set; }
         }

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/IndicateMissingValuesMultiColumn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/IndicateMissingValuesMultiColumn.cs
@@ -0,0 +1,68 @@
+using System;
+using System.Collections.Generic;
+using Microsoft.ML;
+using Microsoft.ML.Data;
+
+namespace Samples.Dynamic
+{
+    public static class IndicateMissingValuesMultiColumn
+    {
+        public static void Example()
+        {
+            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, 
+            // as well as the source of randomness.
+            var mlContext = new MLContext();
+
+            // Get a small dataset as an IEnumerable and convert it to an IDataView.
+            var samples = new List<DataPoint>()
+            {
+                new DataPoint(){ Features1 = new float[3] {1, 1, 0}, Features2 = new float[2] {1, 1} },
+                new DataPoint(){ Features1 = new float[3] {0, float.NaN, 1}, Features2 = new float[2] {float.NaN, 1} },
+                new DataPoint(){ Features1 = new float[3] {-1, float.NaN, -3}, Features2 = new float[2] {1, float.PositiveInfinity} },
+            };
+            var data = mlContext.Data.LoadFromEnumerable(samples);
+
+            // IndicateMissingValues is used to create a boolean containing 'true' where the value in the 
+            // input column is missing. For floats and doubles, missing values are NaN.
+            // We can use an array of InputOutputColumnPair to apply the MissingValueIndicatorEstimator
+            // to multiple columns in one pass over the data.
+            var pipeline = mlContext.Transforms.IndicateMissingValues(new[] {
+                new InputOutputColumnPair("MissingIndicator1", "Features1"),
+                new InputOutputColumnPair("MissingIndicator2", "Features2")
+            });
+
+            // Now we can transform the data and look at the output to confirm the behavior of the estimator.
+            // This operation doesn't actually evaluate data until we read the data below.
+            var tansformer = pipeline.Fit(data);
+            var transformedData = tansformer.Transform(data);
+
+            // We can extract the newly created column as an IEnumerable of SampleDataTransformed, the class we define below.
+            var rowEnumerable = mlContext.Data.CreateEnumerable<SampleDataTransformed>(transformedData, reuseRowObject: false);
+
+            // And finally, we can write out the rows of the dataset, looking at the columns of interest.
+            foreach (var row in rowEnumerable)
+                Console.WriteLine($"Features1: [{string.Join(", ", row.Features1)}]\t MissingIndicator1: [{string.Join(", ", row.MissingIndicator1)}]\t " +
+                    $"Features2: [{string.Join(", ", row.Features2)}]\t MissingIndicator2: [{string.Join(", ", row.MissingIndicator2)}]");
+
+            // Expected output:
+            // Features1: [1, 1, 0]     MissingIndicator1: [False, False, False]        Features2: [1, 1]       MissingIndicator2: [False, False]
+            // Features1: [0, NaN, 1]   MissingIndicator1: [False, True, False]         Features2: [NaN, 1]     MissingIndicator2: [True, False]
+            // Features1: [-1, NaN, -3]         MissingIndicator1: [False, True, False]         Features2: [1, ∞]       MissingIndicator2: [False, False]
+        }
+
+        private class DataPoint
+        {
+            [VectorType(3)]
+            public float[] Features1 { get; set; }
+            [VectorType(2)]
+            public float[] Features2 { get; set; }
+        }
+
+        private sealed class SampleDataTransformed : DataPoint
+        {
+            public bool[] MissingIndicator1 { get; set; }
+            public bool[] MissingIndicator2 { get; set; }
+
+        }
+    }
+}