dotnet · Ivanidzo4ka · Apr 15, 2019 · Apr 8, 2019 · Apr 9, 2019 · Apr 9, 2019
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs
@@ -0,0 +1,91 @@
+using System;
+using System.Collections.Generic;
+using System.Collections.Immutable;
+using System.Linq;
+using Microsoft.ML;
+using Microsoft.ML.Data;
+using static Microsoft.ML.Transforms.NormalizingTransformer;
+
+namespace Samples.Dynamic
+{
+    public class NormalizeBinning
+    {
+        public static void Example()
+        {
+            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, 
+            // as well as the source of randomness.
+            var mlContext = new MLContext();
+            var samples = new List<DataPoint>()
+            {
+                new DataPoint(){ Features = new float[4] { 8, 1, 3, 0} },
+                new DataPoint(){ Features = new float[4] { 6, 2, 2, 0} },
+                new DataPoint(){ Features = new float[4] { 4, 0, 1, 0} },
+                new DataPoint(){ Features = new float[4] { 2,-1,-1, 1} }
+            };
+            // Convert training data to IDataView, the general data type used in ML.NET.
+            var data = mlContext.Data.LoadFromEnumerable(samples);
+            // NormalizeBinning normalizes the data by constructing equidensity bins and produce output based on 
+            // to which bin original value belong.
+            var normalize = mlContext.Transforms.NormalizeBinning("Features", maximumBinCount: 4, fixZero: false);
+
+            // NormalizeBinning normalizes the data by constructing equidensity bins and produce output based on 
+            // to which bin original value belong but make sure zero values would remain zero after normalization.
+            // Helps preserve sparsity.
+            var normalizeFixZero = mlContext.Transforms.NormalizeBinning("Features", maximumBinCount: 4, fixZero: true);
+
+            // Now we can transform the data and look at the output to confirm the behavior of the estimator.
+            // This operation doesn't actually evaluate data until we read the data below.
+            var normalizeTransform = normalize.Fit(data);
+            var transformedData = normalizeTransform.Transform(data);
+            var normalizeFixZeroTransform = normalizeFixZero.Fit(data);
+            var fixZeroData = normalizeFixZeroTransform.Transform(data);
+            var column = transformedData.GetColumn<float[]>("Features").ToArray();
+            foreach (var row in column)
+                Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4"))));
+            // Expected output:
+            //  1.0000, 0.6667, 1.0000, 0.0000
+            //  0.6667, 1.0000, 0.6667, 0.0000
+            //  0.3333, 0.3333, 0.3333, 0.0000
+            //  0.0000, 0.0000, 0.0000, 1.0000
+
+            var columnFixZero = fixZeroData.GetColumn<float[]>("Features").ToArray();
+            foreach (var row in columnFixZero)
+                Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4"))));
+            // Expected output:
+            //  1.0000, 0.3333, 1.0000, 0.0000
+            //  0.6667, 0.6667, 0.6667, 0.0000
+            //  0.3333, 0.0000, 0.3333, 0.0000
+            //  0.0000, -0.3333, 0.0000, 1.0000
+
+            // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters.
+            // If we have multiple columns transformations we need to pass index of InputOutputColumnPair.
+            var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as BinNormalizerModelParameters<ImmutableArray<float>>;
+            var density = transformParams.Density[0];
+            var offset = (transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[0]);
+            Console.WriteLine($"The 0-index value in resulting array would be produce by: y = (Index(x) / {density}) - {offset}");
+            Console.WriteLine("Where Index(x) is the index of the bin to which x belongs");
+            Console.WriteLine($"Bins upper bounds are: {string.Join(" ", transformParams.UpperBounds[0])}");
+            // Expected output:
+            //  The 0-index value in resulting array would be produce by: y = (Index(x) / 3) - 0
+            //  Where Index(x) is the index of the bin to which x belongs
+            //  Bins upper bounds are: 3 5 7 ∞
+
+            var fixZeroParams = (normalizeFixZeroTransform.GetNormalizerModelParameters(0) as BinNormalizerModelParameters<ImmutableArray<float>>);
+            density = fixZeroParams.Density[1];
+            offset = (fixZeroParams.Offset.Length == 0 ? 0 : fixZeroParams.Offset[1]);
+            Console.WriteLine($"The 0-index value in resulting array would be produce by: y = (Index(x) / {density}) - {offset}");
+            Console.WriteLine("Where Index(x) is the index of the bin to which x belongs");
+            Console.WriteLine($"Bins upper bounds are: {string.Join(" ", fixZeroParams.UpperBounds[1])}");
+            // Expected output:
+            //  The 0-index value in resulting array would be produce by: y = (Index(x) / 3) - 0.3333333
+            //  Where Index(x) is the index of the bin to which x belongs
+            //  Bins upper bounds are: -0.5 0.5 1.5 ∞
+        }
+
+        private class DataPoint
+        {
+            [VectorType(4)]
+            public float[] Features { get; set; }
+        }
+    }
+}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs
@@ -0,0 +1,82 @@
+using System;
+using System.Collections.Generic;
+using System.Collections.Immutable;
+using System.Linq;
+using Microsoft.ML;
+using Microsoft.ML.Data;
+using static Microsoft.ML.Transforms.NormalizingTransformer;
+
+namespace Samples.Dynamic
+{
+    public class NormalizeLogMeanVariance
+    {
+        public static void Example()
+        {
+            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, 
+            // as well as the source of randomness.
+            var mlContext = new MLContext();
+            var samples = new List<DataPoint>()
+            {
+                new DataPoint(){ Features = new float[4] { 1, 1, 3, 0} },
+                new DataPoint(){ Features = new float[4] { 2, 2, 2, 0} },
+                new DataPoint(){ Features = new float[4] { 0, 0, 1, 0} },
+                new DataPoint(){ Features = new float[4] {-1,-1,-1, 1} }
+            };
+            // Convert training data to IDataView, the general data type used in ML.NET.
+            var data = mlContext.Data.LoadFromEnumerable(samples);
+            // NormalizeLogMeanVariance normalizes the data based on the computed mean and variance of the logarithm of the data.
+            // Uses Cumulative distribution function as output.
+            var normalize = mlContext.Transforms.NormalizeLogMeanVariance("Features", useCdf: true);
+
+            // NormalizeLogMeanVariance normalizes the data based on the computed mean and variance of the logarithm of the data.
+            var normalizeNoCdf = mlContext.Transforms.NormalizeLogMeanVariance("Features", useCdf: false);
+
+            // Now we can transform the data and look at the output to confirm the behavior of the estimator.
+            // This operation doesn't actually evaluate data until we read the data below.
+            var normalizeTransform = normalize.Fit(data);
+            var transformedData = normalizeTransform.Transform(data);
+            var normalizeNoCdfTransform = normalizeNoCdf.Fit(data);
+            var noCdfData = normalizeNoCdfTransform.Transform(data);
+            var column = transformedData.GetColumn<float[]>("Features").ToArray();
+            foreach (var row in column)
+                Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4"))));
+            // Expected output:
+            //  0.1587, 0.1587, 0.8654, 0.0000
+            //  0.8413, 0.8413, 0.5837, 0.0000
+            //  0.0000, 0.0000, 0.0940, 0.0000
+            //  0.0000, 0.0000, 0.0000, 0.0000
+
+            var columnFixZero = noCdfData.GetColumn<float[]>("Features").ToArray();
+            foreach (var row in columnFixZero)
+                Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4"))));
+            // Expected output:
+            //  1.8854, 1.8854, 5.2970, 0.0000
+            //  4.7708, 4.7708, 3.0925, 0.0000
+            // -1.0000,-1.0000, 0.8879, 0.0000
+            //  3.8854,-3.8854,-3.5213, 0.0000
+
+            // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters.
+            // If we have multiple columns transformations we need to pass index of InputOutputColumnPair.
+            var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as CdfNormalizerModelParameters<ImmutableArray<float>>;
+            Console.WriteLine("The 1-index value in resulting array would be produce by:");
+            Console.WriteLine($"y = 0.5* (1 + ERF((Math.Log(x)- {transformParams.Mean[1]}) / ({transformParams.StandardDeviation[1]} * sqrt(2)))");
+
+            // ERF is https://en.wikipedia.org/wiki/Error_function.
+            // Expected output:
+            //  The 1-index value in resulting array would be produce by:
+            //  y = 0.5* (1 + ERF((Math.Log(x)- 0.3465736) / (0.3465736 * sqrt(2)))
+            var noCdfParams = normalizeNoCdfTransform.GetNormalizerModelParameters(0) as AffineNormalizerModelParameters<ImmutableArray<float>>;
+            var offset = noCdfParams.Offset.Length == 0 ? 0 : noCdfParams.Offset[1];
+            var scale = noCdfParams.Scale[1];
+            Console.WriteLine($"The 1-index value in resulting array would be produce by: y = (x - ({offset})) * {scale}");
+            // Expected output:
+            //  The 1-index value in resulting array would be produce by: y = (x - (2.88539)) * 0.3465736
+        }
+
+        private class DataPoint
+        {
+            [VectorType(4)]
+            public float[] Features { get; set; }
+        }
+    }
+}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs
@@ -0,0 +1,83 @@
+using System;
+using System.Collections.Generic;
+using System.Collections.Immutable;
+using System.Linq;
+using Microsoft.ML;
+using Microsoft.ML.Data;
+using static Microsoft.ML.Transforms.NormalizingTransformer;
+
+namespace Samples.Dynamic
+{
+    public class NormalizeMeanVariance
+    {
+        public static void Example()
+        {
+            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, 
+            // as well as the source of randomness.
+            var mlContext = new MLContext();
+            var samples = new List<DataPoint>()
+            {
+                new DataPoint(){ Features = new float[4] { 1, 1, 3, 0} },
+                new DataPoint(){ Features = new float[4] { 2, 2, 2, 0} },
+                new DataPoint(){ Features = new float[4] { 0, 0, 1, 0} },
+                new DataPoint(){ Features = new float[4] {-1,-1,-1, 1} }
+            };
+            // Convert training data to IDataView, the general data type used in ML.NET.
+            var data = mlContext.Data.LoadFromEnumerable(samples);
+            // NormalizeMeanVariance normalizes the data based on the computed mean and variance of the data.
+            // Uses Cumulative distribution function as output.
+            var normalize = mlContext.Transforms.NormalizeMeanVariance("Features", useCdf: true);
+
+            // NormalizeMeanVariance normalizes the data based on the computed mean and variance of the data.
+            var normalizeNoCdf = mlContext.Transforms.NormalizeMeanVariance("Features", useCdf: false);
+
+            // Now we can transform the data and look at the output to confirm the behavior of the estimator.
+            // This operation doesn't actually evaluate data until we read the data below.
+            var normalizeTransform = normalize.Fit(data);
+            var transformedData = normalizeTransform.Transform(data);
+            var normalizeNoCdfTransform = normalizeNoCdf.Fit(data);
+            var noCdfData = normalizeNoCdfTransform.Transform(data);
+            var column = transformedData.GetColumn<float[]>("Features").ToArray();
+            foreach (var row in column)
+                Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4"))));
+            // Expected output:
+            //  0.6726, 0.6726, 0.8816, 0.2819
+            //  0.9101, 0.9101, 0.6939, 0.2819
+            //  0.3274, 0.3274, 0.4329, 0.2819
+            //  0.0899, 0.0899, 0.0641, 0.9584
+
+
+            var columnFixZero = noCdfData.GetColumn<float[]>("Features").ToArray();
+            foreach (var row in columnFixZero)
+                Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4"))));
+            // Expected output:
+            //  0.8165, 0.8165, 1.5492, 0.0000
+            //  1.6330, 1.6330, 1.0328, 0.0000
+            //  0.0000, 0.0000, 0.5164, 0.0000
+            // -0.8165,-0.8165,-0.5164, 2.0000
+
+            // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters.
+            // If we have multiple columns transformations we need to pass index of InputOutputColumnPair.
+            var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as CdfNormalizerModelParameters<ImmutableArray<float>>;
+            Console.WriteLine($"The 1-index value in resulting array would be produce by:");
+            Console.WriteLine($" y = 0.5* (1 + ERF((x- {transformParams.Mean[1]}) / ({transformParams.StandardDeviation[1]} * sqrt(2)))");
+            // ERF is https://en.wikipedia.org/wiki/Error_function.
+            // Expected output:
+            //  The 1-index value in resulting array would be produce by:
+            //  y = 0.5 * (1 + ERF((x - 0.5) / (1.118034 * sqrt(2)))
+
+            var noCdfParams = normalizeNoCdfTransform.GetNormalizerModelParameters(0) as AffineNormalizerModelParameters<ImmutableArray<float>>;
+            var offset = noCdfParams.Offset.Length == 0 ? 0 : noCdfParams.Offset[1];
+            var scale = noCdfParams.Scale[1];
+            Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (x - ({offset})) * {scale}");
+            // Expected output:
+            // The 1-index value in resulting array would be produce by: y = (x - (0)) * 0.8164966
+        }
+
+        private class DataPoint
+        {
+            [VectorType(4)]
+            public float[] Features { get; set; }
+        }
+    }
+}