diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs deleted file mode 100644 index dc92f50e40..0000000000 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs +++ /dev/null @@ -1,91 +0,0 @@ -using System; -using System.Collections.Generic; -using Microsoft.ML; -using Microsoft.ML.Data; - -namespace Samples.Dynamic -{ - public static class NormalizerTransform - { - public static void Example() - { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. - var ml = new MLContext(); - - // Get a small dataset as an IEnumerable and convert it to an IDataView. - IEnumerable data = Microsoft.ML.SamplesUtils.DatasetUtils.GetInfertData(); - var trainData = ml.Data.LoadFromEnumerable(data); - - // Preview of the data. - // - // Age Case Education Induced Parity PooledStratum RowNum ... - // 26 1 0-5yrs 1 6 3 1 ... - // 42 1 0-5yrs 1 1 1 2 ... - // 39 1 0-5yrs 2 6 4 3 ... - // 34 1 0-5yrs 2 4 2 4 ... - // 35 1 6-11yrs 1 3 32 5 ... - - // A pipeline for normalizing the Induced column. - var pipeline = ml.Transforms.NormalizeMinMax("Induced"); - // The transformed (normalized according to Normalizer.NormalizerMode.MinMax) data. - var transformer = pipeline.Fit(trainData); - - // Normalize the data. - var transformedData = transformer.Transform(trainData); - - // Getting the data of the newly created column, so we can preview it. - var normalizedColumn = transformedData.GetColumn(transformedData.Schema["Induced"]); - - // A small printing utility. - Action> printHelper = (colName, column) => - { - Console.WriteLine($"{colName} column obtained post-transformation."); - foreach (var row in column) - Console.WriteLine($"{row} "); - }; - - printHelper("Induced", normalizedColumn); - - // Induced column obtained post-transformation. - // - // 0.5 - // 0.5 - // 1 - // 1 - // 0.5 - - // Composing a different pipeline if we wanted to normalize more than one column at a time. - // Using log scale as the normalization mode. - var multiColPipeline = ml.Transforms.NormalizeLogMeanVariance(new[] { new InputOutputColumnPair("LogInduced", "Induced"), new InputOutputColumnPair("LogSpontaneous", "Spontaneous") }); - - // The transformed data. - var multiColtransformer = multiColPipeline.Fit(trainData); - var multiColtransformedData = multiColtransformer.Transform(trainData); - - // Getting the newly created columns. - var normalizedInduced = multiColtransformedData.GetColumn(multiColtransformedData.Schema["LogInduced"]); - var normalizedSpont = multiColtransformedData.GetColumn(multiColtransformedData.Schema["LogSpontaneous"]); - - printHelper("LogInduced", normalizedInduced); - - // LogInduced column obtained post-transformation. - // - // 0.2071445 - // 0.2071445 - // 0.889631 - // 0.889631 - // 0.2071445 - - printHelper("LogSpontaneous", normalizedSpont); - - // LogSpontaneous column obtained post-transformation. - // - // 0.8413026 - // 0 - // 0 - // 0 - // 0.1586974 - } - } -} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs new file mode 100644 index 0000000000..f6a3270430 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs @@ -0,0 +1,91 @@ +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; +using static Microsoft.ML.Transforms.NormalizingTransformer; + +namespace Samples.Dynamic +{ + public class NormalizeBinning + { + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + var samples = new List() + { + new DataPoint(){ Features = new float[4] { 8, 1, 3, 0} }, + new DataPoint(){ Features = new float[4] { 6, 2, 2, 0} }, + new DataPoint(){ Features = new float[4] { 4, 0, 1, 0} }, + new DataPoint(){ Features = new float[4] { 2,-1,-1, 1} } + }; + // Convert training data to IDataView, the general data type used in ML.NET. + var data = mlContext.Data.LoadFromEnumerable(samples); + // NormalizeBinning normalizes the data by constructing equidensity bins and produce output based on + // to which bin original value belong. + var normalize = mlContext.Transforms.NormalizeBinning("Features", maximumBinCount: 4, fixZero: false); + + // NormalizeBinning normalizes the data by constructing equidensity bins and produce output based on + // to which bin original value belong but make sure zero values would remain zero after normalization. + // Helps preserve sparsity. + var normalizeFixZero = mlContext.Transforms.NormalizeBinning("Features", maximumBinCount: 4, fixZero: true); + + // Now we can transform the data and look at the output to confirm the behavior of the estimator. + // This operation doesn't actually evaluate data until we read the data below. + var normalizeTransform = normalize.Fit(data); + var transformedData = normalizeTransform.Transform(data); + var normalizeFixZeroTransform = normalizeFixZero.Fit(data); + var fixZeroData = normalizeFixZeroTransform.Transform(data); + var column = transformedData.GetColumn("Features").ToArray(); + foreach (var row in column) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // Expected output: + // 1.0000, 0.6667, 1.0000, 0.0000 + // 0.6667, 1.0000, 0.6667, 0.0000 + // 0.3333, 0.3333, 0.3333, 0.0000 + // 0.0000, 0.0000, 0.0000, 1.0000 + + var columnFixZero = fixZeroData.GetColumn("Features").ToArray(); + foreach (var row in columnFixZero) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // Expected output: + // 1.0000, 0.3333, 1.0000, 0.0000 + // 0.6667, 0.6667, 0.6667, 0.0000 + // 0.3333, 0.0000, 0.3333, 0.0000 + // 0.0000, -0.3333, 0.0000, 1.0000 + + // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. + // If we have multiple columns transformations we need to pass index of InputOutputColumnPair. + var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as BinNormalizerModelParameters>; + var density = transformParams.Density[0]; + var offset = (transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[0]); + Console.WriteLine($"The 0-index value in resulting array would be produce by: y = (Index(x) / {density}) - {offset}"); + Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); + Console.WriteLine($"Bins upper bounds are: {string.Join(" ", transformParams.UpperBounds[0])}"); + // Expected output: + // The 0-index value in resulting array would be produce by: y = (Index(x) / 3) - 0 + // Where Index(x) is the index of the bin to which x belongs + // Bins upper bounds are: 3 5 7 ∞ + + var fixZeroParams = (normalizeFixZeroTransform.GetNormalizerModelParameters(0) as BinNormalizerModelParameters>); + density = fixZeroParams.Density[1]; + offset = (fixZeroParams.Offset.Length == 0 ? 0 : fixZeroParams.Offset[1]); + Console.WriteLine($"The 0-index value in resulting array would be produce by: y = (Index(x) / {density}) - {offset}"); + Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); + Console.WriteLine($"Bins upper bounds are: {string.Join(" ", fixZeroParams.UpperBounds[1])}"); + // Expected output: + // The 0-index value in resulting array would be produce by: y = (Index(x) / 3) - 0.3333333 + // Where Index(x) is the index of the bin to which x belongs + // Bins upper bounds are: -0.5 0.5 1.5 ∞ + } + + private class DataPoint + { + [VectorType(4)] + public float[] Features { get; set; } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs new file mode 100644 index 0000000000..b577270622 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs @@ -0,0 +1,82 @@ +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; +using static Microsoft.ML.Transforms.NormalizingTransformer; + +namespace Samples.Dynamic +{ + public class NormalizeLogMeanVariance + { + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + var samples = new List() + { + new DataPoint(){ Features = new float[4] { 1, 1, 3, 0} }, + new DataPoint(){ Features = new float[4] { 2, 2, 2, 0} }, + new DataPoint(){ Features = new float[4] { 0, 0, 1, 0} }, + new DataPoint(){ Features = new float[4] {-1,-1,-1, 1} } + }; + // Convert training data to IDataView, the general data type used in ML.NET. + var data = mlContext.Data.LoadFromEnumerable(samples); + // NormalizeLogMeanVariance normalizes the data based on the computed mean and variance of the logarithm of the data. + // Uses Cumulative distribution function as output. + var normalize = mlContext.Transforms.NormalizeLogMeanVariance("Features", useCdf: true); + + // NormalizeLogMeanVariance normalizes the data based on the computed mean and variance of the logarithm of the data. + var normalizeNoCdf = mlContext.Transforms.NormalizeLogMeanVariance("Features", useCdf: false); + + // Now we can transform the data and look at the output to confirm the behavior of the estimator. + // This operation doesn't actually evaluate data until we read the data below. + var normalizeTransform = normalize.Fit(data); + var transformedData = normalizeTransform.Transform(data); + var normalizeNoCdfTransform = normalizeNoCdf.Fit(data); + var noCdfData = normalizeNoCdfTransform.Transform(data); + var column = transformedData.GetColumn("Features").ToArray(); + foreach (var row in column) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // Expected output: + // 0.1587, 0.1587, 0.8654, 0.0000 + // 0.8413, 0.8413, 0.5837, 0.0000 + // 0.0000, 0.0000, 0.0940, 0.0000 + // 0.0000, 0.0000, 0.0000, 0.0000 + + var columnFixZero = noCdfData.GetColumn("Features").ToArray(); + foreach (var row in columnFixZero) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // Expected output: + // 1.8854, 1.8854, 5.2970, 0.0000 + // 4.7708, 4.7708, 3.0925, 0.0000 + // -1.0000,-1.0000, 0.8879, 0.0000 + // 3.8854,-3.8854,-3.5213, 0.0000 + + // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. + // If we have multiple columns transformations we need to pass index of InputOutputColumnPair. + var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as CdfNormalizerModelParameters>; + Console.WriteLine("The 1-index value in resulting array would be produce by:"); + Console.WriteLine($"y = 0.5* (1 + ERF((Math.Log(x)- {transformParams.Mean[1]}) / ({transformParams.StandardDeviation[1]} * sqrt(2)))"); + + // ERF is https://en.wikipedia.org/wiki/Error_function. + // Expected output: + // The 1-index value in resulting array would be produce by: + // y = 0.5* (1 + ERF((Math.Log(x)- 0.3465736) / (0.3465736 * sqrt(2))) + var noCdfParams = normalizeNoCdfTransform.GetNormalizerModelParameters(0) as AffineNormalizerModelParameters>; + var offset = noCdfParams.Offset.Length == 0 ? 0 : noCdfParams.Offset[1]; + var scale = noCdfParams.Scale[1]; + Console.WriteLine($"The 1-index value in resulting array would be produce by: y = (x - ({offset})) * {scale}"); + // Expected output: + // The 1-index value in resulting array would be produce by: y = (x - (2.88539)) * 0.3465736 + } + + private class DataPoint + { + [VectorType(4)] + public float[] Features { get; set; } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs new file mode 100644 index 0000000000..ad35d43e6f --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs @@ -0,0 +1,83 @@ +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; +using static Microsoft.ML.Transforms.NormalizingTransformer; + +namespace Samples.Dynamic +{ + public class NormalizeMeanVariance + { + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + var samples = new List() + { + new DataPoint(){ Features = new float[4] { 1, 1, 3, 0} }, + new DataPoint(){ Features = new float[4] { 2, 2, 2, 0} }, + new DataPoint(){ Features = new float[4] { 0, 0, 1, 0} }, + new DataPoint(){ Features = new float[4] {-1,-1,-1, 1} } + }; + // Convert training data to IDataView, the general data type used in ML.NET. + var data = mlContext.Data.LoadFromEnumerable(samples); + // NormalizeMeanVariance normalizes the data based on the computed mean and variance of the data. + // Uses Cumulative distribution function as output. + var normalize = mlContext.Transforms.NormalizeMeanVariance("Features", useCdf: true); + + // NormalizeMeanVariance normalizes the data based on the computed mean and variance of the data. + var normalizeNoCdf = mlContext.Transforms.NormalizeMeanVariance("Features", useCdf: false); + + // Now we can transform the data and look at the output to confirm the behavior of the estimator. + // This operation doesn't actually evaluate data until we read the data below. + var normalizeTransform = normalize.Fit(data); + var transformedData = normalizeTransform.Transform(data); + var normalizeNoCdfTransform = normalizeNoCdf.Fit(data); + var noCdfData = normalizeNoCdfTransform.Transform(data); + var column = transformedData.GetColumn("Features").ToArray(); + foreach (var row in column) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // Expected output: + // 0.6726, 0.6726, 0.8816, 0.2819 + // 0.9101, 0.9101, 0.6939, 0.2819 + // 0.3274, 0.3274, 0.4329, 0.2819 + // 0.0899, 0.0899, 0.0641, 0.9584 + + + var columnFixZero = noCdfData.GetColumn("Features").ToArray(); + foreach (var row in columnFixZero) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // Expected output: + // 0.8165, 0.8165, 1.5492, 0.0000 + // 1.6330, 1.6330, 1.0328, 0.0000 + // 0.0000, 0.0000, 0.5164, 0.0000 + // -0.8165,-0.8165,-0.5164, 2.0000 + + // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. + // If we have multiple columns transformations we need to pass index of InputOutputColumnPair. + var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as CdfNormalizerModelParameters>; + Console.WriteLine($"The 1-index value in resulting array would be produce by:"); + Console.WriteLine($" y = 0.5* (1 + ERF((x- {transformParams.Mean[1]}) / ({transformParams.StandardDeviation[1]} * sqrt(2)))"); + // ERF is https://en.wikipedia.org/wiki/Error_function. + // Expected output: + // The 1-index value in resulting array would be produce by: + // y = 0.5 * (1 + ERF((x - 0.5) / (1.118034 * sqrt(2))) + + var noCdfParams = normalizeNoCdfTransform.GetNormalizerModelParameters(0) as AffineNormalizerModelParameters>; + var offset = noCdfParams.Offset.Length == 0 ? 0 : noCdfParams.Offset[1]; + var scale = noCdfParams.Scale[1]; + Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (x - ({offset})) * {scale}"); + // Expected output: + // The 1-index value in resulting array would be produce by: y = (x - (0)) * 0.8164966 + } + + private class DataPoint + { + [VectorType(4)] + public float[] Features { get; set; } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs new file mode 100644 index 0000000000..7b7a60d74e --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs @@ -0,0 +1,76 @@ +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; +using static Microsoft.ML.Transforms.NormalizingTransformer; + +namespace Samples.Dynamic +{ + public class NormalizeMinMax + { + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + var samples = new List() + { + new DataPoint(){ Features = new float[4] { 1, 1, 3, 0} }, + new DataPoint(){ Features = new float[4] { 2, 2, 2, 0} }, + new DataPoint(){ Features = new float[4] { 0, 0, 1, 0} }, + new DataPoint(){ Features = new float[4] {-1,-1,-1, 1} } + }; + // Convert training data to IDataView, the general data type used in ML.NET. + var data = mlContext.Data.LoadFromEnumerable(samples); + // NormalizeMinMax normalize rows by finding min and max values in each row slot + // and setting projection of min value to 0 and max to 1 and everything else to + // values in between. + var normalize = mlContext.Transforms.NormalizeMinMax("Features", fixZero: false); + + // Normalize rows by finding min and max values in each row slot, but make sure + // zero values would remain zero after normalization. Helps preserve sparsity. + var normalizeFixZero = mlContext.Transforms.NormalizeMinMax("Features", fixZero: true); + + // Now we can transform the data and look at the output to confirm the behavior of the estimator. + // This operation doesn't actually evaluate data until we read the data below. + var normalizeTransform = normalize.Fit(data); + var transformedData = normalizeTransform.Transform(data); + var normalizeFixZeroTransform = normalizeFixZero.Fit(data); + var fixZeroData = normalizeFixZeroTransform.Transform(data); + var column = transformedData.GetColumn("Features").ToArray(); + foreach (var row in column) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // Expected output: + // 0.6667, 0.6667, 1.0000, 0.0000 + // 1.0000, 1.0000, 0.7500, 0.0000 + // 0.3333, 0.3333, 0.5000, 0.0000 + // 0.0000, 0.0000, 0.0000, 1.0000 + + var columnFixZero = fixZeroData.GetColumn("Features").ToArray(); + foreach (var row in columnFixZero) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // Expected output: + // 0.5000, 0.5000, 1.0000, 0.0000 + // 1.0000, 1.0000, 0.6667, 0.0000 + // 0.0000, 0.0000, 0.3333, 0.0000 + // -0.5000,-0.5000,-0.3333, 1.0000 + + // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. + // If we have multiple columns transformations we need to pass index of InputOutputColumnPair. + var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as AffineNormalizerModelParameters>; + Console.WriteLine($"The 1-index value in resulting array would be produce by:"); + Console.WriteLine($" y = (x - ({(transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[1])})) * {transformParams.Scale[1]}"); + // Expected output: + // The 1-index value in resulting array would be produce by: + // y = (x - (-1)) * 0.3333333 + } + + private class DataPoint + { + [VectorType(4)] + public float[] Features { get; set; } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs new file mode 100644 index 0000000000..63fde50a9e --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs @@ -0,0 +1,98 @@ +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; +using static Microsoft.ML.Transforms.NormalizingTransformer; + +namespace Samples.Dynamic +{ + public class NormalizeSupervisedBinning + { + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + var samples = new List() + { + new DataPoint(){ Features = new float[4] { 8, 1, 3, 0}, Bin="Bin1" }, + new DataPoint(){ Features = new float[4] { 6, 2, 2, 1}, Bin="Bin2" }, + new DataPoint(){ Features = new float[4] { 5, 3, 0, 2}, Bin="Bin2" }, + new DataPoint(){ Features = new float[4] { 4,-8, 1, 3}, Bin="Bin3" }, + new DataPoint(){ Features = new float[4] { 2,-5,-1, 4}, Bin="Bin3" } + }; + // Convert training data to IDataView, the general data type used in ML.NET. + var data = mlContext.Data.LoadFromEnumerable(samples); + // Let's transform "Bin" column from string to key. + data = mlContext.Transforms.Conversion.MapValueToKey("Bin").Fit(data).Transform(data); + // NormalizeSupervisedBinning normalizes the data by constructing bins based on correlation with the label column and produce output based on + // to which bin original value belong. + var normalize = mlContext.Transforms.NormalizeSupervisedBinning("Features", labelColumnName: "Bin", mininimumExamplesPerBin: 1, fixZero: false); + + // NormalizeSupervisedBinning normalizes the data by constructing bins based on correlation with the label column and produce output based on + // to which bin original value belong but make sure zero values would remain zero after normalization. + // Helps preserve sparsity. + var normalizeFixZero = mlContext.Transforms.NormalizeSupervisedBinning("Features", labelColumnName: "Bin", mininimumExamplesPerBin: 1, fixZero: true); + + // Now we can transform the data and look at the output to confirm the behavior of the estimator. + // This operation doesn't actually evaluate data until we read the data below. + var normalizeTransform = normalize.Fit(data); + var transformedData = normalizeTransform.Transform(data); + var normalizeFixZeroTransform = normalizeFixZero.Fit(data); + var fixZeroData = normalizeFixZeroTransform.Transform(data); + var column = transformedData.GetColumn("Features").ToArray(); + foreach (var row in column) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // Expected output: + // 1.0000, 0.5000, 1.0000, 0.0000 + // 0.5000, 1.0000, 0.0000, 0.5000 + // 0.5000, 1.0000, 0.0000, 0.5000 + // 0.0000, 0.0000, 0.0000, 1.0000 + // 0.0000, 0.0000, 0.0000, 1.0000 + + var columnFixZero = fixZeroData.GetColumn("Features").ToArray(); + foreach (var row in columnFixZero) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // Expected output: + // 1.0000, 0.0000, 1.0000, 0.0000 + // 0.5000, 0.5000, 0.0000, 0.5000 + // 0.5000, 0.5000, 0.0000, 0.5000 + // 0.0000,-0.5000, 0.0000, 1.0000 + // 0.0000,-0.5000, 0.0000, 1.0000 + + // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. + // If we have multiple columns transformations we need to pass index of InputOutputColumnPair. + var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as BinNormalizerModelParameters>; + Console.WriteLine($"The 1-index value in resulting array would be produce by:"); + Console.WriteLine($"y = (Index(x) / {transformParams.Density[0]}) - {(transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[0])}"); + Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); + Console.WriteLine($"Bins upper borders are: {string.Join(" ", transformParams.UpperBounds[0])}"); + // Expected output: + // The 1-index value in resulting array would be produce by: + // y = (Index(x) / 2) - 0 + // Where Index(x) is the index of the bin to which x belongs + // Bins upper bounds are: 4.5 7 ∞ + + var fixZeroParams = normalizeFixZeroTransform.GetNormalizerModelParameters(0) as BinNormalizerModelParameters>; + Console.WriteLine($"The 1-index value in resulting array would be produce by:"); + Console.WriteLine($" y = (Index(x) / {fixZeroParams.Density[1]}) - {(fixZeroParams.Offset.Length == 0 ? 0 : fixZeroParams.Offset[1])}"); + Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); + Console.WriteLine($"Bins upper borders are: {string.Join(" ", fixZeroParams.UpperBounds[1])}"); + // Expected output: + // The 1-index value in resulting array would be produce by: + // y = (Index(x) / 2) - 0.5 + // Where Index(x) is the index of the bin to which x belongs + // Bins upper bounds are: -2 1.5 ∞ + } + + private class DataPoint + { + [VectorType(4)] + public float[] Features { get; set; } + + public string Bin { get; set; } + } + } +} diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index cfec3f878a..a0ad4b2407 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -20,13 +20,6 @@ public static class NormalizationCatalog /// The transform catalog /// The used to map the old values to the new ones. /// The pairs of input and output columns. - /// - /// - /// - /// - /// [BestFriend] internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, NormalizingEstimator.NormalizationMode mode, @@ -48,7 +41,7 @@ internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, /// /// /// /// /// @@ -84,6 +77,13 @@ public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalo /// Maximum number of examples used to train the normalizer. /// Whether to map zero to zero, preserving sparsity. /// Whether to use CDF as the output. + /// + /// + /// + /// + /// public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, @@ -118,6 +118,13 @@ public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog /// Name of the column to transform. If set to , the value of the will be used as source. /// Maximum number of examples used to train the normalizer. /// Whether to use CDF as the output. + /// + /// + /// + /// + /// public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, @@ -134,13 +141,6 @@ public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatal /// List of Output and Input column pairs. /// Maximum number of examples used to train the normalizer. /// Whether to use CDF as the output. - /// - /// - /// - /// - /// public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatalog catalog, InputOutputColumnPair[] columns, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, bool useCdf = NormalizingEstimator.Defaults.LogMeanVarCdf) => @@ -157,6 +157,13 @@ public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatal /// Maximum number of examples used to train the normalizer. /// Whether to map zero to zero, preserving sparsity. /// Maximum number of bins (power of 2 recommended). + /// + /// + /// + /// + /// public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, @@ -194,6 +201,13 @@ public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catal /// Whether to map zero to zero, preserving sparsity. /// Maximum number of bins (power of 2 recommended). /// Minimum number of examples per bin. + /// + /// + /// + /// + /// public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, string labelColumnName = DefaultColumnNames.Label,