From b936e8e57680e348cc5cbaa66354f3a6d16c218c Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Tue, 2 Apr 2019 09:54:35 -0700 Subject: [PATCH 1/8] Multi-column mapping for Normalizer estimators. --- .../Dynamic/Normalizer.cs | 4 +- docs/samples/Microsoft.ML.Samples/Program.cs | 2 +- .../NormalizerCatalog.cs | 84 ++++++++++++++++++- 3 files changed, 86 insertions(+), 4 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs index 2c0fcce6bb..cf94245aba 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs @@ -56,8 +56,8 @@ public static void Example() // Composing a different pipeline if we wanted to normalize more than one column at a time. // Using log scale as the normalization mode. - var multiColPipeline = ml.Transforms.NormalizeMinMax("LogInduced", "Induced") - .Append(ml.Transforms.NormalizeMinMax("LogSpontaneous", "Spontaneous")); + var multiColPipeline = ml.Transforms.NormalizeLogMeanVariance(new[] { new InputOutputColumnPair("LogInduced", "Induced"), new InputOutputColumnPair("LogSpontaneous", "Spontaneous") }); + // The transformed data. var multiColtransformer = multiColPipeline.Fit(trainData); var multiColtransformedData = multiColtransformer.Transform(trainData); diff --git a/docs/samples/Microsoft.ML.Samples/Program.cs b/docs/samples/Microsoft.ML.Samples/Program.cs index ef67739045..205201e9bd 100644 --- a/docs/samples/Microsoft.ML.Samples/Program.cs +++ b/docs/samples/Microsoft.ML.Samples/Program.cs @@ -6,7 +6,7 @@ internal static class Program { static void Main(string[] args) { - ReplaceMissingValues.Example(); + NormalizerTransform.Example(); } } } diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index 22696851c8..9e71edb624 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -1,4 +1,5 @@ -using Microsoft.ML.Data; +using System.Linq; +using Microsoft.ML.Data; using Microsoft.ML.Runtime; using Microsoft.ML.Transforms; @@ -56,6 +57,20 @@ public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalo return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); } + /// + /// It normalizes the data based on the observed minimum and maximum values of the data. + /// + /// The transform catalog + /// List of Output and Input column pairs. + /// Maximum number of examples used to train the normalizer. + /// Whether to map zero to zero, preserving sparsity. + public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalog, InputOutputColumnPair[] columns, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, + bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched) => + new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), + columns.Select(column => + new NormalizingEstimator.MinMaxColumnOptions(column.OutputColumnName, column.InputColumnName, maximumExampleCount, fixZero)).ToArray()); + /// /// It normalizes the data based on the computed mean and variance of the data. /// @@ -75,6 +90,22 @@ public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); } + /// + /// It normalizes the data based on the computed mean and variance of the data. + /// + /// The transform catalog + /// List of Output and Input column pairs. + /// Maximum number of examples used to train the normalizer. + /// Whether to map zero to zero, preserving sparsity. + /// Whether to use CDF as the output. + public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog catalog, InputOutputColumnPair[] columns, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, + bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, + bool useCdf = NormalizingEstimator.Defaults.MeanVarCdf) => + new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), + columns.Select(column => + new NormalizingEstimator.MeanVarianceColumnOptions(column.OutputColumnName, column.InputColumnName, maximumExampleCount, fixZero, useCdf)).ToArray()); + /// /// It normalizes the data based on the computed mean and variance of the logarithm of the data. /// @@ -92,6 +123,20 @@ public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatal return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); } + /// + /// It normalizes the data based on the computed mean and variance of the logarithm of the data. + /// + /// The transform catalog + /// List of Output and Input column pairs. + /// Maximum number of examples used to train the normalizer. + /// Whether to use CDF as the output. + public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatalog catalog, InputOutputColumnPair[] columns, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, + bool useCdf = NormalizingEstimator.Defaults.LogMeanVarCdf) => + new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), + columns.Select(column => + new NormalizingEstimator.LogMeanVarianceColumnOptions(column.OutputColumnName, column.InputColumnName, maximumExampleCount, useCdf)).ToArray()); + /// /// The values are assigned into bins with equal density. /// @@ -111,6 +156,22 @@ public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catal return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); } + /// + /// The values are assigned into bins with equal density. + /// + /// The transform catalog + /// List of Output and Input column pairs. + /// Maximum number of examples used to train the normalizer. + /// Whether to map zero to zero, preserving sparsity. + /// Maximum number of bins (power of 2 recommended). + public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catalog, InputOutputColumnPair[] columns, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, + bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, + int maximumBinCount = NormalizingEstimator.Defaults.MaximumBinCount) => + new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), + columns.Select(column => + new NormalizingEstimator.BinningColumnOptions(column.OutputColumnName, column.InputColumnName, maximumExampleCount, fixZero, maximumBinCount)).ToArray()); + /// /// The values are assigned into bins based on correlation with the column. /// @@ -134,6 +195,27 @@ public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCat return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); } + /// + /// The values are assigned into bins based on correlation with the column. + /// + /// The transform catalog + /// List of Output and Input column pairs. + /// Name of the label column for supervised binning. + /// Maximum number of examples used to train the normalizer. + /// Whether to map zero to zero, preserving sparsity. + /// Maximum number of bins (power of 2 recommended). + /// Minimum number of examples per bin. + public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCatalog catalog, InputOutputColumnPair[] columns, + string labelColumnName = DefaultColumnNames.Label, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, + bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, + int maximumBinCount = NormalizingEstimator.Defaults.MaximumBinCount, + int mininimumExamplesPerBin = NormalizingEstimator.Defaults.MininimumBinSize) => + new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), + columns.Select(column => + new NormalizingEstimator.SupervisedBinningColumOptions( + column.OutputColumnName, column.InputColumnName, labelColumnName, maximumExampleCount, fixZero, maximumBinCount, mininimumExamplesPerBin)).ToArray()); + /// /// Normalize (rescale) columns according to specified custom parameters. /// From 5ef0bee1ff14b23af429be54a80861a1370f9891 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Tue, 2 Apr 2019 09:55:56 -0700 Subject: [PATCH 2/8] XML comment. --- src/Microsoft.ML.Transforms/NormalizerCatalog.cs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index 9e71edb624..983294ff80 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -130,6 +130,13 @@ public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatal /// List of Output and Input column pairs. /// Maximum number of examples used to train the normalizer. /// Whether to use CDF as the output. + /// + /// + /// + /// + /// public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatalog catalog, InputOutputColumnPair[] columns, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, bool useCdf = NormalizingEstimator.Defaults.LogMeanVarCdf) => From 34e693e41aa38ef3f5a27ca0dd93305222ef7404 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Tue, 2 Apr 2019 10:01:09 -0700 Subject: [PATCH 3/8] revert Program.cs --- docs/samples/Microsoft.ML.Samples/Program.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/samples/Microsoft.ML.Samples/Program.cs b/docs/samples/Microsoft.ML.Samples/Program.cs index 205201e9bd..ef67739045 100644 --- a/docs/samples/Microsoft.ML.Samples/Program.cs +++ b/docs/samples/Microsoft.ML.Samples/Program.cs @@ -6,7 +6,7 @@ internal static class Program { static void Main(string[] args) { - NormalizerTransform.Example(); + ReplaceMissingValues.Example(); } } } From 4fa564973346c7aeee41444c334a9af62c7015d7 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Tue, 2 Apr 2019 10:54:13 -0700 Subject: [PATCH 4/8] Add copyright header. --- src/Microsoft.ML.Transforms/NormalizerCatalog.cs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index 983294ff80..cfec3f878a 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -1,4 +1,8 @@ -using System.Linq; +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Linq; using Microsoft.ML.Data; using Microsoft.ML.Runtime; using Microsoft.ML.Transforms; From 248c1a42e8102063e6e88726138540348039240e Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Tue, 2 Apr 2019 13:01:09 -0700 Subject: [PATCH 5/8] Add tests. --- .../Transformers/NormalizerTests.cs | 113 ++++++++++++++++++ 1 file changed, 113 insertions(+) diff --git a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs index 17d41c418e..2685b3ef50 100644 --- a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs @@ -5,6 +5,7 @@ using System; using System.Collections.Immutable; using System.IO; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Data.IO; using Microsoft.ML.Experimental; @@ -207,6 +208,118 @@ public void NormalizerParameters() Done(); } + [Fact] + public void NormalizerParametersMultiColumnApi() + { + string dataPath = GetDataPath("iris.txt"); + + var loader = new TextLoader(Env, new TextLoader.Options + { + Columns = new[] { + new TextLoader.Column("float1", DataKind.Single, 1), + new TextLoader.Column("float4", DataKind.Single, new[]{new TextLoader.Range(1, 4) }), + new TextLoader.Column("double1", DataKind.Double, 1), + new TextLoader.Column("double4", DataKind.Double, new[]{new TextLoader.Range(1, 4) }), + new TextLoader.Column("int1", DataKind.Int32, 0), + new TextLoader.Column("float0", DataKind.Single, new[]{ new TextLoader.Range { Min = 1, VariableEnd = true } }) + }, + HasHeader = true + }, new MultiFileSource(dataPath)); + var context = new MLContext(seed: 0); + var est = context.Transforms.NormalizeMinMax( + new[] { new InputOutputColumnPair("float1"), new InputOutputColumnPair("float4"), + new InputOutputColumnPair("double1"), new InputOutputColumnPair("double4"), }).Append( + context.Transforms.NormalizeBinning( + new[] {new InputOutputColumnPair("float1bin", "float1"), new InputOutputColumnPair("float4bin", "float4"), + new InputOutputColumnPair("double1bin", "double1"), new InputOutputColumnPair("double4bin", "double4")})).Append( + context.Transforms.NormalizeMeanVariance( + new[] {new InputOutputColumnPair("float1mv", "float1"), new InputOutputColumnPair("float4mv", "float4"), + new InputOutputColumnPair("double1mv", "double1"), new InputOutputColumnPair("double4mv", "double4")})).Append( + context.Transforms.NormalizeLogMeanVariance( + new[] {new InputOutputColumnPair("float1lmv", "float1"), new InputOutputColumnPair("float4lmv", "float4"), + new InputOutputColumnPair("double1lmv", "double1"), new InputOutputColumnPair("double4lmv", "double4")})); + + var data = loader.Load(dataPath); + + var transformer = est.Fit(data); + var transformers = transformer.ToImmutableArray(); + var floatAffineData = (transformers[0] as NormalizingTransformer).Columns[0].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; + Assert.Equal(0.12658228f, floatAffineData.Scale); + Assert.Equal(0, floatAffineData.Offset); + + var floatAffineDataVec = (transformers[0] as NormalizingTransformer).Columns[1].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; + Assert.Equal(4, floatAffineDataVec.Scale.Length); + Assert.Empty(floatAffineDataVec.Offset); + + var doubleAffineData = (transformers[0] as NormalizingTransformer).Columns[2].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; + Assert.Equal(0.12658227848101264, doubleAffineData.Scale); + Assert.Equal(0, doubleAffineData.Offset); + + var doubleAffineDataVec = (transformers[0] as NormalizingTransformer).Columns[3].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; + Assert.Equal(4, doubleAffineDataVec.Scale.Length); + Assert.Empty(doubleAffineDataVec.Offset); + + var floatBinData = (transformers[1] as NormalizingTransformer).Columns[0].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters; + Assert.True(35 == floatBinData.UpperBounds.Length); + Assert.True(34 == floatBinData.Density); + Assert.True(0 == floatBinData.Offset); + + var floatBinDataVec = (transformers[1] as NormalizingTransformer).Columns[1].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters>; + Assert.True(4 == floatBinDataVec.UpperBounds.Length); + Assert.True(35 == floatBinDataVec.UpperBounds[0].Length); + Assert.True(4 == floatBinDataVec.Density.Length); + Assert.True(0 == floatBinDataVec.Offset.Length); + + var doubleBinData = (transformers[1] as NormalizingTransformer).Columns[2].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters; + Assert.Equal(35, doubleBinData.UpperBounds.Length); + Assert.Equal(34, doubleBinData.Density); + Assert.Equal(0, doubleBinData.Offset); + + var doubleBinDataVec = (transformers[1] as NormalizingTransformer).Columns[3].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters>; + Assert.Equal(35, doubleBinDataVec.UpperBounds[0].Length); + Assert.Equal(4, doubleBinDataVec.Density.Length); + Assert.Empty(doubleBinDataVec.Offset); + + var floatCdfMeanData = (transformers[2] as NormalizingTransformer).Columns[0].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; + Assert.Equal(1.33754611f, floatCdfMeanData.Scale); + Assert.Equal(0, floatCdfMeanData.Offset); + + var floatCdfMeanDataVec = (transformers[2] as NormalizingTransformer).Columns[1].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; + Assert.Equal(1.33754611f, floatCdfMeanDataVec.Scale[0]); + Assert.Equal(4, floatCdfMeanDataVec.Scale.Length); + Assert.Empty(floatCdfMeanDataVec.Offset); + + var doubleCdfMeanData = (transformers[2] as NormalizingTransformer).Columns[2].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; + Assert.Equal(1.3375461389666252, doubleCdfMeanData.Scale); + Assert.Equal(0, doubleCdfMeanData.Offset); + + var doubleCdfMeanDataVec = (transformers[2] as NormalizingTransformer).Columns[3].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; + Assert.Equal(4, doubleCdfMeanDataVec.Scale.Length); + Assert.Empty(doubleCdfMeanDataVec.Offset); + + var floatCdfLogMeanData = (transformers[3] as NormalizingTransformer).Columns[0].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters; + Assert.Equal(-0.310623198747635f, floatCdfLogMeanData.Mean); + Assert.True(true == floatCdfLogMeanData.UseLog); + Assert.Equal(0.140807763f, floatCdfLogMeanData.StandardDeviation); + + var floatCdfLogMeanDataVec = (transformers[3] as NormalizingTransformer).Columns[1].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters>; + Assert.Equal(4, floatCdfLogMeanDataVec.Mean.Length); + Assert.True(true == floatCdfLogMeanDataVec.UseLog); + Assert.Equal(4, floatCdfLogMeanDataVec.StandardDeviation.Length); + + var doubleCdfLogMeanData = (transformers[3] as NormalizingTransformer).Columns[2].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters; + Assert.Equal(-0.31062321927759518, doubleCdfLogMeanData.Mean); + Assert.True(doubleCdfLogMeanData.UseLog); + Assert.Equal(0.14080776721611871, doubleCdfLogMeanData.StandardDeviation); + + var doubleCdfLogMeanDataVec = (transformers[3] as NormalizingTransformer).Columns[3].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters>; + Assert.Equal(4, doubleCdfLogMeanDataVec.Mean.Length); + Assert.True(doubleCdfLogMeanDataVec.UseLog); + Assert.Equal(4, doubleCdfLogMeanDataVec.StandardDeviation.Length); + + Done(); + } + [Fact] public void SimpleConstructorsAndExtensions() { From a746735d422c94490bad5a2e745ffb6e8ca0c911 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Tue, 2 Apr 2019 14:42:31 -0700 Subject: [PATCH 6/8] PR feedback. --- .../Transformers/NormalizerTests.cs | 77 ++++++++++++------- 1 file changed, 51 insertions(+), 26 deletions(-) diff --git a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs index 2685b3ef50..a514c4648c 100644 --- a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs @@ -216,6 +216,7 @@ public void NormalizerParametersMultiColumnApi() var loader = new TextLoader(Env, new TextLoader.Options { Columns = new[] { + new TextLoader.Column("Label", DataKind.Single, 0), new TextLoader.Column("float1", DataKind.Single, 1), new TextLoader.Column("float4", DataKind.Single, new[]{new TextLoader.Range(1, 4) }), new TextLoader.Column("double1", DataKind.Double, 1), @@ -228,95 +229,119 @@ public void NormalizerParametersMultiColumnApi() var context = new MLContext(seed: 0); var est = context.Transforms.NormalizeMinMax( new[] { new InputOutputColumnPair("float1"), new InputOutputColumnPair("float4"), - new InputOutputColumnPair("double1"), new InputOutputColumnPair("double4"), }).Append( - context.Transforms.NormalizeBinning( - new[] {new InputOutputColumnPair("float1bin", "float1"), new InputOutputColumnPair("float4bin", "float4"), - new InputOutputColumnPair("double1bin", "double1"), new InputOutputColumnPair("double4bin", "double4")})).Append( - context.Transforms.NormalizeMeanVariance( - new[] {new InputOutputColumnPair("float1mv", "float1"), new InputOutputColumnPair("float4mv", "float4"), - new InputOutputColumnPair("double1mv", "double1"), new InputOutputColumnPair("double4mv", "double4")})).Append( - context.Transforms.NormalizeLogMeanVariance( - new[] {new InputOutputColumnPair("float1lmv", "float1"), new InputOutputColumnPair("float4lmv", "float4"), - new InputOutputColumnPair("double1lmv", "double1"), new InputOutputColumnPair("double4lmv", "double4")})); + new InputOutputColumnPair("double1"), new InputOutputColumnPair("double4"), }) + .Append(context.Transforms.NormalizeBinning( + new[] {new InputOutputColumnPair("float1bin", "float1"), new InputOutputColumnPair("float4bin", "float4"), + new InputOutputColumnPair("double1bin", "double1"), new InputOutputColumnPair("double4bin", "double4")})) + .Append(context.Transforms.NormalizeMeanVariance( + new[] {new InputOutputColumnPair("float1mv", "float1"), new InputOutputColumnPair("float4mv", "float4"), + new InputOutputColumnPair("double1mv", "double1"), new InputOutputColumnPair("double4mv", "double4")})) + .Append(context.Transforms.NormalizeLogMeanVariance( + new[] {new InputOutputColumnPair("float1lmv", "float1"), new InputOutputColumnPair("float4lmv", "float4"), + new InputOutputColumnPair("double1lmv", "double1"), new InputOutputColumnPair("double4lmv", "double4")})) + .Append(context.Transforms.NormalizeSupervisedBinning( + new[] {new InputOutputColumnPair("float1nsb", "float1"), new InputOutputColumnPair("float4nsb", "float4"), + new InputOutputColumnPair("double1nsp", "double1"), new InputOutputColumnPair("double4nsb", "double4")})); var data = loader.Load(dataPath); var transformer = est.Fit(data); var transformers = transformer.ToImmutableArray(); - var floatAffineData = (transformers[0] as NormalizingTransformer).Columns[0].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; + var floatAffineData = ((NormalizingTransformer)transformers[0]).Columns[0].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; Assert.Equal(0.12658228f, floatAffineData.Scale); Assert.Equal(0, floatAffineData.Offset); - var floatAffineDataVec = (transformers[0] as NormalizingTransformer).Columns[1].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; + var floatAffineDataVec = ((NormalizingTransformer)transformers[0]).Columns[1].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; Assert.Equal(4, floatAffineDataVec.Scale.Length); Assert.Empty(floatAffineDataVec.Offset); - var doubleAffineData = (transformers[0] as NormalizingTransformer).Columns[2].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; + var doubleAffineData = ((NormalizingTransformer)transformers[0]).Columns[2].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; Assert.Equal(0.12658227848101264, doubleAffineData.Scale); Assert.Equal(0, doubleAffineData.Offset); - var doubleAffineDataVec = (transformers[0] as NormalizingTransformer).Columns[3].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; + var doubleAffineDataVec = ((NormalizingTransformer)transformers[0]).Columns[3].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; Assert.Equal(4, doubleAffineDataVec.Scale.Length); Assert.Empty(doubleAffineDataVec.Offset); - var floatBinData = (transformers[1] as NormalizingTransformer).Columns[0].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters; + var floatBinData = ((NormalizingTransformer)transformers[1]).Columns[0].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters; Assert.True(35 == floatBinData.UpperBounds.Length); Assert.True(34 == floatBinData.Density); Assert.True(0 == floatBinData.Offset); - var floatBinDataVec = (transformers[1] as NormalizingTransformer).Columns[1].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters>; + var floatBinDataVec = ((NormalizingTransformer)transformers[1]).Columns[1].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters>; Assert.True(4 == floatBinDataVec.UpperBounds.Length); Assert.True(35 == floatBinDataVec.UpperBounds[0].Length); Assert.True(4 == floatBinDataVec.Density.Length); Assert.True(0 == floatBinDataVec.Offset.Length); - var doubleBinData = (transformers[1] as NormalizingTransformer).Columns[2].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters; + var doubleBinData = ((NormalizingTransformer)transformers[1]).Columns[2].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters; Assert.Equal(35, doubleBinData.UpperBounds.Length); Assert.Equal(34, doubleBinData.Density); Assert.Equal(0, doubleBinData.Offset); - var doubleBinDataVec = (transformers[1] as NormalizingTransformer).Columns[3].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters>; + var doubleBinDataVec = ((NormalizingTransformer)transformers[1]).Columns[3].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters>; Assert.Equal(35, doubleBinDataVec.UpperBounds[0].Length); Assert.Equal(4, doubleBinDataVec.Density.Length); Assert.Empty(doubleBinDataVec.Offset); - var floatCdfMeanData = (transformers[2] as NormalizingTransformer).Columns[0].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; + var floatCdfMeanData = ((NormalizingTransformer)transformers[2]).Columns[0].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; Assert.Equal(1.33754611f, floatCdfMeanData.Scale); Assert.Equal(0, floatCdfMeanData.Offset); - var floatCdfMeanDataVec = (transformers[2] as NormalizingTransformer).Columns[1].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; + var floatCdfMeanDataVec = ((NormalizingTransformer)transformers[2]).Columns[1].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; Assert.Equal(1.33754611f, floatCdfMeanDataVec.Scale[0]); Assert.Equal(4, floatCdfMeanDataVec.Scale.Length); Assert.Empty(floatCdfMeanDataVec.Offset); - var doubleCdfMeanData = (transformers[2] as NormalizingTransformer).Columns[2].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; + var doubleCdfMeanData = ((NormalizingTransformer)transformers[2]).Columns[2].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; Assert.Equal(1.3375461389666252, doubleCdfMeanData.Scale); Assert.Equal(0, doubleCdfMeanData.Offset); - var doubleCdfMeanDataVec = (transformers[2] as NormalizingTransformer).Columns[3].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; + var doubleCdfMeanDataVec = ((NormalizingTransformer)transformers[2]).Columns[3].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; Assert.Equal(4, doubleCdfMeanDataVec.Scale.Length); Assert.Empty(doubleCdfMeanDataVec.Offset); - var floatCdfLogMeanData = (transformers[3] as NormalizingTransformer).Columns[0].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters; + var floatCdfLogMeanData = ((NormalizingTransformer)transformers[3]).Columns[0].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters; Assert.Equal(-0.310623198747635f, floatCdfLogMeanData.Mean); Assert.True(true == floatCdfLogMeanData.UseLog); Assert.Equal(0.140807763f, floatCdfLogMeanData.StandardDeviation); - var floatCdfLogMeanDataVec = (transformers[3] as NormalizingTransformer).Columns[1].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters>; + var floatCdfLogMeanDataVec = ((NormalizingTransformer)transformers[3]).Columns[1].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters>; Assert.Equal(4, floatCdfLogMeanDataVec.Mean.Length); Assert.True(true == floatCdfLogMeanDataVec.UseLog); Assert.Equal(4, floatCdfLogMeanDataVec.StandardDeviation.Length); - var doubleCdfLogMeanData = (transformers[3] as NormalizingTransformer).Columns[2].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters; + var doubleCdfLogMeanData = ((NormalizingTransformer)transformers[3]).Columns[2].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters; Assert.Equal(-0.31062321927759518, doubleCdfLogMeanData.Mean); Assert.True(doubleCdfLogMeanData.UseLog); Assert.Equal(0.14080776721611871, doubleCdfLogMeanData.StandardDeviation); - var doubleCdfLogMeanDataVec = (transformers[3] as NormalizingTransformer).Columns[3].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters>; + var doubleCdfLogMeanDataVec = ((NormalizingTransformer)transformers[3]).Columns[3].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters>; Assert.Equal(4, doubleCdfLogMeanDataVec.Mean.Length); Assert.True(doubleCdfLogMeanDataVec.UseLog); Assert.Equal(4, doubleCdfLogMeanDataVec.StandardDeviation.Length); + floatBinData = ((NormalizingTransformer)transformers[4]).Columns[0].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters; + Assert.True(4 == floatBinData.UpperBounds.Length); + Assert.True(3 == floatBinData.Density); + Assert.True(0 == floatBinData.Offset); + + floatBinDataVec = ((NormalizingTransformer)transformers[4]).Columns[1].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters>; + Assert.True(4 == floatBinDataVec.UpperBounds.Length); + Assert.True(4 == floatBinDataVec.UpperBounds[0].Length); + Assert.True(4 == floatBinDataVec.Density.Length); + Assert.True(0 == floatBinDataVec.Offset.Length); + + doubleBinData = ((NormalizingTransformer)transformers[4]).Columns[2].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters; + Assert.Equal(4, doubleBinData.UpperBounds.Length); + Assert.Equal(3, doubleBinData.Density); + Assert.Equal(0, doubleBinData.Offset); + + doubleBinDataVec = ((NormalizingTransformer)transformers[4]).Columns[3].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters>; + Assert.Equal(4, doubleBinDataVec.UpperBounds[0].Length); + Assert.Equal(4, doubleBinDataVec.Density.Length); + Assert.Empty(doubleBinDataVec.Offset); + Done(); } From 685cb963566af7b4743c46bd6801f4f9591b8dce Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Tue, 2 Apr 2019 16:52:28 -0700 Subject: [PATCH 7/8] cleanup. --- test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs index a514c4648c..2b2245c7e9 100644 --- a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs @@ -212,8 +212,9 @@ public void NormalizerParameters() public void NormalizerParametersMultiColumnApi() { string dataPath = GetDataPath("iris.txt"); + var context = new MLContext(seed: 0); - var loader = new TextLoader(Env, new TextLoader.Options + var loader = new TextLoader(context, new TextLoader.Options { Columns = new[] { new TextLoader.Column("Label", DataKind.Single, 0), @@ -226,7 +227,7 @@ public void NormalizerParametersMultiColumnApi() }, HasHeader = true }, new MultiFileSource(dataPath)); - var context = new MLContext(seed: 0); + var est = context.Transforms.NormalizeMinMax( new[] { new InputOutputColumnPair("float1"), new InputOutputColumnPair("float4"), new InputOutputColumnPair("double1"), new InputOutputColumnPair("double4"), }) @@ -241,7 +242,7 @@ public void NormalizerParametersMultiColumnApi() new InputOutputColumnPair("double1lmv", "double1"), new InputOutputColumnPair("double4lmv", "double4")})) .Append(context.Transforms.NormalizeSupervisedBinning( new[] {new InputOutputColumnPair("float1nsb", "float1"), new InputOutputColumnPair("float4nsb", "float4"), - new InputOutputColumnPair("double1nsp", "double1"), new InputOutputColumnPair("double4nsb", "double4")})); + new InputOutputColumnPair("double1nsb", "double1"), new InputOutputColumnPair("double4nsb", "double4")})); var data = loader.Load(dataPath); From 4f33539c7e252b1e7aea567e6e005d4b5310602c Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Tue, 2 Apr 2019 17:30:17 -0700 Subject: [PATCH 8/8] PR feedback. --- .../Transformers/NormalizerTests.cs | 225 ++++++++++-------- 1 file changed, 131 insertions(+), 94 deletions(-) diff --git a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs index 2b2245c7e9..ed26cc0b06 100644 --- a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs @@ -248,100 +248,137 @@ public void NormalizerParametersMultiColumnApi() var transformer = est.Fit(data); var transformers = transformer.ToImmutableArray(); - var floatAffineData = ((NormalizingTransformer)transformers[0]).Columns[0].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; - Assert.Equal(0.12658228f, floatAffineData.Scale); - Assert.Equal(0, floatAffineData.Offset); - - var floatAffineDataVec = ((NormalizingTransformer)transformers[0]).Columns[1].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; - Assert.Equal(4, floatAffineDataVec.Scale.Length); - Assert.Empty(floatAffineDataVec.Offset); - - var doubleAffineData = ((NormalizingTransformer)transformers[0]).Columns[2].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; - Assert.Equal(0.12658227848101264, doubleAffineData.Scale); - Assert.Equal(0, doubleAffineData.Offset); - - var doubleAffineDataVec = ((NormalizingTransformer)transformers[0]).Columns[3].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; - Assert.Equal(4, doubleAffineDataVec.Scale.Length); - Assert.Empty(doubleAffineDataVec.Offset); - - var floatBinData = ((NormalizingTransformer)transformers[1]).Columns[0].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters; - Assert.True(35 == floatBinData.UpperBounds.Length); - Assert.True(34 == floatBinData.Density); - Assert.True(0 == floatBinData.Offset); - - var floatBinDataVec = ((NormalizingTransformer)transformers[1]).Columns[1].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters>; - Assert.True(4 == floatBinDataVec.UpperBounds.Length); - Assert.True(35 == floatBinDataVec.UpperBounds[0].Length); - Assert.True(4 == floatBinDataVec.Density.Length); - Assert.True(0 == floatBinDataVec.Offset.Length); - - var doubleBinData = ((NormalizingTransformer)transformers[1]).Columns[2].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters; - Assert.Equal(35, doubleBinData.UpperBounds.Length); - Assert.Equal(34, doubleBinData.Density); - Assert.Equal(0, doubleBinData.Offset); - - var doubleBinDataVec = ((NormalizingTransformer)transformers[1]).Columns[3].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters>; - Assert.Equal(35, doubleBinDataVec.UpperBounds[0].Length); - Assert.Equal(4, doubleBinDataVec.Density.Length); - Assert.Empty(doubleBinDataVec.Offset); - - var floatCdfMeanData = ((NormalizingTransformer)transformers[2]).Columns[0].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; - Assert.Equal(1.33754611f, floatCdfMeanData.Scale); - Assert.Equal(0, floatCdfMeanData.Offset); - - var floatCdfMeanDataVec = ((NormalizingTransformer)transformers[2]).Columns[1].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; - Assert.Equal(1.33754611f, floatCdfMeanDataVec.Scale[0]); - Assert.Equal(4, floatCdfMeanDataVec.Scale.Length); - Assert.Empty(floatCdfMeanDataVec.Offset); - - var doubleCdfMeanData = ((NormalizingTransformer)transformers[2]).Columns[2].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; - Assert.Equal(1.3375461389666252, doubleCdfMeanData.Scale); - Assert.Equal(0, doubleCdfMeanData.Offset); - - var doubleCdfMeanDataVec = ((NormalizingTransformer)transformers[2]).Columns[3].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; - Assert.Equal(4, doubleCdfMeanDataVec.Scale.Length); - Assert.Empty(doubleCdfMeanDataVec.Offset); - - var floatCdfLogMeanData = ((NormalizingTransformer)transformers[3]).Columns[0].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters; - Assert.Equal(-0.310623198747635f, floatCdfLogMeanData.Mean); - Assert.True(true == floatCdfLogMeanData.UseLog); - Assert.Equal(0.140807763f, floatCdfLogMeanData.StandardDeviation); - - var floatCdfLogMeanDataVec = ((NormalizingTransformer)transformers[3]).Columns[1].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters>; - Assert.Equal(4, floatCdfLogMeanDataVec.Mean.Length); - Assert.True(true == floatCdfLogMeanDataVec.UseLog); - Assert.Equal(4, floatCdfLogMeanDataVec.StandardDeviation.Length); - - var doubleCdfLogMeanData = ((NormalizingTransformer)transformers[3]).Columns[2].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters; - Assert.Equal(-0.31062321927759518, doubleCdfLogMeanData.Mean); - Assert.True(doubleCdfLogMeanData.UseLog); - Assert.Equal(0.14080776721611871, doubleCdfLogMeanData.StandardDeviation); - - var doubleCdfLogMeanDataVec = ((NormalizingTransformer)transformers[3]).Columns[3].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters>; - Assert.Equal(4, doubleCdfLogMeanDataVec.Mean.Length); - Assert.True(doubleCdfLogMeanDataVec.UseLog); - Assert.Equal(4, doubleCdfLogMeanDataVec.StandardDeviation.Length); - - floatBinData = ((NormalizingTransformer)transformers[4]).Columns[0].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters; - Assert.True(4 == floatBinData.UpperBounds.Length); - Assert.True(3 == floatBinData.Density); - Assert.True(0 == floatBinData.Offset); - - floatBinDataVec = ((NormalizingTransformer)transformers[4]).Columns[1].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters>; - Assert.True(4 == floatBinDataVec.UpperBounds.Length); - Assert.True(4 == floatBinDataVec.UpperBounds[0].Length); - Assert.True(4 == floatBinDataVec.Density.Length); - Assert.True(0 == floatBinDataVec.Offset.Length); - - doubleBinData = ((NormalizingTransformer)transformers[4]).Columns[2].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters; - Assert.Equal(4, doubleBinData.UpperBounds.Length); - Assert.Equal(3, doubleBinData.Density); - Assert.Equal(0, doubleBinData.Offset); - - doubleBinDataVec = ((NormalizingTransformer)transformers[4]).Columns[3].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters>; - Assert.Equal(4, doubleBinDataVec.UpperBounds[0].Length); - Assert.Equal(4, doubleBinDataVec.Density.Length); - Assert.Empty(doubleBinDataVec.Offset); + var floatAffineModel = ((NormalizingTransformer)transformers[0]).Columns[0].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; + Assert.Equal(0.12658228f, floatAffineModel.Scale); + Assert.Equal(0, floatAffineModel.Offset); + + var floatAffineModelVec = ((NormalizingTransformer)transformers[0]).Columns[1].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; + Assert.Equal(4, floatAffineModelVec.Scale.Length); + Assert.Empty(floatAffineModelVec.Offset); + + var doubleAffineModel = ((NormalizingTransformer)transformers[0]).Columns[2].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; + Assert.Equal(0.12658227848101264, doubleAffineModel.Scale); + Assert.Equal(0, doubleAffineModel.Offset); + + var doubleAffineModelVector = ((NormalizingTransformer)transformers[0]).Columns[3].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; + Assert.Equal(4, doubleAffineModelVector.Scale.Length); + Assert.Equal(0.12658227848101264, doubleAffineModelVector.Scale[0]); + Assert.Equal(0.4, doubleAffineModelVector.Scale[3]); + Assert.Empty(doubleAffineModelVector.Offset); + + var floatBinModel = ((NormalizingTransformer)transformers[1]).Columns[0].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters; + Assert.True(35 == floatBinModel.UpperBounds.Length); + Assert.True(0.550632954f == floatBinModel.UpperBounds[0]); + Assert.True(float.PositiveInfinity == floatBinModel.UpperBounds[34]); + Assert.True(34 == floatBinModel.Density); + Assert.True(0 == floatBinModel.Offset); + + var floatBinModelVector = ((NormalizingTransformer)transformers[1]).Columns[1].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters>; + Assert.True(4 == floatBinModelVector.UpperBounds.Length); + Assert.True(35 == floatBinModelVector.UpperBounds[0].Length); + Assert.True(0.550632954f == floatBinModelVector.UpperBounds[0][0]); + Assert.True(float.PositiveInfinity == floatBinModelVector.UpperBounds[0][floatBinModelVector.UpperBounds[0].Length - 1]); + Assert.True(0.0600000024f == floatBinModelVector.UpperBounds[3][0]); + Assert.True(float.PositiveInfinity == floatBinModelVector.UpperBounds[3][floatBinModelVector.UpperBounds[3].Length - 1]); + Assert.True(4 == floatBinModelVector.Density.Length); + Assert.True(0 == floatBinModelVector.Offset.Length); + + var doubleBinModel = ((NormalizingTransformer)transformers[1]).Columns[2].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters; + Assert.Equal(35, doubleBinModel.UpperBounds.Length); + Assert.True(0.550632911392405 == doubleBinModel.UpperBounds[0]); + Assert.True(double.PositiveInfinity == doubleBinModel.UpperBounds[34]); + Assert.Equal(34, doubleBinModel.Density); + Assert.Equal(0, doubleBinModel.Offset); + + var doubleBinModelVector = ((NormalizingTransformer)transformers[1]).Columns[3].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters>; + Assert.Equal(35, doubleBinModelVector.UpperBounds[0].Length); + Assert.True(0.550632911392405 == doubleBinModelVector.UpperBounds[0][0]); + Assert.True(double.PositiveInfinity == doubleBinModelVector.UpperBounds[0][doubleBinModelVector.UpperBounds[0].Length - 1]); + Assert.True(0.060000000000000012 == doubleBinModelVector.UpperBounds[3][0]); + Assert.True(double.PositiveInfinity == doubleBinModelVector.UpperBounds[3][doubleBinModelVector.UpperBounds[3].Length - 1]); + Assert.Equal(4, doubleBinModelVector.Density.Length); + Assert.Empty(doubleBinModelVector.Offset); + + var floatCdfMeanModel = ((NormalizingTransformer)transformers[2]).Columns[0].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; + Assert.Equal(1.33754611f, floatCdfMeanModel.Scale); + Assert.Equal(0, floatCdfMeanModel.Offset); + + var floatCdfMeanModelVector = ((NormalizingTransformer)transformers[2]).Columns[1].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; + Assert.Equal(1.33754611f, floatCdfMeanModelVector.Scale[0]); + Assert.Equal(1.75526536f, floatCdfMeanModelVector.Scale[3]); + Assert.Equal(4, floatCdfMeanModelVector.Scale.Length); + Assert.Empty(floatCdfMeanModelVector.Offset); + + var doubleCdfMeanModel = ((NormalizingTransformer)transformers[2]).Columns[2].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; + Assert.Equal(1.3375461389666252, doubleCdfMeanModel.Scale); + Assert.Equal(0, doubleCdfMeanModel.Offset); + + var doubleCdfMeanModelVector = ((NormalizingTransformer)transformers[2]).Columns[3].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; + Assert.Equal(4, doubleCdfMeanModelVector.Scale.Length); + Assert.True(1.3375461389666252 == doubleCdfMeanModelVector.Scale[0]); + Assert.True(1.7552654477786787 == doubleCdfMeanModelVector.Scale[3]); + Assert.Empty(doubleCdfMeanModelVector.Offset); + + var floatCdfLogMeanModel = ((NormalizingTransformer)transformers[3]).Columns[0].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters; + Assert.Equal(-0.310623198747635f, floatCdfLogMeanModel.Mean); + Assert.True(true == floatCdfLogMeanModel.UseLog); + Assert.Equal(0.140807763f, floatCdfLogMeanModel.StandardDeviation); + + var floatCdfLogMeanModelVector = ((NormalizingTransformer)transformers[3]).Columns[1].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters>; + Assert.Equal(4, floatCdfLogMeanModelVector.Mean.Length); + Assert.True(-0.3106232f == floatCdfLogMeanModelVector.Mean[0]); + Assert.True(-1.08362031f == floatCdfLogMeanModelVector.Mean[3]); + Assert.True(true == floatCdfLogMeanModelVector.UseLog); + Assert.Equal(4, floatCdfLogMeanModelVector.StandardDeviation.Length); + Assert.True(0.140807763f == floatCdfLogMeanModelVector.StandardDeviation[0]); + Assert.True(0.9843767f == floatCdfLogMeanModelVector.StandardDeviation[3]); + + var doubleCdfLogMeanModel = ((NormalizingTransformer)transformers[3]).Columns[2].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters; + Assert.Equal(-0.31062321927759518, doubleCdfLogMeanModel.Mean); + Assert.True(doubleCdfLogMeanModel.UseLog); + Assert.Equal(0.14080776721611871, doubleCdfLogMeanModel.StandardDeviation); + + var doubleCdfLogMeanModelVector = ((NormalizingTransformer)transformers[3]).Columns[3].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters>; + Assert.Equal(4, doubleCdfLogMeanModelVector.Mean.Length); + Assert.True(-0.31062321927759518 == doubleCdfLogMeanModelVector.Mean[0]); + Assert.True(-1.0836203140680853 == doubleCdfLogMeanModelVector.Mean[3]); + Assert.True(doubleCdfLogMeanModelVector.UseLog); + Assert.Equal(4, doubleCdfLogMeanModelVector.StandardDeviation.Length); + Assert.True(0.14080776721611871 == doubleCdfLogMeanModelVector.StandardDeviation[0]); + Assert.True(0.98437679839698122 == doubleCdfLogMeanModelVector.StandardDeviation[3]); + + floatBinModel = ((NormalizingTransformer)transformers[4]).Columns[0].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters; + Assert.True(4 == floatBinModel.UpperBounds.Length); + Assert.True(0.6139241f == floatBinModel.UpperBounds[0]); + Assert.True(float.PositiveInfinity == floatBinModel.UpperBounds[3]); + Assert.True(3 == floatBinModel.Density); + Assert.True(0 == floatBinModel.Offset); + + floatBinModelVector = ((NormalizingTransformer)transformers[4]).Columns[1].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters>; + Assert.True(4 == floatBinModelVector.UpperBounds.Length); + Assert.True(4 == floatBinModelVector.UpperBounds[0].Length); + Assert.True(0.6139241f == floatBinModelVector.UpperBounds[0][0]); + Assert.True(float.PositiveInfinity == floatBinModelVector.UpperBounds[0][floatBinModelVector.UpperBounds[0].Length - 1]); + Assert.True(0.32f == floatBinModelVector.UpperBounds[3][0]); + Assert.True(float.PositiveInfinity == floatBinModelVector.UpperBounds[3][floatBinModelVector.UpperBounds[3].Length - 1]); + Assert.True(4 == floatBinModelVector.Density.Length); + Assert.True(0 == floatBinModelVector.Offset.Length); + + doubleBinModel = ((NormalizingTransformer)transformers[4]).Columns[2].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters; + Assert.Equal(4, doubleBinModel.UpperBounds.Length); + Assert.True(0.61392405063291133 == doubleBinModel.UpperBounds[0]); + Assert.True(float.PositiveInfinity == doubleBinModel.UpperBounds[3]); + Assert.Equal(3, doubleBinModel.Density); + Assert.Equal(0, doubleBinModel.Offset); + + doubleBinModelVector = ((NormalizingTransformer)transformers[4]).Columns[3].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters>; + Assert.Equal(4, doubleBinModelVector.UpperBounds[0].Length); + Assert.True(0.6139240506329113335 == doubleBinModelVector.UpperBounds[0][0]); + Assert.True(double.PositiveInfinity == doubleBinModelVector.UpperBounds[0][doubleBinModelVector.UpperBounds[0].Length - 1]); + Assert.True(0.32 == doubleBinModelVector.UpperBounds[3][0]); + Assert.True(double.PositiveInfinity == doubleBinModelVector.UpperBounds[3][doubleBinModelVector.UpperBounds[3].Length - 1]); + Assert.Equal(4, doubleBinModelVector.Density.Length); + Assert.Empty(doubleBinModelVector.Offset); Done(); }