From e5b9fc16ccdaa1b7e764d0bb555b6d0f35fe370e Mon Sep 17 00:00:00 2001 From: Scott Inglis <msinglft@gmail.com> Date: Thu, 18 Apr 2019 18:19:00 -0700 Subject: [PATCH 1/6] XML documentation for Normalizer Tracked in #3204 --- .../Transforms/Normalizer.cs | 33 ++++++++++++++ .../NormalizerCatalog.cs | 45 ++++++++++--------- 2 files changed, 58 insertions(+), 20 deletions(-) diff --git a/src/Microsoft.ML.Data/Transforms/Normalizer.cs b/src/Microsoft.ML.Data/Transforms/Normalizer.cs index e0f5d5f019..2d9406c2df 100644 --- a/src/Microsoft.ML.Data/Transforms/Normalizer.cs +++ b/src/Microsoft.ML.Data/Transforms/Normalizer.cs @@ -26,6 +26,36 @@ namespace Microsoft.ML.Transforms { + /// <summary> + /// <see cref="IEstimator{TTransformer}"/> for the <see cref="NormalizingTransformer"/>. + /// </summary> + /// <remarks> + /// <format type="text/markdown"><) + /// * [NormalizeMeanVariance](xref:Microsoft.ML.NormalizationCatalog.NormalizeMeanVariance(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int64,System.Boolean,System.Boolean)) + /// * [NormalizeLogMeanVariance](xref:Microsoft.ML.NormalizationCatalog.NormalizeLogMeanVariance(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int64,System.Boolean)) + /// * [NormalizeBinning](xref:Microsoft.ML.NormalizationCatalog.NormalizeBinning(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int64,System.Boolean,System.Int32)) + /// * [NormalizeSupervisedBinning](xref:Microsoft.ML.NormalizationCatalog.NormalizeSupervisedBinning(Microsoft.ML.TransformsCatalog,System.String,System.String,System.String,System.Int64,System.Boolean,System.Int32,System.Int32)) + /// + /// ]]> + /// </format> + /// </remarks> public sealed class NormalizingEstimator : IEstimator<NormalizingTransformer> { [BestFriend] @@ -284,6 +314,9 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema) } } + /// <summary> + /// <see cref="ITransformer"/> resulting from fitting an <see cref="NormalizingEstimator"/>. + /// </summary> public sealed partial class NormalizingTransformer : OneToOneTransformerBase { internal const string LoaderSignature = "Normalizer"; diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index 7453efbaac..155d7f9a0d 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -31,11 +31,12 @@ internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, } /// <summary> - /// It normalizes the data based on the observed minimum and maximum values of the data. + /// Create a <see cref="NormalizingEstimator"/>, which normalizes based on the observed minimum and maximum values of the data. /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> - /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> + /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source. + /// The data type on this column should be <see cref="System.Single"/> or <see cref="System.Double"/></param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param> /// <example> @@ -55,10 +56,10 @@ public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalo } /// <summary> - /// It normalizes the data based on the observed minimum and maximum values of the data. + /// Create a <see cref="NormalizingEstimator"/>, which normalizes based on the observed minimum and maximum values of the data. /// </summary> /// <param name="catalog">The transform catalog</param> - /// <param name="columns">List of Output and Input column pairs.</param> + /// <param name="columns">The pairs of input and output columns. The input columns must be of data type <see cref="System.Single"/> or <see cref="System.Double"/></param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param> public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalog, InputOutputColumnPair[] columns, @@ -69,11 +70,12 @@ public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalo new NormalizingEstimator.MinMaxColumnOptions(column.OutputColumnName, column.InputColumnName, maximumExampleCount, fixZero)).ToArray()); /// <summary> - /// It normalizes the data based on the computed mean and variance of the data. + /// Create a <see cref="NormalizingEstimator"/>, which normalizes based on the computed mean and variance of the data. /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> - /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> + /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source. + /// The data type on this column should be <see cref="System.Single"/> or <see cref="System.Double"/></param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param> /// <param name="useCdf">Whether to use CDF as the output.</param> @@ -95,10 +97,10 @@ public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog } /// <summary> - /// It normalizes the data based on the computed mean and variance of the data. + /// Create a <see cref="NormalizingEstimator"/>, which normalizes based on the computed mean and variance of the data. /// </summary> /// <param name="catalog">The transform catalog</param> - /// <param name="columns">List of Output and Input column pairs.</param> + /// <param name="columns">The pairs of input and output columns. The input columns must be of data type <see cref="System.Single"/> or <see cref="System.Double"/></param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param> /// <param name="useCdf">Whether to use CDF as the output.</param> @@ -111,11 +113,12 @@ public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog new NormalizingEstimator.MeanVarianceColumnOptions(column.OutputColumnName, column.InputColumnName, maximumExampleCount, fixZero, useCdf)).ToArray()); /// <summary> - /// It normalizes the data based on the computed mean and variance of the logarithm of the data. + /// Create a <see cref="NormalizingEstimator"/>, which normalizes based on the computed mean and variance of the logarithm of the data. /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> - /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> + /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source. + /// The data type on this column should be <see cref="System.Single"/> or <see cref="System.Double"/></param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="useCdf">Whether to use CDF as the output.</param> /// <example> @@ -135,10 +138,10 @@ public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatal } /// <summary> - /// It normalizes the data based on the computed mean and variance of the logarithm of the data. + /// Create a <see cref="NormalizingEstimator"/>, which normalizes based on the computed mean and variance of the logarithm of the data. /// </summary> /// <param name="catalog">The transform catalog</param> - /// <param name="columns">List of Output and Input column pairs.</param> + /// <param name="columns">The pairs of input and output columns. The input columns must be of data type <see cref="System.Single"/> or <see cref="System.Double"/></param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="useCdf">Whether to use CDF as the output.</param> public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatalog catalog, InputOutputColumnPair[] columns, @@ -149,11 +152,12 @@ public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatal new NormalizingEstimator.LogMeanVarianceColumnOptions(column.OutputColumnName, column.InputColumnName, maximumExampleCount, useCdf)).ToArray()); /// <summary> - /// The values are assigned into bins with equal density. + /// Create a <see cref="NormalizingEstimator"/>, which normalizes by assigning the data into bins with equal density. /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> - /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> + /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source. + /// The data type on this column should be <see cref="System.Single"/> or <see cref="System.Double"/></param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param> /// <param name="maximumBinCount">Maximum number of bins (power of 2 recommended).</param> @@ -175,10 +179,10 @@ public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catal } /// <summary> - /// The values are assigned into bins with equal density. + /// Create a <see cref="NormalizingEstimator"/>, which normalizes by assigning the data into bins with equal density. /// </summary> /// <param name="catalog">The transform catalog</param> - /// <param name="columns">List of Output and Input column pairs.</param> + /// <param name="columns">The pairs of input and output columns. The input columns must be of data type <see cref="System.Single"/> or <see cref="System.Double"/></param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param> /// <param name="maximumBinCount">Maximum number of bins (power of 2 recommended).</param> @@ -191,11 +195,12 @@ public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catal new NormalizingEstimator.BinningColumnOptions(column.OutputColumnName, column.InputColumnName, maximumExampleCount, fixZero, maximumBinCount)).ToArray()); /// <summary> - /// The values are assigned into bins based on correlation with the <paramref name="labelColumnName"/> column. + /// Create a <see cref="NormalizingEstimator"/>, which normalizes by assigning the data into bins based on correlation with the <paramref name="labelColumnName"/> column. /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> - /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> + /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source. + /// The data type on this column should be <see cref="System.Single"/> or <see cref="System.Double"/></param> /// <param name="labelColumnName">Name of the label column for supervised binning.</param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param> @@ -221,10 +226,10 @@ public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCat } /// <summary> - /// The values are assigned into bins based on correlation with the <paramref name="labelColumnName"/> column. + /// Create a <see cref="NormalizingEstimator"/>, which normalizes by assigning the data into bins based on correlation with the <paramref name="labelColumnName"/> column. /// </summary> /// <param name="catalog">The transform catalog</param> - /// <param name="columns">List of Output and Input column pairs.</param> + /// <param name="columns">The pairs of input and output columns. The input columns must be of data type <see cref="System.Single"/> or <see cref="System.Double"/></param> /// <param name="labelColumnName">Name of the label column for supervised binning.</param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param> From f988be2da8ec0aaa587e175939233e94b29e0c63 Mon Sep 17 00:00:00 2001 From: Scott Inglis <msinglft@gmail.com> Date: Thu, 18 Apr 2019 18:24:14 -0700 Subject: [PATCH 2/6] - Fixing space --- src/Microsoft.ML.Data/Transforms/Normalizer.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Data/Transforms/Normalizer.cs b/src/Microsoft.ML.Data/Transforms/Normalizer.cs index 2d9406c2df..b97627a516 100644 --- a/src/Microsoft.ML.Data/Transforms/Normalizer.cs +++ b/src/Microsoft.ML.Data/Transforms/Normalizer.cs @@ -52,7 +52,7 @@ namespace Microsoft.ML.Transforms /// * [NormalizeLogMeanVariance](xref:Microsoft.ML.NormalizationCatalog.NormalizeLogMeanVariance(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int64,System.Boolean)) /// * [NormalizeBinning](xref:Microsoft.ML.NormalizationCatalog.NormalizeBinning(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int64,System.Boolean,System.Int32)) /// * [NormalizeSupervisedBinning](xref:Microsoft.ML.NormalizationCatalog.NormalizeSupervisedBinning(Microsoft.ML.TransformsCatalog,System.String,System.String,System.String,System.Int64,System.Boolean,System.Int32,System.Int32)) - /// + /// /// ]]> /// </format> /// </remarks> From 9a90f5b497fff8fac66a86a897eb8fd4c4d32ce2 Mon Sep 17 00:00:00 2001 From: Scott Inglis <msinglft@gmail.com> Date: Fri, 19 Apr 2019 11:16:19 -0700 Subject: [PATCH 3/6] - Updating from feedback. --- .../Transforms/Normalizer.cs | 2 +- .../NormalizerCatalog.cs | 41 +++++++++++++------ 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/src/Microsoft.ML.Data/Transforms/Normalizer.cs b/src/Microsoft.ML.Data/Transforms/Normalizer.cs index b97627a516..0e77b8e171 100644 --- a/src/Microsoft.ML.Data/Transforms/Normalizer.cs +++ b/src/Microsoft.ML.Data/Transforms/Normalizer.cs @@ -35,7 +35,7 @@ namespace Microsoft.ML.Transforms /// ### Estimator Characteristics /// | | | /// | -- | -- | - /// | Does this estimator need to look at the data to train its parameters? | No | + /// | Does this estimator need to look at the data to train its parameters? | Yes | /// | Input column data type | <xref:System.Single> or <xref:System.Double> | /// | Output column data type | The same as the data type in the input column | /// diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index 155d7f9a0d..3a67e81cf2 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -34,7 +34,8 @@ internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, /// Create a <see cref="NormalizingEstimator"/>, which normalizes based on the observed minimum and maximum values of the data. /// </summary> /// <param name="catalog">The transform catalog</param> - /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> + /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>. + /// The data type on this column is the same as the input column.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source. /// The data type on this column should be <see cref="System.Single"/> or <see cref="System.Double"/></param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> @@ -59,7 +60,9 @@ public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalo /// Create a <see cref="NormalizingEstimator"/>, which normalizes based on the observed minimum and maximum values of the data. /// </summary> /// <param name="catalog">The transform catalog</param> - /// <param name="columns">The pairs of input and output columns. The input columns must be of data type <see cref="System.Single"/> or <see cref="System.Double"/></param> + /// <param name="columns">The pairs of input and output columns. + /// The input columns must be of data type <see cref="System.Single"/> or <see cref="System.Double"/>. + /// The data type for the output column will be the same as the associated input column.</param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param> public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalog, InputOutputColumnPair[] columns, @@ -73,7 +76,8 @@ public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalo /// Create a <see cref="NormalizingEstimator"/>, which normalizes based on the computed mean and variance of the data. /// </summary> /// <param name="catalog">The transform catalog</param> - /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> + /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>. + /// The data type on this column is the same as the input column.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source. /// The data type on this column should be <see cref="System.Single"/> or <see cref="System.Double"/></param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> @@ -100,7 +104,9 @@ public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog /// Create a <see cref="NormalizingEstimator"/>, which normalizes based on the computed mean and variance of the data. /// </summary> /// <param name="catalog">The transform catalog</param> - /// <param name="columns">The pairs of input and output columns. The input columns must be of data type <see cref="System.Single"/> or <see cref="System.Double"/></param> + /// <param name="columns">The pairs of input and output columns. + /// The input columns must be of data type <see cref="System.Single"/> or <see cref="System.Double"/>. + /// The data type for the output column will be the same as the associated input column.</param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param> /// <param name="useCdf">Whether to use CDF as the output.</param> @@ -116,7 +122,8 @@ public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog /// Create a <see cref="NormalizingEstimator"/>, which normalizes based on the computed mean and variance of the logarithm of the data. /// </summary> /// <param name="catalog">The transform catalog</param> - /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> + /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>. + /// The data type on this column is the same as the input column.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source. /// The data type on this column should be <see cref="System.Single"/> or <see cref="System.Double"/></param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> @@ -141,7 +148,9 @@ public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatal /// Create a <see cref="NormalizingEstimator"/>, which normalizes based on the computed mean and variance of the logarithm of the data. /// </summary> /// <param name="catalog">The transform catalog</param> - /// <param name="columns">The pairs of input and output columns. The input columns must be of data type <see cref="System.Single"/> or <see cref="System.Double"/></param> + /// <param name="columns">The pairs of input and output columns. + /// The input columns must be of data type <see cref="System.Single"/> or <see cref="System.Double"/>. + /// The data type for the output column will be the same as the associated input column.</param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="useCdf">Whether to use CDF as the output.</param> public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatalog catalog, InputOutputColumnPair[] columns, @@ -155,7 +164,8 @@ public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatal /// Create a <see cref="NormalizingEstimator"/>, which normalizes by assigning the data into bins with equal density. /// </summary> /// <param name="catalog">The transform catalog</param> - /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> + /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>. + /// The data type on this column is the same as the input column.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source. /// The data type on this column should be <see cref="System.Single"/> or <see cref="System.Double"/></param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> @@ -182,7 +192,9 @@ public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catal /// Create a <see cref="NormalizingEstimator"/>, which normalizes by assigning the data into bins with equal density. /// </summary> /// <param name="catalog">The transform catalog</param> - /// <param name="columns">The pairs of input and output columns. The input columns must be of data type <see cref="System.Single"/> or <see cref="System.Double"/></param> + /// <param name="columns">The pairs of input and output columns. + /// The input columns must be of data type <see cref="System.Single"/> or <see cref="System.Double"/>. + /// The data type for the output column will be the same as the associated input column.</param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param> /// <param name="maximumBinCount">Maximum number of bins (power of 2 recommended).</param> @@ -198,7 +210,8 @@ public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catal /// Create a <see cref="NormalizingEstimator"/>, which normalizes by assigning the data into bins based on correlation with the <paramref name="labelColumnName"/> column. /// </summary> /// <param name="catalog">The transform catalog</param> - /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> + /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>. + /// The data type on this column is the same as the input column.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source. /// The data type on this column should be <see cref="System.Single"/> or <see cref="System.Double"/></param> /// <param name="labelColumnName">Name of the label column for supervised binning.</param> @@ -229,7 +242,9 @@ public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCat /// Create a <see cref="NormalizingEstimator"/>, which normalizes by assigning the data into bins based on correlation with the <paramref name="labelColumnName"/> column. /// </summary> /// <param name="catalog">The transform catalog</param> - /// <param name="columns">The pairs of input and output columns. The input columns must be of data type <see cref="System.Single"/> or <see cref="System.Double"/></param> + /// <param name="columns">The pairs of input and output columns. + /// The input columns must be of data type <see cref="System.Single"/> or <see cref="System.Double"/>. + /// The data type for the output column will be the same as the associated input column.</param> /// <param name="labelColumnName">Name of the label column for supervised binning.</param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param> @@ -261,7 +276,8 @@ internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, /// a pre-processing step would be applied to make the specified column's mean be a zero vector. /// </summary> /// <param name="catalog">The transform's catalog.</param> - /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> + /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>. + /// The data type on this column is the same as the input column.</param> /// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="norm">Type of norm to use to normalize each sample. The indicated norm of the resulted vector will be normalized to one.</param> /// <param name="ensureZeroMean">If <see langword="true"/>, subtract mean from each value before normalizing and use the raw input otherwise.</param> @@ -295,7 +311,8 @@ internal static LpNormNormalizingEstimator NormalizeLpNorm(this TransformsCatalo /// a pre-processing step would be applied to make the specified column's mean be a zero vector. /// </summary> /// <param name="catalog">The transform's catalog.</param> - /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> + /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>. + /// The data type on this column is the same as the input column.</param> /// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="ensureZeroMean">If <see langword="true"/>, subtract mean from each value before normalizing and use the raw input otherwise.</param> /// <param name="ensureUnitStandardDeviation">If <see langword="true"/>, resulted vector's standard deviation would be one. Otherwise, resulted vector's L2-norm would be one.</param> From e331364664a7029cbbe2bb2a9b1248174d4ac009 Mon Sep 17 00:00:00 2001 From: Scott Inglis <msinglft@gmail.com> Date: Fri, 19 Apr 2019 11:53:43 -0700 Subject: [PATCH 4/6] - Updating description, intervals are now in latex. --- src/Microsoft.ML.Data/Transforms/Normalizer.cs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/Microsoft.ML.Data/Transforms/Normalizer.cs b/src/Microsoft.ML.Data/Transforms/Normalizer.cs index 0e77b8e171..9118a00cb3 100644 --- a/src/Microsoft.ML.Data/Transforms/Normalizer.cs +++ b/src/Microsoft.ML.Data/Transforms/Normalizer.cs @@ -36,23 +36,22 @@ namespace Microsoft.ML.Transforms /// | | | /// | -- | -- | /// | Does this estimator need to look at the data to train its parameters? | Yes | - /// | Input column data type | <xref:System.Single> or <xref:System.Double> | - /// | Output column data type | The same as the data type in the input column | + /// | Input column data type | <xref:System.Single> or<xref:System.Double> | + /// | Output column data type | The same data type as the input column | /// /// The resulting NormalizingEstimator will normalize the data in one of the following ways based upon how it was created: - /// * Min Max - Linear rescale such that minimum and maximum values are mapped between -1 and 1. + /// * Min Max - Linear rescale such that the values are mapped to the $[-1,1]$ with the minimum and maximum values being mapped to -1 and 1 respectively. /// * Mean Variance - Rescale to unit variance and, optionally, zero mean. /// * Log Mean Variance - Rescale to unit variance on the log scale. - /// * Binning - Bucketize and then rescale to between -1 and 1. - /// * Supervised Binning - Bucketize and then rescale to between -1 and 1. Calculates bins based on correlation with the Label column. + /// * Binning - Bucketize and then rescale to the values to the $[-1,1]$ interval. + /// * Supervised Binning - Bucketize and then rescale the values to $[-1,1]$ interval. Calculates bins based on correlation with the Label column. /// /// To create this estimator use one of the following: - /// * [NormalizeMinMax](xref:Microsoft.ML.NormalizationCatalog.NormalizeMinMax(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int64,System.Boolean)) + /// * [NormalizeMinMax](xref:Microsoft.ML.NormalizationCatalog.NormalizeMinMax(Microsoft.ML.TransformsCatalog, System.String, System.String, System.Int64, System.Boolean)) /// * [NormalizeMeanVariance](xref:Microsoft.ML.NormalizationCatalog.NormalizeMeanVariance(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int64,System.Boolean,System.Boolean)) /// * [NormalizeLogMeanVariance](xref:Microsoft.ML.NormalizationCatalog.NormalizeLogMeanVariance(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int64,System.Boolean)) /// * [NormalizeBinning](xref:Microsoft.ML.NormalizationCatalog.NormalizeBinning(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int64,System.Boolean,System.Int32)) /// * [NormalizeSupervisedBinning](xref:Microsoft.ML.NormalizationCatalog.NormalizeSupervisedBinning(Microsoft.ML.TransformsCatalog,System.String,System.String,System.String,System.Int64,System.Boolean,System.Int32,System.Int32)) - /// /// ]]> /// </format> /// </remarks> From 5eee846085252c3fb3c9ccd14300f0925544e837 Mon Sep 17 00:00:00 2001 From: Scott Inglis <msinglft@gmail.com> Date: Fri, 19 Apr 2019 15:14:58 -0700 Subject: [PATCH 5/6] - Addressing further feedback --- .../Transforms/Normalizer.cs | 20 +++++++++++++------ .../NormalizerCatalog.cs | 20 +++++++++---------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/src/Microsoft.ML.Data/Transforms/Normalizer.cs b/src/Microsoft.ML.Data/Transforms/Normalizer.cs index 9118a00cb3..165b0dd8bf 100644 --- a/src/Microsoft.ML.Data/Transforms/Normalizer.cs +++ b/src/Microsoft.ML.Data/Transforms/Normalizer.cs @@ -36,15 +36,23 @@ namespace Microsoft.ML.Transforms /// | | | /// | -- | -- | /// | Does this estimator need to look at the data to train its parameters? | Yes | - /// | Input column data type | <xref:System.Single> or<xref:System.Double> | + /// | Input column data type | <xref:System.Single> or<xref:System.Double> or a known-sized vector of those types. | /// | Output column data type | The same data type as the input column | /// /// The resulting NormalizingEstimator will normalize the data in one of the following ways based upon how it was created: - /// * Min Max - Linear rescale such that the values are mapped to the $[-1,1]$ with the minimum and maximum values being mapped to -1 and 1 respectively. - /// * Mean Variance - Rescale to unit variance and, optionally, zero mean. - /// * Log Mean Variance - Rescale to unit variance on the log scale. - /// * Binning - Bucketize and then rescale to the values to the $[-1,1]$ interval. - /// * Supervised Binning - Bucketize and then rescale the values to $[-1,1]$ interval. Calculates bins based on correlation with the Label column. + /// * Min Max - A linear rescale that is based upon the minimum and maximum values for each row. + /// * Mean Variance - Rescale each row to unit variance and, optionally, zero mean. + /// * Log Mean Variance - Rescale each row to unit variance based on a log scale. + /// * Binning - Bucketizes the data in each row and performs a linear rescale based on the calculated bins. + /// * Supervised Binning - Bucketize the data in each row and performas a linear rescale based on the calculated bins. The bin calculation is based on correlation of the Label column. + /// + /// ### Estimator Details + /// The interval of the normalized data depends on whether fixZero is specified or not. fixZero defaults to true. + /// When fixZero is false, the normalized interval is $[0,1]$ and the distribution of the normalized values depends on the normalization mode. For example, with Min Max, the minimum + /// and maximum values are mapped to 0 and 1 respectively and remaining values fall in between. + /// When fixZero is set, the normalized interval is $[-1,1]$ with the distribution of the normalized values depending on the normalization mode, but the behavior is different. + /// With Min Max, the distribution depends on how far away the number is from 0, resulting in the number with the largest distance being mapped to 1 if its a positive number + /// or -1 if its a negative number. The distance from 0 will affect the distribution with a majority of numbers that are closer together normalizing towards 0. /// /// To create this estimator use one of the following: /// * [NormalizeMinMax](xref:Microsoft.ML.NormalizationCatalog.NormalizeMinMax(Microsoft.ML.TransformsCatalog, System.String, System.String, System.Int64, System.Boolean)) diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index 3a67e81cf2..ccf0f35bb7 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -37,7 +37,7 @@ internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>. /// The data type on this column is the same as the input column.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source. - /// The data type on this column should be <see cref="System.Single"/> or <see cref="System.Double"/></param> + /// The data type on this column should be <see cref="System.Single"/>, <see cref="System.Double"/> or a known-sized vector of those types.</param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param> /// <example> @@ -61,7 +61,7 @@ public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalo /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="columns">The pairs of input and output columns. - /// The input columns must be of data type <see cref="System.Single"/> or <see cref="System.Double"/>. + /// The input columns must be of data type <see cref="System.Single"/>, <see cref="System.Double"/> or a known-sized vector of those types. /// The data type for the output column will be the same as the associated input column.</param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param> @@ -79,7 +79,7 @@ public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalo /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>. /// The data type on this column is the same as the input column.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source. - /// The data type on this column should be <see cref="System.Single"/> or <see cref="System.Double"/></param> + /// The data type on this column should be <see cref="System.Single"/>, <see cref="System.Double"/> or a known-sized vector of those types.</param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param> /// <param name="useCdf">Whether to use CDF as the output.</param> @@ -105,7 +105,7 @@ public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="columns">The pairs of input and output columns. - /// The input columns must be of data type <see cref="System.Single"/> or <see cref="System.Double"/>. + /// The input columns must be of data type <see cref="System.Single"/>, <see cref="System.Double"/> or a known-sized vector of those types. /// The data type for the output column will be the same as the associated input column.</param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param> @@ -125,7 +125,7 @@ public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>. /// The data type on this column is the same as the input column.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source. - /// The data type on this column should be <see cref="System.Single"/> or <see cref="System.Double"/></param> + /// The data type on this column should be <see cref="System.Single"/>, <see cref="System.Double"/> or a known-sized vector of those types.</param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="useCdf">Whether to use CDF as the output.</param> /// <example> @@ -149,7 +149,7 @@ public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatal /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="columns">The pairs of input and output columns. - /// The input columns must be of data type <see cref="System.Single"/> or <see cref="System.Double"/>. + /// The input columns must be of data type <see cref="System.Single"/>, <see cref="System.Double"/> or a known-sized vector of those types. /// The data type for the output column will be the same as the associated input column.</param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="useCdf">Whether to use CDF as the output.</param> @@ -167,7 +167,7 @@ public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatal /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>. /// The data type on this column is the same as the input column.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source. - /// The data type on this column should be <see cref="System.Single"/> or <see cref="System.Double"/></param> + /// The data type on this column should be <see cref="System.Single"/>, <see cref="System.Double"/> or a known-sized vector of those types.</param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param> /// <param name="maximumBinCount">Maximum number of bins (power of 2 recommended).</param> @@ -193,7 +193,7 @@ public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catal /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="columns">The pairs of input and output columns. - /// The input columns must be of data type <see cref="System.Single"/> or <see cref="System.Double"/>. + /// The input columns must be of data type <see cref="System.Single"/>, <see cref="System.Double"/> or a known-sized vector of those types. /// The data type for the output column will be the same as the associated input column.</param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param> @@ -213,7 +213,7 @@ public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catal /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>. /// The data type on this column is the same as the input column.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source. - /// The data type on this column should be <see cref="System.Single"/> or <see cref="System.Double"/></param> + /// The data type on this column should be <see cref="System.Single"/>, <see cref="System.Double"/> or a known-sized vector of those types.</param> /// <param name="labelColumnName">Name of the label column for supervised binning.</param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> /// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param> @@ -243,7 +243,7 @@ public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCat /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="columns">The pairs of input and output columns. - /// The input columns must be of data type <see cref="System.Single"/> or <see cref="System.Double"/>. + /// The input columns must be of data type <see cref="System.Single"/>, <see cref="System.Double"/> or a known-sized vector of those types. /// The data type for the output column will be the same as the associated input column.</param> /// <param name="labelColumnName">Name of the label column for supervised binning.</param> /// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param> From c3adf94370df840da80fcc19eb7a7ee923da80d0 Mon Sep 17 00:00:00 2001 From: Scott Inglis <msinglft@gmail.com> Date: Fri, 19 Apr 2019 16:41:31 -0700 Subject: [PATCH 6/6] - update from feedback --- src/Microsoft.ML.Data/Transforms/Normalizer.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Data/Transforms/Normalizer.cs b/src/Microsoft.ML.Data/Transforms/Normalizer.cs index 165b0dd8bf..bf2c84c4c4 100644 --- a/src/Microsoft.ML.Data/Transforms/Normalizer.cs +++ b/src/Microsoft.ML.Data/Transforms/Normalizer.cs @@ -36,7 +36,7 @@ namespace Microsoft.ML.Transforms /// | | | /// | -- | -- | /// | Does this estimator need to look at the data to train its parameters? | Yes | - /// | Input column data type | <xref:System.Single> or<xref:System.Double> or a known-sized vector of those types. | + /// | Input column data type | <xref:System.Single> or <xref:System.Double> or a known-sized vector of those types. | /// | Output column data type | The same data type as the input column | /// /// The resulting NormalizingEstimator will normalize the data in one of the following ways based upon how it was created: