diff --git a/src/Microsoft.ML.Data/Transforms/Normalizer.cs b/src/Microsoft.ML.Data/Transforms/Normalizer.cs
index e0f5d5f019..bf2c84c4c4 100644
--- a/src/Microsoft.ML.Data/Transforms/Normalizer.cs
+++ b/src/Microsoft.ML.Data/Transforms/Normalizer.cs
@@ -26,6 +26,43 @@
namespace Microsoft.ML.Transforms
{
+ ///
+ /// for the .
+ ///
+ ///
+ /// or or a known-sized vector of those types. |
+ /// | Output column data type | The same data type as the input column |
+ ///
+ /// The resulting NormalizingEstimator will normalize the data in one of the following ways based upon how it was created:
+ /// * Min Max - A linear rescale that is based upon the minimum and maximum values for each row.
+ /// * Mean Variance - Rescale each row to unit variance and, optionally, zero mean.
+ /// * Log Mean Variance - Rescale each row to unit variance based on a log scale.
+ /// * Binning - Bucketizes the data in each row and performs a linear rescale based on the calculated bins.
+ /// * Supervised Binning - Bucketize the data in each row and performas a linear rescale based on the calculated bins. The bin calculation is based on correlation of the Label column.
+ ///
+ /// ### Estimator Details
+ /// The interval of the normalized data depends on whether fixZero is specified or not. fixZero defaults to true.
+ /// When fixZero is false, the normalized interval is $[0,1]$ and the distribution of the normalized values depends on the normalization mode. For example, with Min Max, the minimum
+ /// and maximum values are mapped to 0 and 1 respectively and remaining values fall in between.
+ /// When fixZero is set, the normalized interval is $[-1,1]$ with the distribution of the normalized values depending on the normalization mode, but the behavior is different.
+ /// With Min Max, the distribution depends on how far away the number is from 0, resulting in the number with the largest distance being mapped to 1 if its a positive number
+ /// or -1 if its a negative number. The distance from 0 will affect the distribution with a majority of numbers that are closer together normalizing towards 0.
+ ///
+ /// To create this estimator use one of the following:
+ /// * [NormalizeMinMax](xref:Microsoft.ML.NormalizationCatalog.NormalizeMinMax(Microsoft.ML.TransformsCatalog, System.String, System.String, System.Int64, System.Boolean))
+ /// * [NormalizeMeanVariance](xref:Microsoft.ML.NormalizationCatalog.NormalizeMeanVariance(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int64,System.Boolean,System.Boolean))
+ /// * [NormalizeLogMeanVariance](xref:Microsoft.ML.NormalizationCatalog.NormalizeLogMeanVariance(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int64,System.Boolean))
+ /// * [NormalizeBinning](xref:Microsoft.ML.NormalizationCatalog.NormalizeBinning(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int64,System.Boolean,System.Int32))
+ /// * [NormalizeSupervisedBinning](xref:Microsoft.ML.NormalizationCatalog.NormalizeSupervisedBinning(Microsoft.ML.TransformsCatalog,System.String,System.String,System.String,System.Int64,System.Boolean,System.Int32,System.Int32))
+ /// ]]>
+ ///
+ ///
public sealed class NormalizingEstimator : IEstimator
{
[BestFriend]
@@ -284,6 +321,9 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema)
}
}
+ ///
+ /// resulting from fitting an .
+ ///
public sealed partial class NormalizingTransformer : OneToOneTransformerBase
{
internal const string LoaderSignature = "Normalizer";
diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs
index 7453efbaac..ccf0f35bb7 100644
--- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs
+++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs
@@ -31,11 +31,13 @@ internal static NormalizingEstimator Normalize(this TransformsCatalog catalog,
}
///
- /// It normalizes the data based on the observed minimum and maximum values of the data.
+ /// Create a , which normalizes based on the observed minimum and maximum values of the data.
///
/// The transform catalog
- /// Name of the column resulting from the transformation of .
- /// Name of the column to transform. If set to , the value of the will be used as source.
+ /// Name of the column resulting from the transformation of .
+ /// The data type on this column is the same as the input column.
+ /// Name of the column to transform. If set to , the value of the will be used as source.
+ /// The data type on this column should be , or a known-sized vector of those types.
/// Maximum number of examples used to train the normalizer.
/// Whether to map zero to zero, preserving sparsity.
///
@@ -55,10 +57,12 @@ public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalo
}
///
- /// It normalizes the data based on the observed minimum and maximum values of the data.
+ /// Create a , which normalizes based on the observed minimum and maximum values of the data.
///
/// The transform catalog
- /// List of Output and Input column pairs.
+ /// The pairs of input and output columns.
+ /// The input columns must be of data type , or a known-sized vector of those types.
+ /// The data type for the output column will be the same as the associated input column.
/// Maximum number of examples used to train the normalizer.
/// Whether to map zero to zero, preserving sparsity.
public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalog, InputOutputColumnPair[] columns,
@@ -69,11 +73,13 @@ public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalo
new NormalizingEstimator.MinMaxColumnOptions(column.OutputColumnName, column.InputColumnName, maximumExampleCount, fixZero)).ToArray());
///
- /// It normalizes the data based on the computed mean and variance of the data.
+ /// Create a , which normalizes based on the computed mean and variance of the data.
///
/// The transform catalog
- /// Name of the column resulting from the transformation of .
- /// Name of the column to transform. If set to , the value of the will be used as source.
+ /// Name of the column resulting from the transformation of .
+ /// The data type on this column is the same as the input column.
+ /// Name of the column to transform. If set to , the value of the will be used as source.
+ /// The data type on this column should be , or a known-sized vector of those types.
/// Maximum number of examples used to train the normalizer.
/// Whether to map zero to zero, preserving sparsity.
/// Whether to use CDF as the output.
@@ -95,10 +101,12 @@ public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog
}
///
- /// It normalizes the data based on the computed mean and variance of the data.
+ /// Create a , which normalizes based on the computed mean and variance of the data.
///
/// The transform catalog
- /// List of Output and Input column pairs.
+ /// The pairs of input and output columns.
+ /// The input columns must be of data type , or a known-sized vector of those types.
+ /// The data type for the output column will be the same as the associated input column.
/// Maximum number of examples used to train the normalizer.
/// Whether to map zero to zero, preserving sparsity.
/// Whether to use CDF as the output.
@@ -111,11 +119,13 @@ public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog
new NormalizingEstimator.MeanVarianceColumnOptions(column.OutputColumnName, column.InputColumnName, maximumExampleCount, fixZero, useCdf)).ToArray());
///
- /// It normalizes the data based on the computed mean and variance of the logarithm of the data.
+ /// Create a , which normalizes based on the computed mean and variance of the logarithm of the data.
///
/// The transform catalog
- /// Name of the column resulting from the transformation of .
- /// Name of the column to transform. If set to , the value of the will be used as source.
+ /// Name of the column resulting from the transformation of .
+ /// The data type on this column is the same as the input column.
+ /// Name of the column to transform. If set to , the value of the will be used as source.
+ /// The data type on this column should be , or a known-sized vector of those types.
/// Maximum number of examples used to train the normalizer.
/// Whether to use CDF as the output.
///
@@ -135,10 +145,12 @@ public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatal
}
///
- /// It normalizes the data based on the computed mean and variance of the logarithm of the data.
+ /// Create a , which normalizes based on the computed mean and variance of the logarithm of the data.
///
/// The transform catalog
- /// List of Output and Input column pairs.
+ /// The pairs of input and output columns.
+ /// The input columns must be of data type , or a known-sized vector of those types.
+ /// The data type for the output column will be the same as the associated input column.
/// Maximum number of examples used to train the normalizer.
/// Whether to use CDF as the output.
public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatalog catalog, InputOutputColumnPair[] columns,
@@ -149,11 +161,13 @@ public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatal
new NormalizingEstimator.LogMeanVarianceColumnOptions(column.OutputColumnName, column.InputColumnName, maximumExampleCount, useCdf)).ToArray());
///
- /// The values are assigned into bins with equal density.
+ /// Create a , which normalizes by assigning the data into bins with equal density.
///
/// The transform catalog
- /// Name of the column resulting from the transformation of .
- /// Name of the column to transform. If set to , the value of the will be used as source.
+ /// Name of the column resulting from the transformation of .
+ /// The data type on this column is the same as the input column.
+ /// Name of the column to transform. If set to , the value of the will be used as source.
+ /// The data type on this column should be , or a known-sized vector of those types.
/// Maximum number of examples used to train the normalizer.
/// Whether to map zero to zero, preserving sparsity.
/// Maximum number of bins (power of 2 recommended).
@@ -175,10 +189,12 @@ public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catal
}
///
- /// The values are assigned into bins with equal density.
+ /// Create a , which normalizes by assigning the data into bins with equal density.
///
/// The transform catalog
- /// List of Output and Input column pairs.
+ /// The pairs of input and output columns.
+ /// The input columns must be of data type , or a known-sized vector of those types.
+ /// The data type for the output column will be the same as the associated input column.
/// Maximum number of examples used to train the normalizer.
/// Whether to map zero to zero, preserving sparsity.
/// Maximum number of bins (power of 2 recommended).
@@ -191,11 +207,13 @@ public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catal
new NormalizingEstimator.BinningColumnOptions(column.OutputColumnName, column.InputColumnName, maximumExampleCount, fixZero, maximumBinCount)).ToArray());
///
- /// The values are assigned into bins based on correlation with the column.
+ /// Create a , which normalizes by assigning the data into bins based on correlation with the column.
///
/// The transform catalog
- /// Name of the column resulting from the transformation of .
- /// Name of the column to transform. If set to , the value of the will be used as source.
+ /// Name of the column resulting from the transformation of .
+ /// The data type on this column is the same as the input column.
+ /// Name of the column to transform. If set to , the value of the will be used as source.
+ /// The data type on this column should be , or a known-sized vector of those types.
/// Name of the label column for supervised binning.
/// Maximum number of examples used to train the normalizer.
/// Whether to map zero to zero, preserving sparsity.
@@ -221,10 +239,12 @@ public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCat
}
///
- /// The values are assigned into bins based on correlation with the column.
+ /// Create a , which normalizes by assigning the data into bins based on correlation with the column.
///
/// The transform catalog
- /// List of Output and Input column pairs.
+ /// The pairs of input and output columns.
+ /// The input columns must be of data type , or a known-sized vector of those types.
+ /// The data type for the output column will be the same as the associated input column.
/// Name of the label column for supervised binning.
/// Maximum number of examples used to train the normalizer.
/// Whether to map zero to zero, preserving sparsity.
@@ -256,7 +276,8 @@ internal static NormalizingEstimator Normalize(this TransformsCatalog catalog,
/// a pre-processing step would be applied to make the specified column's mean be a zero vector.
///
/// The transform's catalog.
- /// Name of the column resulting from the transformation of .
+ /// Name of the column resulting from the transformation of .
+ /// The data type on this column is the same as the input column.
/// Name of column to transform. If set to , the value of the will be used as source.
/// Type of norm to use to normalize each sample. The indicated norm of the resulted vector will be normalized to one.
/// If , subtract mean from each value before normalizing and use the raw input otherwise.
@@ -290,7 +311,8 @@ internal static LpNormNormalizingEstimator NormalizeLpNorm(this TransformsCatalo
/// a pre-processing step would be applied to make the specified column's mean be a zero vector.
///
/// The transform's catalog.
- /// Name of the column resulting from the transformation of .
+ /// Name of the column resulting from the transformation of .
+ /// The data type on this column is the same as the input column.
/// Name of column to transform. If set to , the value of the will be used as source.
/// If , subtract mean from each value before normalizing and use the raw input otherwise.
/// If , resulted vector's standard deviation would be one. Otherwise, resulted vector's L2-norm would be one.