From ada4969574792f70a4630b1f8cbf09c82516fe46 Mon Sep 17 00:00:00 2001 From: Artidoro Pagnoni Date: Sun, 21 Apr 2019 15:21:39 -0700 Subject: [PATCH 1/2] lp norm gc norm --- src/Microsoft.ML.Transforms/GcnTransform.cs | 87 ++++++++++++++++--- .../NormalizerCatalog.cs | 34 +++----- 2 files changed, 87 insertions(+), 34 deletions(-) diff --git a/src/Microsoft.ML.Transforms/GcnTransform.cs b/src/Microsoft.ML.Transforms/GcnTransform.cs index d2134bd7ed..49ffb068f6 100644 --- a/src/Microsoft.ML.Transforms/GcnTransform.cs +++ b/src/Microsoft.ML.Transforms/GcnTransform.cs @@ -35,17 +35,7 @@ namespace Microsoft.ML.Transforms { /// - /// Lp-Norm (vector/row-wise) normalization transform. Has the following two set of arguments: - /// 1- Lp-Norm normalizer arguments: - /// Normalize rows individually by rescaling them to unit norm (L2, L1 or LInf). - /// Performs the following operation on a vector X: - /// Y = (X - M) / D, where M is mean and D is either L2 norm, L1 norm or LInf norm. - /// Scaling inputs to unit norms is a common operation for text classification or clustering. - /// 2- Global contrast normalization (GCN) arguments: - /// Performs the following operation on a vector X: - /// Y = (s * X - M) / D, where s is a scale, M is mean and D is either L2 norm or standard deviation. - /// Usage examples and Matlab code: - /// https://www.cs.stanford.edu/~acoates/papers/coatesleeng_aistats_2011.pdf. + /// resulting from fitting a or . /// public sealed class LpNormNormalizingTransformer : OneToOneTransformerBase { @@ -641,7 +631,7 @@ public static CommonOutputs.TransformOutput GcNormalize(IHostEnvironment env, Lp } /// - /// Base estimator class for LpNorm and Gcn normalizers. + /// Base estimator class for and normalizers. /// public abstract class LpNormNormalizingEstimatorBase : TrivialEstimator { @@ -805,8 +795,53 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) } /// - /// Lp Normalizing estimator takes columns and normalizes them individually by rescaling them to unit norm. + /// Normalizes (scales) vectors in the input column to the unit norm. The type of norm that is used can be specified by the user. /// + /// + /// | + /// | Output column data type | Vector of | + /// + /// + /// The resulting normalizes vectors in the input column individually + /// by rescaling them to the unit norm. + /// + /// Let $x$ be the input vector, $n$ the size of the vector, $L(x)$ the norm function selected by the user. + /// Let $\mu(x) = \sum_i x_i / n$ be the mean of the values of vector $x$. The + /// performs the following operation on each input vector $x$: + /// + /// $y = \frac{x - \mu(x)}{L(x)}$ + /// + /// if the user specifies that the mean should be zero, or otherwise: + /// + /// $y = \frac{x}{L(x)}$ + /// + /// There are four types of norm that can be selected by the user to be applied on input vector $x$. They are defined as follows: + /// - + /// + /// $L_1(x) = \sum_i |x_i|$ + /// + /// - + /// + /// $L_2(x) = \sqrt{\sum_i x_i^2}$ + /// + /// - + /// + /// $L_{\infty}(x) = \max_i\{|x_i|\}$ + /// + /// - + /// + /// $L_\sigma(x)$ is defined as the standard deviation of the elements of the input vector $x$ + /// + /// ]]> + /// + /// + /// public sealed class LpNormNormalizingEstimator : LpNormNormalizingEstimatorBase { /// @@ -861,8 +896,32 @@ internal LpNormNormalizingEstimator(IHostEnvironment env, params ColumnOptions[] } /// - /// Global contrast normalizing estimator takes columns and performs global constrast normalization. + /// Normalizes (scales) vectors in the input column applying the global contrast normalization. /// + /// + /// | + /// | Output column data type | Vector of | + /// + /// + /// The resulting normalizes vectors in the input column individually + /// by rescaling them applying the global contrast normalization. The + /// performs the following operation on each input vector $x$: + /// + /// $y = \frac{s * x - \mu(x)}{L(x)}$ + /// + /// Where $s$ is a user provided scaling factor, $\mu(x)$ is the mean of the elements of vector $x$, and $L(x)$ is the $L_2$ norm or the + /// standard deviation of the elements of vector $x$. These settings can be specified by the user when the + /// is initialized. + /// ]]> + /// + /// + /// public sealed class GlobalContrastNormalizingEstimator : LpNormNormalizingEstimatorBase { /// diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index 94a93fad6c..1b457bd399 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -280,20 +280,17 @@ internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, => new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns); /// - /// Takes column filled with a vector of floats and normalize its to one. By setting to , - /// a pre-processing step would be applied to make the specified column's mean be a zero vector. + /// Normalizes (scales) vectors in the input column to the unit norm. The type of norm that is used is defined by . + /// Setting to , will apply a pre-processing step to make the specified column's mean be a zero vector. /// /// The transform's catalog. /// Name of the column resulting from the transformation of . - /// The data type on this column is the same as the input column. - /// Name of column to transform. If set to , the value of the will be used as source. + /// This column's data type will be the same as the input column's data type. + /// Name of column to transform. If set to , the value of the + /// will be used as source. + /// This estimator operates over known-sized vectors of . /// Type of norm to use to normalize each sample. The indicated norm of the resulted vector will be normalized to one. /// If , subtract mean from each value before normalizing and use the raw input otherwise. - /// - /// This transform performs the following operation on a each row X: Y = (X - M(X)) / D(X) - /// where M(X) is scalar value of mean for all elements in the current row if set to or 0 othewise - /// and D(X) is scalar value of selected . - /// /// /// /// new LpNormNormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns); /// - /// Takes column filled with a vector of floats and computes global contrast normalization of it. By setting to , - /// a pre-processing step would be applied to make the specified column's mean be a zero vector. + /// Normalizes columns individually applying global contrast normalization. Setting to , + /// will apply a pre-processing step to make the specified column's mean be the zero vector. /// /// The transform's catalog. /// Name of the column resulting from the transformation of . - /// The data type on this column is the same as the input column. - /// Name of column to transform. If set to , the value of the will be used as source. + /// This column's data type will be the same as the input column's data type. + /// Name of column to transform. If set to , the value of the + /// will be used as source. + /// This estimator operates over known-sized vectors of . /// If , subtract mean from each value before normalizing and use the raw input otherwise. - /// If , resulted vector's standard deviation would be one. Otherwise, resulted vector's L2-norm would be one. + /// If , resulted vector's standard deviation would be one. + /// Otherwise, resulted vector's L2-norm would be one. /// Scale features by this value. - /// - /// This transform performs the following operation on a row X: Y = scale * (X - M(X)) / D(X) - /// where M(X) is scalar value of mean for all elements in the current row if set to or 0 othewise - /// D(X) is scalar value of standard deviation for row if set to or - /// L2 norm of this row vector if set to and scale is . - /// /// /// /// Date: Sun, 21 Apr 2019 17:34:58 -0700 Subject: [PATCH 2/2] review comments --- src/Microsoft.ML.Transforms/GcnTransform.cs | 13 ++++++------- src/Microsoft.ML.Transforms/NormalizerCatalog.cs | 10 +++++----- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/Microsoft.ML.Transforms/GcnTransform.cs b/src/Microsoft.ML.Transforms/GcnTransform.cs index 49ffb068f6..1178246386 100644 --- a/src/Microsoft.ML.Transforms/GcnTransform.cs +++ b/src/Microsoft.ML.Transforms/GcnTransform.cs @@ -822,19 +822,19 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) /// $y = \frac{x}{L(x)}$ /// /// There are four types of norm that can be selected by the user to be applied on input vector $x$. They are defined as follows: - /// - + /// - /// /// $L_1(x) = \sum_i |x_i|$ /// - /// - + /// - /// /// $L_2(x) = \sqrt{\sum_i x_i^2}$ /// - /// - + /// - /// /// $L_{\infty}(x) = \max_i\{|x_i|\}$ /// - /// - + /// - /// /// $L_\sigma(x)$ is defined as the standard deviation of the elements of the input vector $x$ /// @@ -909,9 +909,8 @@ internal LpNormNormalizingEstimator(IHostEnvironment env, params ColumnOptions[] /// | Output column data type | Vector of | /// /// - /// The resulting normalizes vectors in the input column individually - /// by rescaling them applying the global contrast normalization. The - /// performs the following operation on each input vector $x$: + /// The resulting normalizes vectors in the input column individually, + /// rescaling them by applying global contrast normalization. The transform performs the following operation on each input vector $x$: /// /// $y = \frac{s * x - \mu(x)}{L(x)}$ /// diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index 1b457bd399..2a699afcf3 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -286,10 +286,10 @@ internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, /// The transform's catalog. /// Name of the column resulting from the transformation of . /// This column's data type will be the same as the input column's data type. - /// Name of column to transform. If set to , the value of the + /// Name of the column to normalize. If set to , the value of the /// will be used as source. /// This estimator operates over known-sized vectors of . - /// Type of norm to use to normalize each sample. The indicated norm of the resulted vector will be normalized to one. + /// Type of norm to use to normalize each sample. The indicated norm of the resulting vector will be normalized to one. /// If , subtract mean from each value before normalizing and use the raw input otherwise. /// /// @@ -318,12 +318,12 @@ internal static LpNormNormalizingEstimator NormalizeLpNorm(this TransformsCatalo /// The transform's catalog. /// Name of the column resulting from the transformation of . /// This column's data type will be the same as the input column's data type. - /// Name of column to transform. If set to , the value of the + /// Name of the column to normalize. If set to , the value of the /// will be used as source. /// This estimator operates over known-sized vectors of . /// If , subtract mean from each value before normalizing and use the raw input otherwise. - /// If , resulted vector's standard deviation would be one. - /// Otherwise, resulted vector's L2-norm would be one. + /// If , the resulting vector's standard deviation would be one. + /// Otherwise, the resulting vector's L2-norm would be one. /// Scale features by this value. /// ///