dotnet · sfilipi · Apr 21, 2019 · Apr 19, 2019 · Apr 19, 2019 · Apr 21, 2019
diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -17,11 +17,14 @@ namespace Microsoft.ML
     public static class TextCatalog
     {
         /// <summary>
-        /// Transform a text column into featurized float array that represents counts of ngrams and char-grams.
+        /// Create a <see cref="TextFeaturizingEstimator"/>, which transforms a text column into a featurized vector of <see cref="System.Single"/> that represents normalized counts of ngrams and char-grams.
         /// </summary>
         /// <param name="catalog">The text-related transform's catalog.</param>
-        /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
-        /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
+        /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.
+        /// This column's data type will be a vector of <see cref="System.Single"/>. </param>
+        /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.
+        /// This estimator operates over text data.
+        /// </param>
         /// <example>
         /// <format type="text/markdown">
         /// <![CDATA[
@@ -36,12 +39,17 @@ public static TextFeaturizingEstimator FeaturizeText(this TransformsCatalog.Text
                 outputColumnName, inputColumnName);
 
         /// <summary>
-        /// Transform several text columns into featurized float array that represents counts of ngrams and char-grams.
+        ///  Create a <see cref="TextFeaturizingEstimator"/>, which transforms a text column into featurized float array that represents normalized counts of ngrams and char-grams.
         /// </summary>
+        /// <remarks>This transform can operate over several columns.</remarks>
         /// <param name="catalog">The text-related transform's catalog.</param>
-        /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnNames"/>.</param>
+        /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnNames"/>.
+        /// This column's data type will be a vector of <see cref="System.Single"/>.
+        /// </param>
         /// <param name="options">Advanced options to the algorithm.</param>
-        /// <param name="inputColumnNames">Name of the columns to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
+        /// <param name="inputColumnNames">Name of the columns to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.
+        /// This estimator operates over text data, and it can transform several columns at once, yielding one vector of <see cref="System.Single"/>
+        /// as the resulting features for all columns.</param>
         /// <example>
         /// <format type="text/markdown">
         /// <![CDATA[

diff --git a/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs b/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs
@@ -31,10 +31,36 @@ namespace Microsoft.ML.Transforms.Text
     /// </summary>
     public interface IStopWordsRemoverOptions { }
 
-    // A transform that turns a collection of text documents into numerical feature vectors. The feature vectors are counts
-    // of (word or character) ngrams in a given text. It offers ngram hashing (finding the ngram token string name to feature
-    // integer index mapping through hashing) as an option.
-    /// <include file='doc.xml' path='doc/members/member[@name="TextFeaturizingEstimator "]/*' />
+    /// <summary>
+    ///  An estimator that turns a collection of text documents into numerical feature vectors.
+    ///  The feature vectors are normalized counts of word and/or character ngrams (based on the options supplied).
+    /// </summary>
+    /// <remarks>
+    /// <format type="text/markdown"><![CDATA[
+    ///
+    /// ###  Estimator Characteristics
+    /// |  |  |
+    /// | -- | -- |
+    /// | Does this estimator need to look at the data to train its parameters? | Yes. |
+    /// | Input column data type | [text](xref:Microsoft.ML.Data.TextDataViewType) |
+    /// | Output column data type | Vector of <xref:System.Single> |
+    ///
+    /// This estimator gives the user one-stop solution for doing:
+    /// * Language Detection
+    /// * [Tokenization](https://en.wikipedia.org/wiki/Lexical_analysis#Tokenization)
+    /// * [Text normalization](https://en.wikipedia.org/wiki/Text_normalization)
+    /// * [Predefined and custom stopwords removal](https://en.wikipedia.org/wiki/Stop_words)
+    /// * [Word-based or character-based Ngram extraction and SkipGram extraction (through the advanced [options](xref:Microsoft.ML.Transforms.TextFeaturizingEstimator.Options.WordFeatureExtractor))](https://en.wikipedia.org/wiki/N-gram)
+    /// * [TF, IDF or TF-IDF](https://en.wikipedia.org/wiki/Tf%E2%80%93idf)
+    /// * [L-p vector normalization](xref: Microsoft.ML.Transforms.LpNormNormalizingTransformer)
+    ///
+    ///  By default the features are made of (word/character) n-grams/skip-grams and the number of features are equal to the vocabulary size found by analyzing the data.
+    ///  To output an additional column with the tokens generated, use [OutputTokensColumnName](xref: Microsoft.ML.Transforms.TextFeaturizingEstimator.Options.OutputTokensColumnName).
+    ///  The number of features can also be specified by selecting the maximum number of n-gram to keep in the <xref:Microsoft.ML.Transforms.TextFeaturizingEstimator.Options>, where the estimator can be further tuned.
+    /// ]]></format>
+    /// </remarks>
+    /// <seealso cref="TextCatalog.FeaturizeText(TransformsCatalog.TextTransforms, string, Options, string[])"/>
+    /// <seealso cref="TextCatalog.FeaturizeText(TransformsCatalog.TextTransforms, string, string)"/>
     public sealed class TextFeaturizingEstimator : IEstimator<ITransformer>
     {
         /// <summary>

diff --git a/src/Microsoft.ML.Transforms/Text/doc.xml b/src/Microsoft.ML.Transforms/Text/doc.xml
@@ -2,45 +2,6 @@
 <doc>
   <members>
 
-    <member name="TextTransform">
-      <summary>
-        A transform that turns a collection of text documents into numerical feature vectors.
-        The feature vectors are normalized counts of (word and/or character) ngrams in a given tokenized text.
-      </summary>
-      <remarks>
-        The TextFeaturizer transform gives user one-stop solution for doing:
-        <list type="bullet">
-          <item><description>Language Detection</description></item>
-          <item><description>Tokenzation</description></item>
-          <item><description>Text normalization</description></item>
-          <item><description>Predefined and custom stopwords removal.</description></item>
-          <item><description>Word-based or character-based Ngram and SkipGram extraction.</description></item>
-          <item><description>TF, IDF or TF-IDF.</description></item>
-          <item><description>L-p vector normalization.</description></item>
-        </list>
-        The TextFeaturizer will show the transformed text, after being applied.
-        It converts a collection of text columns to a matrix of token  ngrams/skip-grams counts.
-        Features are made of (word/character) n-grams/skip-grams and the number of features are equal to the vocabulary size found by analyzing the data.
-      </remarks>
-    </member>
-    <example name="TextTransform">
-      <example>
-        <code language="csharp">
-          pipeline.Add(new TextFeaturizer(&quot;Features&quot;, &quot;SentimentText&quot;)
-          {
-            KeepDiacritics = false,
-            KeepPunctuations = false,
-            TextCase = TextNormalizerTransformCaseNormalizationMode.Lower,
-            OutputTokens = true,
-            StopWordsRemover = new PredefinedStopWordsRemover(),
-            VectorNormalizer = TextTransformTextNormKind.L2,
-            CharFeatureExtractor = new NGramNgramExtractor() { NgramLength = 3, AllLengths = false },
-            WordFeatureExtractor = new NGramNgramExtractor() { NgramLength = 2, AllLengths = true }
-          });
-        </code>
-      </example>
-    </example>
-
     <member name="WordTokenizer">
       <summary>
         This transform splits the text into words using the separator character(s).