From c5601360e84e2aab68a42db7eceaf384d3aaf378 Mon Sep 17 00:00:00 2001
From: Wei-Sheng Chin <wechi@microsoft.com>
Date: Mon, 11 Mar 2019 16:00:42 -0700
Subject: [PATCH 1/7] Polish char-level tokenizers

---
 .../Microsoft.ML.Samples/Dynamic/NgramExtraction.cs  |  2 +-
 src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs  |  2 +-
 src/Microsoft.ML.Transforms/Text/TextCatalog.cs      | 10 ++++++----
 .../Text/TokenizingByCharacters.cs                   | 12 ++++++++----
 .../StaticPipeTests.cs                               |  2 +-
 .../Scenarios/Api/CookbookSamples/CookbookSamples.cs |  2 +-
 .../Api/CookbookSamples/CookbookSamplesDynamicApi.cs |  2 +-
 7 files changed, 19 insertions(+), 13 deletions(-)
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs
index d1f36d3731..883c034d64 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs
@@ -26,7 +26,7 @@ public static void NgramTransform()
             // A pipeline to tokenize text as characters and then combine them together into ngrams
             // The pipeline uses the default settings to featurize.
 
-            var charsPipeline = ml.Transforms.Text.TokenizeCharacters("Chars", "SentimentText", useMarkerCharacters: false);
+            var charsPipeline = ml.Transforms.Text.ProduceCharacterTokens("Chars", "SentimentText", useMarkerCharacters: false);
             var ngramOnePipeline = ml.Transforms.Text.ProduceNgrams("CharsUnigrams", "Chars", ngramLength: 1);
             var ngramTwpPipeline = ml.Transforms.Text.ProduceNgrams("CharsTwograms", "Chars");
             var oneCharsPipeline = charsPipeline.Append(ngramOnePipeline);
diff --git a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
index c4ef323c97..668952e6d9 100644
--- a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
+++ b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
@@ -109,7 +109,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// </summary>
         /// <param name="input">The column to apply to.</param>
         /// <param name="useMarkerCharacters">Whether to use marker characters to separate words.</param>
-        public static VarVector<Key<ushort, string>> TokenizeIntoCharacters(this Scalar<string> input, bool useMarkerCharacters = true) => new OutPipelineColumn(input, useMarkerCharacters);
+        public static VarVector<Key<ushort, string>> ProduceCharacterTokens(this Scalar<string> input, bool useMarkerCharacters = true) => new OutPipelineColumn(input, useMarkerCharacters);
     }
 
     /// <summary>
diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
index 3aa10978ac..c360c9228a 100644
--- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -55,8 +55,9 @@ public static TextFeaturizingEstimator FeaturizeText(this TransformsCatalog.Text
         /// <param name="catalog">The text-related transform's catalog.</param>
         /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
         /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
-        /// <param name="useMarkerCharacters">Whether to use marker characters to separate words.</param>
-        public static TokenizingByCharactersEstimator TokenizeCharacters(this TransformsCatalog.TextTransforms catalog,
+        /// <param name="useMarkerCharacters">Whether to prepend a marker character, <see langword="0x02"/>, to the beginning,
+        /// and append another marker character, <see langword="0x03"/>, to the end of the output vector of characters.</param>
+        public static TokenizingByCharactersEstimator ProduceCharacterTokens(this TransformsCatalog.TextTransforms catalog,
             string outputColumnName,
             string inputColumnName = null,
             bool useMarkerCharacters = CharTokenizingDefaults.UseMarkerCharacters)
@@ -67,10 +68,11 @@ public static TokenizingByCharactersEstimator TokenizeCharacters(this Transforms
         /// Tokenize incoming text in input columns and output the tokens as output columns.
         /// </summary>
         /// <param name="catalog">The text-related transform's catalog.</param>
-        /// <param name="useMarkerCharacters">Whether to use marker characters to separate words.</param>
+        /// <param name="useMarkerCharacters">Whether to prepend a marker character, <see langword="0x02"/>, to the beginning,
+        /// and append another marker character, <see langword="0x03"/>, to the end of the output vector of characters.</param>
         /// <param name="columns">Pairs of columns to run the tokenization on.</param>
 
-        public static TokenizingByCharactersEstimator TokenizeCharacters(this TransformsCatalog.TextTransforms catalog,
+        public static TokenizingByCharactersEstimator ProduceCharacterTokens(this TransformsCatalog.TextTransforms catalog,
             bool useMarkerCharacters = CharTokenizingDefaults.UseMarkerCharacters,
             params ColumnOptions[] columns)
             => new TokenizingByCharactersEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), useMarkerCharacters, ColumnOptions.ConvertToValueTuples(columns));
diff --git a/src/Microsoft.ML.Transforms/Text/TokenizingByCharacters.cs b/src/Microsoft.ML.Transforms/Text/TokenizingByCharacters.cs
index 731f13990e..ac30aaf0aa 100644
--- a/src/Microsoft.ML.Transforms/Text/TokenizingByCharacters.cs
+++ b/src/Microsoft.ML.Transforms/Text/TokenizingByCharacters.cs
@@ -102,7 +102,8 @@ private static VersionInfo GetVersionInfo()
         /// Tokenize incoming text in input columns and output the tokens as output columns.
         /// </summary>
         /// <param name="env">The environment.</param>
-        /// <param name="useMarkerCharacters">Whether to use marker characters to separate words.</param>
+        /// <param name="useMarkerCharacters">Whether to prepend a marker character, <see langword="0x02"/>, to the beginning,
+        /// and append another marker character, <see langword="0x03"/>, to the end of the output vector of characters.</param>
         /// <param name="columns">Pairs of columns to run the tokenization on.</param>
         internal TokenizingByCharactersTransformer(IHostEnvironment env, bool useMarkerCharacters = TokenizingByCharactersEstimator.Defaults.UseMarkerCharacters,
             params (string outputColumnName, string inputColumnName)[] columns) :
@@ -114,7 +115,7 @@ internal TokenizingByCharactersTransformer(IHostEnvironment env, bool useMarkerC
         /// <summary>
         /// The names of the output and input column pairs on which the transformation is applied.
         /// </summary>
-        public IReadOnlyCollection<(string outputColumnName, string inputColumnName)> Columns => ColumnPairs.AsReadOnly();
+        internal IReadOnlyCollection<(string outputColumnName, string inputColumnName)> Columns => ColumnPairs.AsReadOnly();
 
         private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol)
         {
@@ -555,6 +556,7 @@ internal static class Defaults
         {
             public const bool UseMarkerCharacters = true;
         }
+
         internal static bool IsColumnTypeValid(DataViewType type) => type.GetItemType() is TextDataViewType;
 
         internal const string ExpectedColumnType = "Text";
@@ -565,7 +567,8 @@ internal static class Defaults
         /// <param name="env">The environment.</param>
         /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
         /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
-        /// <param name="useMarkerCharacters">Whether to use marker characters to separate words.</param>
+        /// <param name="useMarkerCharacters">Whether to prepend a marker character, <see langword="0x02"/>, to the beginning,
+        /// and append another marker character, <see langword="0x03"/>, to the end of the output vector of characters.</param>
         internal TokenizingByCharactersEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null,
             bool useMarkerCharacters = Defaults.UseMarkerCharacters)
             : this(env, useMarkerCharacters, new[] { (outputColumnName, inputColumnName ?? outputColumnName) })
@@ -576,7 +579,8 @@ internal TokenizingByCharactersEstimator(IHostEnvironment env, string outputColu
         /// Tokenize incoming text in input columns and output the tokens as output columns.
         /// </summary>
         /// <param name="env">The environment.</param>
-        /// <param name="useMarkerCharacters">Whether to use marker characters to separate words.</param>
+        /// <param name="useMarkerCharacters">Whether to prepend a marker character, <see langword="0x02"/>, to the beginning,
+        /// and append another marker character, <see langword="0x03"/>, to the end of the output vector of characters.</param>
         /// <param name="columns">Pairs of columns to run the tokenization on.</param>
 
         internal TokenizingByCharactersEstimator(IHostEnvironment env, bool useMarkerCharacters = Defaults.UseMarkerCharacters,
diff --git a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
index 926e770187..296eabf5fa 100644
--- a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
+++ b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
@@ -520,7 +520,7 @@ public void Tokenize()
                 .Append(r => (
                     r.label,
                     tokens: r.text.TokenizeText(),
-                    chars: r.text.TokenizeIntoCharacters()));
+                    chars: r.text.ProduceCharacterTokens()));
 
             var tdata = est.Fit(data).Transform(data);
             var schema = tdata.AsDynamic.Schema;
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
index e2bacb6309..8a5920c781 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
@@ -467,7 +467,7 @@ private void TextFeaturizationOn(string dataPath)
                     BagOfBigrams: r.Message.NormalizeText().ProduceHashedWordBags(ngramLength: 2, useAllLengths: false),
 
                     // NLP pipeline 3: bag of tri-character sequences with TF-IDF weighting.
-                    BagOfTrichar: r.Message.TokenizeIntoCharacters().ProduceNgrams(ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf),
+                    BagOfTrichar: r.Message.ProduceCharacterTokens().ProduceNgrams(ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf),
 
                     // NLP pipeline 4: word embeddings.
                     // PretrainedModelKind.Sswe is used here for performance of the test. In a real
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs
index 50c0439112..91405a7585 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs
@@ -305,7 +305,7 @@ private void TextFeaturizationOn(string dataPath)
                             ngramLength: 2, useAllLengths: false))
 
                 // NLP pipeline 3: bag of tri-character sequences with TF-IDF weighting.
-                .Append(mlContext.Transforms.Text.TokenizeCharacters("MessageChars", "Message"))
+                .Append(mlContext.Transforms.Text.ProduceCharacterTokens("MessageChars", "Message"))
                 .Append(new NgramExtractingEstimator(mlContext, "BagOfTrichar", "MessageChars", 
                             ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf))
 

From ef85fa8f93ac1a98f9dff8231d3812ffa70e0a74 Mon Sep 17 00:00:00 2001
From: Wei-Sheng Chin <wechi@microsoft.com>
Date: Mon, 11 Mar 2019 16:09:29 -0700
Subject: [PATCH 2/7] Polish word-level tokenizers

---
 .../Dynamic/KeyToValueValueToKey.cs               |  4 ++--
 .../Dynamic/StopWordRemoverTransform.cs           |  2 +-
 .../Dynamic/TensorFlow/TextClassification.cs      |  2 +-
 .../Dynamic/WordEmbeddingTransform.cs             |  2 +-
 .../TextStaticExtensions.cs                       |  2 +-
 src/Microsoft.ML.Transforms/Text/TextCatalog.cs   | 15 ++-------------
 .../Text/WordTokenizing.cs                        | 12 ++++++++++++
 .../StaticPipeTests.cs                            |  8 ++++----
 .../Api/CookbookSamples/CookbookSamples.cs        |  2 +-
 .../CookbookSamples/CookbookSamplesDynamicApi.cs  |  2 +-
 .../TensorflowTests.cs                            |  2 +-
 .../Transformers/CategoricalHashTests.cs          |  2 +-
 .../Transformers/TextFeaturizerTests.cs           |  2 +-
 .../Transformers/ValueMappingTests.cs             |  2 +-
 .../Transformers/WordEmbeddingsTests.cs           |  4 ++--
 15 files changed, 32 insertions(+), 31 deletions(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/KeyToValueValueToKey.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/KeyToValueValueToKey.cs
index 47d3de37bc..951918ac04 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/KeyToValueValueToKey.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/KeyToValueValueToKey.cs
@@ -30,7 +30,7 @@ public static void Example()
             // making use of default settings.
             string defaultColumnName = "DefaultKeys";
             // REVIEW create through the catalog extension
-            var default_pipeline = ml.Transforms.Text.TokenizeWords("Review")
+            var default_pipeline = ml.Transforms.Text.ProduceWordTokens("Review")
                 .Append(ml.Transforms.Conversion.MapValueToKey(defaultColumnName, "Review"));
 
             // Another pipeline, that customizes the advanced settings of the ValueToKeyMappingEstimator.
@@ -38,7 +38,7 @@ public static void Example()
             // and condition the order in which they get evaluated by changing keyOrdinality from the default ByOccurence (order in which they get encountered) 
             // to value/alphabetically.
             string customizedColumnName = "CustomizedKeys";
-            var customized_pipeline = ml.Transforms.Text.TokenizeWords("Review")
+            var customized_pipeline = ml.Transforms.Text.ProduceWordTokens("Review")
                 .Append(ml.Transforms.Conversion.MapValueToKey(customizedColumnName, "Review", maximumNumberOfKeys: 10, keyOrdinality: ValueToKeyMappingEstimator.KeyOrdinality.ByValue));
 
             // The transformed data.
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/StopWordRemoverTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/StopWordRemoverTransform.cs
index 33a7fc5fd4..cdf53e09fb 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/StopWordRemoverTransform.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/StopWordRemoverTransform.cs
@@ -25,7 +25,7 @@ public static void Example()
 
             // Let's take SentimentText column and break it into vector of words.
             string originalTextColumnName = "Words";
-            var words = ml.Transforms.Text.TokenizeWords("SentimentText", originalTextColumnName);
+            var words = ml.Transforms.Text.ProduceWordTokens("SentimentText", originalTextColumnName);
 
             // Default pipeline will apply default stop word remover which is based on predifined set of words for certain languages.
             var defaultPipeline = words.Append(ml.Transforms.Text.RemoveDefaultStopWords(originalTextColumnName, "DefaultRemover"));
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs
index ef78a04e9a..9b61bbcb10 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs
@@ -68,7 +68,7 @@ public static void Example()
                 j.Features = features;
             };
 
-            var engine = mlContext.Transforms.Text.TokenizeWords("TokenizedWords", "Sentiment_Text")
+            var engine = mlContext.Transforms.Text.ProduceWordTokens("TokenizedWords", "Sentiment_Text")
                 .Append(mlContext.Transforms.Conversion.MapValue(lookupMap, "Words", "Ids", new ColumnOptions[] { ("VariableLenghtFeatures", "TokenizedWords") }))
                 .Append(mlContext.Transforms.CustomMapping(ResizeFeaturesAction, "Resize"))
                 .Append(tensorFlowModel.ScoreTensorFlowModel(new[] { "Prediction/Softmax" }, new[] { "Features" }))
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/WordEmbeddingTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/WordEmbeddingTransform.cs
index 63428fcdf9..c14166e8ff 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/WordEmbeddingTransform.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/WordEmbeddingTransform.cs
@@ -26,7 +26,7 @@ public static void Example()
 
             // Pipeline which goes through SentimentText and normalizes it, tokenize it by words, and removes default stopwords.
             var wordsPipeline = ml.Transforms.Text.NormalizeText("NormalizedText", "SentimentText", keepDiacritics: false, keepPunctuations: false)
-                .Append(ml.Transforms.Text.TokenizeWords("Words", "NormalizedText"))
+                .Append(ml.Transforms.Text.ProduceWordTokens("Words", "NormalizedText"))
                 .Append(ml.Transforms.Text.RemoveDefaultStopWords("CleanWords", "Words"));
 
             var wordsDataview = wordsPipeline.Fit(trainData).Transform(trainData);
diff --git a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
index 668952e6d9..5bdb7b6c76 100644
--- a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
+++ b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
@@ -55,7 +55,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// </summary>
         /// <param name="input">The column to apply to.</param>
         /// <param name="separators">The separators to use (uses space character by default).</param>
-        public static VarVector<string> TokenizeText(this Scalar<string> input, char[] separators = null) => new OutPipelineColumn(input, separators);
+        public static VarVector<string> ProduceWordTokens(this Scalar<string> input, char[] separators = null) => new OutPipelineColumn(input, separators);
     }
 
     /// <summary>
diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
index c360c9228a..9e10fa180d 100644
--- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -159,29 +159,18 @@ public static WordEmbeddingEstimator ApplyWordEmbedding(this TransformsCatalog.T
         /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
         /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
         /// <param name="separators">The separators to use (uses space character by default).</param>
-        public static WordTokenizingEstimator TokenizeWords(this TransformsCatalog.TextTransforms catalog,
+        public static WordTokenizingEstimator ProduceWordTokens(this TransformsCatalog.TextTransforms catalog,
             string outputColumnName,
             string inputColumnName = null,
             char[] separators = null)
             => new WordTokenizingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), outputColumnName, inputColumnName, separators);
 
-        /// <summary>
-        /// Tokenizes incoming text in input columns and outputs the tokens using <paramref name="separators"/> as separators.
-        /// </summary>
-        /// <param name="catalog">The text-related transform's catalog.</param>
-        /// <param name="columns">Pairs of columns to run the tokenization on.</param>
-        /// <param name="separators">The separators to use (uses space character by default).</param>
-        public static WordTokenizingEstimator TokenizeWords(this TransformsCatalog.TextTransforms catalog,
-            (string outputColumnName, string inputColumnName)[] columns,
-            char[] separators = null)
-            => new WordTokenizingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), columns, separators);
-
         /// <summary>
         ///  Tokenizes incoming text in input columns, using per-column configurations, and outputs the tokens.
         /// </summary>
         /// <param name="catalog">The text-related transform's catalog.</param>
         /// <param name="columns">Pairs of columns to run the tokenization on.</param>
-        public static WordTokenizingEstimator TokenizeWords(this TransformsCatalog.TextTransforms catalog,
+        public static WordTokenizingEstimator ProduceWordTokens(this TransformsCatalog.TextTransforms catalog,
             params WordTokenizingEstimator.ColumnOptions[] columns)
           => new WordTokenizingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), columns);
 
diff --git a/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs b/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs
index 6cc72d22c6..e08fd51273 100644
--- a/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs
+++ b/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs
@@ -441,9 +441,21 @@ internal WordTokenizingEstimator(IHostEnvironment env, params ColumnOptions[] co
         }
         public sealed class ColumnOptions
         {
+            /// <summary>
+            /// Output column name that will be used to store the tokenization result of <see cref="InputColumnName"/> column.
+            /// </summary>
             public readonly string Name;
+            /// <summary>
+            /// Input column name that will be tokenized into words.
+            /// </summary>
             public readonly string InputColumnName;
+            /// <summary>
+            /// Seperator list used to tokenize input string. If not specified, space will be used.
+            /// </summary>
             public IReadOnlyList<char> Separators => SeparatorsArray;
+            /// <summary>
+            /// State of <see cref="Separators"/>. Since <see langword="char"/>[] is multable, it's not safe to directly expose this field to users.
+            /// </summary>
             internal readonly char[] SeparatorsArray;
 
             /// <summary>
diff --git a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
index 296eabf5fa..c366b83b60 100644
--- a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
+++ b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
@@ -519,7 +519,7 @@ public void Tokenize()
             var est = data.MakeNewEstimator()
                 .Append(r => (
                     r.label,
-                    tokens: r.text.TokenizeText(),
+                    tokens: r.text.ProduceWordTokens(),
                     chars: r.text.ProduceCharacterTokens()));
 
             var tdata = est.Fit(data).Transform(data);
@@ -547,7 +547,7 @@ public void NormalizeTextAndRemoveStopWords()
                 .Append(r => (
                     r.label,
                     normalized_text: r.text.NormalizeText(),
-                    words_without_stopwords: r.text.TokenizeText().RemoveStopwords()));
+                    words_without_stopwords: r.text.ProduceWordTokens().RemoveStopwords()));
 
             var tdata = est.Fit(data).Transform(data);
             var schema = tdata.AsDynamic.Schema;
@@ -604,8 +604,8 @@ public void Ngrams()
             var est = data.MakeNewEstimator()
                 .Append(r => (
                     r.label,
-                    ngrams: r.text.TokenizeText().ToKey().ProduceNgrams(),
-                    ngramshash: r.text.TokenizeText().ToKey().ProduceHashedNgrams()));
+                    ngrams: r.text.ProduceWordTokens().ToKey().ProduceNgrams(),
+                    ngramshash: r.text.ProduceWordTokens().ToKey().ProduceHashedNgrams()));
 
             var tdata = est.Fit(data).Transform(data);
             var schema = tdata.AsDynamic.Schema;
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
index 8a5920c781..6ad76d4153 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
@@ -472,7 +472,7 @@ private void TextFeaturizationOn(string dataPath)
                     // NLP pipeline 4: word embeddings.
                     // PretrainedModelKind.Sswe is used here for performance of the test. In a real
                     // scenario, it is best to use a different model for more accuracy.
-                    Embeddings: r.Message.NormalizeText().TokenizeText().WordEmbeddings(WordEmbeddingEstimator.PretrainedModelKind.SentimentSpecificWordEmbedding)
+                    Embeddings: r.Message.NormalizeText().ProduceWordTokens().WordEmbeddings(WordEmbeddingEstimator.PretrainedModelKind.SentimentSpecificWordEmbedding)
                 ));
 
             // Let's train our pipeline, and then apply it to the same data.
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs
index 91405a7585..edfb40d5fd 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs
@@ -312,7 +312,7 @@ private void TextFeaturizationOn(string dataPath)
                 // NLP pipeline 4: word embeddings.
                 // PretrainedModelKind.Sswe is used here for performance of the test. In a real
                 // scenario, it is best to use a different model for more accuracy.
-                .Append(mlContext.Transforms.Text.TokenizeWords("TokenizedMessage", "NormalizedMessage"))
+                .Append(mlContext.Transforms.Text.ProduceWordTokens("TokenizedMessage", "NormalizedMessage"))
                 .Append(mlContext.Transforms.Text.ApplyWordEmbedding("Embeddings", "TokenizedMessage",
                             WordEmbeddingEstimator.PretrainedModelKind.SentimentSpecificWordEmbedding));
 
diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs
index e8043a0051..a42f1cea61 100644
--- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs
+++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs
@@ -999,7 +999,7 @@ public void TensorFlowSentimentClassificationTest()
             // The first pipeline 'dataPipe' tokenzies the string into words and maps each word to an integer which is an index in the dictionary.
             // Then this integer vector is retrieved from the pipeline and resized to fixed length.
             // The second pipeline 'tfEnginePipe' takes the resized integer vector and passes it to TensoFlow and gets the classification scores.
-            var estimator = mlContext.Transforms.Text.TokenizeWords("TokenizedWords", "Sentiment_Text")
+            var estimator = mlContext.Transforms.Text.ProduceWordTokens("TokenizedWords", "Sentiment_Text")
                 .Append(mlContext.Transforms.Conversion.MapValue(lookupMap, "Words", "Ids", new ColumnOptions[] { ("Features", "TokenizedWords") }));
             var dataPipe = estimator.Fit(dataView)
                 .CreatePredictionEngine<TensorFlowSentiment, TensorFlowSentiment>(mlContext);
diff --git a/test/Microsoft.ML.Tests/Transformers/CategoricalHashTests.cs b/test/Microsoft.ML.Tests/Transformers/CategoricalHashTests.cs
index e8cb49fe0a..f5dc6c59bc 100644
--- a/test/Microsoft.ML.Tests/Transformers/CategoricalHashTests.cs
+++ b/test/Microsoft.ML.Tests/Transformers/CategoricalHashTests.cs
@@ -76,7 +76,7 @@ public void CategoricalHashStatic()
                       row.ScalarString,
                       row.VectorString,
                       // Create a VarVector column
-                      VarVectorString: row.ScalarString.TokenizeText())).
+                      VarVectorString: row.ScalarString.ProduceWordTokens())).
                   Append(row => (
                       A: row.ScalarString.OneHotHashEncoding(outputKind: CategoricalHashStaticExtensions.OneHotHashScalarOutputKind.Ind),
                       B: row.VectorString.OneHotHashEncoding(outputKind: CategoricalHashStaticExtensions.OneHotHashVectorOutputKind.Ind),
diff --git a/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs b/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs
index e2575557ec..8ecccfe350 100644
--- a/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs
+++ b/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs
@@ -143,7 +143,7 @@ public void TextNormalizationAndStopwordRemoverWorkout()
                     text: ctx.LoadFloat(1)), hasHeader: true)
                 .Load(sentimentDataPath);
             var est = ML.Transforms.Text.NormalizeText("text")
-                .Append(ML.Transforms.Text.TokenizeWords("words", "text"))
+                .Append(ML.Transforms.Text.ProduceWordTokens("words", "text"))
                 .Append(ML.Transforms.Text.RemoveDefaultStopWords("NoDefaultStopwords", "words"))
                 .Append(ML.Transforms.Text.RemoveStopWords("NoStopWords", "words", "xbox", "this", "is", "a", "the", "THAT", "bY"));
 
diff --git a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs
index 66f5ed0db2..0bb6e7c0f9 100644
--- a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs
+++ b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs
@@ -546,7 +546,7 @@ public void ValueMappingInputIsVectorWorkout()
             var keys = new List<ReadOnlyMemory<char>>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory(), "wahoo".AsMemory() };
             var values = new List<int>() { 1, 2, 3, 4 };
 
-            var est = ML.Transforms.Text.TokenizeWords("TokenizeB", "B")
+            var est = ML.Transforms.Text.ProduceWordTokens("TokenizeB", "B")
                 .Append(ML.Transforms.Conversion.MapValue(keys, values, new ColumnOptions[] { ("VecB", "TokenizeB") }));
             TestEstimatorCore(est, validFitInput: dataView, invalidInput: badDataView);
         }
diff --git a/test/Microsoft.ML.Tests/Transformers/WordEmbeddingsTests.cs b/test/Microsoft.ML.Tests/Transformers/WordEmbeddingsTests.cs
index 95843dc330..3e42c515c2 100644
--- a/test/Microsoft.ML.Tests/Transformers/WordEmbeddingsTests.cs
+++ b/test/Microsoft.ML.Tests/Transformers/WordEmbeddingsTests.cs
@@ -35,7 +35,7 @@ public void TestWordEmbeddings()
                    }).Load(GetDataPath(dataPath));
 
             var est = ML.Transforms.Text.NormalizeText("NormalizedText", "SentimentText", keepDiacritics: false, keepPunctuations: false)
-                  .Append(ML.Transforms.Text.TokenizeWords("Words", "NormalizedText"))
+                  .Append(ML.Transforms.Text.ProduceWordTokens("Words", "NormalizedText"))
                   .Append(ML.Transforms.Text.RemoveDefaultStopWords("CleanWords", "Words"));
             var words = est.Fit(data).Transform(data);
 
@@ -70,7 +70,7 @@ public void TestCustomWordEmbeddings()
                    }).Load(GetDataPath(dataPath));
 
             var est = ML.Transforms.Text.NormalizeText("NormalizedText", "SentimentText", keepDiacritics: false, keepPunctuations: false)
-                  .Append(ML.Transforms.Text.TokenizeWords("Words", "NormalizedText"))
+                  .Append(ML.Transforms.Text.ProduceWordTokens("Words", "NormalizedText"))
                   .Append(ML.Transforms.Text.RemoveDefaultStopWords("CleanWords", "Words"));
             var words = est.Fit(data).Transform(data);
             var pathToCustomModel = DeleteOutputPath("custommodel.txt");

From 13e55d280a841a78c84c7818d3622536958da90f Mon Sep 17 00:00:00 2001
From: Wei-Sheng Chin <wechi@microsoft.com>
Date: Mon, 11 Mar 2019 17:10:50 -0700
Subject: [PATCH 3/7] Scrub stopword removers

---
 .../TextStaticExtensions.cs                   |  4 +--
 .../Text/StopWordsRemovingTransformer.cs      |  4 +--
 .../Text/TextCatalog.cs                       | 36 -------------------
 .../StaticPipeTests.cs                        |  2 +-
 4 files changed, 5 insertions(+), 41 deletions(-)

diff --git a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
index 5bdb7b6c76..de5865cfcd 100644
--- a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
+++ b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
@@ -162,8 +162,8 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// Remove stop words from incoming text.
         /// </summary>
         /// <param name="input">The column to apply to.</param>
-        /// <param name="language">Langauge of the input text.</param>
-        public static VarVector<string> RemoveStopwords(this VarVector<string> input,
+        /// <param name="language">Langauge of the input text. It will be used to retrieve a built-in stopword list.</param>
+        public static VarVector<string> RemoveDefaultStopWords(this VarVector<string> input,
             StopWordsRemovingEstimator.Language language = StopWordsRemovingEstimator.Language.English) => new OutPipelineColumn(input, language);
     }
 
diff --git a/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs b/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs
index 2a36dbae24..2244ff58bc 100644
--- a/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs
+++ b/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs
@@ -133,7 +133,7 @@ private static VersionInfo GetVersionInfo()
         /// <summary>
         /// Defines the behavior of the transformer.
         /// </summary>
-        public IReadOnlyCollection<StopWordsRemovingEstimator.ColumnOptions> Columns => _columns.AsReadOnly();
+        internal IReadOnlyCollection<StopWordsRemovingEstimator.ColumnOptions> Columns => _columns.AsReadOnly();
 
         private readonly StopWordsRemovingEstimator.ColumnOptions[] _columns;
         private static volatile NormStr.Pool[] _stopWords;
@@ -828,7 +828,7 @@ private void LoadStopWords(IChannel ch, ReadOnlyMemory<char> stopwords, string d
         /// <summary>
         /// The names of the input output column pairs on which this transformation is applied.
         /// </summary>
-        public IReadOnlyCollection<(string outputColumnName, string inputColumnName)> Columns => ColumnPairs.AsReadOnly();
+        internal IReadOnlyCollection<(string outputColumnName, string inputColumnName)> Columns => ColumnPairs.AsReadOnly();
 
         /// <summary>
         /// Custom stopword remover removes specified list of stop words.
diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
index 9e10fa180d..c78e83430a 100644
--- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -234,24 +234,6 @@ public static StopWordsRemovingEstimator RemoveDefaultStopWords(this TransformsC
             StopWordsRemovingEstimator.Language language = StopWordsRemovingEstimator.Language.English)
             => new StopWordsRemovingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), outputColumnName, inputColumnName, language);
 
-        /// <summary>
-        /// Removes stop words from incoming token streams in input columns
-        /// and outputs the token streams without stop words as output columns.
-        /// </summary>
-        /// <param name="catalog">The text-related transform's catalog.</param>
-        /// <param name="columns">Pairs of columns to remove stop words on.</param>
-        /// <param name="language">Langauge of the input text columns <paramref name="columns"/>.</param>
-        /// <example>
-        /// <format type="text/markdown">
-        /// <![CDATA[
-        ///  [!code-csharp[FastTree](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/StopWordRemoverTransform.cs)]
-        /// ]]></format>
-        /// </example>
-        public static StopWordsRemovingEstimator RemoveDefaultStopWords(this TransformsCatalog.TextTransforms catalog,
-            (string outputColumnName, string inputColumnName)[] columns,
-             StopWordsRemovingEstimator.Language language = StopWordsRemovingEstimator.Language.English)
-            => new StopWordsRemovingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), columns, language);
-
         /// <summary>
         /// Removes stop words from incoming token streams in <paramref name="inputColumnName"/>
         /// and outputs the token streams without stopwords as <paramref name="outputColumnName"/>.
@@ -272,24 +254,6 @@ public static CustomStopWordsRemovingEstimator RemoveStopWords(this TransformsCa
             params string[] stopwords)
             => new CustomStopWordsRemovingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), outputColumnName, inputColumnName, stopwords);
 
-        /// <summary>
-        /// Removes stop words from incoming token streams in input columns
-        /// and outputs the token streams without stop words as output columns.
-        /// </summary>
-        /// <param name="catalog">The text-related transform's catalog.</param>
-        /// <param name="columns">Pairs of columns to remove stop words on.</param>
-        /// <param name="stopwords">Array of words to remove.</param>
-        /// <example>
-        /// <format type="text/markdown">
-        /// <![CDATA[
-        ///  [!code-csharp[FastTree](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/StopWordRemoverTransform.cs)]
-        /// ]]></format>
-        /// </example>
-        public static CustomStopWordsRemovingEstimator RemoveStopWords(this TransformsCatalog.TextTransforms catalog,
-            (string outputColumnName, string inputColumnName)[] columns,
-             params string[] stopwords)
-            => new CustomStopWordsRemovingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), columns, stopwords);
-
         /// <summary>
         /// Produces a bag of counts of ngrams (sequences of consecutive words) in <paramref name="inputColumnName"/>
         /// and outputs bag of word vector as <paramref name="outputColumnName"/>
diff --git a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
index c366b83b60..a6f9fae895 100644
--- a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
+++ b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
@@ -547,7 +547,7 @@ public void NormalizeTextAndRemoveStopWords()
                 .Append(r => (
                     r.label,
                     normalized_text: r.text.NormalizeText(),
-                    words_without_stopwords: r.text.ProduceWordTokens().RemoveStopwords()));
+                    words_without_stopwords: r.text.ProduceWordTokens().RemoveDefaultStopWords()));
 
             var tdata = est.Fit(data).Transform(data);
             var schema = tdata.AsDynamic.Schema;

From f8096c0c6a10fc088a63278744dd6e2ed3b5b9f6 Mon Sep 17 00:00:00 2001
From: Wei-Sheng Chin <wechi@microsoft.com>
Date: Mon, 11 Mar 2019 17:15:18 -0700
Subject: [PATCH 4/7] Update cookbook

---
 docs/code/MlNetCookBook.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/code/MlNetCookBook.md b/docs/code/MlNetCookBook.md
index 7a73a1178a..25edf3c1b2 100644
--- a/docs/code/MlNetCookBook.md
+++ b/docs/code/MlNetCookBook.md
@@ -775,12 +775,12 @@ var pipeline =
                 ngramLength: 2, useAllLengths: false))
 
     // NLP pipeline 3: bag of tri-character sequences with TF-IDF weighting.
-    .Append(mlContext.Transforms.Text.TokenizeCharacters("MessageChars", "Message"))
+    .Append(mlContext.Transforms.Text.ProduceCharacterTokens("MessageChars", "Message"))
     .Append(new NgramExtractingEstimator(mlContext, "BagOfTrichar", "MessageChars", 
                 ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf))
 
     // NLP pipeline 4: word embeddings.
-    .Append(mlContext.Transforms.Text.TokenizeWords("TokenizedMessage", "NormalizedMessage"))
+    .Append(mlContext.Transforms.Text.ProduceWordTokens("TokenizedMessage", "NormalizedMessage"))
     .Append(mlContext.Transforms.Text.ExtractWordEmbeddings("Embeddings", "TokenizedMessage",
                 WordEmbeddingsExtractingEstimator.PretrainedModelKind.SentimentSpecificWordEmbedding));
 

From 8e5c515aaae3bcbd9c485a22e3adb5e932e75c6c Mon Sep 17 00:00:00 2001
From: Wei-Sheng Chin <wechi@microsoft.com>
Date: Mon, 11 Mar 2019 17:19:06 -0700
Subject: [PATCH 5/7] Address a comment

---
 src/Microsoft.ML.Transforms/Text/WordTokenizing.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs b/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs
index e08fd51273..b6278e824a 100644
--- a/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs
+++ b/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs
@@ -105,7 +105,7 @@ private static VersionInfo GetVersionInfo()
 
         private const string RegistrationName = "DelimitedTokenize";
 
-        public IReadOnlyCollection<WordTokenizingEstimator.ColumnOptions> Columns => _columns.AsReadOnly();
+        internal IReadOnlyCollection<WordTokenizingEstimator.ColumnOptions> Columns => _columns.AsReadOnly();
         private readonly WordTokenizingEstimator.ColumnOptions[] _columns;
 
         private static (string name, string inputColumnName)[] GetColumnPairs(WordTokenizingEstimator.ColumnOptions[] columns)

From 883784a1935d50f39418f733208ea0279f942e4a Mon Sep 17 00:00:00 2001
From: Wei-Sheng Chin <wechi@microsoft.com>
Date: Tue, 12 Mar 2019 09:25:56 -0700
Subject: [PATCH 6/7] Rename ProduceCharacterTokens to ProduceCharactersAsKeys

---
 docs/code/MlNetCookBook.md                                    | 2 +-
 docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs  | 2 +-
 src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs           | 2 +-
 src/Microsoft.ML.Transforms/Text/TextCatalog.cs               | 4 ++--
 test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs    | 2 +-
 .../Scenarios/Api/CookbookSamples/CookbookSamples.cs          | 2 +-
 .../Api/CookbookSamples/CookbookSamplesDynamicApi.cs          | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/code/MlNetCookBook.md b/docs/code/MlNetCookBook.md
index 25edf3c1b2..9a7c1edad3 100644
--- a/docs/code/MlNetCookBook.md
+++ b/docs/code/MlNetCookBook.md
@@ -775,7 +775,7 @@ var pipeline =
                 ngramLength: 2, useAllLengths: false))
 
     // NLP pipeline 3: bag of tri-character sequences with TF-IDF weighting.
-    .Append(mlContext.Transforms.Text.ProduceCharacterTokens("MessageChars", "Message"))
+    .Append(mlContext.Transforms.Text.ProduceCharactersAsKeys("MessageChars", "Message"))
     .Append(new NgramExtractingEstimator(mlContext, "BagOfTrichar", "MessageChars", 
                 ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf))
 
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs
index 883c034d64..8bd0a463c1 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs
@@ -26,7 +26,7 @@ public static void NgramTransform()
             // A pipeline to tokenize text as characters and then combine them together into ngrams
             // The pipeline uses the default settings to featurize.
 
-            var charsPipeline = ml.Transforms.Text.ProduceCharacterTokens("Chars", "SentimentText", useMarkerCharacters: false);
+            var charsPipeline = ml.Transforms.Text.ProduceCharactersAsKeys("Chars", "SentimentText", useMarkerCharacters: false);
             var ngramOnePipeline = ml.Transforms.Text.ProduceNgrams("CharsUnigrams", "Chars", ngramLength: 1);
             var ngramTwpPipeline = ml.Transforms.Text.ProduceNgrams("CharsTwograms", "Chars");
             var oneCharsPipeline = charsPipeline.Append(ngramOnePipeline);
diff --git a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
index de5865cfcd..b71c8a2d71 100644
--- a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
+++ b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
@@ -109,7 +109,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// </summary>
         /// <param name="input">The column to apply to.</param>
         /// <param name="useMarkerCharacters">Whether to use marker characters to separate words.</param>
-        public static VarVector<Key<ushort, string>> ProduceCharacterTokens(this Scalar<string> input, bool useMarkerCharacters = true) => new OutPipelineColumn(input, useMarkerCharacters);
+        public static VarVector<Key<ushort, string>> ProduceCharactersAsKeys(this Scalar<string> input, bool useMarkerCharacters = true) => new OutPipelineColumn(input, useMarkerCharacters);
     }
 
     /// <summary>
diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
index c78e83430a..2e46acf916 100644
--- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -57,7 +57,7 @@ public static TextFeaturizingEstimator FeaturizeText(this TransformsCatalog.Text
         /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
         /// <param name="useMarkerCharacters">Whether to prepend a marker character, <see langword="0x02"/>, to the beginning,
         /// and append another marker character, <see langword="0x03"/>, to the end of the output vector of characters.</param>
-        public static TokenizingByCharactersEstimator ProduceCharacterTokens(this TransformsCatalog.TextTransforms catalog,
+        public static TokenizingByCharactersEstimator ProduceCharactersAsKeys(this TransformsCatalog.TextTransforms catalog,
             string outputColumnName,
             string inputColumnName = null,
             bool useMarkerCharacters = CharTokenizingDefaults.UseMarkerCharacters)
@@ -72,7 +72,7 @@ public static TokenizingByCharactersEstimator ProduceCharacterTokens(this Transf
         /// and append another marker character, <see langword="0x03"/>, to the end of the output vector of characters.</param>
         /// <param name="columns">Pairs of columns to run the tokenization on.</param>
 
-        public static TokenizingByCharactersEstimator ProduceCharacterTokens(this TransformsCatalog.TextTransforms catalog,
+        public static TokenizingByCharactersEstimator ProduceCharactersAsKeys(this TransformsCatalog.TextTransforms catalog,
             bool useMarkerCharacters = CharTokenizingDefaults.UseMarkerCharacters,
             params ColumnOptions[] columns)
             => new TokenizingByCharactersEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), useMarkerCharacters, ColumnOptions.ConvertToValueTuples(columns));
diff --git a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
index a6f9fae895..f0fb5844d1 100644
--- a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
+++ b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
@@ -520,7 +520,7 @@ public void Tokenize()
                 .Append(r => (
                     r.label,
                     tokens: r.text.ProduceWordTokens(),
-                    chars: r.text.ProduceCharacterTokens()));
+                    chars: r.text.ProduceCharactersAsKeys()));
 
             var tdata = est.Fit(data).Transform(data);
             var schema = tdata.AsDynamic.Schema;
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
index 6ad76d4153..96a2bb6267 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
@@ -467,7 +467,7 @@ private void TextFeaturizationOn(string dataPath)
                     BagOfBigrams: r.Message.NormalizeText().ProduceHashedWordBags(ngramLength: 2, useAllLengths: false),
 
                     // NLP pipeline 3: bag of tri-character sequences with TF-IDF weighting.
-                    BagOfTrichar: r.Message.ProduceCharacterTokens().ProduceNgrams(ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf),
+                    BagOfTrichar: r.Message.ProduceCharactersAsKeys().ProduceNgrams(ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf),
 
                     // NLP pipeline 4: word embeddings.
                     // PretrainedModelKind.Sswe is used here for performance of the test. In a real
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs
index edfb40d5fd..0e9c962539 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs
@@ -305,7 +305,7 @@ private void TextFeaturizationOn(string dataPath)
                             ngramLength: 2, useAllLengths: false))
 
                 // NLP pipeline 3: bag of tri-character sequences with TF-IDF weighting.
-                .Append(mlContext.Transforms.Text.ProduceCharacterTokens("MessageChars", "Message"))
+                .Append(mlContext.Transforms.Text.ProduceCharactersAsKeys("MessageChars", "Message"))
                 .Append(new NgramExtractingEstimator(mlContext, "BagOfTrichar", "MessageChars", 
                             ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf))
 

From d99e19298c413b706fff72b49a1d252855dbb028 Mon Sep 17 00:00:00 2001
From: Wei-Sheng Chin <wechi@microsoft.com>
Date: Wed, 13 Mar 2019 11:36:56 -0700
Subject: [PATCH 7/7] Address comments

---
 .../Dynamic/KeyToValueValueToKey.cs                    |  4 ++--
 .../Microsoft.ML.Samples/Dynamic/NgramExtraction.cs    |  2 +-
 .../Dynamic/StopWordRemoverTransform.cs                |  2 +-
 .../Dynamic/TensorFlow/TextClassification.cs           |  2 +-
 .../Dynamic/WordEmbeddingTransform.cs                  |  2 +-
 src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs    |  4 ++--
 src/Microsoft.ML.Transforms/Text/TextCatalog.cs        |  8 ++++----
 .../StaticPipeTests.cs                                 | 10 +++++-----
 .../Scenarios/Api/CookbookSamples/CookbookSamples.cs   |  4 ++--
 .../Api/CookbookSamples/CookbookSamplesDynamicApi.cs   |  4 ++--
 .../TensorflowTests.cs                                 |  2 +-
 .../Transformers/CategoricalHashTests.cs               |  2 +-
 .../Transformers/TextFeaturizerTests.cs                |  2 +-
 .../Transformers/ValueMappingTests.cs                  |  2 +-
 .../Transformers/WordEmbeddingsTests.cs                |  4 ++--
 15 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/KeyToValueValueToKey.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/KeyToValueValueToKey.cs
index 951918ac04..c7f5636d47 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/KeyToValueValueToKey.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/KeyToValueValueToKey.cs
@@ -30,7 +30,7 @@ public static void Example()
             // making use of default settings.
             string defaultColumnName = "DefaultKeys";
             // REVIEW create through the catalog extension
-            var default_pipeline = ml.Transforms.Text.ProduceWordTokens("Review")
+            var default_pipeline = ml.Transforms.Text.TokenizeIntoWords("Review")
                 .Append(ml.Transforms.Conversion.MapValueToKey(defaultColumnName, "Review"));
 
             // Another pipeline, that customizes the advanced settings of the ValueToKeyMappingEstimator.
@@ -38,7 +38,7 @@ public static void Example()
             // and condition the order in which they get evaluated by changing keyOrdinality from the default ByOccurence (order in which they get encountered) 
             // to value/alphabetically.
             string customizedColumnName = "CustomizedKeys";
-            var customized_pipeline = ml.Transforms.Text.ProduceWordTokens("Review")
+            var customized_pipeline = ml.Transforms.Text.TokenizeIntoWords("Review")
                 .Append(ml.Transforms.Conversion.MapValueToKey(customizedColumnName, "Review", maximumNumberOfKeys: 10, keyOrdinality: ValueToKeyMappingEstimator.KeyOrdinality.ByValue));
 
             // The transformed data.
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs
index 8bd0a463c1..45f89fd80b 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs
@@ -26,7 +26,7 @@ public static void NgramTransform()
             // A pipeline to tokenize text as characters and then combine them together into ngrams
             // The pipeline uses the default settings to featurize.
 
-            var charsPipeline = ml.Transforms.Text.ProduceCharactersAsKeys("Chars", "SentimentText", useMarkerCharacters: false);
+            var charsPipeline = ml.Transforms.Text.TokenizeIntoCharactersAsKeys("Chars", "SentimentText", useMarkerCharacters: false);
             var ngramOnePipeline = ml.Transforms.Text.ProduceNgrams("CharsUnigrams", "Chars", ngramLength: 1);
             var ngramTwpPipeline = ml.Transforms.Text.ProduceNgrams("CharsTwograms", "Chars");
             var oneCharsPipeline = charsPipeline.Append(ngramOnePipeline);
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/StopWordRemoverTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/StopWordRemoverTransform.cs
index cdf53e09fb..134a3ca3a0 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/StopWordRemoverTransform.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/StopWordRemoverTransform.cs
@@ -25,7 +25,7 @@ public static void Example()
 
             // Let's take SentimentText column and break it into vector of words.
             string originalTextColumnName = "Words";
-            var words = ml.Transforms.Text.ProduceWordTokens("SentimentText", originalTextColumnName);
+            var words = ml.Transforms.Text.TokenizeIntoWords("SentimentText", originalTextColumnName);
 
             // Default pipeline will apply default stop word remover which is based on predifined set of words for certain languages.
             var defaultPipeline = words.Append(ml.Transforms.Text.RemoveDefaultStopWords(originalTextColumnName, "DefaultRemover"));
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs
index 9b61bbcb10..b2b5363a8d 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs
@@ -68,7 +68,7 @@ public static void Example()
                 j.Features = features;
             };
 
-            var engine = mlContext.Transforms.Text.ProduceWordTokens("TokenizedWords", "Sentiment_Text")
+            var engine = mlContext.Transforms.Text.TokenizeIntoWords("TokenizedWords", "Sentiment_Text")
                 .Append(mlContext.Transforms.Conversion.MapValue(lookupMap, "Words", "Ids", new ColumnOptions[] { ("VariableLenghtFeatures", "TokenizedWords") }))
                 .Append(mlContext.Transforms.CustomMapping(ResizeFeaturesAction, "Resize"))
                 .Append(tensorFlowModel.ScoreTensorFlowModel(new[] { "Prediction/Softmax" }, new[] { "Features" }))
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/WordEmbeddingTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/WordEmbeddingTransform.cs
index c14166e8ff..1830b3e171 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/WordEmbeddingTransform.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/WordEmbeddingTransform.cs
@@ -26,7 +26,7 @@ public static void Example()
 
             // Pipeline which goes through SentimentText and normalizes it, tokenize it by words, and removes default stopwords.
             var wordsPipeline = ml.Transforms.Text.NormalizeText("NormalizedText", "SentimentText", keepDiacritics: false, keepPunctuations: false)
-                .Append(ml.Transforms.Text.ProduceWordTokens("Words", "NormalizedText"))
+                .Append(ml.Transforms.Text.TokenizeIntoWords("Words", "NormalizedText"))
                 .Append(ml.Transforms.Text.RemoveDefaultStopWords("CleanWords", "Words"));
 
             var wordsDataview = wordsPipeline.Fit(trainData).Transform(trainData);
diff --git a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
index b71c8a2d71..1424c849a9 100644
--- a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
+++ b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
@@ -55,7 +55,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// </summary>
         /// <param name="input">The column to apply to.</param>
         /// <param name="separators">The separators to use (uses space character by default).</param>
-        public static VarVector<string> ProduceWordTokens(this Scalar<string> input, char[] separators = null) => new OutPipelineColumn(input, separators);
+        public static VarVector<string> TokenizeIntoWords(this Scalar<string> input, char[] separators = null) => new OutPipelineColumn(input, separators);
     }
 
     /// <summary>
@@ -109,7 +109,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// </summary>
         /// <param name="input">The column to apply to.</param>
         /// <param name="useMarkerCharacters">Whether to use marker characters to separate words.</param>
-        public static VarVector<Key<ushort, string>> ProduceCharactersAsKeys(this Scalar<string> input, bool useMarkerCharacters = true) => new OutPipelineColumn(input, useMarkerCharacters);
+        public static VarVector<Key<ushort, string>> TokenizeIntoCharactersAsKeys(this Scalar<string> input, bool useMarkerCharacters = true) => new OutPipelineColumn(input, useMarkerCharacters);
     }
 
     /// <summary>
diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
index 2e46acf916..ef56dbf065 100644
--- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -57,7 +57,7 @@ public static TextFeaturizingEstimator FeaturizeText(this TransformsCatalog.Text
         /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
         /// <param name="useMarkerCharacters">Whether to prepend a marker character, <see langword="0x02"/>, to the beginning,
         /// and append another marker character, <see langword="0x03"/>, to the end of the output vector of characters.</param>
-        public static TokenizingByCharactersEstimator ProduceCharactersAsKeys(this TransformsCatalog.TextTransforms catalog,
+        public static TokenizingByCharactersEstimator TokenizeIntoCharactersAsKeys(this TransformsCatalog.TextTransforms catalog,
             string outputColumnName,
             string inputColumnName = null,
             bool useMarkerCharacters = CharTokenizingDefaults.UseMarkerCharacters)
@@ -72,7 +72,7 @@ public static TokenizingByCharactersEstimator ProduceCharactersAsKeys(this Trans
         /// and append another marker character, <see langword="0x03"/>, to the end of the output vector of characters.</param>
         /// <param name="columns">Pairs of columns to run the tokenization on.</param>
 
-        public static TokenizingByCharactersEstimator ProduceCharactersAsKeys(this TransformsCatalog.TextTransforms catalog,
+        public static TokenizingByCharactersEstimator TokenizeIntoCharactersAsKeys(this TransformsCatalog.TextTransforms catalog,
             bool useMarkerCharacters = CharTokenizingDefaults.UseMarkerCharacters,
             params ColumnOptions[] columns)
             => new TokenizingByCharactersEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), useMarkerCharacters, ColumnOptions.ConvertToValueTuples(columns));
@@ -159,7 +159,7 @@ public static WordEmbeddingEstimator ApplyWordEmbedding(this TransformsCatalog.T
         /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
         /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
         /// <param name="separators">The separators to use (uses space character by default).</param>
-        public static WordTokenizingEstimator ProduceWordTokens(this TransformsCatalog.TextTransforms catalog,
+        public static WordTokenizingEstimator TokenizeIntoWords(this TransformsCatalog.TextTransforms catalog,
             string outputColumnName,
             string inputColumnName = null,
             char[] separators = null)
@@ -170,7 +170,7 @@ public static WordTokenizingEstimator ProduceWordTokens(this TransformsCatalog.T
         /// </summary>
         /// <param name="catalog">The text-related transform's catalog.</param>
         /// <param name="columns">Pairs of columns to run the tokenization on.</param>
-        public static WordTokenizingEstimator ProduceWordTokens(this TransformsCatalog.TextTransforms catalog,
+        public static WordTokenizingEstimator TokenizeIntoWords(this TransformsCatalog.TextTransforms catalog,
             params WordTokenizingEstimator.ColumnOptions[] columns)
           => new WordTokenizingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), columns);
 
diff --git a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
index f0fb5844d1..660c83209b 100644
--- a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
+++ b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
@@ -519,8 +519,8 @@ public void Tokenize()
             var est = data.MakeNewEstimator()
                 .Append(r => (
                     r.label,
-                    tokens: r.text.ProduceWordTokens(),
-                    chars: r.text.ProduceCharactersAsKeys()));
+                    tokens: r.text.TokenizeIntoWords(),
+                    chars: r.text.TokenizeIntoCharactersAsKeys()));
 
             var tdata = est.Fit(data).Transform(data);
             var schema = tdata.AsDynamic.Schema;
@@ -547,7 +547,7 @@ public void NormalizeTextAndRemoveStopWords()
                 .Append(r => (
                     r.label,
                     normalized_text: r.text.NormalizeText(),
-                    words_without_stopwords: r.text.ProduceWordTokens().RemoveDefaultStopWords()));
+                    words_without_stopwords: r.text.TokenizeIntoWords().RemoveDefaultStopWords()));
 
             var tdata = est.Fit(data).Transform(data);
             var schema = tdata.AsDynamic.Schema;
@@ -604,8 +604,8 @@ public void Ngrams()
             var est = data.MakeNewEstimator()
                 .Append(r => (
                     r.label,
-                    ngrams: r.text.ProduceWordTokens().ToKey().ProduceNgrams(),
-                    ngramshash: r.text.ProduceWordTokens().ToKey().ProduceHashedNgrams()));
+                    ngrams: r.text.TokenizeIntoWords().ToKey().ProduceNgrams(),
+                    ngramshash: r.text.TokenizeIntoWords().ToKey().ProduceHashedNgrams()));
 
             var tdata = est.Fit(data).Transform(data);
             var schema = tdata.AsDynamic.Schema;
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
index 96a2bb6267..cbcbbcf231 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
@@ -467,12 +467,12 @@ private void TextFeaturizationOn(string dataPath)
                     BagOfBigrams: r.Message.NormalizeText().ProduceHashedWordBags(ngramLength: 2, useAllLengths: false),
 
                     // NLP pipeline 3: bag of tri-character sequences with TF-IDF weighting.
-                    BagOfTrichar: r.Message.ProduceCharactersAsKeys().ProduceNgrams(ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf),
+                    BagOfTrichar: r.Message.TokenizeIntoCharactersAsKeys().ProduceNgrams(ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf),
 
                     // NLP pipeline 4: word embeddings.
                     // PretrainedModelKind.Sswe is used here for performance of the test. In a real
                     // scenario, it is best to use a different model for more accuracy.
-                    Embeddings: r.Message.NormalizeText().ProduceWordTokens().WordEmbeddings(WordEmbeddingEstimator.PretrainedModelKind.SentimentSpecificWordEmbedding)
+                    Embeddings: r.Message.NormalizeText().TokenizeIntoWords().WordEmbeddings(WordEmbeddingEstimator.PretrainedModelKind.SentimentSpecificWordEmbedding)
                 ));
 
             // Let's train our pipeline, and then apply it to the same data.
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs
index 0e9c962539..6fb2202692 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs
@@ -305,14 +305,14 @@ private void TextFeaturizationOn(string dataPath)
                             ngramLength: 2, useAllLengths: false))
 
                 // NLP pipeline 3: bag of tri-character sequences with TF-IDF weighting.
-                .Append(mlContext.Transforms.Text.ProduceCharactersAsKeys("MessageChars", "Message"))
+                .Append(mlContext.Transforms.Text.TokenizeIntoCharactersAsKeys("MessageChars", "Message"))
                 .Append(new NgramExtractingEstimator(mlContext, "BagOfTrichar", "MessageChars", 
                             ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf))
 
                 // NLP pipeline 4: word embeddings.
                 // PretrainedModelKind.Sswe is used here for performance of the test. In a real
                 // scenario, it is best to use a different model for more accuracy.
-                .Append(mlContext.Transforms.Text.ProduceWordTokens("TokenizedMessage", "NormalizedMessage"))
+                .Append(mlContext.Transforms.Text.TokenizeIntoWords("TokenizedMessage", "NormalizedMessage"))
                 .Append(mlContext.Transforms.Text.ApplyWordEmbedding("Embeddings", "TokenizedMessage",
                             WordEmbeddingEstimator.PretrainedModelKind.SentimentSpecificWordEmbedding));
 
diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs
index a42f1cea61..b6fecd7a8d 100644
--- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs
+++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs
@@ -999,7 +999,7 @@ public void TensorFlowSentimentClassificationTest()
             // The first pipeline 'dataPipe' tokenzies the string into words and maps each word to an integer which is an index in the dictionary.
             // Then this integer vector is retrieved from the pipeline and resized to fixed length.
             // The second pipeline 'tfEnginePipe' takes the resized integer vector and passes it to TensoFlow and gets the classification scores.
-            var estimator = mlContext.Transforms.Text.ProduceWordTokens("TokenizedWords", "Sentiment_Text")
+            var estimator = mlContext.Transforms.Text.TokenizeIntoWords("TokenizedWords", "Sentiment_Text")
                 .Append(mlContext.Transforms.Conversion.MapValue(lookupMap, "Words", "Ids", new ColumnOptions[] { ("Features", "TokenizedWords") }));
             var dataPipe = estimator.Fit(dataView)
                 .CreatePredictionEngine<TensorFlowSentiment, TensorFlowSentiment>(mlContext);
diff --git a/test/Microsoft.ML.Tests/Transformers/CategoricalHashTests.cs b/test/Microsoft.ML.Tests/Transformers/CategoricalHashTests.cs
index f5dc6c59bc..58940f19c2 100644
--- a/test/Microsoft.ML.Tests/Transformers/CategoricalHashTests.cs
+++ b/test/Microsoft.ML.Tests/Transformers/CategoricalHashTests.cs
@@ -76,7 +76,7 @@ public void CategoricalHashStatic()
                       row.ScalarString,
                       row.VectorString,
                       // Create a VarVector column
-                      VarVectorString: row.ScalarString.ProduceWordTokens())).
+                      VarVectorString: row.ScalarString.TokenizeIntoWords())).
                   Append(row => (
                       A: row.ScalarString.OneHotHashEncoding(outputKind: CategoricalHashStaticExtensions.OneHotHashScalarOutputKind.Ind),
                       B: row.VectorString.OneHotHashEncoding(outputKind: CategoricalHashStaticExtensions.OneHotHashVectorOutputKind.Ind),
diff --git a/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs b/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs
index 8ecccfe350..4445cbae7f 100644
--- a/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs
+++ b/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs
@@ -143,7 +143,7 @@ public void TextNormalizationAndStopwordRemoverWorkout()
                     text: ctx.LoadFloat(1)), hasHeader: true)
                 .Load(sentimentDataPath);
             var est = ML.Transforms.Text.NormalizeText("text")
-                .Append(ML.Transforms.Text.ProduceWordTokens("words", "text"))
+                .Append(ML.Transforms.Text.TokenizeIntoWords("words", "text"))
                 .Append(ML.Transforms.Text.RemoveDefaultStopWords("NoDefaultStopwords", "words"))
                 .Append(ML.Transforms.Text.RemoveStopWords("NoStopWords", "words", "xbox", "this", "is", "a", "the", "THAT", "bY"));
 
diff --git a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs
index 0bb6e7c0f9..c98617c8a8 100644
--- a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs
+++ b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs
@@ -546,7 +546,7 @@ public void ValueMappingInputIsVectorWorkout()
             var keys = new List<ReadOnlyMemory<char>>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory(), "wahoo".AsMemory() };
             var values = new List<int>() { 1, 2, 3, 4 };
 
-            var est = ML.Transforms.Text.ProduceWordTokens("TokenizeB", "B")
+            var est = ML.Transforms.Text.TokenizeIntoWords("TokenizeB", "B")
                 .Append(ML.Transforms.Conversion.MapValue(keys, values, new ColumnOptions[] { ("VecB", "TokenizeB") }));
             TestEstimatorCore(est, validFitInput: dataView, invalidInput: badDataView);
         }
diff --git a/test/Microsoft.ML.Tests/Transformers/WordEmbeddingsTests.cs b/test/Microsoft.ML.Tests/Transformers/WordEmbeddingsTests.cs
index 3e42c515c2..dc105520e9 100644
--- a/test/Microsoft.ML.Tests/Transformers/WordEmbeddingsTests.cs
+++ b/test/Microsoft.ML.Tests/Transformers/WordEmbeddingsTests.cs
@@ -35,7 +35,7 @@ public void TestWordEmbeddings()
                    }).Load(GetDataPath(dataPath));
 
             var est = ML.Transforms.Text.NormalizeText("NormalizedText", "SentimentText", keepDiacritics: false, keepPunctuations: false)
-                  .Append(ML.Transforms.Text.ProduceWordTokens("Words", "NormalizedText"))
+                  .Append(ML.Transforms.Text.TokenizeIntoWords("Words", "NormalizedText"))
                   .Append(ML.Transforms.Text.RemoveDefaultStopWords("CleanWords", "Words"));
             var words = est.Fit(data).Transform(data);
 
@@ -70,7 +70,7 @@ public void TestCustomWordEmbeddings()
                    }).Load(GetDataPath(dataPath));
 
             var est = ML.Transforms.Text.NormalizeText("NormalizedText", "SentimentText", keepDiacritics: false, keepPunctuations: false)
-                  .Append(ML.Transforms.Text.ProduceWordTokens("Words", "NormalizedText"))
+                  .Append(ML.Transforms.Text.TokenizeIntoWords("Words", "NormalizedText"))
                   .Append(ML.Transforms.Text.RemoveDefaultStopWords("CleanWords", "Words"));
             var words = est.Fit(data).Transform(data);
             var pathToCustomModel = DeleteOutputPath("custommodel.txt");