From 15be23ce2cd48f8639a7b7acd75f2b25a1da04e7 Mon Sep 17 00:00:00 2001
From: Zeeshan Ahmed <zeahmed@microsoft.com>
Date: Fri, 29 Mar 2019 12:38:32 -0700
Subject: [PATCH 1/6] Created sample for 'ApplyWordEmbedding' API.

---
 .../Text/ApplyCustomWordEmbedding.cs          | 79 +++++++++++++++++++
 .../Transforms/Text/ApplyWordEmbedding.cs     | 68 ++++++++++++++++
 .../Text/TextCatalog.cs                       |  4 +-
 3 files changed, 149 insertions(+), 2 deletions(-)
 create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyCustomWordEmbedding.cs
 create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyWordEmbedding.cs
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyCustomWordEmbedding.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyCustomWordEmbedding.cs
new file mode 100644
index 0000000000..297df0dffb
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyCustomWordEmbedding.cs
@@ -0,0 +1,79 @@
+﻿using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+
+namespace Microsoft.ML.Samples.Dynamic
+{
+    public static class ApplyCustomWordEmbedding
+    {
+        public static void Example()
+        {
+            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, 
+            // as well as the source of randomness.
+            var mlContext = new MLContext();
+
+            // Create an empty data sample list. The 'ApplyWordEmbedding' does not require training data as
+            // the estimator ('WordEmbeddingEstimator') created by 'ApplyWordEmbedding' API is not a trainable estimator.
+            // The empty list is only needed to pass input schema to the pipeline.
+            var emptySamples = new List<TextData>();
+
+            // Convert sample list to an empty IDataView.
+            var emptyDataView = mlContext.Data.LoadFromEnumerable(emptySamples);
+
+            var pathToCustomModel = @".\custommodel.txt";
+            using (StreamWriter file = new StreamWriter(pathToCustomModel, false))
+            {
+
+                file.WriteLine("This is custom file for 4 words with 3 dimensional word embedding vector. This first line in this file does not confirm to the '<word> <float> <float> <float>' pattern, and is therefore ignored");
+                file.WriteLine("greate" + " " + string.Join(" ", 1.0f, 2.0f, 3.0f));
+                file.WriteLine("product" + " " + string.Join(" ", -1.0f, -2.0f, -3.0f));
+                file.WriteLine("like" + " " + string.Join(" ", -1f, 100.0f, -100f));
+                file.WriteLine("buy" + " " + string.Join(" ", 0f, 0f, 20f));
+            }
+
+            // A pipeline for converting text into a 9-dimension word embedding vector using the custom word embedding model.
+            // The 'ApplyWordEmbedding' computes the minimum, average and maximum values for each token's embedding vector.
+            // Tokens in 'custommodel.txt' model are represented as 3-dimension vector.
+            // Therefore, the output is of 9-dimension [min, avg, max].
+            //
+            // The 'ApplyWordEmbedding' API requires vector of text as input.
+            // The pipeline first normalizes and tokenizes text then applies word embedding transformation.
+            var textPipeline = mlContext.Transforms.Text.NormalizeText("Text")
+                .Append(mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text"))
+                .Append(mlContext.Transforms.Text.ApplyWordEmbedding("Features", pathToCustomModel, "Tokens"));
+
+            // Fit to data.
+            var textTransformer = textPipeline.Fit(emptyDataView);
+
+            // Create the prediction engine to get the embedding vector from the input text/string.
+            var predictionEngine = mlContext.Model.CreatePredictionEngine<TextData, TransformedTextData>(textTransformer);
+
+            // Call the prediction API to convert the text into embedding vector.
+            var data = new TextData() { Text = "This is a greate product. I would like to buy it again."  };
+            var prediction = predictionEngine.Predict(data);
+
+            // Print the length of the embedding vector.
+            Console.WriteLine($"Number of Features: {prediction.Features.Length}");
+
+            // Print the embedding vector.
+            Console.Write("Features: ");
+            foreach (var f in prediction.Features)
+                Console.Write($"{f:F4} ");
+
+            //  Expected output:
+            //   Number of Features: 9
+            //   Features: -1.0000 0.0000 -100.0000 0.0000 34.0000 -25.6667 1.0000 100.0000 20.0000
+        }
+
+        public class TextData
+        {
+            public string Text { get; set; }
+        }
+
+        public class TransformedTextData : TextData
+        {
+            public float[] Features { get; set; }
+        }
+    }
+}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyWordEmbedding.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyWordEmbedding.cs
new file mode 100644
index 0000000000..b7530be587
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyWordEmbedding.cs
@@ -0,0 +1,68 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Microsoft.ML.Samples.Dynamic
+{
+    public static class ApplyWordEmbedding
+    {
+        public static void Example()
+        {
+            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, 
+            // as well as the source of randomness.
+            var mlContext = new MLContext();
+
+            // Create an empty data sample list. The 'ApplyWordEmbedding' does not require training data as
+            // the estimator ('WordEmbeddingEstimator') created by 'ApplyWordEmbedding' API is not a trainable estimator.
+            // The empty list is only needed to pass input schema to the pipeline.
+            var emptySamples = new List<TextData>();
+
+            // Convert sample list to an empty IDataView.
+            var emptyDataView = mlContext.Data.LoadFromEnumerable(emptySamples);
+
+            // A pipeline for converting text into a 150-dimension embedding vector using pretrained 'SentimentSpecificWordEmbedding' model.
+            // The 'ApplyWordEmbedding' computes the minimum, average and maximum values for each token's embedding vector.
+            // Tokens in 'SentimentSpecificWordEmbedding' model are represented as 50-dimension vector.
+            // Therefore, the output is of 150-dimension [min, avg, max].
+            //
+            // The 'ApplyWordEmbedding' API requires vector of text as input.
+            // The pipeline first normalizes and tokenizes text then applies word embedding transformation.
+            var textPipeline = mlContext.Transforms.Text.NormalizeText("Text")
+                .Append(mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text"))
+                .Append(mlContext.Transforms.Text.ApplyWordEmbedding("Features", "Tokens", 
+                    Transforms.Text.WordEmbeddingEstimator.PretrainedModelKind.SentimentSpecificWordEmbedding));
+
+            // Fit to data.
+            var textTransformer = textPipeline.Fit(emptyDataView);
+
+            // Create the prediction engine to get the embedding vector from the input text/string.
+            var predictionEngine = mlContext.Model.CreatePredictionEngine<TextData, TransformedTextData>(textTransformer);
+
+            // Call the prediction API to convert the text into embedding vector.
+            var data = new TextData() { Text = "This is a greate product. I would like to buy it again."  };
+            var prediction = predictionEngine.Predict(data);
+
+            // Print the length of the embedding vector.
+            Console.WriteLine($"Number of Features: {prediction.Features.Length}");
+
+            // Print the embedding vector.
+            Console.Write("Features: ");
+            foreach (var f in prediction.Features)
+                Console.Write($"{f:F4} ");
+
+            //  Expected output:
+            //   Number of Features: 150
+            //   Features: -1.2489 0.2384 -1.3034 -0.9135 -3.4978 -0.1784 -1.3823 -0.3863 -2.5262 -0.8950 ...
+        }
+
+        public class TextData
+        {
+            public string Text { get; set; }
+        }
+
+        public class TransformedTextData : TextData
+        {
+            public float[] Features { get; set; }
+        }
+    }
+}
diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
index 2be9e4dd7d..1d0ef69a33 100644
--- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -125,7 +125,7 @@ public static TextNormalizingEstimator NormalizeText(this TransformsCatalog.Text
         /// <example>
         /// <format type="text/markdown">
         /// <![CDATA[
-        /// [!code-csharp[FeaturizeText](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/WordEmbeddingTransform.cs)]
+        /// [!code-csharp[ApplyWordEmbedding](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyWordEmbedding.cs)]
         /// ]]>
         /// </format>
         /// </example>
@@ -143,7 +143,7 @@ public static WordEmbeddingEstimator ApplyWordEmbedding(this TransformsCatalog.T
         /// <example>
         /// <format type="text/markdown">
         /// <![CDATA[
-        /// [!code-csharp[FeaturizeText](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/WordEmbeddingTransform.cs)]
+        /// [!code-csharp[ApplyWordEmbedding](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyCustomWordEmbedding.cs)]
         /// ]]>
         /// </format>
         /// </example>

From 58e2d4be07734fcebb4b01034e31a6fda1109da9 Mon Sep 17 00:00:00 2001
From: Zeeshan Ahmed <zeahmed@microsoft.com>
Date: Fri, 29 Mar 2019 18:24:20 -0700
Subject: [PATCH 2/6] Addressed reviewers' comments.

---
 .../Transforms/Text/ApplyCustomWordEmbedding.cs   | 15 ++++++++-------
 .../Dynamic/Transforms/Text/ApplyWordEmbedding.cs |  2 +-
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyCustomWordEmbedding.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyCustomWordEmbedding.cs
index 297df0dffb..4ac4ab18da 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyCustomWordEmbedding.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyCustomWordEmbedding.cs
@@ -21,15 +21,16 @@ public static void Example()
             // Convert sample list to an empty IDataView.
             var emptyDataView = mlContext.Data.LoadFromEnumerable(emptySamples);
 
+            // Write a custom 3-dimensional word embedding model with 4 words.
+            // Each line follows '<word> <float> <float> <float>' pattern.
+            // Lines that do not confirm to the pattern are ignored.
             var pathToCustomModel = @".\custommodel.txt";
             using (StreamWriter file = new StreamWriter(pathToCustomModel, false))
             {
-
-                file.WriteLine("This is custom file for 4 words with 3 dimensional word embedding vector. This first line in this file does not confirm to the '<word> <float> <float> <float>' pattern, and is therefore ignored");
-                file.WriteLine("greate" + " " + string.Join(" ", 1.0f, 2.0f, 3.0f));
-                file.WriteLine("product" + " " + string.Join(" ", -1.0f, -2.0f, -3.0f));
-                file.WriteLine("like" + " " + string.Join(" ", -1f, 100.0f, -100f));
-                file.WriteLine("buy" + " " + string.Join(" ", 0f, 0f, 20f));
+                file.WriteLine("great 1.0 2.0 3.0");
+                file.WriteLine("product -1.0 -2.0 -3.0");
+                file.WriteLine("like -1 100.0 -100");
+                file.WriteLine("buy 0 0 20");
             }
 
             // A pipeline for converting text into a 9-dimension word embedding vector using the custom word embedding model.
@@ -50,7 +51,7 @@ public static void Example()
             var predictionEngine = mlContext.Model.CreatePredictionEngine<TextData, TransformedTextData>(textTransformer);
 
             // Call the prediction API to convert the text into embedding vector.
-            var data = new TextData() { Text = "This is a greate product. I would like to buy it again."  };
+            var data = new TextData() { Text = "This is a great product. I would like to buy it again."  };
             var prediction = predictionEngine.Predict(data);
 
             // Print the length of the embedding vector.
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyWordEmbedding.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyWordEmbedding.cs
index b7530be587..0a58a2da07 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyWordEmbedding.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyWordEmbedding.cs
@@ -39,7 +39,7 @@ public static void Example()
             var predictionEngine = mlContext.Model.CreatePredictionEngine<TextData, TransformedTextData>(textTransformer);
 
             // Call the prediction API to convert the text into embedding vector.
-            var data = new TextData() { Text = "This is a greate product. I would like to buy it again."  };
+            var data = new TextData() { Text = "This is a great product. I would like to buy it again."  };
             var prediction = predictionEngine.Predict(data);
 
             // Print the length of the embedding vector.

From a3ec5d3870a39c08206309dd50286bd717e2f2f2 Mon Sep 17 00:00:00 2001
From: Zeeshan Ahmed <zeahmed@microsoft.com>
Date: Mon, 1 Apr 2019 10:39:12 -0700
Subject: [PATCH 3/6] Deleted old embedding sample.

---
 .../Dynamic/WordEmbeddingTransform.cs         | 109 ------------------
 1 file changed, 109 deletions(-)
 delete mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/WordEmbeddingTransform.cs

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/WordEmbeddingTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/WordEmbeddingTransform.cs
deleted file mode 100644
index 1830b3e171..0000000000
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/WordEmbeddingTransform.cs
+++ /dev/null
@@ -1,109 +0,0 @@
-﻿using System;
-using System.Collections.Generic;
-using System.IO;
-using Microsoft.ML.Data;
-using Microsoft.ML.Transforms.Text;
-namespace Microsoft.ML.Samples.Dynamic
-{
-    public static class WordEmbeddingTransform
-    {
-        public static void Example()
-        {
-            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, 
-            // as well as the source of randomness.
-            var ml = new MLContext();
-
-            // Get a small dataset as an IEnumerable and convert to IDataView.
-            var data = SamplesUtils.DatasetUtils.GetSentimentData();
-            var trainData = ml.Data.LoadFromEnumerable(data);
-
-            // Preview of the data.
-            //
-            // Sentiment    SentimentText
-            // true         Best game I've ever played.
-            // false        ==RUDE== Dude, 2.
-            // true          Until the next game, this is the best Xbox game!
-
-            // Pipeline which goes through SentimentText and normalizes it, tokenize it by words, and removes default stopwords.
-            var wordsPipeline = ml.Transforms.Text.NormalizeText("NormalizedText", "SentimentText", keepDiacritics: false, keepPunctuations: false)
-                .Append(ml.Transforms.Text.TokenizeIntoWords("Words", "NormalizedText"))
-                .Append(ml.Transforms.Text.RemoveDefaultStopWords("CleanWords", "Words"));
-
-            var wordsDataview = wordsPipeline.Fit(trainData).Transform(trainData);
-            // Preview of the CleanWords column obtained after processing SentimentText.
-            var cleanWords = wordsDataview.GetColumn<ReadOnlyMemory<char>[]>(wordsDataview.Schema["CleanWords"]);
-            Console.WriteLine($" CleanWords column obtained post-transformation.");
-            foreach (var featureRow in cleanWords)
-            {
-                foreach (var value in featureRow)
-                    Console.Write($"{value} ");
-                Console.WriteLine("");
-            }
-
-            Console.WriteLine("===================================================");
-            // best game ive played
-            // == rude == dude 2
-            // game best xbox game
-
-            // Small helper to print wordembeddings in the console. 
-            Action<string, IEnumerable<float[]>> printEmbeddings = (columnName, column) =>
-            {
-                Console.WriteLine($"{columnName} column obtained post-transformation.");
-                foreach (var featureRow in column)
-                {
-                    foreach (var value in featureRow)
-                        Console.Write($"{value} ");
-                    Console.WriteLine("");
-                }
-
-                Console.WriteLine("===================================================");
-            };
-
-            // Let's apply pretrained word embedding model GloVeTwitter25D.
-            // 25D means each word mapped into 25 dimensional space, basically each word represented by 25 float values.
-            var gloveWordEmbedding = ml.Transforms.Text.ApplyWordEmbedding("GloveEmbeddings", "CleanWords",
-                WordEmbeddingEstimator.PretrainedModelKind.GloVeTwitter25D);
-
-            // We also have option to apply custom word embedding models.
-            // Let's first create one.
-            // Format is following:
-            // First line is ignored if it is a header for your file.
-            // Each next line contains a single word followed by either a tab or space, and a list of floats also separated by a tab or space.
-            // Size of array of floats should be same for whole file.
-            var pathToCustomModel = @".\custommodel.txt";
-            using (StreamWriter file = new StreamWriter(pathToCustomModel, false))
-            {
-
-                file.WriteLine("This is custom file for 4 words with 3 dimensional word embedding vector. This first line in this file does not conform to the '<word> <float> <float> <float>' pattern, and is therefore ignored");
-                file.WriteLine("xbox" + " " + string.Join(" ", 1.0f, 2.0f, 3.0f));
-                file.WriteLine("game" + " " + string.Join(" ", -1.0f, -2.0f, -3.0f));
-                file.WriteLine("dude" + " " + string.Join(" ", -1f, 100.0f, -100f));
-                file.WriteLine("best" + " " + string.Join(" ", 0f, 0f, 20f));
-            }
-            // Now let's add custom embedding on top of same words.
-            var pipeline = gloveWordEmbedding.Append(ml.Transforms.Text.ApplyWordEmbedding("CustomEmbeddings", @".\custommodel.txt", "CleanWords"));
-
-            // And do all required transformations.
-            var embeddingDataview = pipeline.Fit(wordsDataview).Transform(wordsDataview);
-
-            var customEmbeddings = embeddingDataview.GetColumn<float[]>(embeddingDataview.Schema["CustomEmbeddings"]);
-            printEmbeddings("GloveEmbeddings", customEmbeddings);
-
-            // -1  -2   -3  -0.5   -1  8.5  0   0   20
-            // -1 100 -100    -1  100 -100 -1 100 -100
-            //  1  -2   -3 -0.25 -0.5 4.25  1   2   20
-            // As you can see above we output 9 values for each line
-            // We go through each word present in row and extract 3 floats for it (if we can find that word in model).
-            // First 3 floats in output values represent minimum values (for each dimension) for extracted values. 
-            // Second set of 3 floats in output represent average (for each dimension) for extracted values.
-            // Third set of 3 floats in output represent maximum values (for each dimension) for extracted values.
-            // Preview of GloveEmbeddings.
-            var gloveEmbeddings = embeddingDataview.GetColumn<float[]>(embeddingDataview.Schema["GloveEmbeddings"]);
-            printEmbeddings("GloveEmbeddings", gloveEmbeddings);
-            // 0.23166 0.048825 0.26878 -1.3945 -0.86072 -0.026778 0.84075 -0.81987 -1.6681 -1.0658 -0.30596 0.50974 ...
-            //-0.094905 0.61109 0.52546 - 0.2516 0.054786 0.022661 1.1801 0.33329 - 0.85388 0.15471 - 0.5984 0.4364  ...
-            // 0.23166 0.048825 0.26878 - 1.3945 - 0.30044 - 0.16523 0.47251 0.10276 - 0.20978 - 0.68094 - 0.30596  ...
-
-        }
-    }
-}

From 64ff94669b26a914d7e8feebe7f0663928d5660c Mon Sep 17 00:00:00 2001
From: Zeeshan Ahmed <zeahmed@microsoft.com>
Date: Mon, 1 Apr 2019 11:29:04 -0700
Subject: [PATCH 4/6] Created samples for TokenizeIntoWords and RemoveStopWords
 APIs.

---
 .../Dynamic/StopWordRemoverTransform.cs       | 82 -------------------
 .../Transforms/Text/RemoveDefaultStopWords.cs | 59 +++++++++++++
 .../Transforms/Text/RemoveStopWords.cs        | 59 +++++++++++++
 .../Transforms/Text/TokenizeIntoWords.cs      | 57 +++++++++++++
 .../Text/TextCatalog.cs                       | 17 +++-
 5 files changed, 188 insertions(+), 86 deletions(-)
 delete mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/StopWordRemoverTransform.cs
 create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveDefaultStopWords.cs
 create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveStopWords.cs
 create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/TokenizeIntoWords.cs

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/StopWordRemoverTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/StopWordRemoverTransform.cs
deleted file mode 100644
index 134a3ca3a0..0000000000
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/StopWordRemoverTransform.cs
+++ /dev/null
@@ -1,82 +0,0 @@
-﻿using System;
-using System.Collections.Generic;
-using Microsoft.ML.Data;
-
-namespace Microsoft.ML.Samples.Dynamic
-{
-    public static class StopWordRemoverTransform
-    {
-        public static void Example()
-        {
-            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, 
-            // as well as the source of randomness.
-            var ml = new MLContext();
-
-            // Get a small dataset as an IEnumerable and convert to IDataView.
-            var data = SamplesUtils.DatasetUtils.GetSentimentData();
-            var trainData = ml.Data.LoadFromEnumerable(data);
-
-            // Preview of the data.
-            //
-            // Sentiment    SentimentText
-            // true         Best game I've ever played.
-            // false        ==RUDE== Dude, 2.
-            // true          Until the next game, this is the best Xbox game!
-
-            // Let's take SentimentText column and break it into vector of words.
-            string originalTextColumnName = "Words";
-            var words = ml.Transforms.Text.TokenizeIntoWords("SentimentText", originalTextColumnName);
-
-            // Default pipeline will apply default stop word remover which is based on predifined set of words for certain languages.
-            var defaultPipeline = words.Append(ml.Transforms.Text.RemoveDefaultStopWords(originalTextColumnName, "DefaultRemover"));
-
-            // Another pipeline, that removes words specified by user. We do case insensitive comparison for the stop words.
-            var customizedPipeline = words.Append(ml.Transforms.Text.RemoveStopWords(originalTextColumnName, "RemovedWords",
-                new[] { "XBOX" }));
-
-            // The transformed data for both pipelines.
-            var transformedDataDefault = defaultPipeline.Fit(trainData).Transform(trainData);
-            var transformedDataCustomized = customizedPipeline.Fit(trainData).Transform(trainData);
-
-            // Small helper to print the text inside the columns, in the console. 
-            Action<string, IEnumerable<VBuffer<ReadOnlyMemory<char>>>> printHelper = (columnName, column) =>
-            {
-                Console.WriteLine($"{columnName} column obtained post-transformation.");
-                foreach (var featureRow in column)
-                {
-                    foreach (var value in featureRow.GetValues())
-                        Console.Write($"{value}|");
-                    Console.WriteLine("");
-                }
-
-                Console.WriteLine("===================================================");
-            };
-
-            // Preview the result of breaking string into array of words.
-            var originalText = transformedDataDefault.GetColumn<VBuffer<ReadOnlyMemory<char>>>(transformedDataDefault.Schema[originalTextColumnName]);
-            printHelper(originalTextColumnName, originalText);
-            // Best|game|I've|ever|played.|
-            // == RUDE ==| Dude,| 2 |
-            // Until | the | next | game,| this |is| the | best | Xbox | game!|
-
-            // Preview the result of cleaning with default stop word remover.
-            var defaultRemoverData = transformedDataDefault.GetColumn<VBuffer<ReadOnlyMemory<char>>>(transformedDataDefault.Schema["DefaultRemover"]);
-            printHelper("DefaultRemover", defaultRemoverData);
-            // Best|game|I've|played.|
-            // == RUDE ==| Dude,| 2 |
-            // game,| best | Xbox | game!|
-            // As you can see "Until, the, next, this, is" was removed.
-
-
-            // Preview the result of cleaning with default customized stop word remover.
-            var customizeRemoverData = transformedDataCustomized.GetColumn<VBuffer<ReadOnlyMemory<char>>>(transformedDataCustomized.Schema["RemovedWords"]);
-            printHelper("RemovedWords", customizeRemoverData);
-
-            // Best|game|I've|ever|played.|
-            // == RUDE ==| Dude,| 2 |
-            // Until | the | next | game,| this |is| the | best | game!|
-            //As you can see Xbox was removed.
-
-        }
-    }
-}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveDefaultStopWords.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveDefaultStopWords.cs
new file mode 100644
index 0000000000..6e5af5fb67
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveDefaultStopWords.cs
@@ -0,0 +1,59 @@
+﻿using System;
+using System.Collections.Generic;
+using Microsoft.ML.Transforms.Text;
+
+namespace Microsoft.ML.Samples.Dynamic
+{
+    public static class RemoveDefaultStopWords
+    {
+        public static void Example()
+        {
+            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, 
+            // as well as the source of randomness.
+            var mlContext = new MLContext();
+
+            // Create an empty data sample list. The 'RemoveDefaultStopWords' does not require training data as
+            // the estimator ('StopWordsRemovingEstimator') created by 'RemoveDefaultStopWords' API is not a trainable estimator.
+            // The empty list is only needed to pass input schema to the pipeline.
+            var emptySamples = new List<TextData>();
+
+            // Convert sample list to an empty IDataView.
+            var emptyDataView = mlContext.Data.LoadFromEnumerable(emptySamples);
+
+            // A pipeline for removing stop words from input text/string.
+            // The pipeline first tokenizes text into words then removes stop words.
+            var textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Words", "Text")
+                .Append(mlContext.Transforms.Text.RemoveDefaultStopWords("WordsWithoutStopWords", "Words", language: StopWordsRemovingEstimator.Language.English));
+
+            // Fit to data.
+            var textTransformer = textPipeline.Fit(emptyDataView);
+
+            // Create the prediction engine to remove the stop words from the input text/string.
+            var predictionEngine = mlContext.Model.CreatePredictionEngine<TextData, TransformedTextData>(textTransformer);
+
+            // Call the prediction API to remove stop words.
+            var data = new TextData() { Text = "ML.NET's RemoveDefaultStopWords API removes stop words from the text/string. It requires the text/string to be tokenized beforehand." };
+            var prediction = predictionEngine.Predict(data);
+
+            // Print the length of the word vector after the stop words removed.
+            Console.WriteLine($"Number of words: {prediction.WordsWithoutStopWords.Length}");
+
+            // Print the word vector without stop words.
+            Console.WriteLine($"\nWords without stop words: {string.Join(",", prediction.WordsWithoutStopWords)}");
+
+            //  Expected output:
+            //   Number of words: 11
+            //   Words without stop words: ML.NET's,RemoveDefaultStopWords,API,removes,stop,words,text/string.,requires,text/string,tokenized,beforehand.
+        }
+
+        public class TextData
+        {
+            public string Text { get; set; }
+        }
+
+        public class TransformedTextData : TextData
+        {
+            public string[] WordsWithoutStopWords { get; set; }
+        }
+    }
+}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveStopWords.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveStopWords.cs
new file mode 100644
index 0000000000..6b9a6a6a07
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveStopWords.cs
@@ -0,0 +1,59 @@
+﻿using System;
+using System.Collections.Generic;
+using Microsoft.ML.Transforms.Text;
+
+namespace Microsoft.ML.Samples.Dynamic
+{
+    public static class RemoveStopWords
+    {
+        public static void Example()
+        {
+            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, 
+            // as well as the source of randomness.
+            var mlContext = new MLContext();
+
+            // Create an empty data sample list. The 'RemoveStopWords' does not require training data as
+            // the estimator ('CustomStopWordsRemovingEstimator') created by 'RemoveStopWords' API is not a trainable estimator.
+            // The empty list is only needed to pass input schema to the pipeline.
+            var emptySamples = new List<TextData>();
+
+            // Convert sample list to an empty IDataView.
+            var emptyDataView = mlContext.Data.LoadFromEnumerable(emptySamples);
+
+            // A pipeline for removing stop words from input text/string.
+            // The pipeline first tokenizes text into words then removes stop words.
+            var textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Words", "Text")
+                .Append(mlContext.Transforms.Text.RemoveStopWords("WordsWithoutStopWords", "Words", stopwords: new[] { "a", "the", "from", "by" }));
+
+            // Fit to data.
+            var textTransformer = textPipeline.Fit(emptyDataView);
+
+            // Create the prediction engine to remove the stop words from the input text/string.
+            var predictionEngine = mlContext.Model.CreatePredictionEngine<TextData, TransformedTextData>(textTransformer);
+
+            // Call the prediction API to remove stop words.
+            var data = new TextData() { Text = "ML.NET's RemoveStopWords API removes stop words from the text/string using a list of stop words provided by the user." };
+            var prediction = predictionEngine.Predict(data);
+
+            // Print the length of the word vector after the stop words removed.
+            Console.WriteLine($"Number of words: {prediction.WordsWithoutStopWords.Length}");
+
+            // Print the word vector without stop words.
+            Console.WriteLine($"\nWords without stop words: {string.Join(",", prediction.WordsWithoutStopWords)}");
+
+            //  Expected output:
+            //   Number of words: 14
+            //   Words without stop words: ML.NET's,RemoveStopWords,API,removes,stop,words,text/string,using,list,of,stop,words,provided,user.
+        }
+
+        public class TextData
+        {
+            public string Text { get; set; }
+        }
+
+        public class TransformedTextData : TextData
+        {
+            public string[] WordsWithoutStopWords { get; set; }
+        }
+    }
+}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/TokenizeIntoWords.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/TokenizeIntoWords.cs
new file mode 100644
index 0000000000..d3275e6482
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/TokenizeIntoWords.cs
@@ -0,0 +1,57 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Microsoft.ML.Samples.Dynamic
+{
+    public static class TokenizeIntoWords
+    {
+        public static void Example()
+        {
+            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, 
+            // as well as the source of randomness.
+            var mlContext = new MLContext();
+
+            // Create an empty data sample list. The 'TokenizeIntoWords' does not require training data as
+            // the estimator ('WordTokenizingEstimator') created by 'TokenizeIntoWords' API is not a trainable estimator.
+            // The empty list is only needed to pass input schema to the pipeline.
+            var emptySamples = new List<TextData>();
+
+            // Convert sample list to an empty IDataView.
+            var emptyDataView = mlContext.Data.LoadFromEnumerable(emptySamples);
+
+            // A pipeline for converting text into vector of words.
+            var textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Words", "Text", separators: new[] { ' ' });
+
+            // Fit to data.
+            var textTransformer = textPipeline.Fit(emptyDataView);
+
+            // Create the prediction engine to get the word vector from the input text/string.
+            var predictionEngine = mlContext.Model.CreatePredictionEngine<TextData, TransformedTextData>(textTransformer);
+
+            // Call the prediction API to convert the text into words.
+            var data = new TextData() { Text = "ML.NET's TokenizeIntoWords API splits text/string into words using the list of characters provided as separators." };
+            var prediction = predictionEngine.Predict(data);
+
+            // Print the length of the word vector.
+            Console.WriteLine($"Number of words: {prediction.Words.Length}");
+
+            // Print the word vector.
+            Console.WriteLine($"\nWords: {string.Join(",", prediction.Words)}");
+
+            //  Expected output:
+            //   Number of words: 15
+            //   Words: ML.NET's,TokenizeIntoWords,API,splits,text/string,into,words,using,the,list,of,characters,provided,as,separators.
+        }
+
+        public class TextData
+        {
+            public string Text { get; set; }
+        }
+
+        public class TransformedTextData : TextData
+        {
+            public string[] Words { get; set; }
+        }
+    }
+}
diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
index 1d0ef69a33..f0294730e4 100644
--- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -179,6 +179,13 @@ internal static WordEmbeddingEstimator ApplyWordEmbedding(this TransformsCatalog
         /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
         /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
         /// <param name="separators">The separators to use (uses space character by default).</param>
+        /// <example>
+        /// <format type="text/markdown">
+        /// <![CDATA[
+        /// [!code-csharp[TokenizeIntoWords](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/TokenizeIntoWords.cs)]
+        /// ]]>
+        /// </format>
+        /// </example>
         public static WordTokenizingEstimator TokenizeIntoWords(this TransformsCatalog.TextTransforms catalog,
             string outputColumnName,
             string inputColumnName = null,
@@ -247,8 +254,9 @@ internal static NgramExtractingEstimator ProduceNgrams(this TransformsCatalog.Te
         /// <example>
         /// <format type="text/markdown">
         /// <![CDATA[
-        ///  [!code-csharp[FastTree](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/StopWordRemoverTransform.cs)]
-        /// ]]></format>
+        /// [!code-csharp[RemoveStopWords](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveDefaultStopWords.cs)]
+        /// ]]>
+        /// </format>
         /// </example>
         public static StopWordsRemovingEstimator RemoveDefaultStopWords(this TransformsCatalog.TextTransforms catalog,
             string outputColumnName,
@@ -267,8 +275,9 @@ public static StopWordsRemovingEstimator RemoveDefaultStopWords(this TransformsC
         /// <example>
         /// <format type="text/markdown">
         /// <![CDATA[
-        ///  [!code-csharp[FastTree](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/StopWordRemoverTransform.cs)]
-        /// ]]></format>
+        /// [!code-csharp[RemoveStopWords](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveStopWords.cs)]
+        /// ]]>
+        /// </format>
         /// </example>
         public static CustomStopWordsRemovingEstimator RemoveStopWords(this TransformsCatalog.TextTransforms catalog,
             string outputColumnName,

From 1bc241d37910e6e8c13cc760827b056a7bee5057 Mon Sep 17 00:00:00 2001
From: Zeeshan Ahmed <zeahmed@microsoft.com>
Date: Mon, 1 Apr 2019 17:29:32 -0700
Subject: [PATCH 5/6] Addressed reviewers' comments.

---
 .../Dynamic/Transforms/Text/ApplyCustomWordEmbedding.cs       | 2 +-
 .../Dynamic/Transforms/Text/ApplyWordEmbedding.cs             | 2 +-
 .../Dynamic/Transforms/Text/NormalizeText.cs                  | 2 +-
 .../Dynamic/Transforms/Text/RemoveDefaultStopWords.cs         | 2 +-
 .../Dynamic/Transforms/Text/RemoveStopWords.cs                | 2 +-
 .../Dynamic/Transforms/Text/TokenizeIntoCharactersAsKeys.cs   | 2 +-
 .../Dynamic/Transforms/Text/TokenizeIntoWords.cs              | 4 +++-
 7 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyCustomWordEmbedding.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyCustomWordEmbedding.cs
index 4ac4ab18da..c3ee04dbbe 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyCustomWordEmbedding.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyCustomWordEmbedding.cs
@@ -13,7 +13,7 @@ public static void Example()
             // as well as the source of randomness.
             var mlContext = new MLContext();
 
-            // Create an empty data sample list. The 'ApplyWordEmbedding' does not require training data as
+            // Create an empty list as the dataset. The 'ApplyWordEmbedding' does not require training data as
             // the estimator ('WordEmbeddingEstimator') created by 'ApplyWordEmbedding' API is not a trainable estimator.
             // The empty list is only needed to pass input schema to the pipeline.
             var emptySamples = new List<TextData>();
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyWordEmbedding.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyWordEmbedding.cs
index 0a58a2da07..c1a62e21f5 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyWordEmbedding.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyWordEmbedding.cs
@@ -12,7 +12,7 @@ public static void Example()
             // as well as the source of randomness.
             var mlContext = new MLContext();
 
-            // Create an empty data sample list. The 'ApplyWordEmbedding' does not require training data as
+            // Create an empty list as the dataset. The 'ApplyWordEmbedding' does not require training data as
             // the estimator ('WordEmbeddingEstimator') created by 'ApplyWordEmbedding' API is not a trainable estimator.
             // The empty list is only needed to pass input schema to the pipeline.
             var emptySamples = new List<TextData>();
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/NormalizeText.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/NormalizeText.cs
index 920ea4353c..3fa83cf3ca 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/NormalizeText.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/NormalizeText.cs
@@ -12,7 +12,7 @@ public static void Example()
             // as well as the source of randomness.
             var mlContext = new MLContext();
 
-            // Create an empty data sample list. The 'NormalizeText' API does not require training data as
+            // Create an empty list as the dataset. The 'NormalizeText' API does not require training data as
             // the estimator ('TextNormalizingEstimator') created by 'NormalizeText' API is not a trainable estimator.
             // The empty list is only needed to pass input schema to the pipeline.
             var emptySamples = new List<TextData>();
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveDefaultStopWords.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveDefaultStopWords.cs
index 6e5af5fb67..a6bec688a3 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveDefaultStopWords.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveDefaultStopWords.cs
@@ -12,7 +12,7 @@ public static void Example()
             // as well as the source of randomness.
             var mlContext = new MLContext();
 
-            // Create an empty data sample list. The 'RemoveDefaultStopWords' does not require training data as
+            // Create an empty list as the dataset. The 'RemoveDefaultStopWords' does not require training data as
             // the estimator ('StopWordsRemovingEstimator') created by 'RemoveDefaultStopWords' API is not a trainable estimator.
             // The empty list is only needed to pass input schema to the pipeline.
             var emptySamples = new List<TextData>();
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveStopWords.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveStopWords.cs
index 6b9a6a6a07..501ab8ae68 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveStopWords.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveStopWords.cs
@@ -12,7 +12,7 @@ public static void Example()
             // as well as the source of randomness.
             var mlContext = new MLContext();
 
-            // Create an empty data sample list. The 'RemoveStopWords' does not require training data as
+            // Create an empty list as the dataset. The 'RemoveStopWords' does not require training data as
             // the estimator ('CustomStopWordsRemovingEstimator') created by 'RemoveStopWords' API is not a trainable estimator.
             // The empty list is only needed to pass input schema to the pipeline.
             var emptySamples = new List<TextData>();
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/TokenizeIntoCharactersAsKeys.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/TokenizeIntoCharactersAsKeys.cs
index 9c443b459a..922269d222 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/TokenizeIntoCharactersAsKeys.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/TokenizeIntoCharactersAsKeys.cs
@@ -12,7 +12,7 @@ public static void Example()
             // as well as the source of randomness.
             var mlContext = new MLContext();
 
-            // Create an empty data sample list. The 'TokenizeIntoCharactersAsKeys' does not require training data as
+            // Create an empty list as the dataset. The 'TokenizeIntoCharactersAsKeys' does not require training data as
             // the estimator ('TokenizingByCharactersEstimator') created by 'TokenizeIntoCharactersAsKeys' API is not a trainable estimator.
             // The empty list is only needed to pass input schema to the pipeline.
             var emptySamples = new List<TextData>();
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/TokenizeIntoWords.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/TokenizeIntoWords.cs
index d3275e6482..1f98bd5a21 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/TokenizeIntoWords.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/TokenizeIntoWords.cs
@@ -12,7 +12,7 @@ public static void Example()
             // as well as the source of randomness.
             var mlContext = new MLContext();
 
-            // Create an empty data sample list. The 'TokenizeIntoWords' does not require training data as
+            // Create an empty list as the dataset. The 'TokenizeIntoWords' does not require training data as
             // the estimator ('WordTokenizingEstimator') created by 'TokenizeIntoWords' API is not a trainable estimator.
             // The empty list is only needed to pass input schema to the pipeline.
             var emptySamples = new List<TextData>();
@@ -21,6 +21,8 @@ public static void Example()
             var emptyDataView = mlContext.Data.LoadFromEnumerable(emptySamples);
 
             // A pipeline for converting text into vector of words.
+            // The following call to 'TokenizeIntoWords' tokenizes text/string into words using space as a separator.
+            // Space is also a default value for the 'separators' argument if it is not specified.
             var textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Words", "Text", separators: new[] { ' ' });
 
             // Fit to data.

From 672ade68572a9127cace1e825bb1fb35cf72e898 Mon Sep 17 00:00:00 2001
From: Zeeshan Ahmed <zeahmed@microsoft.com>
Date: Tue, 2 Apr 2019 13:26:22 -0700
Subject: [PATCH 6/6] Addressed reviewers' comments.

---
 .../Dynamic/Transforms/Text/RemoveDefaultStopWords.cs          | 3 ++-
 .../Dynamic/Transforms/Text/RemoveStopWords.cs                 | 3 ++-
 src/Microsoft.ML.Transforms/Text/TextCatalog.cs                | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveDefaultStopWords.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveDefaultStopWords.cs
index a6bec688a3..ddd5a56750 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveDefaultStopWords.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveDefaultStopWords.cs
@@ -22,6 +22,7 @@ public static void Example()
 
             // A pipeline for removing stop words from input text/string.
             // The pipeline first tokenizes text into words then removes stop words.
+            // The 'RemoveDefaultStopWords' API ignores casing of the text/string e.g. 'tHe' and 'the' are considered the same stop words.
             var textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Words", "Text")
                 .Append(mlContext.Transforms.Text.RemoveDefaultStopWords("WordsWithoutStopWords", "Words", language: StopWordsRemovingEstimator.Language.English));
 
@@ -32,7 +33,7 @@ public static void Example()
             var predictionEngine = mlContext.Model.CreatePredictionEngine<TextData, TransformedTextData>(textTransformer);
 
             // Call the prediction API to remove stop words.
-            var data = new TextData() { Text = "ML.NET's RemoveDefaultStopWords API removes stop words from the text/string. It requires the text/string to be tokenized beforehand." };
+            var data = new TextData() { Text = "ML.NET's RemoveDefaultStopWords API removes stop words from tHe text/string. It requires the text/string to be tokenized beforehand." };
             var prediction = predictionEngine.Predict(data);
 
             // Print the length of the word vector after the stop words removed.
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveStopWords.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveStopWords.cs
index 501ab8ae68..a412920496 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveStopWords.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveStopWords.cs
@@ -22,6 +22,7 @@ public static void Example()
 
             // A pipeline for removing stop words from input text/string.
             // The pipeline first tokenizes text into words then removes stop words.
+            // The 'RemoveStopWords' API ignores casing of the text/string e.g. 'tHe' and 'the' are considered the same stop words.
             var textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Words", "Text")
                 .Append(mlContext.Transforms.Text.RemoveStopWords("WordsWithoutStopWords", "Words", stopwords: new[] { "a", "the", "from", "by" }));
 
@@ -32,7 +33,7 @@ public static void Example()
             var predictionEngine = mlContext.Model.CreatePredictionEngine<TextData, TransformedTextData>(textTransformer);
 
             // Call the prediction API to remove stop words.
-            var data = new TextData() { Text = "ML.NET's RemoveStopWords API removes stop words from the text/string using a list of stop words provided by the user." };
+            var data = new TextData() { Text = "ML.NET's RemoveStopWords API removes stop words from tHe text/string using a list of stop words provided by the user." };
             var prediction = predictionEngine.Predict(data);
 
             // Print the length of the word vector after the stop words removed.
diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
index 115ece0934..db412be77c 100644
--- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -261,7 +261,7 @@ internal static NgramExtractingEstimator ProduceNgrams(this TransformsCatalog.Te
         /// <example>
         /// <format type="text/markdown">
         /// <![CDATA[
-        /// [!code-csharp[RemoveStopWords](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveDefaultStopWords.cs)]
+        /// [!code-csharp[RemoveDefaultStopWords](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveDefaultStopWords.cs)]
         /// ]]>
         /// </format>
         /// </example>