From 4f581077340c57e34e09e7ed33cde7d959e0fc06 Mon Sep 17 00:00:00 2001
From: Wei-Sheng Chin <wechi@microsoft.com>
Date: Fri, 8 Mar 2019 17:00:56 -0800
Subject: [PATCH 01/12] Scrub n-gram hashing

---
 .../Text/NgramHashingTransformer.cs           | 125 ++++--------------
 .../Text/TextCatalog.cs                       |  85 +++---------
 2 files changed, 46 insertions(+), 164 deletions(-)

diff --git a/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs b/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs
index eb2e63d85d..86bfdc3e87 100644
--- a/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs
+++ b/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs
@@ -52,8 +52,8 @@ internal sealed class Column : ManyToOneColumn
 
             [Argument(ArgumentType.AtMostOnce,
                 HelpText = "Number of bits to hash into. Must be between 1 and 30, inclusive.",
-                ShortName = "bits")]
-            public int? HashBits;
+                Name = "HashBits", ShortName = "bits")]
+            public int? NumberOfBits;
 
             [Argument(ArgumentType.AtMostOnce, HelpText = "Hashing seed")]
             public uint? Seed;
@@ -91,7 +91,7 @@ private protected override bool TryParse(string str)
 
                 if (!int.TryParse(extra, out int bits))
                     return false;
-                HashBits = bits;
+                NumberOfBits = bits;
                 return true;
             }
 
@@ -103,10 +103,10 @@ internal bool TryUnparse(StringBuilder sb)
                 {
                     return false;
                 }
-                if (HashBits == null)
+                if (NumberOfBits == null)
                     return TryUnparseCore(sb);
 
-                string extra = HashBits.Value.ToString();
+                string extra = NumberOfBits.Value.ToString();
                 return TryUnparseCore(sb, extra);
             }
         }
@@ -133,8 +133,8 @@ internal sealed class Options
 
             [Argument(ArgumentType.AtMostOnce,
                 HelpText = "Number of bits to hash into. Must be between 1 and 30, inclusive.",
-                ShortName = "bits", SortOrder = 2)]
-            public int HashBits = NgramHashingEstimator.Defaults.HashBits;
+                Name = "HashBits", ShortName = "bits", SortOrder = 2)]
+            public int NumberOfBits = NgramHashingEstimator.Defaults.NumberOfBits;
 
             [Argument(ArgumentType.AtMostOnce, HelpText = "Hashing seed")]
             public uint Seed = NgramHashingEstimator.Defaults.Seed;
@@ -353,7 +353,7 @@ private static IDataTransform Create(IHostEnvironment env, Options options, IDat
                         item.NgramLength ?? options.NgramLength,
                         item.SkipLength ?? options.SkipLength,
                         item.AllLengths ?? options.AllLengths,
-                        item.HashBits ?? options.HashBits,
+                        item.NumberOfBits ?? options.NumberOfBits,
                         item.Seed ?? options.Seed,
                         item.Ordered ?? options.Ordered,
                         item.InvertHash ?? options.InvertHash,
@@ -408,13 +408,13 @@ public Mapper(NgramHashingTransformer parent, DataViewSchema inputSchema, Finder
                         _srcTypes[i][j] = srcType;
                     }
 
-                    _types[i] = new VectorType(NumberDataViewType.Single, 1 << _parent._columns[i].HashBits);
+                    _types[i] = new VectorType(NumberDataViewType.Single, 1 << _parent._columns[i].NumberOfBits);
                 }
             }
 
             private NgramIdFinder GetNgramIdFinder(int iinfo)
             {
-                uint mask = (1U << _parent._columns[iinfo].HashBits) - 1;
+                uint mask = (1U << _parent._columns[iinfo].NumberOfBits) - 1;
                 int ngramLength = _parent._columns[iinfo].NgramLength;
                 bool rehash = _parent._columns[iinfo].RehashUnigrams;
                 bool ordered = _parent._columns[iinfo].Ordered;
@@ -819,7 +819,7 @@ public NgramIdFinder Decorate(int iinfo, NgramIdFinder finder)
                 }
 
                 var collector = _iinfoToCollector[iinfo] = new InvertHashCollector<NGram>(
-                    1 << _parent._columns[iinfo].HashBits, _invertHashMaxCounts[iinfo],
+                    1 << _parent._columns[iinfo].NumberOfBits, _invertHashMaxCounts[iinfo],
                     stringMapper, EqualityComparer<NGram>.Default, (in NGram src, ref NGram dst) => dst = src.Clone());
 
                 return
@@ -852,7 +852,7 @@ public VBuffer<ReadOnlyMemory<char>>[] SlotNamesMetadata(out VectorType[] types)
                     if (_iinfoToCollector[iinfo] != null)
                     {
                         var vec = values[iinfo] = _iinfoToCollector[iinfo].GetMetadata();
-                        Contracts.Assert(vec.Length == 1 << _parent._columns[iinfo].HashBits);
+                        Contracts.Assert(vec.Length == 1 << _parent._columns[iinfo].NumberOfBits);
                         types[iinfo] = new VectorType(TextDataViewType.Instance, vec.Length);
                     }
                 }
@@ -887,7 +887,7 @@ public sealed class ColumnOptions
             /// <summary>Whether to store all ngram lengths up to <see cref="NgramLength"/>, or only <see cref="NgramLength"/>.</summary>
             public readonly bool AllLengths;
             /// <summary>Number of bits to hash into. Must be between 1 and 31, inclusive.</summary>
-            public readonly int HashBits;
+            public readonly int NumberOfBits;
             /// <summary>Hashing seed.</summary>
             public readonly uint Seed;
             /// <summary>Whether the position of each term should be included in the hash.</summary>
@@ -907,14 +907,14 @@ public sealed class ColumnOptions
             internal string[] FriendlyNames;
 
             /// <summary>
-            /// Describes how the transformer handles one column pair.
+            /// Describes how the transformer maps several input columns, <paramref name="inputColumnNames"/>, to a output column, <paramref name="name"/>.
             /// </summary>
             /// <param name="name">Name of the column resulting from the transformation of <paramref name="inputColumnNames"/>.</param>
             /// <param name="inputColumnNames">Names of the columns to transform. </param>
             /// <param name="ngramLength">Maximum ngram length.</param>
             /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
             /// <param name="allLengths">Whether to store all ngram lengths up to <paramref name="ngramLength"/>, or only <paramref name="ngramLength"/>.</param>
-            /// <param name="hashBits">Number of bits to hash into. Must be between 1 and 31, inclusive.</param>
+            /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 31, inclusive.</param>
             /// <param name="seed">Hashing seed.</param>
             /// <param name="ordered">Whether the position of each term should be included in the hash.</param>
             /// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
@@ -928,7 +928,7 @@ public ColumnOptions(string name,
                 int ngramLength = NgramHashingEstimator.Defaults.NgramLength,
                 int skipLength = NgramHashingEstimator.Defaults.SkipLength,
                 bool allLengths = NgramHashingEstimator.Defaults.AllLengths,
-                int hashBits = NgramHashingEstimator.Defaults.HashBits,
+                int numberOfBits = NgramHashingEstimator.Defaults.NumberOfBits,
                 uint seed = NgramHashingEstimator.Defaults.Seed,
                 bool ordered = NgramHashingEstimator.Defaults.Ordered,
                 int invertHash = NgramHashingEstimator.Defaults.InvertHash,
@@ -942,8 +942,8 @@ public ColumnOptions(string name,
                     throw Contracts.ExceptParam(nameof(invertHash), "Value too small, must be -1 or larger");
                 // If the bits is 31 or higher, we can't declare a KeyValues of the appropriate length,
                 // this requiring a VBuffer of length 1u << 31 which exceeds int.MaxValue.
-                if (invertHash != 0 && hashBits >= 31)
-                    throw Contracts.ExceptParam(nameof(hashBits), $"Cannot support invertHash for a {0} bit hash. 30 is the maximum possible.", hashBits);
+                if (invertHash != 0 && numberOfBits >= 31)
+                    throw Contracts.ExceptParam(nameof(numberOfBits), $"Cannot support invertHash for a {0} bit hash. 30 is the maximum possible.", numberOfBits);
 
                 if (NgramLength + SkipLength > NgramBufferBuilder.MaxSkipNgramLength)
                 {
@@ -956,7 +956,7 @@ public ColumnOptions(string name,
                 NgramLength = ngramLength;
                 SkipLength = skipLength;
                 AllLengths = allLengths;
-                HashBits = hashBits;
+                NumberOfBits = numberOfBits;
                 Seed = seed;
                 Ordered = ordered;
                 InvertHash = invertHash;
@@ -988,8 +988,8 @@ internal ColumnOptions(ModelLoadContext ctx)
                 SkipLength = ctx.Reader.ReadInt32();
                 Contracts.CheckDecode(0 <= SkipLength && SkipLength <= NgramBufferBuilder.MaxSkipNgramLength);
                 Contracts.CheckDecode(SkipLength <= NgramBufferBuilder.MaxSkipNgramLength - NgramLength);
-                HashBits = ctx.Reader.ReadInt32();
-                Contracts.CheckDecode(1 <= HashBits && HashBits <= 30);
+                NumberOfBits = ctx.Reader.ReadInt32();
+                Contracts.CheckDecode(1 <= NumberOfBits && NumberOfBits <= 30);
                 Seed = ctx.Reader.ReadUInt32();
                 RehashUnigrams = ctx.Reader.ReadBoolByte();
                 Ordered = ctx.Reader.ReadBoolByte();
@@ -1018,8 +1018,8 @@ internal ColumnOptions(ModelLoadContext ctx, string name, string[] inputColumnNa
                 SkipLength = ctx.Reader.ReadInt32();
                 Contracts.CheckDecode(0 <= SkipLength && SkipLength <= NgramBufferBuilder.MaxSkipNgramLength);
                 Contracts.CheckDecode(SkipLength <= NgramBufferBuilder.MaxSkipNgramLength - NgramLength);
-                HashBits = ctx.Reader.ReadInt32();
-                Contracts.CheckDecode(1 <= HashBits && HashBits <= 30);
+                NumberOfBits = ctx.Reader.ReadInt32();
+                Contracts.CheckDecode(1 <= NumberOfBits && NumberOfBits <= 30);
                 Seed = ctx.Reader.ReadUInt32();
                 RehashUnigrams = ctx.Reader.ReadBoolByte();
                 Ordered = ctx.Reader.ReadBoolByte();
@@ -1052,8 +1052,8 @@ internal void Save(ModelSaveContext ctx)
                 Contracts.Assert(0 <= SkipLength && SkipLength <= NgramBufferBuilder.MaxSkipNgramLength);
                 Contracts.Assert(NgramLength + SkipLength <= NgramBufferBuilder.MaxSkipNgramLength);
                 ctx.Writer.Write(SkipLength);
-                Contracts.Assert(1 <= HashBits && HashBits <= 30);
-                ctx.Writer.Write(HashBits);
+                Contracts.Assert(1 <= NumberOfBits && NumberOfBits <= 30);
+                ctx.Writer.Write(NumberOfBits);
                 ctx.Writer.Write(Seed);
                 ctx.Writer.WriteBoolByte(RehashUnigrams);
                 ctx.Writer.WriteBoolByte(Ordered);
@@ -1066,7 +1066,7 @@ internal static class Defaults
             internal const int NgramLength = 2;
             internal const bool AllLengths = true;
             internal const int SkipLength = 0;
-            internal const int HashBits = 16;
+            internal const int NumberOfBits = 16;
             internal const uint Seed = 314489979;
             internal const bool RehashUnigrams = false;
             internal const bool Ordered = true;
@@ -1086,7 +1086,7 @@ internal static class Defaults
         /// <param name="env">The environment.</param>
         /// <param name="outputColumnName">Name of output column, will contain the ngram vector. Null means <paramref name="inputColumnName"/> is replaced.</param>
         /// <param name="inputColumnName">Name of input column containing tokenized text.</param>
-        /// <param name="hashBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
+        /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
@@ -1099,84 +1099,17 @@ internal static class Defaults
         internal NgramHashingEstimator(IHostEnvironment env,
             string outputColumnName,
             string inputColumnName = null,
-            int hashBits = 16,
+            int numberOfBits = 16,
             int ngramLength = 2,
             int skipLength = 0,
             bool allLengths = true,
             uint seed = 314489979,
             bool ordered = true,
             int invertHash = 0)
-            : this(env, new[] { (outputColumnName, new[] { inputColumnName ?? outputColumnName }) }, hashBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash)
+            : this(env, new ColumnOptions(outputColumnName, new[] { inputColumnName ?? outputColumnName }, ngramLength, skipLength, allLengths, numberOfBits, seed, ordered, invertHash))
         {
         }
 
-        /// <summary>
-        /// Produces a bag of counts of hashed ngrams in <paramref name="inputColumnNames"/>
-        /// and outputs ngram vector as <paramref name="outputColumnName"/>
-        ///
-        /// <see cref="NgramHashingEstimator"/> is different from <see cref="WordHashBagEstimator"/> in a way that <see cref="NgramHashingEstimator"/>
-        /// takes tokenized text as input while <see cref="WordHashBagEstimator"/> tokenizes text internally.
-        /// </summary>
-        /// <param name="env">The environment.</param>
-        /// <param name="outputColumnName">Name of output column, will contain the ngram vector.</param>
-        /// <param name="inputColumnNames">Name of input columns containing tokenized text.</param>
-        /// <param name="hashBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
-        /// <param name="ngramLength">Ngram length.</param>
-        /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="seed">Hashing seed.</param>
-        /// <param name="ordered">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
-        /// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
-        /// Text representation of original values are stored in the slot names of the  metadata for the new column.Hashing, as such, can map many initial values to one.
-        /// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
-        /// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param>
-        internal NgramHashingEstimator(IHostEnvironment env,
-            string outputColumnName,
-            string[] inputColumnNames,
-            int hashBits = 16,
-            int ngramLength = 2,
-            int skipLength = 0,
-            bool allLengths = true,
-            uint seed = 314489979,
-            bool ordered = true,
-            int invertHash = 0)
-            : this(env, new[] { (outputColumnName, inputColumnNames) }, hashBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash)
-        {
-        }
-
-        /// <summary>
-        /// Produces a bag of counts of hashed ngrams in <paramref name="columns.inputs"/>
-        /// and outputs ngram vector for each output in <paramref name="columns.output"/>
-        ///
-        /// <see cref="NgramHashingEstimator"/> is different from <see cref="WordHashBagEstimator"/> in a way that <see cref="NgramHashingEstimator"/>
-        /// takes tokenized text as input while <see cref="WordHashBagEstimator"/> tokenizes text internally.
-        /// </summary>
-        /// <param name="env">The environment.</param>
-        /// <param name="columns">Pairs of input columns to output column mappings on which to compute ngram vector.</param>
-        /// <param name="hashBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
-        /// <param name="ngramLength">Ngram length.</param>
-        /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="seed">Hashing seed.</param>
-        /// <param name="ordered">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
-        /// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
-        /// Text representation of original values are stored in the slot names of the  metadata for the new column.Hashing, as such, can map many initial values to one.
-        /// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
-        /// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param>
-        internal NgramHashingEstimator(IHostEnvironment env,
-            (string outputColumnName, string[] inputColumnName)[] columns,
-            int hashBits = 16,
-            int ngramLength = 2,
-            int skipLength = 0,
-            bool allLengths = true,
-            uint seed = 314489979,
-            bool ordered = true,
-            int invertHash = 0)
-             : this(env, columns.Select(x => new ColumnOptions(x.outputColumnName, x.inputColumnName, ngramLength, skipLength, allLengths, hashBits, seed, ordered, invertHash)).ToArray())
-        {
-
-        }
-
         /// <summary>
         /// Produces a bag of counts of hashed ngrams in <paramref name="columns.inputs"/>
         /// and outputs ngram vector for each output in <paramref name="columns.output"/>
diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
index bcfa9801e3..79f9c3c1ca 100644
--- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -394,7 +394,7 @@ public static WordBagEstimator ProduceWordBags(this TransformsCatalog.TextTransf
         /// <param name="catalog">The text-related transform's catalog.</param>
         /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
         /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
-        /// <param name="hashBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
+        /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
@@ -407,7 +407,7 @@ public static WordBagEstimator ProduceWordBags(this TransformsCatalog.TextTransf
         public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.TextTransforms catalog,
             string outputColumnName,
             string inputColumnName = null,
-            int hashBits = NgramHashExtractingTransformer.DefaultArguments.HashBits,
+            int numberOfBits = NgramHashExtractingTransformer.DefaultArguments.HashBits,
             int ngramLength = NgramHashExtractingTransformer.DefaultArguments.NgramLength,
             int skipLength = NgramHashExtractingTransformer.DefaultArguments.SkipLength,
             bool allLengths = NgramHashExtractingTransformer.DefaultArguments.AllLengths,
@@ -415,7 +415,7 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.
             bool ordered = NgramHashExtractingTransformer.DefaultArguments.Ordered,
             int invertHash = NgramHashExtractingTransformer.DefaultArguments.InvertHash)
             => new WordHashBagEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
-                outputColumnName, inputColumnName, hashBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
+                outputColumnName, inputColumnName, numberOfBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
 
         /// <summary>
         /// Produces a bag of counts of hashed ngrams in <paramref name="inputColumnNames"/>
@@ -424,7 +424,7 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.
         /// <param name="catalog">The text-related transform's catalog.</param>
         /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnNames"/>.</param>
         /// <param name="inputColumnNames">Name of the columns to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
-        /// <param name="hashBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
+        /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
@@ -437,7 +437,7 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.
         public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.TextTransforms catalog,
             string outputColumnName,
             string[] inputColumnNames,
-            int hashBits = NgramHashExtractingTransformer.DefaultArguments.HashBits,
+            int numberOfBits = NgramHashExtractingTransformer.DefaultArguments.HashBits,
             int ngramLength = NgramHashExtractingTransformer.DefaultArguments.NgramLength,
             int skipLength = NgramHashExtractingTransformer.DefaultArguments.SkipLength,
             bool allLengths = NgramHashExtractingTransformer.DefaultArguments.AllLengths,
@@ -445,7 +445,7 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.
             bool ordered = NgramHashExtractingTransformer.DefaultArguments.Ordered,
             int invertHash = NgramHashExtractingTransformer.DefaultArguments.InvertHash)
             => new WordHashBagEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
-                outputColumnName, inputColumnNames, hashBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
+                outputColumnName, inputColumnNames, numberOfBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
 
         /// <summary>
         /// Produces a bag of counts of hashed ngrams in <paramref name="columns.inputs"/>
@@ -453,7 +453,7 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.
         /// </summary>
         /// <param name="catalog">The text-related transform's catalog.</param>
         /// <param name="columns">Pairs of columns to compute bag of word vector.</param>
-        /// <param name="hashBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
+        /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
@@ -465,7 +465,7 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.
         /// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param>
         public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.TextTransforms catalog,
             (string outputColumnName, string[] inputColumnNames)[] columns,
-            int hashBits = NgramHashExtractingTransformer.DefaultArguments.HashBits,
+            int numberOfBits = NgramHashExtractingTransformer.DefaultArguments.HashBits,
             int ngramLength = NgramHashExtractingTransformer.DefaultArguments.NgramLength,
             int skipLength = NgramHashExtractingTransformer.DefaultArguments.SkipLength,
             bool allLengths = NgramHashExtractingTransformer.DefaultArguments.AllLengths,
@@ -473,7 +473,7 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.
             bool ordered = NgramHashExtractingTransformer.DefaultArguments.Ordered,
             int invertHash = NgramHashExtractingTransformer.DefaultArguments.InvertHash)
             => new WordHashBagEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
-               columns, hashBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
+               columns, numberOfBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
 
         /// <summary>
         /// Produces a bag of counts of hashed ngrams in <paramref name="inputColumnName"/>
@@ -485,7 +485,7 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.
         /// <param name="catalog">The text-related transform's catalog.</param>
         /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
         /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
-        /// <param name="hashBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
+        /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
@@ -498,7 +498,7 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.
         public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.TextTransforms catalog,
             string outputColumnName,
             string inputColumnName = null,
-            int hashBits = NgramHashingEstimator.Defaults.HashBits,
+            int numberOfBits = NgramHashingEstimator.Defaults.NumberOfBits,
             int ngramLength = NgramHashingEstimator.Defaults.NgramLength,
             int skipLength = NgramHashingEstimator.Defaults.SkipLength,
             bool allLengths = NgramHashingEstimator.Defaults.AllLengths,
@@ -506,71 +506,20 @@ public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.T
             bool ordered = NgramHashingEstimator.Defaults.Ordered,
             int invertHash = NgramHashingEstimator.Defaults.InvertHash)
             => new NgramHashingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
-                outputColumnName, inputColumnName, hashBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
+                outputColumnName, inputColumnName, numberOfBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
 
         /// <summary>
-        /// Produces a bag of counts of hashed ngrams in <paramref name="inputColumnNames"/>
-        /// and outputs ngram vector as <paramref name="outputColumnName"/>
+        /// Produces a bag of counts of hashed ngrams for each <paramref name="columns"/>. For each column,
+        /// <see cref="NgramHashingEstimator.ColumnOptions.InputColumnNames"/> are the input columns of the output column named as <see cref="NgramHashingEstimator.ColumnOptions.Name"/>.
         ///
         /// <see cref="NgramHashingEstimator"/> is different from <see cref="WordHashBagEstimator"/> in a way that <see cref="NgramHashingEstimator"/>
         /// takes tokenized text as input while <see cref="WordHashBagEstimator"/> tokenizes text internally.
         /// </summary>
         /// <param name="catalog">The text-related transform's catalog.</param>
-        /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnNames"/>.</param>
-        /// <param name="inputColumnNames">Name of the columns to transform.</param>
-        /// <param name="hashBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
-        /// <param name="ngramLength">Ngram length.</param>
-        /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="seed">Hashing seed.</param>
-        /// <param name="ordered">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
-        /// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
-        /// Text representation of original values are stored in the slot names of the  metadata for the new column.Hashing, as such, can map many initial values to one.
-        /// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
-        /// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param>
+        /// <param name="columns">Pairs of columns to compute n-grams. Note that gram indices are generated by hashing.</param>
         public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.TextTransforms catalog,
-            string outputColumnName,
-            string[] inputColumnNames,
-            int hashBits = NgramHashingEstimator.Defaults.HashBits,
-            int ngramLength = NgramHashingEstimator.Defaults.NgramLength,
-            int skipLength = NgramHashingEstimator.Defaults.SkipLength,
-            bool allLengths = NgramHashingEstimator.Defaults.AllLengths,
-            uint seed = NgramHashingEstimator.Defaults.Seed,
-            bool ordered = NgramHashingEstimator.Defaults.Ordered,
-            int invertHash = NgramHashingEstimator.Defaults.InvertHash)
-             => new NgramHashingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
-                 outputColumnName, inputColumnNames, hashBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
-
-        /// <summary>
-        /// Produces a bag of counts of hashed ngrams in <paramref name="columns.inputs"/>
-        /// and outputs ngram vector for each output in <paramref name="columns.output"/>
-        ///
-        /// <see cref="NgramHashingEstimator"/> is different from <see cref="WordHashBagEstimator"/> in a way that <see cref="NgramHashingEstimator"/>
-        /// takes tokenized text as input while <see cref="WordHashBagEstimator"/> tokenizes text internally.
-        /// </summary>
-        /// <param name="catalog">The text-related transform's catalog.</param>
-        /// <param name="columns">Pairs of columns to compute bag of word vector.</param>
-        /// <param name="hashBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
-        /// <param name="ngramLength">Ngram length.</param>
-        /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="seed">Hashing seed.</param>
-        /// <param name="ordered">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
-        /// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
-        /// Text representation of original values are stored in the slot names of the  metadata for the new column.Hashing, as such, can map many initial values to one.
-        /// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
-        /// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param>
-        public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.TextTransforms catalog,
-            (string outputColumnName, string[] inputColumnNames)[] columns,
-            int hashBits = NgramHashingEstimator.Defaults.HashBits,
-            int ngramLength = NgramHashingEstimator.Defaults.NgramLength,
-            int skipLength = NgramHashingEstimator.Defaults.SkipLength,
-            bool allLengths = NgramHashingEstimator.Defaults.AllLengths,
-            uint seed = NgramHashingEstimator.Defaults.Seed,
-            bool ordered = NgramHashingEstimator.Defaults.Ordered,
-            int invertHash = NgramHashingEstimator.Defaults.InvertHash)
-             => new NgramHashingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
-                 columns, hashBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
+            NgramHashingEstimator.ColumnOptions[] columns)
+             => new NgramHashingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), columns);
 
         /// <summary>
         /// Uses <a href="https://arxiv.org/abs/1412.1576">LightLDA</a> to transform a document (represented as a vector of floats)

From 6cfe2fc6765882d34b4e2fd1e239dd367ae5725a Mon Sep 17 00:00:00 2001
From: Wei-Sheng Chin <wechi@microsoft.com>
Date: Fri, 8 Mar 2019 17:07:38 -0800
Subject: [PATCH 02/12] Handle static part

---
 .../TextStaticExtensions.cs                   | 26 +++++++++----------
 .../Text/TextCatalog.cs                       |  4 +--
 .../StaticPipeTests.cs                        |  2 +-
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
index 3bf6647d70..39944d6924 100644
--- a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
+++ b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
@@ -512,8 +512,8 @@ private sealed class OutPipelineColumn : Vector<float>
         {
             public readonly VarVector<Key<uint, string>> Input;
 
-            public OutPipelineColumn(VarVector<Key<uint, string>> input, int hashBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool ordered, int invertHash)
-                : base(new Reconciler(hashBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash), input)
+            public OutPipelineColumn(VarVector<Key<uint, string>> input, int numberOfBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool ordered, int invertHash)
+                : base(new Reconciler(numberOfBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash), input)
             {
                 Input = input;
             }
@@ -521,7 +521,7 @@ public OutPipelineColumn(VarVector<Key<uint, string>> input, int hashBits, int n
 
         private sealed class Reconciler : EstimatorReconciler, IEquatable<Reconciler>
         {
-            private readonly int _hashBits;
+            private readonly int _numberOfBits;
             private readonly int _ngramLength;
             private readonly int _skipLength;
             private readonly bool _allLengths;
@@ -529,9 +529,9 @@ private sealed class Reconciler : EstimatorReconciler, IEquatable<Reconciler>
             private readonly bool _ordered;
             private readonly int _invertHash;
 
-            public Reconciler(int hashBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool ordered, int invertHash)
+            public Reconciler(int numberOfBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool ordered, int invertHash)
             {
-                _hashBits = hashBits;
+                _numberOfBits = numberOfBits;
                 _ngramLength = ngramLength;
                 _skipLength = skipLength;
                 _allLengths = allLengths;
@@ -542,7 +542,7 @@ public Reconciler(int hashBits, int ngramLength, int skipLength, bool allLengths
 
             public bool Equals(Reconciler other)
             {
-                return _hashBits == other._hashBits &&
+                return _numberOfBits == other._numberOfBits &&
                     _ngramLength == other._ngramLength &&
                     _skipLength == other._skipLength &&
                     _allLengths == other._allLengths &&
@@ -561,7 +561,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
                 var columns = new List<NgramHashingEstimator.ColumnOptions>();
                 foreach (var outCol in toOutput)
                     columns.Add(new NgramHashingEstimator.ColumnOptions(outputNames[outCol], new[] { inputNames[((OutPipelineColumn)outCol).Input] },
-                          _ngramLength, _skipLength, _allLengths, _hashBits, _seed, _ordered, _invertHash));
+                          _ngramLength, _skipLength, _allLengths, _numberOfBits, _seed, _ordered, _invertHash));
 
                 return new NgramHashingEstimator(env, columns.ToArray());
             }
@@ -571,11 +571,11 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// Produces a bag of counts of ngrams (sequences of consecutive words of length 1-n) in a given tokenized text.
         /// It does so by hashing each ngram and using the hash value as the index in the bag.
         ///
-        /// <see cref="ToNgramsHash"/> is different from <see cref="WordHashBagEstimatorStaticExtensions.ToBagofHashedWords"/>
-        /// in a way that <see cref="ToNgramsHash"/> takes tokenized text as input while <see cref="WordHashBagEstimatorStaticExtensions.ToBagofHashedWords"/> tokenizes text internally.
+        /// <see cref="ApplyNgramHashing"/> is different from <see cref="WordHashBagEstimatorStaticExtensions.ToBagofHashedWords"/>
+        /// in a way that <see cref="ApplyNgramHashing"/> takes tokenized text as input while <see cref="WordHashBagEstimatorStaticExtensions.ToBagofHashedWords"/> tokenizes text internally.
         /// </summary>
         /// <param name="input">The column to apply to.</param>
-        /// <param name="hashBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
+        /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
@@ -585,13 +585,13 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// Text representation of original values are stored in the slot names of the  metadata for the new column.Hashing, as such, can map many initial values to one.
         /// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
         /// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param>
-        public static Vector<float> ToNgramsHash(this VarVector<Key<uint, string>> input,
-            int hashBits = 16,
+        public static Vector<float> ApplyNgramHashing(this VarVector<Key<uint, string>> input,
+            int numberOfBits = 16,
             int ngramLength = 2,
             int skipLength = 0,
             bool allLengths = true,
             uint seed = 314489979,
             bool ordered = true,
-            int invertHash = 0) => new OutPipelineColumn(input, hashBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
+            int invertHash = 0) => new OutPipelineColumn(input, numberOfBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
     }
 }
diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
index 79f9c3c1ca..44fb6c45b9 100644
--- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -495,7 +495,7 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.
         /// Text representation of original values are stored in the slot names of the  metadata for the new column.Hashing, as such, can map many initial values to one.
         /// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
         /// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param>
-        public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.TextTransforms catalog,
+        public static NgramHashingEstimator ApplyNgramHashing(this TransformsCatalog.TextTransforms catalog,
             string outputColumnName,
             string inputColumnName = null,
             int numberOfBits = NgramHashingEstimator.Defaults.NumberOfBits,
@@ -517,7 +517,7 @@ public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.T
         /// </summary>
         /// <param name="catalog">The text-related transform's catalog.</param>
         /// <param name="columns">Pairs of columns to compute n-grams. Note that gram indices are generated by hashing.</param>
-        public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.TextTransforms catalog,
+        public static NgramHashingEstimator ApplyNgramHashing(this TransformsCatalog.TextTransforms catalog,
             NgramHashingEstimator.ColumnOptions[] columns)
              => new NgramHashingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), columns);
 
diff --git a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
index f18a707999..8a1f392533 100644
--- a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
+++ b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
@@ -605,7 +605,7 @@ public void Ngrams()
                 .Append(r => (
                     r.label,
                     ngrams: r.text.TokenizeText().ToKey().ToNgrams(),
-                    ngramshash: r.text.TokenizeText().ToKey().ToNgramsHash()));
+                    ngramshash: r.text.TokenizeText().ToKey().ApplyNgramHashing()));
 
             var tdata = est.Fit(data).Transform(data);
             var schema = tdata.AsDynamic.Schema;

From 698b941cf601bb8d4b46b4a95fc0bb31cb02dafe Mon Sep 17 00:00:00 2001
From: Wei-Sheng Chin <wechi@microsoft.com>
Date: Fri, 8 Mar 2019 17:23:41 -0800
Subject: [PATCH 03/12] Handle Ngram

Shift from Array to ReadOnlyList
---
 .../TextStaticExtensions.cs                   |  6 +-
 .../Text/NgramTransform.cs                    | 66 ++++++++++---------
 .../Text/TextCatalog.cs                       | 18 ++---
 .../Text/WordBagTransform.cs                  |  4 +-
 4 files changed, 49 insertions(+), 45 deletions(-)

diff --git a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
index 39944d6924..f2d5678f79 100644
--- a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
+++ b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
@@ -492,15 +492,15 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="maxNumTerms">Maximum number of ngrams to store in the dictionary.</param>
+        /// <param name="maximumTermCount">Maximum number of ngrams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         public static Vector<float> ToNgrams<TKey>(this VarVector<Key<TKey, string>> input,
             int ngramLength = 1,
             int skipLength = 0,
             bool allLengths = true,
-            int maxNumTerms = 10000000,
+            int maximumTermCount = 10000000,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.WeightingCriteria.Tf)
-                => new OutPipelineColumn(input, ngramLength, skipLength, allLengths, maxNumTerms, weighting);
+                => new OutPipelineColumn(input, ngramLength, skipLength, allLengths, maximumTermCount, weighting);
     }
 
     /// <summary>
diff --git a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs
index ce5fb31119..c1df26fdac 100644
--- a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs
+++ b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs
@@ -93,7 +93,7 @@ internal sealed class Options : TransformInputBase
             public int SkipLength = NgramExtractingEstimator.Defaults.SkipLength;
 
             [Argument(ArgumentType.Multiple, HelpText = "Maximum number of ngrams to store in the dictionary", ShortName = "max")]
-            public int[] MaxNumTerms = new int[] { NgramExtractingEstimator.Defaults.MaxNumTerms };
+            public int[] MaxNumTerms = new int[] { NgramExtractingEstimator.Defaults.MaximumTermCount };
 
             [Argument(ArgumentType.AtMostOnce, HelpText = "The weighting criteria")]
             public NgramExtractingEstimator.WeightingCriteria Weighting = NgramExtractingEstimator.Defaults.Weighting;
@@ -253,7 +253,7 @@ private static SequencePool[] Train(IHostEnvironment env, NgramExtractingEstimat
                     // Note: GetNgramIdFinderAdd will control how many ngrams of a specific length will
                     // be added (using lims[iinfo]), therefore we set slotLim to the maximum
                     helpers[iinfo] = new NgramBufferBuilder(ngramLength, skipLength, Utils.ArrayMaxSize,
-                        GetNgramIdFinderAdd(env, counts[iinfo], columns[iinfo].Limits, ngramMaps[iinfo], transformInfos[iinfo].RequireIdf));
+                        GetNgramIdFinderAdd(env, counts[iinfo], columns[iinfo].MaximumTermCounts, ngramMaps[iinfo], transformInfos[iinfo].RequireIdf));
                 }
 
                 int cInfoFull = 0;
@@ -293,7 +293,7 @@ private static SequencePool[] Train(IHostEnvironment env, NgramExtractingEstimat
                                 }
                             }
                         }
-                        AssertValid(env, counts[iinfo], columns[iinfo].Limits, ngramMaps[iinfo]);
+                        AssertValid(env, counts[iinfo], columns[iinfo].MaximumTermCounts, ngramMaps[iinfo]);
                     }
                 }
 
@@ -307,7 +307,7 @@ private static SequencePool[] Train(IHostEnvironment env, NgramExtractingEstimat
 
                 for (int iinfo = 0; iinfo < columns.Length; iinfo++)
                 {
-                    AssertValid(env, counts[iinfo], columns[iinfo].Limits, ngramMaps[iinfo]);
+                    AssertValid(env, counts[iinfo], columns[iinfo].MaximumTermCounts, ngramMaps[iinfo]);
 
                     int ngramLength = transformInfos[iinfo].NgramLength;
                     for (int i = 0; i < ngramLength; i++)
@@ -319,11 +319,11 @@ private static SequencePool[] Train(IHostEnvironment env, NgramExtractingEstimat
         }
 
         [Conditional("DEBUG")]
-        private static void AssertValid(IHostEnvironment env, int[] counts, ImmutableArray<int> lims, SequencePool pool)
+        private static void AssertValid(IHostEnvironment env, int[] counts, IReadOnlyList<int> lims, SequencePool pool)
         {
             int count = 0;
             int countFull = 0;
-            for (int i = 0; i < lims.Length; i++)
+            for (int i = 0; i < lims.Count; i++)
             {
                 env.Assert(counts[i] >= 0);
                 env.Assert(counts[i] <= lims[i]);
@@ -334,20 +334,20 @@ private static void AssertValid(IHostEnvironment env, int[] counts, ImmutableArr
             env.Assert(count == pool.Count);
         }
 
-        private static NgramIdFinder GetNgramIdFinderAdd(IHostEnvironment env, int[] counts, ImmutableArray<int> lims, SequencePool pool, bool requireIdf)
+        private static NgramIdFinder GetNgramIdFinderAdd(IHostEnvironment env, int[] counts, IReadOnlyList<int> lims, SequencePool pool, bool requireIdf)
         {
             Contracts.AssertValue(env);
-            env.Assert(lims.Length > 0);
-            env.Assert(lims.Length == Utils.Size(counts));
+            env.Assert(lims.Count > 0);
+            env.Assert(lims.Count == Utils.Size(counts));
 
             int numFull = lims.Count(l => l <= 0);
-            int ngramLength = lims.Length;
+            int ngramLength = lims.Count;
             return
                 (uint[] ngram, int lim, int icol, ref bool more) =>
                 {
                     env.Assert(0 < lim && lim <= Utils.Size(ngram));
                     env.Assert(lim <= Utils.Size(counts));
-                    env.Assert(lim <= lims.Length);
+                    env.Assert(lim <= lims.Count);
                     env.Assert(icol == 0);
 
                     var max = lim - 1;
@@ -695,7 +695,7 @@ internal static class Defaults
             public const int NgramLength = 2;
             public const bool AllLengths = true;
             public const int SkipLength = 0;
-            public const int MaxNumTerms = 10000000;
+            public const int MaximumTermCount = 10000000;
             public const WeightingCriteria Weighting = WeightingCriteria.Tf;
         }
 
@@ -712,16 +712,16 @@ internal static class Defaults
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="maxNumTerms">Maximum number of ngrams to store in the dictionary.</param>
+        /// <param name="maximumTermCount">Maximum number of ngrams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         internal NgramExtractingEstimator(IHostEnvironment env,
             string outputColumnName, string inputColumnName = null,
             int ngramLength = Defaults.NgramLength,
             int skipLength = Defaults.SkipLength,
             bool allLengths = Defaults.AllLengths,
-            int maxNumTerms = Defaults.MaxNumTerms,
+            int maximumTermCount = Defaults.MaximumTermCount,
             WeightingCriteria weighting = Defaults.Weighting)
-            : this(env, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }, ngramLength, skipLength, allLengths, maxNumTerms, weighting)
+            : this(env, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }, ngramLength, skipLength, allLengths, maximumTermCount, weighting)
         {
         }
 
@@ -734,16 +734,16 @@ internal NgramExtractingEstimator(IHostEnvironment env,
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="maxNumTerms">Maximum number of ngrams to store in the dictionary.</param>
+        /// <param name="maximumTermCount">Maximum number of ngrams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         internal NgramExtractingEstimator(IHostEnvironment env,
             (string outputColumnName, string inputColumnName)[] columns,
             int ngramLength = Defaults.NgramLength,
             int skipLength = Defaults.SkipLength,
             bool allLengths = Defaults.AllLengths,
-            int maxNumTerms = Defaults.MaxNumTerms,
+            int maximumTermCount = Defaults.MaximumTermCount,
             WeightingCriteria weighting = Defaults.Weighting)
-            : this(env, columns.Select(x => new ColumnOptions(x.outputColumnName, x.inputColumnName, ngramLength, skipLength, allLengths, weighting, maxNumTerms)).ToArray())
+            : this(env, columns.Select(x => new ColumnOptions(x.outputColumnName, x.inputColumnName, ngramLength, skipLength, allLengths, weighting, maximumTermCount)).ToArray())
         {
         }
 
@@ -809,10 +809,14 @@ public sealed class ColumnOptions
             /// <summary>The weighting criteria.</summary>
             public readonly WeightingCriteria Weighting;
             /// <summary>
+            /// Underlying state of <see cref="MaximumTermCounts"/>.
+            /// </summary>
+            private readonly ImmutableArray<int> _maximumTermCounts;
+            /// <summary>
             /// Contains the maximum number of grams to store in the dictionary, for each level of ngrams,
             /// from 1 (in position 0) up to ngramLength (in position ngramLength-1)
             /// </summary>
-            public readonly ImmutableArray<int> Limits;
+            public IReadOnlyList<int> MaximumTermCounts => _maximumTermCounts;
 
             /// <summary>
             /// Describes how the transformer handles one Gcn column pair.
@@ -823,14 +827,14 @@ public sealed class ColumnOptions
             /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
             /// <param name="allLengths">Whether to store all ngram lengths up to ngramLength, or only ngramLength.</param>
             /// <param name="weighting">The weighting criteria.</param>
-            /// <param name="maxNumTerms">Maximum number of ngrams to store in the dictionary.</param>
+            /// <param name="maximumTermCount">Maximum number of ngrams to store in the dictionary.</param>
             public ColumnOptions(string name, string inputColumnName = null,
                 int ngramLength = Defaults.NgramLength,
                 int skipLength = Defaults.SkipLength,
                 bool allLengths = Defaults.AllLengths,
                 WeightingCriteria weighting = Defaults.Weighting,
-                int maxNumTerms = Defaults.MaxNumTerms)
-                : this(name, ngramLength, skipLength, allLengths, weighting, new int[] { maxNumTerms }, inputColumnName ?? name)
+                int maximumTermCount = Defaults.MaximumTermCount)
+                : this(name, ngramLength, skipLength, allLengths, weighting, new int[] { maximumTermCount }, inputColumnName ?? name)
             {
             }
 
@@ -839,7 +843,7 @@ internal ColumnOptions(string name,
                 int skipLength,
                 bool allLengths,
                 WeightingCriteria weighting,
-                int[] maxNumTerms,
+                int[] maximumTermCounts,
                 string inputColumnName = null)
             {
                 Name = name;
@@ -857,18 +861,18 @@ internal ColumnOptions(string name,
                 var limits = new int[ngramLength];
                 if (!AllLengths)
                 {
-                    Contracts.CheckUserArg(Utils.Size(maxNumTerms) == 0 ||
-                        Utils.Size(maxNumTerms) == 1 && maxNumTerms[0] > 0, nameof(maxNumTerms));
-                    limits[ngramLength - 1] = Utils.Size(maxNumTerms) == 0 ? Defaults.MaxNumTerms : maxNumTerms[0];
+                    Contracts.CheckUserArg(Utils.Size(maximumTermCounts) == 0 ||
+                        Utils.Size(maximumTermCounts) == 1 && maximumTermCounts[0] > 0, nameof(maximumTermCounts));
+                    limits[ngramLength - 1] = Utils.Size(maximumTermCounts) == 0 ? Defaults.MaximumTermCount : maximumTermCounts[0];
                 }
                 else
                 {
-                    Contracts.CheckUserArg(Utils.Size(maxNumTerms) <= ngramLength, nameof(maxNumTerms));
-                    Contracts.CheckUserArg(Utils.Size(maxNumTerms) == 0 || maxNumTerms.All(i => i >= 0) && maxNumTerms[maxNumTerms.Length - 1] > 0, nameof(maxNumTerms));
-                    var extend = Utils.Size(maxNumTerms) == 0 ? Defaults.MaxNumTerms : maxNumTerms[maxNumTerms.Length - 1];
-                    limits = Utils.BuildArray(ngramLength, i => i < Utils.Size(maxNumTerms) ? maxNumTerms[i] : extend);
+                    Contracts.CheckUserArg(Utils.Size(maximumTermCounts) <= ngramLength, nameof(maximumTermCounts));
+                    Contracts.CheckUserArg(Utils.Size(maximumTermCounts) == 0 || maximumTermCounts.All(i => i >= 0) && maximumTermCounts[maximumTermCounts.Length - 1] > 0, nameof(maximumTermCounts));
+                    var extend = Utils.Size(maximumTermCounts) == 0 ? Defaults.MaximumTermCount : maximumTermCounts[maximumTermCounts.Length - 1];
+                    limits = Utils.BuildArray(ngramLength, i => i < Utils.Size(maximumTermCounts) ? maximumTermCounts[i] : extend);
                 }
-                Limits = ImmutableArray.Create(limits);
+                _maximumTermCounts = ImmutableArray.Create(limits);
             }
         }
 
diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
index 44fb6c45b9..8d3856aa78 100644
--- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -194,7 +194,7 @@ public static WordTokenizingEstimator TokenizeWords(this TransformsCatalog.TextT
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="maxNumTerms">Maximum number of ngrams to store in the dictionary.</param>
+        /// <param name="maximumTermCount">Maximum number of ngrams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         /// <example>
         /// <format type="text/markdown">
@@ -209,10 +209,10 @@ public static NgramExtractingEstimator ProduceNgrams(this TransformsCatalog.Text
             int ngramLength = NgramExtractingEstimator.Defaults.NgramLength,
             int skipLength = NgramExtractingEstimator.Defaults.SkipLength,
             bool allLengths = NgramExtractingEstimator.Defaults.AllLengths,
-            int maxNumTerms = NgramExtractingEstimator.Defaults.MaxNumTerms,
+            int maximumTermCount = NgramExtractingEstimator.Defaults.MaximumTermCount,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.Defaults.Weighting) =>
             new NgramExtractingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), outputColumnName, inputColumnName,
-                ngramLength, skipLength, allLengths, maxNumTerms, weighting);
+                ngramLength, skipLength, allLengths, maximumTermCount, weighting);
 
         /// <summary>
         /// Produces a bag of counts of ngrams (sequences of consecutive words) in <paramref name="columns.inputs"/>
@@ -223,17 +223,17 @@ public static NgramExtractingEstimator ProduceNgrams(this TransformsCatalog.Text
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="maxNumTerms">Maximum number of ngrams to store in the dictionary.</param>
+        /// <param name="maximumTermCount">Maximum number of ngrams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         public static NgramExtractingEstimator ProduceNgrams(this TransformsCatalog.TextTransforms catalog,
             (string outputColumnName, string inputColumnName)[] columns,
             int ngramLength = NgramExtractingEstimator.Defaults.NgramLength,
             int skipLength = NgramExtractingEstimator.Defaults.SkipLength,
             bool allLengths = NgramExtractingEstimator.Defaults.AllLengths,
-            int maxNumTerms = NgramExtractingEstimator.Defaults.MaxNumTerms,
+            int maximumTermCount = NgramExtractingEstimator.Defaults.MaximumTermCount,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.Defaults.Weighting)
             => new NgramExtractingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), columns,
-                ngramLength, skipLength, allLengths, maxNumTerms, weighting);
+                ngramLength, skipLength, allLengths, maximumTermCount, weighting);
 
         /// <summary>
         /// Produces a bag of counts of ngrams (sequences of consecutive words) in <paramref name="columns.inputs"/>
@@ -339,7 +339,7 @@ public static WordBagEstimator ProduceWordBags(this TransformsCatalog.TextTransf
             int ngramLength = NgramExtractingEstimator.Defaults.NgramLength,
             int skipLength = NgramExtractingEstimator.Defaults.SkipLength,
             bool allLengths = NgramExtractingEstimator.Defaults.AllLengths,
-            int maxNumTerms = NgramExtractingEstimator.Defaults.MaxNumTerms,
+            int maxNumTerms = NgramExtractingEstimator.Defaults.MaximumTermCount,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.WeightingCriteria.Tf)
             => new WordBagEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
                 outputColumnName, inputColumnName, ngramLength, skipLength, allLengths, maxNumTerms);
@@ -362,7 +362,7 @@ public static WordBagEstimator ProduceWordBags(this TransformsCatalog.TextTransf
             int ngramLength = NgramExtractingEstimator.Defaults.NgramLength,
             int skipLength = NgramExtractingEstimator.Defaults.SkipLength,
             bool allLengths = NgramExtractingEstimator.Defaults.AllLengths,
-            int maxNumTerms = NgramExtractingEstimator.Defaults.MaxNumTerms,
+            int maxNumTerms = NgramExtractingEstimator.Defaults.MaximumTermCount,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.WeightingCriteria.Tf)
             => new WordBagEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
                 outputColumnName, inputColumnNames, ngramLength, skipLength, allLengths, maxNumTerms, weighting);
@@ -383,7 +383,7 @@ public static WordBagEstimator ProduceWordBags(this TransformsCatalog.TextTransf
             int ngramLength = NgramExtractingEstimator.Defaults.NgramLength,
             int skipLength = NgramExtractingEstimator.Defaults.SkipLength,
             bool allLengths = NgramExtractingEstimator.Defaults.AllLengths,
-            int maxNumTerms = NgramExtractingEstimator.Defaults.MaxNumTerms,
+            int maxNumTerms = NgramExtractingEstimator.Defaults.MaximumTermCount,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.WeightingCriteria.Tf)
             => new WordBagEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), columns, ngramLength, skipLength, allLengths, maxNumTerms, weighting);
 
diff --git a/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs b/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs
index a66b4512be..1ccd1d38fe 100644
--- a/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs
+++ b/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs
@@ -229,7 +229,7 @@ internal abstract class ArgumentsBase
             public bool AllLengths = NgramExtractingEstimator.Defaults.AllLengths;
 
             [Argument(ArgumentType.Multiple, HelpText = "Maximum number of ngrams to store in the dictionary", ShortName = "max")]
-            public int[] MaxNumTerms = new int[] { NgramExtractingEstimator.Defaults.MaxNumTerms };
+            public int[] MaxNumTerms = new int[] { NgramExtractingEstimator.Defaults.MaximumTermCount };
 
             [Argument(ArgumentType.AtMostOnce, HelpText = "The weighting criteria")]
             public NgramExtractingEstimator.WeightingCriteria Weighting = NgramExtractingEstimator.Defaults.Weighting;
@@ -315,7 +315,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa
                     termArgs =
                         new ValueToKeyMappingTransformer.Options()
                         {
-                            MaxNumTerms = Utils.Size(options.MaxNumTerms) > 0 ? options.MaxNumTerms[0] : NgramExtractingEstimator.Defaults.MaxNumTerms,
+                            MaxNumTerms = Utils.Size(options.MaxNumTerms) > 0 ? options.MaxNumTerms[0] : NgramExtractingEstimator.Defaults.MaximumTermCount,
                             Columns = new ValueToKeyMappingTransformer.Column[termCols.Count]
                         };
                 }

From 03f6025d2784d63f284a819ae07a069d828f7e84 Mon Sep 17 00:00:00 2001
From: Wei-Sheng Chin <wechi@microsoft.com>
Date: Mon, 11 Mar 2019 10:02:28 -0700
Subject: [PATCH 04/12] Address comments

---
 .../TextStaticExtensions.cs                   | 14 ++---
 .../Text/NgramTransform.cs                    | 56 +++++++++----------
 .../Text/TextCatalog.cs                       | 33 ++---------
 .../Text/WordBagTransform.cs                  |  4 +-
 4 files changed, 43 insertions(+), 64 deletions(-)

diff --git a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
index f2d5678f79..bc542fb485 100644
--- a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
+++ b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
@@ -443,7 +443,7 @@ private sealed class Reconciler : EstimatorReconciler, IEquatable<Reconciler>
             private readonly int _ngramLength;
             private readonly int _skipLength;
             private readonly bool _allLengths;
-            private readonly int _maxNumTerms;
+            private readonly int _maxNgramsCount;
             private readonly NgramExtractingEstimator.WeightingCriteria _weighting;
 
             public Reconciler(int ngramLength, int skipLength, bool allLengths, int maxNumTerms, NgramExtractingEstimator.WeightingCriteria weighting)
@@ -451,7 +451,7 @@ public Reconciler(int ngramLength, int skipLength, bool allLengths, int maxNumTe
                 _ngramLength = ngramLength;
                 _skipLength = skipLength;
                 _allLengths = allLengths;
-                _maxNumTerms = maxNumTerms;
+                _maxNgramsCount = maxNumTerms;
                 _weighting = weighting;
 
             }
@@ -461,7 +461,7 @@ public bool Equals(Reconciler other)
                 return _ngramLength == other._ngramLength &&
                 _skipLength == other._skipLength &&
                 _allLengths == other._allLengths &&
-                _maxNumTerms == other._maxNumTerms &&
+                _maxNgramsCount == other._maxNgramsCount &&
                 _weighting == other._weighting;
             }
 
@@ -477,7 +477,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
                 foreach (var outCol in toOutput)
                     pairs.Add((outputNames[outCol], inputNames[((OutPipelineColumn)outCol).Input]));
 
-                return new NgramExtractingEstimator(env, pairs.ToArray(), _ngramLength, _skipLength, _allLengths, _maxNumTerms, _weighting);
+                return new NgramExtractingEstimator(env, pairs.ToArray(), _ngramLength, _skipLength, _allLengths, _maxNgramsCount, _weighting);
             }
         }
 
@@ -492,15 +492,15 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="maximumTermCount">Maximum number of ngrams to store in the dictionary.</param>
+        /// <param name="maximumNgramsCount">Maximum number of n-grams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         public static Vector<float> ToNgrams<TKey>(this VarVector<Key<TKey, string>> input,
             int ngramLength = 1,
             int skipLength = 0,
             bool allLengths = true,
-            int maximumTermCount = 10000000,
+            int maximumNgramsCount = 10000000,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.WeightingCriteria.Tf)
-                => new OutPipelineColumn(input, ngramLength, skipLength, allLengths, maximumTermCount, weighting);
+                => new OutPipelineColumn(input, ngramLength, skipLength, allLengths, maximumNgramsCount, weighting);
     }
 
     /// <summary>
diff --git a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs
index c1df26fdac..8e360e7123 100644
--- a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs
+++ b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs
@@ -93,7 +93,7 @@ internal sealed class Options : TransformInputBase
             public int SkipLength = NgramExtractingEstimator.Defaults.SkipLength;
 
             [Argument(ArgumentType.Multiple, HelpText = "Maximum number of ngrams to store in the dictionary", ShortName = "max")]
-            public int[] MaxNumTerms = new int[] { NgramExtractingEstimator.Defaults.MaximumTermCount };
+            public int[] MaxNumTerms = new int[] { NgramExtractingEstimator.Defaults.MaximumNgramsCount };
 
             [Argument(ArgumentType.AtMostOnce, HelpText = "The weighting criteria")]
             public NgramExtractingEstimator.WeightingCriteria Weighting = NgramExtractingEstimator.Defaults.Weighting;
@@ -253,7 +253,7 @@ private static SequencePool[] Train(IHostEnvironment env, NgramExtractingEstimat
                     // Note: GetNgramIdFinderAdd will control how many ngrams of a specific length will
                     // be added (using lims[iinfo]), therefore we set slotLim to the maximum
                     helpers[iinfo] = new NgramBufferBuilder(ngramLength, skipLength, Utils.ArrayMaxSize,
-                        GetNgramIdFinderAdd(env, counts[iinfo], columns[iinfo].MaximumTermCounts, ngramMaps[iinfo], transformInfos[iinfo].RequireIdf));
+                        GetNgramIdFinderAdd(env, counts[iinfo], columns[iinfo].MaximumNgramsCounts, ngramMaps[iinfo], transformInfos[iinfo].RequireIdf));
                 }
 
                 int cInfoFull = 0;
@@ -293,7 +293,7 @@ private static SequencePool[] Train(IHostEnvironment env, NgramExtractingEstimat
                                 }
                             }
                         }
-                        AssertValid(env, counts[iinfo], columns[iinfo].MaximumTermCounts, ngramMaps[iinfo]);
+                        AssertValid(env, counts[iinfo], columns[iinfo].MaximumNgramsCounts, ngramMaps[iinfo]);
                     }
                 }
 
@@ -307,7 +307,7 @@ private static SequencePool[] Train(IHostEnvironment env, NgramExtractingEstimat
 
                 for (int iinfo = 0; iinfo < columns.Length; iinfo++)
                 {
-                    AssertValid(env, counts[iinfo], columns[iinfo].MaximumTermCounts, ngramMaps[iinfo]);
+                    AssertValid(env, counts[iinfo], columns[iinfo].MaximumNgramsCounts, ngramMaps[iinfo]);
 
                     int ngramLength = transformInfos[iinfo].NgramLength;
                     for (int i = 0; i < ngramLength; i++)
@@ -695,7 +695,7 @@ internal static class Defaults
             public const int NgramLength = 2;
             public const bool AllLengths = true;
             public const int SkipLength = 0;
-            public const int MaximumTermCount = 10000000;
+            public const int MaximumNgramsCount = 10000000;
             public const WeightingCriteria Weighting = WeightingCriteria.Tf;
         }
 
@@ -712,16 +712,16 @@ internal static class Defaults
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="maximumTermCount">Maximum number of ngrams to store in the dictionary.</param>
+        /// <param name="maximumNgramsCount">Maximum number of n-grams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         internal NgramExtractingEstimator(IHostEnvironment env,
             string outputColumnName, string inputColumnName = null,
             int ngramLength = Defaults.NgramLength,
             int skipLength = Defaults.SkipLength,
             bool allLengths = Defaults.AllLengths,
-            int maximumTermCount = Defaults.MaximumTermCount,
+            int maximumNgramsCount = Defaults.MaximumNgramsCount,
             WeightingCriteria weighting = Defaults.Weighting)
-            : this(env, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }, ngramLength, skipLength, allLengths, maximumTermCount, weighting)
+            : this(env, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }, ngramLength, skipLength, allLengths, maximumNgramsCount, weighting)
         {
         }
 
@@ -734,16 +734,16 @@ internal NgramExtractingEstimator(IHostEnvironment env,
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="maximumTermCount">Maximum number of ngrams to store in the dictionary.</param>
+        /// <param name="maximumNgramsCount">Maximum number of n-grams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         internal NgramExtractingEstimator(IHostEnvironment env,
             (string outputColumnName, string inputColumnName)[] columns,
             int ngramLength = Defaults.NgramLength,
             int skipLength = Defaults.SkipLength,
             bool allLengths = Defaults.AllLengths,
-            int maximumTermCount = Defaults.MaximumTermCount,
+            int maximumNgramsCount = Defaults.MaximumNgramsCount,
             WeightingCriteria weighting = Defaults.Weighting)
-            : this(env, columns.Select(x => new ColumnOptions(x.outputColumnName, x.inputColumnName, ngramLength, skipLength, allLengths, weighting, maximumTermCount)).ToArray())
+            : this(env, columns.Select(x => new ColumnOptions(x.outputColumnName, x.inputColumnName, ngramLength, skipLength, allLengths, weighting, maximumNgramsCount)).ToArray())
         {
         }
 
@@ -809,14 +809,14 @@ public sealed class ColumnOptions
             /// <summary>The weighting criteria.</summary>
             public readonly WeightingCriteria Weighting;
             /// <summary>
-            /// Underlying state of <see cref="MaximumTermCounts"/>.
+            /// Underlying state of <see cref="MaximumNgramsCounts"/>.
             /// </summary>
-            private readonly ImmutableArray<int> _maximumTermCounts;
+            private readonly ImmutableArray<int> _maximumNgramsCounts;
             /// <summary>
-            /// Contains the maximum number of grams to store in the dictionary, for each level of ngrams,
-            /// from 1 (in position 0) up to ngramLength (in position ngramLength-1)
+            /// Contains the maximum number of terms (that is, n-grams) to store in the dictionary, for each level of n-grams,
+            /// from n=1 (in position 0) up to n=<see cref="NgramLength"/> (in position <see cref="NgramLength"/>-1)
             /// </summary>
-            public IReadOnlyList<int> MaximumTermCounts => _maximumTermCounts;
+            public IReadOnlyList<int> MaximumNgramsCounts => _maximumNgramsCounts;
 
             /// <summary>
             /// Describes how the transformer handles one Gcn column pair.
@@ -827,14 +827,14 @@ public sealed class ColumnOptions
             /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
             /// <param name="allLengths">Whether to store all ngram lengths up to ngramLength, or only ngramLength.</param>
             /// <param name="weighting">The weighting criteria.</param>
-            /// <param name="maximumTermCount">Maximum number of ngrams to store in the dictionary.</param>
+            /// <param name="maximumNgramsCount">Maximum number of n-grams to store in the dictionary.</param>
             public ColumnOptions(string name, string inputColumnName = null,
                 int ngramLength = Defaults.NgramLength,
                 int skipLength = Defaults.SkipLength,
                 bool allLengths = Defaults.AllLengths,
                 WeightingCriteria weighting = Defaults.Weighting,
-                int maximumTermCount = Defaults.MaximumTermCount)
-                : this(name, ngramLength, skipLength, allLengths, weighting, new int[] { maximumTermCount }, inputColumnName ?? name)
+                int maximumNgramsCount = Defaults.MaximumNgramsCount)
+                : this(name, ngramLength, skipLength, allLengths, weighting, new int[] { maximumNgramsCount }, inputColumnName ?? name)
             {
             }
 
@@ -843,7 +843,7 @@ internal ColumnOptions(string name,
                 int skipLength,
                 bool allLengths,
                 WeightingCriteria weighting,
-                int[] maximumTermCounts,
+                int[] maximumNgramsCounts,
                 string inputColumnName = null)
             {
                 Name = name;
@@ -861,18 +861,18 @@ internal ColumnOptions(string name,
                 var limits = new int[ngramLength];
                 if (!AllLengths)
                 {
-                    Contracts.CheckUserArg(Utils.Size(maximumTermCounts) == 0 ||
-                        Utils.Size(maximumTermCounts) == 1 && maximumTermCounts[0] > 0, nameof(maximumTermCounts));
-                    limits[ngramLength - 1] = Utils.Size(maximumTermCounts) == 0 ? Defaults.MaximumTermCount : maximumTermCounts[0];
+                    Contracts.CheckUserArg(Utils.Size(maximumNgramsCounts) == 0 ||
+                        Utils.Size(maximumNgramsCounts) == 1 && maximumNgramsCounts[0] > 0, nameof(maximumNgramsCounts));
+                    limits[ngramLength - 1] = Utils.Size(maximumNgramsCounts) == 0 ? Defaults.MaximumNgramsCount : maximumNgramsCounts[0];
                 }
                 else
                 {
-                    Contracts.CheckUserArg(Utils.Size(maximumTermCounts) <= ngramLength, nameof(maximumTermCounts));
-                    Contracts.CheckUserArg(Utils.Size(maximumTermCounts) == 0 || maximumTermCounts.All(i => i >= 0) && maximumTermCounts[maximumTermCounts.Length - 1] > 0, nameof(maximumTermCounts));
-                    var extend = Utils.Size(maximumTermCounts) == 0 ? Defaults.MaximumTermCount : maximumTermCounts[maximumTermCounts.Length - 1];
-                    limits = Utils.BuildArray(ngramLength, i => i < Utils.Size(maximumTermCounts) ? maximumTermCounts[i] : extend);
+                    Contracts.CheckUserArg(Utils.Size(maximumNgramsCounts) <= ngramLength, nameof(maximumNgramsCounts));
+                    Contracts.CheckUserArg(Utils.Size(maximumNgramsCounts) == 0 || maximumNgramsCounts.All(i => i >= 0) && maximumNgramsCounts[maximumNgramsCounts.Length - 1] > 0, nameof(maximumNgramsCounts));
+                    var extend = Utils.Size(maximumNgramsCounts) == 0 ? Defaults.MaximumNgramsCount : maximumNgramsCounts[maximumNgramsCounts.Length - 1];
+                    limits = Utils.BuildArray(ngramLength, i => i < Utils.Size(maximumNgramsCounts) ? maximumNgramsCounts[i] : extend);
                 }
-                _maximumTermCounts = ImmutableArray.Create(limits);
+                _maximumNgramsCounts = ImmutableArray.Create(limits);
             }
         }
 
diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
index 8d3856aa78..3a370c8739 100644
--- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -194,7 +194,7 @@ public static WordTokenizingEstimator TokenizeWords(this TransformsCatalog.TextT
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="maximumTermCount">Maximum number of ngrams to store in the dictionary.</param>
+        /// <param name="maximumNgramsCounts">Maximum number of n-grams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         /// <example>
         /// <format type="text/markdown">
@@ -209,31 +209,10 @@ public static NgramExtractingEstimator ProduceNgrams(this TransformsCatalog.Text
             int ngramLength = NgramExtractingEstimator.Defaults.NgramLength,
             int skipLength = NgramExtractingEstimator.Defaults.SkipLength,
             bool allLengths = NgramExtractingEstimator.Defaults.AllLengths,
-            int maximumTermCount = NgramExtractingEstimator.Defaults.MaximumTermCount,
+            int maximumNgramsCounts = NgramExtractingEstimator.Defaults.MaximumNgramsCount,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.Defaults.Weighting) =>
             new NgramExtractingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), outputColumnName, inputColumnName,
-                ngramLength, skipLength, allLengths, maximumTermCount, weighting);
-
-        /// <summary>
-        /// Produces a bag of counts of ngrams (sequences of consecutive words) in <paramref name="columns.inputs"/>
-        /// and outputs bag of word vector for each output in <paramref name="columns.output"/>
-        /// </summary>
-        /// <param name="catalog">The text-related transform's catalog.</param>
-        /// <param name="columns">Pairs of columns to compute bag of word vector.</param>
-        /// <param name="ngramLength">Ngram length.</param>
-        /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="maximumTermCount">Maximum number of ngrams to store in the dictionary.</param>
-        /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
-        public static NgramExtractingEstimator ProduceNgrams(this TransformsCatalog.TextTransforms catalog,
-            (string outputColumnName, string inputColumnName)[] columns,
-            int ngramLength = NgramExtractingEstimator.Defaults.NgramLength,
-            int skipLength = NgramExtractingEstimator.Defaults.SkipLength,
-            bool allLengths = NgramExtractingEstimator.Defaults.AllLengths,
-            int maximumTermCount = NgramExtractingEstimator.Defaults.MaximumTermCount,
-            NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.Defaults.Weighting)
-            => new NgramExtractingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), columns,
-                ngramLength, skipLength, allLengths, maximumTermCount, weighting);
+                ngramLength, skipLength, allLengths, maximumNgramsCounts, weighting);
 
         /// <summary>
         /// Produces a bag of counts of ngrams (sequences of consecutive words) in <paramref name="columns.inputs"/>
@@ -339,7 +318,7 @@ public static WordBagEstimator ProduceWordBags(this TransformsCatalog.TextTransf
             int ngramLength = NgramExtractingEstimator.Defaults.NgramLength,
             int skipLength = NgramExtractingEstimator.Defaults.SkipLength,
             bool allLengths = NgramExtractingEstimator.Defaults.AllLengths,
-            int maxNumTerms = NgramExtractingEstimator.Defaults.MaximumTermCount,
+            int maxNumTerms = NgramExtractingEstimator.Defaults.MaximumNgramsCount,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.WeightingCriteria.Tf)
             => new WordBagEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
                 outputColumnName, inputColumnName, ngramLength, skipLength, allLengths, maxNumTerms);
@@ -362,7 +341,7 @@ public static WordBagEstimator ProduceWordBags(this TransformsCatalog.TextTransf
             int ngramLength = NgramExtractingEstimator.Defaults.NgramLength,
             int skipLength = NgramExtractingEstimator.Defaults.SkipLength,
             bool allLengths = NgramExtractingEstimator.Defaults.AllLengths,
-            int maxNumTerms = NgramExtractingEstimator.Defaults.MaximumTermCount,
+            int maxNumTerms = NgramExtractingEstimator.Defaults.MaximumNgramsCount,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.WeightingCriteria.Tf)
             => new WordBagEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
                 outputColumnName, inputColumnNames, ngramLength, skipLength, allLengths, maxNumTerms, weighting);
@@ -383,7 +362,7 @@ public static WordBagEstimator ProduceWordBags(this TransformsCatalog.TextTransf
             int ngramLength = NgramExtractingEstimator.Defaults.NgramLength,
             int skipLength = NgramExtractingEstimator.Defaults.SkipLength,
             bool allLengths = NgramExtractingEstimator.Defaults.AllLengths,
-            int maxNumTerms = NgramExtractingEstimator.Defaults.MaximumTermCount,
+            int maxNumTerms = NgramExtractingEstimator.Defaults.MaximumNgramsCount,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.WeightingCriteria.Tf)
             => new WordBagEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), columns, ngramLength, skipLength, allLengths, maxNumTerms, weighting);
 
diff --git a/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs b/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs
index 1ccd1d38fe..30a1a2fae1 100644
--- a/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs
+++ b/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs
@@ -229,7 +229,7 @@ internal abstract class ArgumentsBase
             public bool AllLengths = NgramExtractingEstimator.Defaults.AllLengths;
 
             [Argument(ArgumentType.Multiple, HelpText = "Maximum number of ngrams to store in the dictionary", ShortName = "max")]
-            public int[] MaxNumTerms = new int[] { NgramExtractingEstimator.Defaults.MaximumTermCount };
+            public int[] MaxNumTerms = new int[] { NgramExtractingEstimator.Defaults.MaximumNgramsCount };
 
             [Argument(ArgumentType.AtMostOnce, HelpText = "The weighting criteria")]
             public NgramExtractingEstimator.WeightingCriteria Weighting = NgramExtractingEstimator.Defaults.Weighting;
@@ -315,7 +315,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa
                     termArgs =
                         new ValueToKeyMappingTransformer.Options()
                         {
-                            MaxNumTerms = Utils.Size(options.MaxNumTerms) > 0 ? options.MaxNumTerms[0] : NgramExtractingEstimator.Defaults.MaximumTermCount,
+                            MaxNumTerms = Utils.Size(options.MaxNumTerms) > 0 ? options.MaxNumTerms[0] : NgramExtractingEstimator.Defaults.MaximumNgramsCount,
                             Columns = new ValueToKeyMappingTransformer.Column[termCols.Count]
                         };
                 }

From 86048700849c235537cd38bcbca10f17431210d1 Mon Sep 17 00:00:00 2001
From: Wei-Sheng Chin <wechi@microsoft.com>
Date: Mon, 11 Mar 2019 10:04:21 -0700
Subject: [PATCH 05/12] Address one more comment

---
 src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs         | 6 +++---
 test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs  | 2 +-
 .../Scenarios/Api/CookbookSamples/CookbookSamples.cs        | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
index bc542fb485..5aea9f53f9 100644
--- a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
+++ b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
@@ -485,8 +485,8 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// Produces a bag of counts of ngrams (sequences of consecutive words ) in a given tokenized text.
         /// It does so by building a dictionary of ngrams and using the id in the dictionary as the index in the bag.
         ///
-        /// /// <see cref="ToNgrams"/> is different from <see cref="WordBagEstimatorStaticExtensions.ToBagofWords"/>
-        /// in a way that <see cref="ToNgrams"/> takes tokenized text as input while <see cref="WordBagEstimatorStaticExtensions.ToBagofWords"/> tokenizes text internally.
+        /// /// <see cref="ProduceNgrams"/> is different from <see cref="WordBagEstimatorStaticExtensions.ToBagofWords"/>
+        /// in a way that <see cref="ProduceNgrams"/> takes tokenized text as input while <see cref="WordBagEstimatorStaticExtensions.ToBagofWords"/> tokenizes text internally.
         /// </summary>
         /// <param name="input">The column to apply to.</param>
         /// <param name="ngramLength">Ngram length.</param>
@@ -494,7 +494,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="maximumNgramsCount">Maximum number of n-grams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
-        public static Vector<float> ToNgrams<TKey>(this VarVector<Key<TKey, string>> input,
+        public static Vector<float> ProduceNgrams<TKey>(this VarVector<Key<TKey, string>> input,
             int ngramLength = 1,
             int skipLength = 0,
             bool allLengths = true,
diff --git a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
index 8a1f392533..9f18956af4 100644
--- a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
+++ b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
@@ -604,7 +604,7 @@ public void Ngrams()
             var est = data.MakeNewEstimator()
                 .Append(r => (
                     r.label,
-                    ngrams: r.text.TokenizeText().ToKey().ToNgrams(),
+                    ngrams: r.text.TokenizeText().ToKey().ProduceNgrams(),
                     ngramshash: r.text.TokenizeText().ToKey().ApplyNgramHashing()));
 
             var tdata = est.Fit(data).Transform(data);
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
index b60afc07f2..64d81361d4 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
@@ -467,7 +467,7 @@ private void TextFeaturizationOn(string dataPath)
                     BagOfBigrams: r.Message.NormalizeText().ToBagofHashedWords(ngramLength: 2, allLengths: false),
 
                     // NLP pipeline 3: bag of tri-character sequences with TF-IDF weighting.
-                    BagOfTrichar: r.Message.TokenizeIntoCharacters().ToNgrams(ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf),
+                    BagOfTrichar: r.Message.TokenizeIntoCharacters().ProduceNgrams(ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf),
 
                     // NLP pipeline 4: word embeddings.
                     // PretrainedModelKind.Sswe is used here for performance of the test. In a real

From 47b3aacc5c653c231d2f0eb46aeff7bd29b911eb Mon Sep 17 00:00:00 2001
From: Wei-Sheng Chin <wechi@microsoft.com>
Date: Mon, 11 Mar 2019 10:47:56 -0700
Subject: [PATCH 06/12] Rename NgramHashing's static API again

---
 src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs        | 6 +++---
 test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
index 5aea9f53f9..e722c4f7d6 100644
--- a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
+++ b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
@@ -571,8 +571,8 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// Produces a bag of counts of ngrams (sequences of consecutive words of length 1-n) in a given tokenized text.
         /// It does so by hashing each ngram and using the hash value as the index in the bag.
         ///
-        /// <see cref="ApplyNgramHashing"/> is different from <see cref="WordHashBagEstimatorStaticExtensions.ToBagofHashedWords"/>
-        /// in a way that <see cref="ApplyNgramHashing"/> takes tokenized text as input while <see cref="WordHashBagEstimatorStaticExtensions.ToBagofHashedWords"/> tokenizes text internally.
+        /// <see cref="ProduceHashedNgrams"/> is different from <see cref="WordHashBagEstimatorStaticExtensions.ToBagofHashedWords"/>
+        /// in a way that <see cref="ProduceHashedNgrams"/> takes tokenized text as input while <see cref="WordHashBagEstimatorStaticExtensions.ToBagofHashedWords"/> tokenizes text internally.
         /// </summary>
         /// <param name="input">The column to apply to.</param>
         /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
@@ -585,7 +585,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// Text representation of original values are stored in the slot names of the  metadata for the new column.Hashing, as such, can map many initial values to one.
         /// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
         /// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param>
-        public static Vector<float> ApplyNgramHashing(this VarVector<Key<uint, string>> input,
+        public static Vector<float> ProduceHashedNgrams(this VarVector<Key<uint, string>> input,
             int numberOfBits = 16,
             int ngramLength = 2,
             int skipLength = 0,
diff --git a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
index 9f18956af4..5cd6dbfb85 100644
--- a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
+++ b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
@@ -605,7 +605,7 @@ public void Ngrams()
                 .Append(r => (
                     r.label,
                     ngrams: r.text.TokenizeText().ToKey().ProduceNgrams(),
-                    ngramshash: r.text.TokenizeText().ToKey().ApplyNgramHashing()));
+                    ngramshash: r.text.TokenizeText().ToKey().ProduceHashedNgrams()));
 
             var tdata = est.Fit(data).Transform(data);
             var schema = tdata.AsDynamic.Schema;

From 390aa147f04797eab8c9fbd2a52f8938a2f47b7e Mon Sep 17 00:00:00 2001
From: Wei-Sheng Chin <wechi@microsoft.com>
Date: Mon, 11 Mar 2019 15:18:04 -0700
Subject: [PATCH 07/12] Address comments and handle WordBags and HashedWordBags

---
 .../Dynamic/NgramExtraction.cs                |  2 +-
 .../TextStaticExtensions.cs                   | 24 +++----
 .../Text/NgramHashingTransformer.cs           |  5 +-
 .../Text/NgramTransform.cs                    | 24 +++----
 .../Text/TextCatalog.cs                       | 64 +++----------------
 .../Text/WrappedTextTransformers.cs           | 18 +++---
 .../StaticPipeTests.cs                        | 10 +--
 .../Api/CookbookSamples/CookbookSamples.cs    |  4 +-
 8 files changed, 54 insertions(+), 97 deletions(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs
index fa3c6317bf..d1f36d3731 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs
@@ -61,7 +61,7 @@ public static void NgramTransform()
             // 'e' - 1 '<?>' - 2 'd' - 1 '=' - 4 'R' - 1 'U' - 1 'D' - 2 'E' - 1 'u' - 1 ',' - 1 '2' - 1
             // 'B' - 0 'e' - 6 's' - 3 't' - 6 '<?>' - 9 'g' - 2 'a' - 2 'm' - 2 'I' - 0 ''' - 0 'v' - 0 ...
             // Preview of the CharsTwoGrams column obtained after processing the input.
-            var charsTwoGramColumn = transformedData_twochars.GetColumn<VBuffer<float>>(transformedData_onechars.Schema["CharsUnigrams"]);
+            var charsTwoGramColumn = transformedData_twochars.GetColumn<VBuffer<float>>(transformedData_twochars.Schema["CharsTwograms"]);
             transformedData_twochars.Schema["CharsTwograms"].GetSlotNames(ref slotNames);
             printHelper("CharsTwograms", charsTwoGramColumn, slotNames);
 
diff --git a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
index e722c4f7d6..4d8644afc9 100644
--- a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
+++ b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
@@ -310,15 +310,15 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="maxNumTerms">Maximum number of ngrams to store in the dictionary.</param>
+        /// <param name="maximumNgramsCount">Maximum number of ngrams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
-        public static Vector<float> ToBagofWords(this Scalar<string> input,
+        public static Vector<float> ProduceWordBags(this Scalar<string> input,
             int ngramLength = 1,
             int skipLength = 0,
             bool allLengths = true,
-            int maxNumTerms = 10000000,
+            int maximumNgramsCount = 10000000,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.WeightingCriteria.Tf)
-                => new OutPipelineColumn(input, ngramLength, skipLength, allLengths, maxNumTerms, weighting);
+                => new OutPipelineColumn(input, ngramLength, skipLength, allLengths, maximumNgramsCount, weighting);
     }
 
     /// <summary>
@@ -397,7 +397,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// It does so by hashing each ngram and using the hash value as the index in the bag.
         /// </summary>
         /// <param name="input">The column to apply to.</param>
-        /// <param name="hashBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
+        /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
@@ -407,14 +407,14 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// Text representation of original values are stored in the slot names of the  metadata for the new column.Hashing, as such, can map many initial values to one.
         /// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
         /// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param>
-        public static Vector<float> ToBagofHashedWords(this Scalar<string> input,
-            int hashBits = 16,
+        public static Vector<float> ProduceHashedWordBags(this Scalar<string> input,
+            int numberOfBits = 16,
             int ngramLength = 1,
             int skipLength = 0,
             bool allLengths = true,
             uint seed = 314489979,
             bool ordered = true,
-            int invertHash = 0) => new OutPipelineColumn(input, hashBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
+            int invertHash = 0) => new OutPipelineColumn(input, numberOfBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
     }
 
     /// <summary>
@@ -485,8 +485,8 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// Produces a bag of counts of ngrams (sequences of consecutive words ) in a given tokenized text.
         /// It does so by building a dictionary of ngrams and using the id in the dictionary as the index in the bag.
         ///
-        /// /// <see cref="ProduceNgrams"/> is different from <see cref="WordBagEstimatorStaticExtensions.ToBagofWords"/>
-        /// in a way that <see cref="ProduceNgrams"/> takes tokenized text as input while <see cref="WordBagEstimatorStaticExtensions.ToBagofWords"/> tokenizes text internally.
+        /// /// <see cref="ProduceNgrams"/> is different from <see cref="WordBagEstimatorStaticExtensions.ProduceWordBags"/>
+        /// in a way that <see cref="ProduceNgrams"/> takes tokenized text as input while <see cref="WordBagEstimatorStaticExtensions.ProduceWordBags"/> tokenizes text internally.
         /// </summary>
         /// <param name="input">The column to apply to.</param>
         /// <param name="ngramLength">Ngram length.</param>
@@ -571,8 +571,8 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// Produces a bag of counts of ngrams (sequences of consecutive words of length 1-n) in a given tokenized text.
         /// It does so by hashing each ngram and using the hash value as the index in the bag.
         ///
-        /// <see cref="ProduceHashedNgrams"/> is different from <see cref="WordHashBagEstimatorStaticExtensions.ToBagofHashedWords"/>
-        /// in a way that <see cref="ProduceHashedNgrams"/> takes tokenized text as input while <see cref="WordHashBagEstimatorStaticExtensions.ToBagofHashedWords"/> tokenizes text internally.
+        /// <see cref="ProduceHashedNgrams"/> is different from <see cref="WordHashBagEstimatorStaticExtensions.ProduceHashedWordBags"/>
+        /// in a way that <see cref="ProduceHashedNgrams"/> takes tokenized text as input while <see cref="WordHashBagEstimatorStaticExtensions.ProduceHashedWordBags"/> tokenizes text internally.
         /// </summary>
         /// <param name="input">The column to apply to.</param>
         /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
diff --git a/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs b/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs
index 86bfdc3e87..2c7eae634d 100644
--- a/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs
+++ b/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs
@@ -945,11 +945,14 @@ public ColumnOptions(string name,
                 if (invertHash != 0 && numberOfBits >= 31)
                     throw Contracts.ExceptParam(nameof(numberOfBits), $"Cannot support invertHash for a {0} bit hash. 30 is the maximum possible.", numberOfBits);
 
-                if (NgramLength + SkipLength > NgramBufferBuilder.MaxSkipNgramLength)
+                if (ngramLength == 1 && skipLength != 0)
+                    throw Contracts.ExceptUserArg(nameof(skipLength), $"Number of skips can only be zero when the maximum n-gram's length is one.");
+                if (ngramLength + skipLength > NgramBufferBuilder.MaxSkipNgramLength)
                 {
                     throw Contracts.ExceptUserArg(nameof(skipLength),
                         $"The sum of skipLength and ngramLength must be less than or equal to {NgramBufferBuilder.MaxSkipNgramLength}");
                 }
+
                 FriendlyNames = null;
                 Name = name;
                 InputColumnNamesArray = inputColumnNames;
diff --git a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs
index 8e360e7123..7731a2dd62 100644
--- a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs
+++ b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs
@@ -846,20 +846,15 @@ internal ColumnOptions(string name,
                 int[] maximumNgramsCounts,
                 string inputColumnName = null)
             {
-                Name = name;
-                InputColumnName = inputColumnName ?? name;
-                NgramLength = ngramLength;
-                Contracts.CheckUserArg(0 < NgramLength && NgramLength <= NgramBufferBuilder.MaxSkipNgramLength, nameof(ngramLength));
-                SkipLength = skipLength;
-                if (NgramLength + SkipLength > NgramBufferBuilder.MaxSkipNgramLength)
-                {
+                if (ngramLength == 1 && skipLength != 0)
+                    throw Contracts.ExceptUserArg(nameof(skipLength), $"Number of skips can only be zero when the maximum n-gram's length is one.");
+                if (ngramLength + skipLength > NgramBufferBuilder.MaxSkipNgramLength)
                     throw Contracts.ExceptUserArg(nameof(skipLength),
                         $"The sum of skipLength and ngramLength must be less than or equal to {NgramBufferBuilder.MaxSkipNgramLength}");
-                }
-                AllLengths = allLengths;
-                Weighting = weighting;
+                Contracts.CheckUserArg(0 < ngramLength && ngramLength <= NgramBufferBuilder.MaxSkipNgramLength, nameof(ngramLength));
+
                 var limits = new int[ngramLength];
-                if (!AllLengths)
+                if (!allLengths)
                 {
                     Contracts.CheckUserArg(Utils.Size(maximumNgramsCounts) == 0 ||
                         Utils.Size(maximumNgramsCounts) == 1 && maximumNgramsCounts[0] > 0, nameof(maximumNgramsCounts));
@@ -873,6 +868,13 @@ internal ColumnOptions(string name,
                     limits = Utils.BuildArray(ngramLength, i => i < Utils.Size(maximumNgramsCounts) ? maximumNgramsCounts[i] : extend);
                 }
                 _maximumNgramsCounts = ImmutableArray.Create(limits);
+
+                Name = name;
+                InputColumnName = inputColumnName ?? name;
+                NgramLength = ngramLength;
+                SkipLength = skipLength;
+                AllLengths = allLengths;
+                Weighting = weighting;
             }
         }
 
diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
index 3a370c8739..486835c4a7 100644
--- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -310,7 +310,7 @@ public static CustomStopWordsRemovingEstimator RemoveStopWords(this TransformsCa
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="maxNumTerms">Maximum number of ngrams to store in the dictionary.</param>
+        /// <param name="maximumNgramsCount">Maximum number of ngrams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         public static WordBagEstimator ProduceWordBags(this TransformsCatalog.TextTransforms catalog,
             string outputColumnName,
@@ -318,10 +318,10 @@ public static WordBagEstimator ProduceWordBags(this TransformsCatalog.TextTransf
             int ngramLength = NgramExtractingEstimator.Defaults.NgramLength,
             int skipLength = NgramExtractingEstimator.Defaults.SkipLength,
             bool allLengths = NgramExtractingEstimator.Defaults.AllLengths,
-            int maxNumTerms = NgramExtractingEstimator.Defaults.MaximumNgramsCount,
+            int maximumNgramsCount = NgramExtractingEstimator.Defaults.MaximumNgramsCount,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.WeightingCriteria.Tf)
             => new WordBagEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
-                outputColumnName, inputColumnName, ngramLength, skipLength, allLengths, maxNumTerms);
+                outputColumnName, inputColumnName, ngramLength, skipLength, allLengths, maximumNgramsCount);
 
         /// <summary>
         /// Produces a bag of counts of ngrams (sequences of consecutive words) in <paramref name="inputColumnNames"/>
@@ -333,7 +333,7 @@ public static WordBagEstimator ProduceWordBags(this TransformsCatalog.TextTransf
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="maxNumTerms">Maximum number of ngrams to store in the dictionary.</param>
+        /// <param name="maximumNgramsCount">Maximum number of ngrams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         public static WordBagEstimator ProduceWordBags(this TransformsCatalog.TextTransforms catalog,
             string outputColumnName,
@@ -341,30 +341,10 @@ public static WordBagEstimator ProduceWordBags(this TransformsCatalog.TextTransf
             int ngramLength = NgramExtractingEstimator.Defaults.NgramLength,
             int skipLength = NgramExtractingEstimator.Defaults.SkipLength,
             bool allLengths = NgramExtractingEstimator.Defaults.AllLengths,
-            int maxNumTerms = NgramExtractingEstimator.Defaults.MaximumNgramsCount,
+            int maximumNgramsCount = NgramExtractingEstimator.Defaults.MaximumNgramsCount,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.WeightingCriteria.Tf)
             => new WordBagEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
-                outputColumnName, inputColumnNames, ngramLength, skipLength, allLengths, maxNumTerms, weighting);
-
-        /// <summary>
-        /// Produces a bag of counts of ngrams (sequences of consecutive words) in <paramref name="columns.inputs"/>
-        /// and outputs bag of word vector for each output in <paramref name="columns.output"/>
-        /// </summary>
-        /// <param name="catalog">The text-related transform's catalog.</param>
-        /// <param name="columns">Pairs of columns to compute bag of word vector.</param>
-        /// <param name="ngramLength">Ngram length.</param>
-        /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="maxNumTerms">Maximum number of ngrams to store in the dictionary.</param>
-        /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
-        public static WordBagEstimator ProduceWordBags(this TransformsCatalog.TextTransforms catalog,
-            (string outputColumnName, string[] inputColumnNames)[] columns,
-            int ngramLength = NgramExtractingEstimator.Defaults.NgramLength,
-            int skipLength = NgramExtractingEstimator.Defaults.SkipLength,
-            bool allLengths = NgramExtractingEstimator.Defaults.AllLengths,
-            int maxNumTerms = NgramExtractingEstimator.Defaults.MaximumNgramsCount,
-            NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.WeightingCriteria.Tf)
-            => new WordBagEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), columns, ngramLength, skipLength, allLengths, maxNumTerms, weighting);
+                outputColumnName, inputColumnNames, ngramLength, skipLength, allLengths, maximumNgramsCount, weighting);
 
         /// <summary>
         /// Produces a bag of counts of hashed ngrams in <paramref name="inputColumnName"/>
@@ -426,34 +406,6 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.
             => new WordHashBagEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
                 outputColumnName, inputColumnNames, numberOfBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
 
-        /// <summary>
-        /// Produces a bag of counts of hashed ngrams in <paramref name="columns.inputs"/>
-        /// and outputs bag of word vector for each output in <paramref name="columns.output"/>
-        /// </summary>
-        /// <param name="catalog">The text-related transform's catalog.</param>
-        /// <param name="columns">Pairs of columns to compute bag of word vector.</param>
-        /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
-        /// <param name="ngramLength">Ngram length.</param>
-        /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="seed">Hashing seed.</param>
-        /// <param name="ordered">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
-        /// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
-        /// Text representation of original values are stored in the slot names of the  metadata for the new column.Hashing, as such, can map many initial values to one.
-        /// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
-        /// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param>
-        public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.TextTransforms catalog,
-            (string outputColumnName, string[] inputColumnNames)[] columns,
-            int numberOfBits = NgramHashExtractingTransformer.DefaultArguments.HashBits,
-            int ngramLength = NgramHashExtractingTransformer.DefaultArguments.NgramLength,
-            int skipLength = NgramHashExtractingTransformer.DefaultArguments.SkipLength,
-            bool allLengths = NgramHashExtractingTransformer.DefaultArguments.AllLengths,
-            uint seed = NgramHashExtractingTransformer.DefaultArguments.Seed,
-            bool ordered = NgramHashExtractingTransformer.DefaultArguments.Ordered,
-            int invertHash = NgramHashExtractingTransformer.DefaultArguments.InvertHash)
-            => new WordHashBagEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
-               columns, numberOfBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
-
         /// <summary>
         /// Produces a bag of counts of hashed ngrams in <paramref name="inputColumnName"/>
         /// and outputs ngram vector as <paramref name="outputColumnName"/>
@@ -474,7 +426,7 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.
         /// Text representation of original values are stored in the slot names of the  metadata for the new column.Hashing, as such, can map many initial values to one.
         /// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
         /// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param>
-        public static NgramHashingEstimator ApplyNgramHashing(this TransformsCatalog.TextTransforms catalog,
+        public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.TextTransforms catalog,
             string outputColumnName,
             string inputColumnName = null,
             int numberOfBits = NgramHashingEstimator.Defaults.NumberOfBits,
@@ -496,7 +448,7 @@ public static NgramHashingEstimator ApplyNgramHashing(this TransformsCatalog.Tex
         /// </summary>
         /// <param name="catalog">The text-related transform's catalog.</param>
         /// <param name="columns">Pairs of columns to compute n-grams. Note that gram indices are generated by hashing.</param>
-        public static NgramHashingEstimator ApplyNgramHashing(this TransformsCatalog.TextTransforms catalog,
+        public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.TextTransforms catalog,
             NgramHashingEstimator.ColumnOptions[] columns)
              => new NgramHashingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), columns);
 
diff --git a/src/Microsoft.ML.Transforms/Text/WrappedTextTransformers.cs b/src/Microsoft.ML.Transforms/Text/WrappedTextTransformers.cs
index cf8722495a..19f87ea3fc 100644
--- a/src/Microsoft.ML.Transforms/Text/WrappedTextTransformers.cs
+++ b/src/Microsoft.ML.Transforms/Text/WrappedTextTransformers.cs
@@ -36,7 +36,7 @@ public sealed class WordBagEstimator : IEstimator<ITransformer>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="maxNumTerms">Maximum number of ngrams to store in the dictionary.</param>
+        /// <param name="maximumNgramsCount">Maximum number of ngrams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         internal WordBagEstimator(IHostEnvironment env,
             string outputColumnName,
@@ -44,9 +44,9 @@ internal WordBagEstimator(IHostEnvironment env,
             int ngramLength = 1,
             int skipLength = 0,
             bool allLengths = true,
-            int maxNumTerms = 10000000,
+            int maximumNgramsCount = 10000000,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.WeightingCriteria.Tf)
-            : this(env, outputColumnName, new[] { inputColumnName ?? outputColumnName }, ngramLength, skipLength, allLengths, maxNumTerms, weighting)
+            : this(env, outputColumnName, new[] { inputColumnName ?? outputColumnName }, ngramLength, skipLength, allLengths, maximumNgramsCount, weighting)
         {
         }
 
@@ -60,7 +60,7 @@ internal WordBagEstimator(IHostEnvironment env,
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="maxNumTerms">Maximum number of ngrams to store in the dictionary.</param>
+        /// <param name="maximumNgramsCount">Maximum number of ngrams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         internal WordBagEstimator(IHostEnvironment env,
             string outputColumnName,
@@ -68,9 +68,9 @@ internal WordBagEstimator(IHostEnvironment env,
             int ngramLength = 1,
             int skipLength = 0,
             bool allLengths = true,
-            int maxNumTerms = 10000000,
+            int maximumNgramsCount = 10000000,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.WeightingCriteria.Tf)
-            : this(env, new[] { (outputColumnName, inputColumnNames) }, ngramLength, skipLength, allLengths, maxNumTerms, weighting)
+            : this(env, new[] { (outputColumnName, inputColumnNames) }, ngramLength, skipLength, allLengths, maximumNgramsCount, weighting)
         {
         }
 
@@ -83,14 +83,14 @@ internal WordBagEstimator(IHostEnvironment env,
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="maxNumTerms">Maximum number of ngrams to store in the dictionary.</param>
+        /// <param name="maximumNgramsCount">Maximum number of ngrams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         internal WordBagEstimator(IHostEnvironment env,
             (string outputColumnName, string[] inputColumnNames)[] columns,
             int ngramLength = 1,
             int skipLength = 0,
             bool allLengths = true,
-            int maxNumTerms = 10000000,
+            int maximumNgramsCount = 10000000,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.WeightingCriteria.Tf)
         {
             Contracts.CheckValue(env, nameof(env));
@@ -106,7 +106,7 @@ internal WordBagEstimator(IHostEnvironment env,
             _ngramLength = ngramLength;
             _skipLength = skipLength;
             _allLengths = allLengths;
-            _maxNumTerms = maxNumTerms;
+            _maxNumTerms = maximumNgramsCount;
             _weighting = weighting;
         }
 
diff --git a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
index 5cd6dbfb85..926e770187 100644
--- a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
+++ b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
@@ -575,8 +575,8 @@ public void ConvertToWordBag()
             var est = data.MakeNewEstimator()
                 .Append(r => (
                     r.label,
-                    bagofword: r.text.ToBagofWords(),
-                    bagofhashedword: r.text.ToBagofHashedWords()));
+                    bagofword: r.text.ProduceWordBags(),
+                    bagofhashedword: r.text.ProduceHashedWordBags()));
 
             var tdata = est.Fit(data).Transform(data);
             var schema = tdata.AsDynamic.Schema;
@@ -675,7 +675,7 @@ public void LdaTopicModel()
             var est = data.MakeNewEstimator()
                 .Append(r => (
                     r.label,
-                    topics: r.text.ToBagofWords().LatentDirichletAllocation(numberOfTopics: 3, numberOfSummaryTermsPerTopic:5, alphaSum: 10, onFit: m => ldaSummary = m.LdaTopicSummary)));
+                    topics: r.text.ProduceWordBags().LatentDirichletAllocation(numberOfTopics: 3, numberOfSummaryTermsPerTopic:5, alphaSum: 10, onFit: m => ldaSummary = m.LdaTopicSummary)));
 
             var transformer = est.Fit(data);
             var tdata = transformer.Transform(data);
@@ -700,8 +700,8 @@ public void FeatureSelection()
             var est = data.MakeNewEstimator()
                 .Append(r => (
                     r.label,
-                    bag_of_words_count: r.text.ToBagofWords().SelectFeaturesBasedOnCount(10),
-                    bag_of_words_mi: r.text.ToBagofWords().SelectFeaturesBasedOnMutualInformation(r.label)));
+                    bag_of_words_count: r.text.ProduceWordBags().SelectFeaturesBasedOnCount(10),
+                    bag_of_words_mi: r.text.ProduceWordBags().SelectFeaturesBasedOnMutualInformation(r.label)));
 
             var tdata = est.Fit(data).Transform(data);
             var schema = tdata.AsDynamic.Schema;
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
index 64d81361d4..0261377a49 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
@@ -461,10 +461,10 @@ private void TextFeaturizationOn(string dataPath)
                     TextFeatures: r.Message.FeaturizeText(),
 
                     // NLP pipeline 1: bag of words.
-                    BagOfWords: r.Message.NormalizeText().ToBagofWords(),
+                    BagOfWords: r.Message.NormalizeText().ProduceWordBags(),
 
                     // NLP pipeline 2: bag of bigrams, using hashes instead of dictionary indices.
-                    BagOfBigrams: r.Message.NormalizeText().ToBagofHashedWords(ngramLength: 2, allLengths: false),
+                    BagOfBigrams: r.Message.NormalizeText().ProduceHashedWordBags(ngramLength: 2, allLengths: false),
 
                     // NLP pipeline 3: bag of tri-character sequences with TF-IDF weighting.
                     BagOfTrichar: r.Message.TokenizeIntoCharacters().ProduceNgrams(ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf),

From b60d6a496dccf8db05c8530bddadaa8a7a1d76c6 Mon Sep 17 00:00:00 2001
From: Wei-Sheng Chin <wechi@microsoft.com>
Date: Mon, 11 Mar 2019 16:37:18 -0700
Subject: [PATCH 08/12] Address comment

---
 src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs | 3 ++-
 src/Microsoft.ML.Transforms/Text/NgramTransform.cs          | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs b/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs
index 2c7eae634d..d0fb5e5a1f 100644
--- a/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs
+++ b/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs
@@ -946,7 +946,8 @@ public ColumnOptions(string name,
                     throw Contracts.ExceptParam(nameof(numberOfBits), $"Cannot support invertHash for a {0} bit hash. 30 is the maximum possible.", numberOfBits);
 
                 if (ngramLength == 1 && skipLength != 0)
-                    throw Contracts.ExceptUserArg(nameof(skipLength), $"Number of skips can only be zero when the maximum n-gram's length is one.");
+                    throw Contracts.ExceptUserArg(nameof(skipLength), string.Format(
+                        "{0} (actual value: {1}) can only be zero when {2} set to one.", nameof(skipLength), skipLength, nameof(ngramLength)));
                 if (ngramLength + skipLength > NgramBufferBuilder.MaxSkipNgramLength)
                 {
                     throw Contracts.ExceptUserArg(nameof(skipLength),
diff --git a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs
index 7731a2dd62..8bfad84278 100644
--- a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs
+++ b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs
@@ -847,7 +847,8 @@ internal ColumnOptions(string name,
                 string inputColumnName = null)
             {
                 if (ngramLength == 1 && skipLength != 0)
-                    throw Contracts.ExceptUserArg(nameof(skipLength), $"Number of skips can only be zero when the maximum n-gram's length is one.");
+                    throw Contracts.ExceptUserArg(nameof(skipLength), string.Format(
+                        "{0} (actual value: {1}) can only be zero when {2} set to one.", nameof(skipLength), skipLength, nameof(ngramLength)));
                 if (ngramLength + skipLength > NgramBufferBuilder.MaxSkipNgramLength)
                     throw Contracts.ExceptUserArg(nameof(skipLength),
                         $"The sum of skipLength and ngramLength must be less than or equal to {NgramBufferBuilder.MaxSkipNgramLength}");

From 97469bc54bd2a9112f51951f3feee3c3241bf375 Mon Sep 17 00:00:00 2001
From: Wei-Sheng Chin <wechi@microsoft.com>
Date: Tue, 12 Mar 2019 16:42:20 -0700
Subject: [PATCH 09/12] ordered ---> useOrderedHashing

---
 .../TextStaticExtensions.cs                   | 40 +++++++++----------
 .../Text/NgramHashingTransformer.cs           | 26 ++++++------
 .../Text/TextCatalog.cs                       | 19 +++++----
 .../Text/WrappedTextTransformers.cs           | 18 ++++-----
 4 files changed, 51 insertions(+), 52 deletions(-)

diff --git a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
index 4d8644afc9..babd909ae4 100644
--- a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
+++ b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
@@ -336,9 +336,9 @@ public OutPipelineColumn(Scalar<string> input,
                 int skipLength,
                 bool allLengths,
                 uint seed,
-                bool ordered,
+                bool useOrderedHashing,
                 int invertHash)
-                : base(new Reconciler(hashBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash), input)
+                : base(new Reconciler(hashBits, ngramLength, skipLength, allLengths, seed, useOrderedHashing, invertHash), input)
             {
                 Input = input;
             }
@@ -351,17 +351,17 @@ private sealed class Reconciler : EstimatorReconciler, IEquatable<Reconciler>
             private readonly int _skipLength;
             private readonly bool _allLengths;
             private readonly uint _seed;
-            private readonly bool _ordered;
+            private readonly bool _useOrderedHashing;
             private readonly int _invertHash;
 
-            public Reconciler(int hashBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool ordered, int invertHash)
+            public Reconciler(int hashBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool useOrderedHashing, int invertHash)
             {
                 _hashBits = hashBits;
                 _ngramLength = ngramLength;
                 _skipLength = skipLength;
                 _allLengths = allLengths;
                 _seed = seed;
-                _ordered = ordered;
+                _useOrderedHashing = useOrderedHashing;
                 _invertHash = invertHash;
             }
 
@@ -372,7 +372,7 @@ public bool Equals(Reconciler other)
                     _skipLength == other._skipLength &&
                     _allLengths == other._allLengths &&
                     _seed == other._seed &&
-                    _ordered == other._ordered &&
+                    _useOrderedHashing == other._useOrderedHashing &&
                     _invertHash == other._invertHash;
             }
 
@@ -388,7 +388,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
                 foreach (var outCol in toOutput)
                     pairs.Add((outputNames[outCol], new[] { inputNames[((OutPipelineColumn)outCol).Input] }));
 
-                return new WordHashBagEstimator(env, pairs.ToArray(), _hashBits, _ngramLength, _skipLength, _allLengths, _seed, _ordered, _invertHash);
+                return new WordHashBagEstimator(env, pairs.ToArray(), _hashBits, _ngramLength, _skipLength, _allLengths, _seed, _useOrderedHashing, _invertHash);
             }
         }
 
@@ -402,7 +402,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="seed">Hashing seed.</param>
-        /// <param name="ordered">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
+        /// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
         /// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
         /// Text representation of original values are stored in the slot names of the  metadata for the new column.Hashing, as such, can map many initial values to one.
         /// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
@@ -413,8 +413,8 @@ public static Vector<float> ProduceHashedWordBags(this Scalar<string> input,
             int skipLength = 0,
             bool allLengths = true,
             uint seed = 314489979,
-            bool ordered = true,
-            int invertHash = 0) => new OutPipelineColumn(input, numberOfBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
+            bool useOrderedHashing = true,
+            int invertHash = 0) => new OutPipelineColumn(input, numberOfBits, ngramLength, skipLength, allLengths, seed, useOrderedHashing, invertHash);
     }
 
     /// <summary>
@@ -512,8 +512,8 @@ private sealed class OutPipelineColumn : Vector<float>
         {
             public readonly VarVector<Key<uint, string>> Input;
 
-            public OutPipelineColumn(VarVector<Key<uint, string>> input, int numberOfBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool ordered, int invertHash)
-                : base(new Reconciler(numberOfBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash), input)
+            public OutPipelineColumn(VarVector<Key<uint, string>> input, int numberOfBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool useOrderedHashing, int invertHash)
+                : base(new Reconciler(numberOfBits, ngramLength, skipLength, allLengths, seed, useOrderedHashing, invertHash), input)
             {
                 Input = input;
             }
@@ -526,17 +526,17 @@ private sealed class Reconciler : EstimatorReconciler, IEquatable<Reconciler>
             private readonly int _skipLength;
             private readonly bool _allLengths;
             private readonly uint _seed;
-            private readonly bool _ordered;
+            private readonly bool _useOrderedHashing;
             private readonly int _invertHash;
 
-            public Reconciler(int numberOfBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool ordered, int invertHash)
+            public Reconciler(int numberOfBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool useOrderedHashing, int invertHash)
             {
                 _numberOfBits = numberOfBits;
                 _ngramLength = ngramLength;
                 _skipLength = skipLength;
                 _allLengths = allLengths;
                 _seed = seed;
-                _ordered = ordered;
+                _useOrderedHashing = useOrderedHashing;
                 _invertHash = invertHash;
             }
 
@@ -547,7 +547,7 @@ public bool Equals(Reconciler other)
                     _skipLength == other._skipLength &&
                     _allLengths == other._allLengths &&
                     _seed == other._seed &&
-                    _ordered == other._ordered &&
+                    _useOrderedHashing == other._useOrderedHashing &&
                     _invertHash == other._invertHash;
             }
 
@@ -561,7 +561,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
                 var columns = new List<NgramHashingEstimator.ColumnOptions>();
                 foreach (var outCol in toOutput)
                     columns.Add(new NgramHashingEstimator.ColumnOptions(outputNames[outCol], new[] { inputNames[((OutPipelineColumn)outCol).Input] },
-                          _ngramLength, _skipLength, _allLengths, _numberOfBits, _seed, _ordered, _invertHash));
+                          _ngramLength, _skipLength, _allLengths, _numberOfBits, _seed, _useOrderedHashing, _invertHash));
 
                 return new NgramHashingEstimator(env, columns.ToArray());
             }
@@ -580,7 +580,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="seed">Hashing seed.</param>
-        /// <param name="ordered">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
+        /// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
         /// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
         /// Text representation of original values are stored in the slot names of the  metadata for the new column.Hashing, as such, can map many initial values to one.
         /// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
@@ -591,7 +591,7 @@ public static Vector<float> ProduceHashedNgrams(this VarVector<Key<uint, string>
             int skipLength = 0,
             bool allLengths = true,
             uint seed = 314489979,
-            bool ordered = true,
-            int invertHash = 0) => new OutPipelineColumn(input, numberOfBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
+            bool useOrderedHashing = true,
+            int invertHash = 0) => new OutPipelineColumn(input, numberOfBits, ngramLength, skipLength, allLengths, seed, useOrderedHashing, invertHash);
     }
 }
diff --git a/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs b/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs
index d0fb5e5a1f..9acd8ccc4c 100644
--- a/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs
+++ b/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs
@@ -145,7 +145,7 @@ internal sealed class Options
             [Argument(ArgumentType.AtMostOnce,
                 HelpText = "Whether the position of each source column should be included in the hash (when there are multiple source columns).",
                 ShortName = "ord", SortOrder = 6)]
-            public bool Ordered = NgramHashingEstimator.Defaults.Ordered;
+            public bool Ordered = NgramHashingEstimator.Defaults.UseOrderedHashing;
 
             [Argument(ArgumentType.AtMostOnce, HelpText = "Limit the number of keys used to generate the slot name to this many. 0 means no invert hashing, -1 means no limit.",
                 ShortName = "ih")]
@@ -417,7 +417,7 @@ private NgramIdFinder GetNgramIdFinder(int iinfo)
                 uint mask = (1U << _parent._columns[iinfo].NumberOfBits) - 1;
                 int ngramLength = _parent._columns[iinfo].NgramLength;
                 bool rehash = _parent._columns[iinfo].RehashUnigrams;
-                bool ordered = _parent._columns[iinfo].Ordered;
+                bool ordered = _parent._columns[iinfo].UseOrderedHashing;
                 bool all = _parent._columns[iinfo].AllLengths;
                 uint seed = _parent._columns[iinfo].Seed;
 
@@ -891,7 +891,7 @@ public sealed class ColumnOptions
             /// <summary>Hashing seed.</summary>
             public readonly uint Seed;
             /// <summary>Whether the position of each term should be included in the hash.</summary>
-            public readonly bool Ordered;
+            public readonly bool UseOrderedHashing;
             /// <summary>
             /// During hashing we constuct mappings between original values and the produced hash values.
             /// Text representation of original values are stored in the slot names of the  metadata for the new column.
@@ -916,7 +916,7 @@ public sealed class ColumnOptions
             /// <param name="allLengths">Whether to store all ngram lengths up to <paramref name="ngramLength"/>, or only <paramref name="ngramLength"/>.</param>
             /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 31, inclusive.</param>
             /// <param name="seed">Hashing seed.</param>
-            /// <param name="ordered">Whether the position of each term should be included in the hash.</param>
+            /// <param name="useOrderedHashing">Whether the position of each term should be included in the hash.</param>
             /// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
             /// Text representation of original values are stored in the slot names of the metadata for the new column.
             /// Hashing, as such, can map many initial values to one.
@@ -930,7 +930,7 @@ public ColumnOptions(string name,
                 bool allLengths = NgramHashingEstimator.Defaults.AllLengths,
                 int numberOfBits = NgramHashingEstimator.Defaults.NumberOfBits,
                 uint seed = NgramHashingEstimator.Defaults.Seed,
-                bool ordered = NgramHashingEstimator.Defaults.Ordered,
+                bool useOrderedHashing = NgramHashingEstimator.Defaults.UseOrderedHashing,
                 int invertHash = NgramHashingEstimator.Defaults.InvertHash,
                 bool rehashUnigrams = NgramHashingEstimator.Defaults.RehashUnigrams)
             {
@@ -962,7 +962,7 @@ public ColumnOptions(string name,
                 AllLengths = allLengths;
                 NumberOfBits = numberOfBits;
                 Seed = seed;
-                Ordered = ordered;
+                UseOrderedHashing = useOrderedHashing;
                 InvertHash = invertHash;
                 RehashUnigrams = rehashUnigrams;
             }
@@ -996,7 +996,7 @@ internal ColumnOptions(ModelLoadContext ctx)
                 Contracts.CheckDecode(1 <= NumberOfBits && NumberOfBits <= 30);
                 Seed = ctx.Reader.ReadUInt32();
                 RehashUnigrams = ctx.Reader.ReadBoolByte();
-                Ordered = ctx.Reader.ReadBoolByte();
+                UseOrderedHashing = ctx.Reader.ReadBoolByte();
                 AllLengths = ctx.Reader.ReadBoolByte();
             }
 
@@ -1026,7 +1026,7 @@ internal ColumnOptions(ModelLoadContext ctx, string name, string[] inputColumnNa
                 Contracts.CheckDecode(1 <= NumberOfBits && NumberOfBits <= 30);
                 Seed = ctx.Reader.ReadUInt32();
                 RehashUnigrams = ctx.Reader.ReadBoolByte();
-                Ordered = ctx.Reader.ReadBoolByte();
+                UseOrderedHashing = ctx.Reader.ReadBoolByte();
                 AllLengths = ctx.Reader.ReadBoolByte();
             }
 
@@ -1060,7 +1060,7 @@ internal void Save(ModelSaveContext ctx)
                 ctx.Writer.Write(NumberOfBits);
                 ctx.Writer.Write(Seed);
                 ctx.Writer.WriteBoolByte(RehashUnigrams);
-                ctx.Writer.WriteBoolByte(Ordered);
+                ctx.Writer.WriteBoolByte(UseOrderedHashing);
                 ctx.Writer.WriteBoolByte(AllLengths);
             }
         }
@@ -1073,7 +1073,7 @@ internal static class Defaults
             internal const int NumberOfBits = 16;
             internal const uint Seed = 314489979;
             internal const bool RehashUnigrams = false;
-            internal const bool Ordered = true;
+            internal const bool UseOrderedHashing = true;
             internal const int InvertHash = 0;
         }
 
@@ -1095,7 +1095,7 @@ internal static class Defaults
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="seed">Hashing seed.</param>
-        /// <param name="ordered">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
+        /// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
         /// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
         /// Text representation of original values are stored in the slot names of the  metadata for the new column.Hashing, as such, can map many initial values to one.
         /// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
@@ -1108,9 +1108,9 @@ internal NgramHashingEstimator(IHostEnvironment env,
             int skipLength = 0,
             bool allLengths = true,
             uint seed = 314489979,
-            bool ordered = true,
+            bool useOrderedHashing = true,
             int invertHash = 0)
-            : this(env, new ColumnOptions(outputColumnName, new[] { inputColumnName ?? outputColumnName }, ngramLength, skipLength, allLengths, numberOfBits, seed, ordered, invertHash))
+            : this(env, new ColumnOptions(outputColumnName, new[] { inputColumnName ?? outputColumnName }, ngramLength, skipLength, allLengths, numberOfBits, seed, useOrderedHashing, invertHash))
         {
         }
 
diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
index 486835c4a7..2f3c3b0603 100644
--- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -2,7 +2,6 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
-using System.Collections.Generic;
 using Microsoft.ML.Data;
 using Microsoft.ML.Runtime;
 using Microsoft.ML.Transforms.Text;
@@ -358,7 +357,7 @@ public static WordBagEstimator ProduceWordBags(this TransformsCatalog.TextTransf
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="seed">Hashing seed.</param>
-        /// <param name="ordered">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
+        /// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
         /// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
         /// Text representation of original values are stored in the slot names of the  metadata for the new column.Hashing, as such, can map many initial values to one.
         /// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
@@ -371,10 +370,10 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.
             int skipLength = NgramHashExtractingTransformer.DefaultArguments.SkipLength,
             bool allLengths = NgramHashExtractingTransformer.DefaultArguments.AllLengths,
             uint seed = NgramHashExtractingTransformer.DefaultArguments.Seed,
-            bool ordered = NgramHashExtractingTransformer.DefaultArguments.Ordered,
+            bool useOrderedHashing = NgramHashExtractingTransformer.DefaultArguments.Ordered,
             int invertHash = NgramHashExtractingTransformer.DefaultArguments.InvertHash)
             => new WordHashBagEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
-                outputColumnName, inputColumnName, numberOfBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
+                outputColumnName, inputColumnName, numberOfBits, ngramLength, skipLength, allLengths, seed, useOrderedHashing, invertHash);
 
         /// <summary>
         /// Produces a bag of counts of hashed ngrams in <paramref name="inputColumnNames"/>
@@ -388,7 +387,7 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="seed">Hashing seed.</param>
-        /// <param name="ordered">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
+        /// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
         /// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
         /// Text representation of original values are stored in the slot names of the  metadata for the new column.Hashing, as such, can map many initial values to one.
         /// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
@@ -401,10 +400,10 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.
             int skipLength = NgramHashExtractingTransformer.DefaultArguments.SkipLength,
             bool allLengths = NgramHashExtractingTransformer.DefaultArguments.AllLengths,
             uint seed = NgramHashExtractingTransformer.DefaultArguments.Seed,
-            bool ordered = NgramHashExtractingTransformer.DefaultArguments.Ordered,
+            bool useOrderedHashing = NgramHashExtractingTransformer.DefaultArguments.Ordered,
             int invertHash = NgramHashExtractingTransformer.DefaultArguments.InvertHash)
             => new WordHashBagEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
-                outputColumnName, inputColumnNames, numberOfBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
+                outputColumnName, inputColumnNames, numberOfBits, ngramLength, skipLength, allLengths, seed, useOrderedHashing, invertHash);
 
         /// <summary>
         /// Produces a bag of counts of hashed ngrams in <paramref name="inputColumnName"/>
@@ -421,7 +420,7 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="seed">Hashing seed.</param>
-        /// <param name="ordered">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
+        /// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
         /// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
         /// Text representation of original values are stored in the slot names of the  metadata for the new column.Hashing, as such, can map many initial values to one.
         /// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
@@ -434,10 +433,10 @@ public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.T
             int skipLength = NgramHashingEstimator.Defaults.SkipLength,
             bool allLengths = NgramHashingEstimator.Defaults.AllLengths,
             uint seed = NgramHashingEstimator.Defaults.Seed,
-            bool ordered = NgramHashingEstimator.Defaults.Ordered,
+            bool useOrderedHashing = NgramHashingEstimator.Defaults.UseOrderedHashing,
             int invertHash = NgramHashingEstimator.Defaults.InvertHash)
             => new NgramHashingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
-                outputColumnName, inputColumnName, numberOfBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
+                outputColumnName, inputColumnName, numberOfBits, ngramLength, skipLength, allLengths, seed, useOrderedHashing, invertHash);
 
         /// <summary>
         /// Produces a bag of counts of hashed ngrams for each <paramref name="columns"/>. For each column,
diff --git a/src/Microsoft.ML.Transforms/Text/WrappedTextTransformers.cs b/src/Microsoft.ML.Transforms/Text/WrappedTextTransformers.cs
index 19f87ea3fc..bd7df9abc0 100644
--- a/src/Microsoft.ML.Transforms/Text/WrappedTextTransformers.cs
+++ b/src/Microsoft.ML.Transforms/Text/WrappedTextTransformers.cs
@@ -169,7 +169,7 @@ public sealed class WordHashBagEstimator : IEstimator<ITransformer>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="seed">Hashing seed.</param>
-        /// <param name="ordered">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
+        /// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
         /// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
         /// Text representation of original values are stored in the slot names of the  metadata for the new column.Hashing, as such, can map many initial values to one.
         /// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
@@ -182,9 +182,9 @@ internal WordHashBagEstimator(IHostEnvironment env,
             int skipLength = 0,
             bool allLengths = true,
             uint seed = 314489979,
-            bool ordered = true,
+            bool useOrderedHashing = true,
             int invertHash = 0)
-            : this(env, new[] { (outputColumnName, new[] { inputColumnName ?? outputColumnName }) }, hashBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash)
+            : this(env, new[] { (outputColumnName, new[] { inputColumnName ?? outputColumnName }) }, hashBits, ngramLength, skipLength, allLengths, seed, useOrderedHashing, invertHash)
         {
         }
 
@@ -200,7 +200,7 @@ internal WordHashBagEstimator(IHostEnvironment env,
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="seed">Hashing seed.</param>
-        /// <param name="ordered">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
+        /// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
         /// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
         /// Text representation of original values are stored in the slot names of the  metadata for the new column.Hashing, as such, can map many initial values to one.
         /// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
@@ -213,9 +213,9 @@ internal WordHashBagEstimator(IHostEnvironment env,
             int skipLength = 0,
             bool allLengths = true,
             uint seed = 314489979,
-            bool ordered = true,
+            bool useOrderedHashing = true,
             int invertHash = 0)
-            : this(env, new[] { (outputColumnName, inputColumnNames) }, hashBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash)
+            : this(env, new[] { (outputColumnName, inputColumnNames) }, hashBits, ngramLength, skipLength, allLengths, seed, useOrderedHashing, invertHash)
         {
         }
 
@@ -230,7 +230,7 @@ internal WordHashBagEstimator(IHostEnvironment env,
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="seed">Hashing seed.</param>
-        /// <param name="ordered">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
+        /// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
         /// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
         /// Text representation of original values are stored in the slot names of the  metadata for the new column.Hashing, as such, can map many initial values to one.
         /// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
@@ -242,7 +242,7 @@ internal WordHashBagEstimator(IHostEnvironment env,
             int skipLength = 0,
             bool allLengths = true,
             uint seed = 314489979,
-            bool ordered = true,
+            bool useOrderedHashing = true,
             int invertHash = 0)
         {
             Contracts.CheckValue(env, nameof(env));
@@ -260,7 +260,7 @@ internal WordHashBagEstimator(IHostEnvironment env,
             _skipLength = skipLength;
             _allLengths = allLengths;
             _seed = seed;
-            _ordered = ordered;
+            _ordered = useOrderedHashing;
             _invertHash = invertHash;
         }
 

From d1d2e66020ecd035b6c0b2ecb2996fdafe561ac9 Mon Sep 17 00:00:00 2001
From: Wei-Sheng Chin <wechi@microsoft.com>
Date: Tue, 12 Mar 2019 22:53:45 -0700
Subject: [PATCH 10/12] Fix a name

---
 src/Microsoft.ML.Transforms/Text/TextCatalog.cs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
index 1f5b129af4..8eec4ef097 100644
--- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -193,7 +193,7 @@ public static WordTokenizingEstimator TokenizeWords(this TransformsCatalog.TextT
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
         /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="maximumNgramsCounts">Maximum number of n-grams to store in the dictionary.</param>
+        /// <param name="maximumNgramsCount">Maximum number of n-grams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         /// <example>
         /// <format type="text/markdown">
@@ -208,10 +208,10 @@ public static NgramExtractingEstimator ProduceNgrams(this TransformsCatalog.Text
             int ngramLength = NgramExtractingEstimator.Defaults.NgramLength,
             int skipLength = NgramExtractingEstimator.Defaults.SkipLength,
             bool allLengths = NgramExtractingEstimator.Defaults.AllLengths,
-            int maximumNgramsCounts = NgramExtractingEstimator.Defaults.MaximumNgramsCount,
+            int maximumNgramsCount = NgramExtractingEstimator.Defaults.MaximumNgramsCount,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.Defaults.Weighting) =>
             new NgramExtractingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), outputColumnName, inputColumnName,
-                ngramLength, skipLength, allLengths, maximumNgramsCounts, weighting);
+                ngramLength, skipLength, allLengths, maximumNgramsCount, weighting);
 
         /// <summary>
         /// Produces a bag of counts of ngrams (sequences of consecutive words) in <paramref name="columns.inputs"/>

From fa57beab542b7c7edcaee0a2fd58b273b187218e Mon Sep 17 00:00:00 2001
From: Wei-Sheng Chin <wechi@microsoft.com>
Date: Tue, 12 Mar 2019 23:28:21 -0700
Subject: [PATCH 11/12] Deal with most allLengths

---
 docs/code/MlNetCookBook.md                    |  2 +-
 .../TextStaticExtensions.cs                   | 32 ++++++-------
 .../Text/NgramHashingTransformer.cs           | 42 ++++++++--------
 .../Text/NgramTransform.cs                    | 40 ++++++++--------
 .../Text/TextCatalog.cs                       | 36 +++++++-------
 .../Text/TextFeaturizingEstimator.cs          |  6 +--
 .../Text/WordBagTransform.cs                  | 25 +++++-----
 .../Text/WordHashBagProducingTransform.cs     | 18 +++----
 .../Text/WrappedTextTransformers.cs           | 48 +++++++++----------
 .../DataTransformation.cs                     |  2 +-
 .../CookbookSamplesDynamicApi.cs              |  2 +-
 11 files changed, 127 insertions(+), 126 deletions(-)

diff --git a/docs/code/MlNetCookBook.md b/docs/code/MlNetCookBook.md
index 8edb9a626a..7a73a1178a 100644
--- a/docs/code/MlNetCookBook.md
+++ b/docs/code/MlNetCookBook.md
@@ -772,7 +772,7 @@ var pipeline =
 
     // NLP pipeline 2: bag of bigrams, using hashes instead of dictionary indices.
     .Append(new WordHashBagEstimator(mlContext, "BagOfBigrams","NormalizedMessage", 
-                ngramLength: 2, allLengths: false))
+                ngramLength: 2, useAllLengths: false))
 
     // NLP pipeline 3: bag of tri-character sequences with TF-IDF weighting.
     .Append(mlContext.Transforms.Text.TokenizeCharacters("MessageChars", "Message"))
diff --git a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
index ed18a6a53e..4d597837f8 100644
--- a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
+++ b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
@@ -263,7 +263,7 @@ private sealed class Reconciler : EstimatorReconciler, IEquatable<Reconciler>
         {
             private readonly int _ngramLength;
             private readonly int _skipLength;
-            private readonly bool _allLengths;
+            private readonly bool _useAllLengths;
             private readonly int _maxNumTerms;
             private readonly NgramExtractingEstimator.WeightingCriteria _weighting;
 
@@ -271,7 +271,7 @@ public Reconciler(int ngramLength, int skipLength, bool allLengths, int maxNumTe
             {
                 _ngramLength = ngramLength;
                 _skipLength = skipLength;
-                _allLengths = allLengths;
+                _useAllLengths = allLengths;
                 _maxNumTerms = maxNumTerms;
                 _weighting = weighting;
 
@@ -281,7 +281,7 @@ public bool Equals(Reconciler other)
             {
                 return _ngramLength == other._ngramLength &&
                 _skipLength == other._skipLength &&
-                _allLengths == other._allLengths &&
+                _useAllLengths == other._useAllLengths &&
                 _maxNumTerms == other._maxNumTerms &&
                 _weighting == other._weighting;
             }
@@ -298,7 +298,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
                 foreach (var outCol in toOutput)
                     pairs.Add((outputNames[outCol], new[] { inputNames[((OutPipelineColumn)outCol).Input] }));
 
-                return new WordBagEstimator(env, pairs.ToArray(), _ngramLength, _skipLength, _allLengths, _maxNumTerms, _weighting);
+                return new WordBagEstimator(env, pairs.ToArray(), _ngramLength, _skipLength, _useAllLengths, _maxNumTerms, _weighting);
             }
         }
 
@@ -349,7 +349,7 @@ private sealed class Reconciler : EstimatorReconciler, IEquatable<Reconciler>
             private readonly int _numberOfBits;
             private readonly int _ngramLength;
             private readonly int _skipLength;
-            private readonly bool _allLengths;
+            private readonly bool _useAllLengths;
             private readonly uint _seed;
             private readonly bool _useOrderedHashing;
             private readonly int _maximumNumberOfInverts;
@@ -359,7 +359,7 @@ public Reconciler(int numberOfBits, int ngramLength, int skipLength, bool allLen
                 _numberOfBits = numberOfBits;
                 _ngramLength = ngramLength;
                 _skipLength = skipLength;
-                _allLengths = allLengths;
+                _useAllLengths = allLengths;
                 _seed = seed;
                 _useOrderedHashing = useOrderedHashing;
                 _maximumNumberOfInverts = maximumNumberOfInverts;
@@ -370,7 +370,7 @@ public bool Equals(Reconciler other)
                 return _numberOfBits == other._numberOfBits &&
                     _ngramLength == other._ngramLength &&
                     _skipLength == other._skipLength &&
-                    _allLengths == other._allLengths &&
+                    _useAllLengths == other._useAllLengths &&
                     _seed == other._seed &&
                     _useOrderedHashing == other._useOrderedHashing &&
                     _maximumNumberOfInverts == other._maximumNumberOfInverts;
@@ -388,7 +388,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
                 foreach (var outCol in toOutput)
                     pairs.Add((outputNames[outCol], new[] { inputNames[((OutPipelineColumn)outCol).Input] }));
 
-                return new WordHashBagEstimator(env, pairs.ToArray(), _numberOfBits, _ngramLength, _skipLength, _allLengths, _seed, _useOrderedHashing, _maximumNumberOfInverts);
+                return new WordHashBagEstimator(env, pairs.ToArray(), _numberOfBits, _ngramLength, _skipLength, _useAllLengths, _seed, _useOrderedHashing, _maximumNumberOfInverts);
             }
         }
 
@@ -442,7 +442,7 @@ private sealed class Reconciler : EstimatorReconciler, IEquatable<Reconciler>
         {
             private readonly int _ngramLength;
             private readonly int _skipLength;
-            private readonly bool _allLengths;
+            private readonly bool _useAllLengths;
             private readonly int _maxNgramsCount;
             private readonly NgramExtractingEstimator.WeightingCriteria _weighting;
 
@@ -450,7 +450,7 @@ public Reconciler(int ngramLength, int skipLength, bool allLengths, int maxNumTe
             {
                 _ngramLength = ngramLength;
                 _skipLength = skipLength;
-                _allLengths = allLengths;
+                _useAllLengths = allLengths;
                 _maxNgramsCount = maxNumTerms;
                 _weighting = weighting;
 
@@ -460,7 +460,7 @@ public bool Equals(Reconciler other)
             {
                 return _ngramLength == other._ngramLength &&
                 _skipLength == other._skipLength &&
-                _allLengths == other._allLengths &&
+                _useAllLengths == other._useAllLengths &&
                 _maxNgramsCount == other._maxNgramsCount &&
                 _weighting == other._weighting;
             }
@@ -477,7 +477,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
                 foreach (var outCol in toOutput)
                     pairs.Add((outputNames[outCol], inputNames[((OutPipelineColumn)outCol).Input]));
 
-                return new NgramExtractingEstimator(env, pairs.ToArray(), _ngramLength, _skipLength, _allLengths, _maxNgramsCount, _weighting);
+                return new NgramExtractingEstimator(env, pairs.ToArray(), _ngramLength, _skipLength, _useAllLengths, _maxNgramsCount, _weighting);
             }
         }
 
@@ -524,7 +524,7 @@ private sealed class Reconciler : EstimatorReconciler, IEquatable<Reconciler>
             private readonly int _numberOfBits;
             private readonly int _ngramLength;
             private readonly int _skipLength;
-            private readonly bool _allLengths;
+            private readonly bool _useAllLengths;
             private readonly uint _seed;
             private readonly bool _useOrderedHashing;
             private readonly int _maximumNumberOfInverts;
@@ -534,7 +534,7 @@ public Reconciler(int numberOfBits, int ngramLength, int skipLength, bool allLen
                 _numberOfBits = numberOfBits;
                 _ngramLength = ngramLength;
                 _skipLength = skipLength;
-                _allLengths = allLengths;
+                _useAllLengths = allLengths;
                 _seed = seed;
                 _useOrderedHashing = useOrderedHashing;
                 _maximumNumberOfInverts = maximumNumberOfInverts;
@@ -545,7 +545,7 @@ public bool Equals(Reconciler other)
                 return _numberOfBits == other._numberOfBits &&
                     _ngramLength == other._ngramLength &&
                     _skipLength == other._skipLength &&
-                    _allLengths == other._allLengths &&
+                    _useAllLengths == other._useAllLengths &&
                     _seed == other._seed &&
                     _useOrderedHashing == other._useOrderedHashing &&
                     _maximumNumberOfInverts == other._maximumNumberOfInverts;
@@ -561,7 +561,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
                 var columns = new List<NgramHashingEstimator.ColumnOptions>();
                 foreach (var outCol in toOutput)
                     columns.Add(new NgramHashingEstimator.ColumnOptions(outputNames[outCol], new[] { inputNames[((OutPipelineColumn)outCol).Input] },
-                          _ngramLength, _skipLength, _allLengths, _numberOfBits, _seed, _useOrderedHashing, _maximumNumberOfInverts));
+                          _ngramLength, _skipLength, _useAllLengths, _numberOfBits, _seed, _useOrderedHashing, _maximumNumberOfInverts));
 
                 return new NgramHashingEstimator(env, columns.ToArray());
             }
diff --git a/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs b/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs
index 8de00d8106..21337758ca 100644
--- a/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs
+++ b/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs
@@ -42,8 +42,8 @@ internal sealed class Column : ManyToOneColumn
             public int? NgramLength;
 
             [Argument(ArgumentType.AtMostOnce, HelpText =
-                "Whether to include all ngram lengths up to " + nameof(NgramLength) + " or only " + nameof(NgramLength), ShortName = "all")]
-            public bool? AllLengths;
+                "Whether to include all ngram lengths up to " + nameof(NgramLength) + " or only " + nameof(NgramLength), Name = "AllLengths", ShortName = "all")]
+            public bool? UseAllLengths;
 
             [Argument(ArgumentType.AtMostOnce,
                 HelpText = "Maximum number of tokens to skip when constructing an ngram",
@@ -98,7 +98,7 @@ private protected override bool TryParse(string str)
             internal bool TryUnparse(StringBuilder sb)
             {
                 Contracts.AssertValue(sb);
-                if (NgramLength != null || AllLengths != null || SkipLength != null || Seed != null ||
+                if (NgramLength != null || UseAllLengths != null || SkipLength != null || Seed != null ||
                     RehashUnigrams != null || Ordered != null || MaximumNumberOfInverts != null)
                 {
                     return false;
@@ -123,8 +123,8 @@ internal sealed class Options
 
             [Argument(ArgumentType.AtMostOnce,
                 HelpText = "Whether to include all ngram lengths up to " + nameof(NgramLength) + " or only " + nameof(NgramLength),
-                ShortName = "all", SortOrder = 4)]
-            public bool AllLengths = NgramHashingEstimator.Defaults.AllLengths;
+                Name = "AllLengths", ShortName = "all", SortOrder = 4)]
+            public bool UseAllLengths = NgramHashingEstimator.Defaults.UseAllLengths;
 
             [Argument(ArgumentType.AtMostOnce,
                 HelpText = "Maximum number of tokens to skip when constructing an ngram",
@@ -352,7 +352,7 @@ private static IDataTransform Create(IHostEnvironment env, Options options, IDat
                         item.Source ?? new string[] { item.Name },
                         item.NgramLength ?? options.NgramLength,
                         item.SkipLength ?? options.SkipLength,
-                        item.AllLengths ?? options.AllLengths,
+                        item.UseAllLengths ?? options.UseAllLengths,
                         item.NumberOfBits ?? options.NumberOfBits,
                         item.Seed ?? options.Seed,
                         item.Ordered ?? options.Ordered,
@@ -418,7 +418,7 @@ private NgramIdFinder GetNgramIdFinder(int iinfo)
                 int ngramLength = _parent._columns[iinfo].NgramLength;
                 bool rehash = _parent._columns[iinfo].RehashUnigrams;
                 bool ordered = _parent._columns[iinfo].UseOrderedHashing;
-                bool all = _parent._columns[iinfo].AllLengths;
+                bool all = _parent._columns[iinfo].UseAllLengths;
                 uint seed = _parent._columns[iinfo].Seed;
 
                 // REVIEW: Consider the case when:
@@ -885,7 +885,7 @@ public sealed class ColumnOptions
             /// <summary>Maximum number of tokens to skip when constructing an ngram.</summary>
             public readonly int SkipLength;
             /// <summary>Whether to store all ngram lengths up to <see cref="NgramLength"/>, or only <see cref="NgramLength"/>.</summary>
-            public readonly bool AllLengths;
+            public readonly bool UseAllLengths;
             /// <summary>Number of bits to hash into. Must be between 1 and 31, inclusive.</summary>
             public readonly int NumberOfBits;
             /// <summary>Hashing seed.</summary>
@@ -913,7 +913,7 @@ public sealed class ColumnOptions
             /// <param name="inputColumnNames">Names of the columns to transform. </param>
             /// <param name="ngramLength">Maximum ngram length.</param>
             /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-            /// <param name="allLengths">Whether to store all ngram lengths up to <paramref name="ngramLength"/>, or only <paramref name="ngramLength"/>.</param>
+            /// <param name="useAllLengths">Whether to store all ngram lengths up to <paramref name="ngramLength"/>, or only <paramref name="ngramLength"/>.</param>
             /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 31, inclusive.</param>
             /// <param name="seed">Hashing seed.</param>
             /// <param name="useOrderedHashing">Whether the position of each term should be included in the hash.</param>
@@ -927,7 +927,7 @@ public ColumnOptions(string name,
                 string[] inputColumnNames,
                 int ngramLength = NgramHashingEstimator.Defaults.NgramLength,
                 int skipLength = NgramHashingEstimator.Defaults.SkipLength,
-                bool allLengths = NgramHashingEstimator.Defaults.AllLengths,
+                bool useAllLengths = NgramHashingEstimator.Defaults.UseAllLengths,
                 int numberOfBits = NgramHashingEstimator.Defaults.NumberOfBits,
                 uint seed = NgramHashingEstimator.Defaults.Seed,
                 bool useOrderedHashing = NgramHashingEstimator.Defaults.UseOrderedHashing,
@@ -959,7 +959,7 @@ public ColumnOptions(string name,
                 InputColumnNamesArray = inputColumnNames;
                 NgramLength = ngramLength;
                 SkipLength = skipLength;
-                AllLengths = allLengths;
+                UseAllLengths = useAllLengths;
                 NumberOfBits = numberOfBits;
                 Seed = seed;
                 UseOrderedHashing = useOrderedHashing;
@@ -997,7 +997,7 @@ internal ColumnOptions(ModelLoadContext ctx)
                 Seed = ctx.Reader.ReadUInt32();
                 RehashUnigrams = ctx.Reader.ReadBoolByte();
                 UseOrderedHashing = ctx.Reader.ReadBoolByte();
-                AllLengths = ctx.Reader.ReadBoolByte();
+                UseAllLengths = ctx.Reader.ReadBoolByte();
             }
 
             internal ColumnOptions(ModelLoadContext ctx, string name, string[] inputColumnNames)
@@ -1027,7 +1027,7 @@ internal ColumnOptions(ModelLoadContext ctx, string name, string[] inputColumnNa
                 Seed = ctx.Reader.ReadUInt32();
                 RehashUnigrams = ctx.Reader.ReadBoolByte();
                 UseOrderedHashing = ctx.Reader.ReadBoolByte();
-                AllLengths = ctx.Reader.ReadBoolByte();
+                UseAllLengths = ctx.Reader.ReadBoolByte();
             }
 
             internal void Save(ModelSaveContext ctx)
@@ -1061,14 +1061,14 @@ internal void Save(ModelSaveContext ctx)
                 ctx.Writer.Write(Seed);
                 ctx.Writer.WriteBoolByte(RehashUnigrams);
                 ctx.Writer.WriteBoolByte(UseOrderedHashing);
-                ctx.Writer.WriteBoolByte(AllLengths);
+                ctx.Writer.WriteBoolByte(UseAllLengths);
             }
         }
 
         internal static class Defaults
         {
             internal const int NgramLength = 2;
-            internal const bool AllLengths = true;
+            internal const bool UseAllLengths = true;
             internal const int SkipLength = 0;
             internal const int NumberOfBits = 16;
             internal const uint Seed = 314489979;
@@ -1093,7 +1093,7 @@ internal static class Defaults
         /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="seed">Hashing seed.</param>
         /// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
         /// <param name="maximumNumberOfInverts">During hashing we constuct mappings between original values and the produced hash values.
@@ -1106,11 +1106,11 @@ internal NgramHashingEstimator(IHostEnvironment env,
             int numberOfBits = 16,
             int ngramLength = 2,
             int skipLength = 0,
-            bool allLengths = true,
+            bool useAllLengths = true,
             uint seed = 314489979,
             bool useOrderedHashing = true,
             int maximumNumberOfInverts = 0)
-            : this(env, outputColumnName, new[] { inputColumnName ?? outputColumnName }, numberOfBits, ngramLength, skipLength, allLengths, seed, useOrderedHashing, maximumNumberOfInverts)
+            : this(env, outputColumnName, new[] { inputColumnName ?? outputColumnName }, numberOfBits, ngramLength, skipLength, useAllLengths, seed, useOrderedHashing, maximumNumberOfInverts)
         {
         }
 
@@ -1127,7 +1127,7 @@ internal NgramHashingEstimator(IHostEnvironment env,
         /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="seed">Hashing seed.</param>
         /// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
         /// <param name="maximumNumberOfInverts">During hashing we constuct mappings between original values and the produced hash values.
@@ -1140,11 +1140,11 @@ internal NgramHashingEstimator(IHostEnvironment env,
             int numberOfBits = 16,
             int ngramLength = 2,
             int skipLength = 0,
-            bool allLengths = true,
+            bool useAllLengths = true,
             uint seed = 314489979,
             bool useOrderedHashing = true,
             int maximumNumberOfInverts = 0)
-            : this(env, new ColumnOptions(outputColumnName, inputColumnNames, ngramLength, skipLength, allLengths, numberOfBits, seed, useOrderedHashing, maximumNumberOfInverts))
+            : this(env, new ColumnOptions(outputColumnName, inputColumnNames, ngramLength, skipLength, useAllLengths, numberOfBits, seed, useOrderedHashing, maximumNumberOfInverts))
         {
         }
 
diff --git a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs
index 8bfad84278..6e0cfb35f3 100644
--- a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs
+++ b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs
@@ -42,8 +42,8 @@ internal sealed class Column : OneToOneColumn
             public int? NgramLength;
 
             [Argument(ArgumentType.AtMostOnce, HelpText =
-                "Whether to include all ngram lengths up to " + nameof(NgramLength) + " or only " + nameof(NgramLength), ShortName = "all")]
-            public bool? AllLengths;
+                "Whether to include all ngram lengths up to " + nameof(NgramLength) + " or only " + nameof(NgramLength), Name = "AllLengths", ShortName = "all")]
+            public bool? UseAllLengths;
 
             [Argument(ArgumentType.AtMostOnce,
                 HelpText = "Maximum number of tokens to skip when constructing an ngram",
@@ -69,7 +69,7 @@ internal static Column Parse(string str)
             internal bool TryUnparse(StringBuilder sb)
             {
                 Contracts.AssertValue(sb);
-                if (NgramLength != null || AllLengths != null || SkipLength != null || Utils.Size(MaxNumTerms) != 0)
+                if (NgramLength != null || UseAllLengths != null || SkipLength != null || Utils.Size(MaxNumTerms) != 0)
                     return false;
                 return TryUnparseCore(sb);
             }
@@ -84,8 +84,8 @@ internal sealed class Options : TransformInputBase
             public int NgramLength = NgramExtractingEstimator.Defaults.NgramLength;
 
             [Argument(ArgumentType.AtMostOnce, HelpText =
-                "Whether to store all ngram lengths up to ngramLength, or only ngramLength", ShortName = "all")]
-            public bool AllLengths = NgramExtractingEstimator.Defaults.AllLengths;
+                "Whether to store all ngram lengths up to ngramLength, or only ngramLength", Name = "AllLengths", ShortName = "all")]
+            public bool UseAllLengths = NgramExtractingEstimator.Defaults.UseAllLengths;
 
             [Argument(ArgumentType.AtMostOnce,
                 HelpText = "Maximum number of tokens to skip when constructing an ngram",
@@ -424,7 +424,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa
                         item.Name,
                         item.NgramLength ?? options.NgramLength,
                         item.SkipLength ?? options.SkipLength,
-                        item.AllLengths ?? options.AllLengths,
+                        item.UseAllLengths ?? options.UseAllLengths,
                         item.Weighting ?? options.Weighting,
                         maxNumTerms,
                         item.Source ?? item.Name);
@@ -693,7 +693,7 @@ public enum WeightingCriteria
         internal static class Defaults
         {
             public const int NgramLength = 2;
-            public const bool AllLengths = true;
+            public const bool UseAllLengths = true;
             public const int SkipLength = 0;
             public const int MaximumNgramsCount = 10000000;
             public const WeightingCriteria Weighting = WeightingCriteria.Tf;
@@ -711,17 +711,17 @@ internal static class Defaults
         /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="maximumNgramsCount">Maximum number of n-grams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         internal NgramExtractingEstimator(IHostEnvironment env,
             string outputColumnName, string inputColumnName = null,
             int ngramLength = Defaults.NgramLength,
             int skipLength = Defaults.SkipLength,
-            bool allLengths = Defaults.AllLengths,
+            bool useAllLengths = Defaults.UseAllLengths,
             int maximumNgramsCount = Defaults.MaximumNgramsCount,
             WeightingCriteria weighting = Defaults.Weighting)
-            : this(env, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }, ngramLength, skipLength, allLengths, maximumNgramsCount, weighting)
+            : this(env, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }, ngramLength, skipLength, useAllLengths, maximumNgramsCount, weighting)
         {
         }
 
@@ -733,17 +733,17 @@ internal NgramExtractingEstimator(IHostEnvironment env,
         /// <param name="columns">Pairs of columns to compute bag of word vector.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="maximumNgramsCount">Maximum number of n-grams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         internal NgramExtractingEstimator(IHostEnvironment env,
             (string outputColumnName, string inputColumnName)[] columns,
             int ngramLength = Defaults.NgramLength,
             int skipLength = Defaults.SkipLength,
-            bool allLengths = Defaults.AllLengths,
+            bool useAllLengths = Defaults.UseAllLengths,
             int maximumNgramsCount = Defaults.MaximumNgramsCount,
             WeightingCriteria weighting = Defaults.Weighting)
-            : this(env, columns.Select(x => new ColumnOptions(x.outputColumnName, x.inputColumnName, ngramLength, skipLength, allLengths, weighting, maximumNgramsCount)).ToArray())
+            : this(env, columns.Select(x => new ColumnOptions(x.outputColumnName, x.inputColumnName, ngramLength, skipLength, useAllLengths, weighting, maximumNgramsCount)).ToArray())
         {
         }
 
@@ -805,7 +805,7 @@ public sealed class ColumnOptions
             /// <summary>Maximum number of tokens to skip when constructing an ngram.</summary>
             public readonly int SkipLength;
             /// <summary>Whether to store all ngram lengths up to ngramLength, or only ngramLength.</summary>
-            public readonly bool AllLengths;
+            public readonly bool UseAllLengths;
             /// <summary>The weighting criteria.</summary>
             public readonly WeightingCriteria Weighting;
             /// <summary>
@@ -825,23 +825,23 @@ public sealed class ColumnOptions
             /// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="name"/> will be used as source.</param>
             /// <param name="ngramLength">Maximum ngram length.</param>
             /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-            /// <param name="allLengths">Whether to store all ngram lengths up to ngramLength, or only ngramLength.</param>
+            /// <param name="useAllLengths">Whether to store all ngram lengths up to ngramLength, or only ngramLength.</param>
             /// <param name="weighting">The weighting criteria.</param>
             /// <param name="maximumNgramsCount">Maximum number of n-grams to store in the dictionary.</param>
             public ColumnOptions(string name, string inputColumnName = null,
                 int ngramLength = Defaults.NgramLength,
                 int skipLength = Defaults.SkipLength,
-                bool allLengths = Defaults.AllLengths,
+                bool useAllLengths = Defaults.UseAllLengths,
                 WeightingCriteria weighting = Defaults.Weighting,
                 int maximumNgramsCount = Defaults.MaximumNgramsCount)
-                : this(name, ngramLength, skipLength, allLengths, weighting, new int[] { maximumNgramsCount }, inputColumnName ?? name)
+                : this(name, ngramLength, skipLength, useAllLengths, weighting, new int[] { maximumNgramsCount }, inputColumnName ?? name)
             {
             }
 
             internal ColumnOptions(string name,
                 int ngramLength,
                 int skipLength,
-                bool allLengths,
+                bool useAllLengths,
                 WeightingCriteria weighting,
                 int[] maximumNgramsCounts,
                 string inputColumnName = null)
@@ -855,7 +855,7 @@ internal ColumnOptions(string name,
                 Contracts.CheckUserArg(0 < ngramLength && ngramLength <= NgramBufferBuilder.MaxSkipNgramLength, nameof(ngramLength));
 
                 var limits = new int[ngramLength];
-                if (!allLengths)
+                if (!useAllLengths)
                 {
                     Contracts.CheckUserArg(Utils.Size(maximumNgramsCounts) == 0 ||
                         Utils.Size(maximumNgramsCounts) == 1 && maximumNgramsCounts[0] > 0, nameof(maximumNgramsCounts));
@@ -874,7 +874,7 @@ internal ColumnOptions(string name,
                 InputColumnName = inputColumnName ?? name;
                 NgramLength = ngramLength;
                 SkipLength = skipLength;
-                AllLengths = allLengths;
+                UseAllLengths = useAllLengths;
                 Weighting = weighting;
             }
         }
diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
index 8eec4ef097..3aa10978ac 100644
--- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -192,7 +192,7 @@ public static WordTokenizingEstimator TokenizeWords(this TransformsCatalog.TextT
         /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="maximumNgramsCount">Maximum number of n-grams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         /// <example>
@@ -207,11 +207,11 @@ public static NgramExtractingEstimator ProduceNgrams(this TransformsCatalog.Text
             string inputColumnName = null,
             int ngramLength = NgramExtractingEstimator.Defaults.NgramLength,
             int skipLength = NgramExtractingEstimator.Defaults.SkipLength,
-            bool allLengths = NgramExtractingEstimator.Defaults.AllLengths,
+            bool useAllLengths = NgramExtractingEstimator.Defaults.UseAllLengths,
             int maximumNgramsCount = NgramExtractingEstimator.Defaults.MaximumNgramsCount,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.Defaults.Weighting) =>
             new NgramExtractingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), outputColumnName, inputColumnName,
-                ngramLength, skipLength, allLengths, maximumNgramsCount, weighting);
+                ngramLength, skipLength, useAllLengths, maximumNgramsCount, weighting);
 
         /// <summary>
         /// Produces a bag of counts of ngrams (sequences of consecutive words) in <paramref name="columns.inputs"/>
@@ -308,7 +308,7 @@ public static CustomStopWordsRemovingEstimator RemoveStopWords(this TransformsCa
         /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="maximumNgramsCount">Maximum number of ngrams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         public static WordBagEstimator ProduceWordBags(this TransformsCatalog.TextTransforms catalog,
@@ -316,11 +316,11 @@ public static WordBagEstimator ProduceWordBags(this TransformsCatalog.TextTransf
             string inputColumnName = null,
             int ngramLength = NgramExtractingEstimator.Defaults.NgramLength,
             int skipLength = NgramExtractingEstimator.Defaults.SkipLength,
-            bool allLengths = NgramExtractingEstimator.Defaults.AllLengths,
+            bool useAllLengths = NgramExtractingEstimator.Defaults.UseAllLengths,
             int maximumNgramsCount = NgramExtractingEstimator.Defaults.MaximumNgramsCount,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.WeightingCriteria.Tf)
             => new WordBagEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
-                outputColumnName, inputColumnName, ngramLength, skipLength, allLengths, maximumNgramsCount);
+                outputColumnName, inputColumnName, ngramLength, skipLength, useAllLengths, maximumNgramsCount);
 
         /// <summary>
         /// Produces a bag of counts of ngrams (sequences of consecutive words) in <paramref name="inputColumnNames"/>
@@ -331,7 +331,7 @@ public static WordBagEstimator ProduceWordBags(this TransformsCatalog.TextTransf
         /// <param name="inputColumnNames">Name of the columns to transform.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="maximumNgramsCount">Maximum number of ngrams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         public static WordBagEstimator ProduceWordBags(this TransformsCatalog.TextTransforms catalog,
@@ -339,11 +339,11 @@ public static WordBagEstimator ProduceWordBags(this TransformsCatalog.TextTransf
             string[] inputColumnNames,
             int ngramLength = NgramExtractingEstimator.Defaults.NgramLength,
             int skipLength = NgramExtractingEstimator.Defaults.SkipLength,
-            bool allLengths = NgramExtractingEstimator.Defaults.AllLengths,
+            bool useAllLengths = NgramExtractingEstimator.Defaults.UseAllLengths,
             int maximumNgramsCount = NgramExtractingEstimator.Defaults.MaximumNgramsCount,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.WeightingCriteria.Tf)
             => new WordBagEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
-                outputColumnName, inputColumnNames, ngramLength, skipLength, allLengths, maximumNgramsCount, weighting);
+                outputColumnName, inputColumnNames, ngramLength, skipLength, useAllLengths, maximumNgramsCount, weighting);
 
         /// <summary>
         /// Produces a bag of counts of hashed ngrams in <paramref name="inputColumnName"/>
@@ -355,7 +355,7 @@ public static WordBagEstimator ProduceWordBags(this TransformsCatalog.TextTransf
         /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="seed">Hashing seed.</param>
         /// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
         /// <param name="maximumNumberOfInverts">During hashing we constuct mappings between original values and the produced hash values.
@@ -368,13 +368,13 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.
             int numberOfBits = NgramHashExtractingTransformer.DefaultArguments.NumberOfBits,
             int ngramLength = NgramHashExtractingTransformer.DefaultArguments.NgramLength,
             int skipLength = NgramHashExtractingTransformer.DefaultArguments.SkipLength,
-            bool allLengths = NgramHashExtractingTransformer.DefaultArguments.AllLengths,
+            bool useAllLengths = NgramHashExtractingTransformer.DefaultArguments.UseAllLengths,
             uint seed = NgramHashExtractingTransformer.DefaultArguments.Seed,
             bool useOrderedHashing = NgramHashExtractingTransformer.DefaultArguments.Ordered,
             int maximumNumberOfInverts = NgramHashExtractingTransformer.DefaultArguments.MaximumNumberOfInverts)
             => new WordHashBagEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
                 outputColumnName, inputColumnName, numberOfBits: numberOfBits, ngramLength: ngramLength,
-                skipLength: skipLength, allLengths: allLengths, seed: seed, useOrderedHashing: useOrderedHashing,
+                skipLength: skipLength, useAllLengths: useAllLengths, seed: seed, useOrderedHashing: useOrderedHashing,
                 maximumNumberOfInverts: maximumNumberOfInverts);
 
         /// <summary>
@@ -387,7 +387,7 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.
         /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="seed">Hashing seed.</param>
         /// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
         /// <param name="maximumNumberOfInverts">During hashing we constuct mappings between original values and the produced hash values.
@@ -400,13 +400,13 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.
             int numberOfBits = NgramHashExtractingTransformer.DefaultArguments.NumberOfBits,
             int ngramLength = NgramHashExtractingTransformer.DefaultArguments.NgramLength,
             int skipLength = NgramHashExtractingTransformer.DefaultArguments.SkipLength,
-            bool allLengths = NgramHashExtractingTransformer.DefaultArguments.AllLengths,
+            bool useAllLengths = NgramHashExtractingTransformer.DefaultArguments.UseAllLengths,
             uint seed = NgramHashExtractingTransformer.DefaultArguments.Seed,
             bool useOrderedHashing = NgramHashExtractingTransformer.DefaultArguments.Ordered,
             int maximumNumberOfInverts = NgramHashExtractingTransformer.DefaultArguments.MaximumNumberOfInverts)
             => new WordHashBagEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
                 outputColumnName, inputColumnNames, numberOfBits: numberOfBits, ngramLength: ngramLength,
-                skipLength: skipLength, allLengths: allLengths, seed: seed, useOrderedHashing: useOrderedHashing,
+                skipLength: skipLength, useAllLengths: useAllLengths, seed: seed, useOrderedHashing: useOrderedHashing,
                 maximumNumberOfInverts: maximumNumberOfInverts);
 
         /// <summary>
@@ -422,7 +422,7 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.
         /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="seed">Hashing seed.</param>
         /// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
         /// <param name="maximumNumberOfInverts">During hashing we constuct mappings between original values and the produced hash values.
@@ -435,13 +435,13 @@ public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.T
             int numberOfBits = NgramHashingEstimator.Defaults.NumberOfBits,
             int ngramLength = NgramHashingEstimator.Defaults.NgramLength,
             int skipLength = NgramHashingEstimator.Defaults.SkipLength,
-            bool allLengths = NgramHashingEstimator.Defaults.AllLengths,
+            bool useAllLengths = NgramHashingEstimator.Defaults.UseAllLengths,
             uint seed = NgramHashingEstimator.Defaults.Seed,
             bool useOrderedHashing = NgramHashingEstimator.Defaults.UseOrderedHashing,
             int maximumNumberOfInverts = NgramHashingEstimator.Defaults.MaximumNumberOfInverts)
             => new NgramHashingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(),
                 outputColumnName, inputColumnName, numberOfBits: numberOfBits, ngramLength: ngramLength, skipLength: skipLength,
-                allLengths: allLengths, seed: seed, useOrderedHashing: useOrderedHashing, maximumNumberOfInverts: maximumNumberOfInverts);
+                useAllLengths: useAllLengths, seed: seed, useOrderedHashing: useOrderedHashing, maximumNumberOfInverts: maximumNumberOfInverts);
 
         /// <summary>
         /// Produces a bag of counts of hashed ngrams for each <paramref name="columns"/>. For each column,
diff --git a/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs b/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs
index 6c89740117..d7cca7752a 100644
--- a/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs
@@ -141,7 +141,7 @@ public WordBagEstimator.Options WordFeatureExtractor
                         extractor = new NgramExtractorTransform.NgramExtractorArguments();
                         extractor.NgramLength = _wordFeatureExtractor.NgramLength;
                         extractor.SkipLength = _wordFeatureExtractor.SkipLength;
-                        extractor.AllLengths = _wordFeatureExtractor.AllLengths;
+                        extractor.UseAllLengths = _wordFeatureExtractor.UseAllLengths;
                         extractor.MaxNumTerms = _wordFeatureExtractor.MaximumNgramsCount;
                         extractor.Weighting = _wordFeatureExtractor.Weighting;
                     }
@@ -173,7 +173,7 @@ public WordBagEstimator.Options CharFeatureExtractor
                         extractor = new NgramExtractorTransform.NgramExtractorArguments();
                         extractor.NgramLength = _charFeatureExtractor.NgramLength;
                         extractor.SkipLength = _charFeatureExtractor.SkipLength;
-                        extractor.AllLengths = _charFeatureExtractor.AllLengths;
+                        extractor.UseAllLengths = _charFeatureExtractor.UseAllLengths;
                         extractor.MaxNumTerms = _charFeatureExtractor.MaximumNgramsCount;
                         extractor.Weighting = _charFeatureExtractor.Weighting;
                     }
@@ -187,7 +187,7 @@ public WordBagEstimator.Options CharFeatureExtractor
             public Options()
             {
                 WordFeatureExtractor = new WordBagEstimator.Options();
-                CharFeatureExtractor = new WordBagEstimator.Options() { NgramLength = 3, AllLengths = false };
+                CharFeatureExtractor = new WordBagEstimator.Options() { NgramLength = 3, UseAllLengths = false };
             }
         }
 
diff --git a/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs b/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs
index c2fa970088..934a2253ee 100644
--- a/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs
+++ b/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs
@@ -54,8 +54,8 @@ internal sealed class Column : ManyToOneColumn
 
             [Argument(ArgumentType.AtMostOnce,
                 HelpText = "Whether to include all ngram lengths up to " + nameof(NgramLength) + " or only " + nameof(NgramLength),
-                ShortName = "all")]
-            public bool? AllLengths;
+                Name = "AllLengths", ShortName = "all")]
+            public bool? UseAllLengths;
 
             [Argument(ArgumentType.Multiple, HelpText = "Maximum number of ngrams to store in the dictionary", ShortName = "max")]
             public int[] MaxNumTerms = null;
@@ -76,7 +76,7 @@ internal static Column Parse(string str)
             internal bool TryUnparse(StringBuilder sb)
             {
                 Contracts.AssertValue(sb);
-                if (NgramLength != null || SkipLength != null || AllLengths != null || Utils.Size(MaxNumTerms) > 0 ||
+                if (NgramLength != null || SkipLength != null || UseAllLengths != null || Utils.Size(MaxNumTerms) > 0 ||
                     Weighting != null)
                 {
                     return false;
@@ -123,7 +123,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa
                     MaxNumTerms = options.MaxNumTerms,
                     NgramLength = options.NgramLength,
                     SkipLength = options.SkipLength,
-                    AllLengths = options.AllLengths,
+                    UseAllLengths = options.UseAllLengths,
                     Weighting = options.Weighting,
                     Columns = new NgramExtractorTransform.Column[options.Columns.Length]
                 };
@@ -146,7 +146,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa
                         NgramLength = column.NgramLength,
                         SkipLength = column.SkipLength,
                         Weighting = column.Weighting,
-                        AllLengths = column.AllLengths
+                        UseAllLengths = column.UseAllLengths
                     };
             }
 
@@ -175,8 +175,9 @@ internal sealed class Column : OneToOneColumn
             public int? SkipLength;
 
             [Argument(ArgumentType.AtMostOnce, HelpText =
-                "Whether to include all ngram lengths up to " + nameof(NgramLength) + " or only " + nameof(NgramLength), ShortName = "all")]
-            public bool? AllLengths;
+                "Whether to include all ngram lengths up to " + nameof(NgramLength) + " or only " + nameof(NgramLength),
+                Name = "AllLengths", ShortName = "all")]
+            public bool? UseAllLengths;
 
             // REVIEW: This argument is actually confusing. If you set only one value we will use this value for all ngrams respectfully for example,
             // if we specify 3 ngrams we will have maxNumTerms * 3. And it also pick first value from this array to run term transform, so if you specify
@@ -200,7 +201,7 @@ internal static Column Parse(string str)
             internal bool TryUnparse(StringBuilder sb)
             {
                 Contracts.AssertValue(sb);
-                if (NgramLength != null || SkipLength != null || AllLengths != null || Utils.Size(MaxNumTerms) > 0 ||
+                if (NgramLength != null || SkipLength != null || UseAllLengths != null || Utils.Size(MaxNumTerms) > 0 ||
                     Weighting != null)
                 {
                     return false;
@@ -225,8 +226,8 @@ internal abstract class ArgumentsBase
 
             [Argument(ArgumentType.AtMostOnce,
                 HelpText = "Whether to include all ngram lengths up to " + nameof(NgramLength) + " or only " + nameof(NgramLength),
-                ShortName = "all")]
-            public bool AllLengths = NgramExtractingEstimator.Defaults.AllLengths;
+                Name = "AllLengths", ShortName = "all")]
+            public bool UseAllLengths = NgramExtractingEstimator.Defaults.UseAllLengths;
 
             [Argument(ArgumentType.Multiple, HelpText = "Maximum number of ngrams to store in the dictionary", ShortName = "max")]
             public int[] MaxNumTerms = new int[] { NgramExtractingEstimator.Defaults.MaximumNgramsCount };
@@ -347,7 +348,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa
                 ngramColumns[iinfo] = new NgramExtractingEstimator.ColumnOptions(column.Name,
                     column.NgramLength ?? options.NgramLength,
                     column.SkipLength ?? options.SkipLength,
-                    column.AllLengths ?? options.AllLengths,
+                    column.UseAllLengths ?? options.UseAllLengths,
                     column.Weighting ?? options.Weighting,
                     column.MaxNumTerms ?? options.MaxNumTerms,
                     isTermCol[iinfo] ? column.Name : column.Source
@@ -380,7 +381,7 @@ internal static IDataTransform Create(IHostEnvironment env, NgramExtractorArgume
                 Columns = extractorCols,
                 NgramLength = extractorArgs.NgramLength,
                 SkipLength = extractorArgs.SkipLength,
-                AllLengths = extractorArgs.AllLengths,
+                UseAllLengths = extractorArgs.UseAllLengths,
                 MaxNumTerms = extractorArgs.MaxNumTerms,
                 Weighting = extractorArgs.Weighting
             };
diff --git a/src/Microsoft.ML.Transforms/Text/WordHashBagProducingTransform.cs b/src/Microsoft.ML.Transforms/Text/WordHashBagProducingTransform.cs
index 1fe0c21b09..7a5641e2a2 100644
--- a/src/Microsoft.ML.Transforms/Text/WordHashBagProducingTransform.cs
+++ b/src/Microsoft.ML.Transforms/Text/WordHashBagProducingTransform.cs
@@ -129,7 +129,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa
                         Ordered = column.Ordered,
                         MaximumNumberOfInverts = column.MaximumNumberOfInverts,
                         FriendlyNames = options.Columns[iinfo].Source,
-                        AllLengths = column.AllLengths
+                        UseAllLengths = column.UseAllLengths
                     };
             }
 
@@ -138,7 +138,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa
             var featurizeArgs =
                 new NgramHashExtractingTransformer.Options
                 {
-                    AllLengths = options.AllLengths,
+                    UseAllLengths = options.UseAllLengths,
                     NumberOfBits = options.NumberOfBits,
                     NgramLength = options.NgramLength,
                     SkipLength = options.SkipLength,
@@ -189,8 +189,8 @@ internal abstract class ColumnBase : ManyToOneColumn
 
             [Argument(ArgumentType.AtMostOnce,
                 HelpText = "Whether to include all ngram lengths up to " + nameof(NgramLength) + " or only " + nameof(NgramLength),
-                ShortName = "all", SortOrder = 4)]
-            public bool? AllLengths;
+                Name = "AllLengths", ShortName = "all", SortOrder = 4)]
+            public bool? UseAllLengths;
         }
 
         internal sealed class Column : ColumnBase
@@ -279,8 +279,8 @@ internal abstract class ArgumentsBase
 
             [Argument(ArgumentType.AtMostOnce,
                HelpText = "Whether to include all ngram lengths up to ngramLength or only ngramLength",
-               ShortName = "all", SortOrder = 4)]
-            public bool AllLengths = true;
+               Name = "AllLengths", ShortName = "all", SortOrder = 4)]
+            public bool UseAllLengths = true;
         }
 
         internal static class DefaultArguments
@@ -291,7 +291,7 @@ internal static class DefaultArguments
             public const uint Seed = 314489979;
             public const bool Ordered = true;
             public const int MaximumNumberOfInverts = 0;
-            public const bool AllLengths = true;
+            public const bool UseAllLengths = true;
         }
 
         [TlcModule.Component(Name = "NGramHash", FriendlyName = "NGram Hash Extractor Transform", Alias = "NGramHashExtractorTransform,NGramHashExtractor",
@@ -369,7 +369,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa
                     new NgramHashingEstimator.ColumnOptions(column.Name, tmpColNames[iinfo],
                     column.NgramLength ?? options.NgramLength,
                     column.SkipLength ?? options.SkipLength,
-                    column.AllLengths ?? options.AllLengths,
+                    column.UseAllLengths ?? options.UseAllLengths,
                     column.NumberOfBits ?? options.NumberOfBits,
                     column.Seed ?? options.Seed,
                     column.Ordered ?? options.Ordered,
@@ -439,7 +439,7 @@ internal static IDataTransform Create(NgramHashExtractorArguments extractorArgs,
                 MaximumNumberOfInverts = extractorArgs.MaximumNumberOfInverts,
                 Ordered = extractorArgs.Ordered,
                 Seed = extractorArgs.Seed,
-                AllLengths = extractorArgs.AllLengths
+                UseAllLengths = extractorArgs.UseAllLengths
             };
 
             return Create(h, options, input, termLoaderArgs);
diff --git a/src/Microsoft.ML.Transforms/Text/WrappedTextTransformers.cs b/src/Microsoft.ML.Transforms/Text/WrappedTextTransformers.cs
index 55fc359957..4f91ea42c3 100644
--- a/src/Microsoft.ML.Transforms/Text/WrappedTextTransformers.cs
+++ b/src/Microsoft.ML.Transforms/Text/WrappedTextTransformers.cs
@@ -22,7 +22,7 @@ public sealed class WordBagEstimator : IEstimator<ITransformer>
         private readonly (string outputColumnName, string[] sourceColumnsNames)[] _columns;
         private readonly int _ngramLength;
         private readonly int _skipLength;
-        private readonly bool _allLengths;
+        private readonly bool _useAllLengths;
         private readonly int _maxNumTerms;
         private readonly NgramExtractingEstimator.WeightingCriteria _weighting;
 
@@ -44,7 +44,7 @@ public class Options
             /// <summary>
             /// Whether to store all ngram lengths up to ngramLength, or only ngramLength.
             /// </summary>
-            public bool AllLengths;
+            public bool UseAllLengths;
 
             /// <summary>
             /// The maximum number of grams to store in the dictionary, for each level of ngrams,
@@ -61,7 +61,7 @@ public Options()
             {
                 NgramLength = 1;
                 SkipLength = NgramExtractingEstimator.Defaults.SkipLength;
-                AllLengths = NgramExtractingEstimator.Defaults.AllLengths;
+                UseAllLengths = NgramExtractingEstimator.Defaults.UseAllLengths;
                 MaximumNgramsCount = new int[] { NgramExtractingEstimator.Defaults.MaximumNgramsCount };
                 Weighting = NgramExtractingEstimator.Defaults.Weighting;
             }
@@ -76,7 +76,7 @@ public Options()
         /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="maximumNgramsCount">Maximum number of ngrams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         internal WordBagEstimator(IHostEnvironment env,
@@ -84,10 +84,10 @@ internal WordBagEstimator(IHostEnvironment env,
             string inputColumnName = null,
             int ngramLength = 1,
             int skipLength = 0,
-            bool allLengths = true,
+            bool useAllLengths = true,
             int maximumNgramsCount = 10000000,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.WeightingCriteria.Tf)
-            : this(env, outputColumnName, new[] { inputColumnName ?? outputColumnName }, ngramLength, skipLength, allLengths, maximumNgramsCount, weighting)
+            : this(env, outputColumnName, new[] { inputColumnName ?? outputColumnName }, ngramLength, skipLength, useAllLengths, maximumNgramsCount, weighting)
         {
         }
 
@@ -100,7 +100,7 @@ internal WordBagEstimator(IHostEnvironment env,
         /// <param name="inputColumnNames">The columns containing text to compute bag of word vector.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="maximumNgramsCount">Maximum number of ngrams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         internal WordBagEstimator(IHostEnvironment env,
@@ -108,10 +108,10 @@ internal WordBagEstimator(IHostEnvironment env,
             string[] inputColumnNames,
             int ngramLength = 1,
             int skipLength = 0,
-            bool allLengths = true,
+            bool useAllLengths = true,
             int maximumNgramsCount = 10000000,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.WeightingCriteria.Tf)
-            : this(env, new[] { (outputColumnName, inputColumnNames) }, ngramLength, skipLength, allLengths, maximumNgramsCount, weighting)
+            : this(env, new[] { (outputColumnName, inputColumnNames) }, ngramLength, skipLength, useAllLengths, maximumNgramsCount, weighting)
         {
         }
 
@@ -123,14 +123,14 @@ internal WordBagEstimator(IHostEnvironment env,
         /// <param name="columns">Pairs of columns to compute bag of word vector.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="maximumNgramsCount">Maximum number of ngrams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         internal WordBagEstimator(IHostEnvironment env,
             (string outputColumnName, string[] inputColumnNames)[] columns,
             int ngramLength = 1,
             int skipLength = 0,
-            bool allLengths = true,
+            bool useAllLengths = true,
             int maximumNgramsCount = 10000000,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.WeightingCriteria.Tf)
         {
@@ -146,7 +146,7 @@ internal WordBagEstimator(IHostEnvironment env,
             _columns = columns;
             _ngramLength = ngramLength;
             _skipLength = skipLength;
-            _allLengths = allLengths;
+            _useAllLengths = useAllLengths;
             _maxNumTerms = maximumNgramsCount;
             _weighting = weighting;
         }
@@ -160,7 +160,7 @@ public ITransformer Fit(IDataView input)
                 Columns = _columns.Select(x => new WordBagBuildingTransformer.Column { Name = x.outputColumnName, Source = x.sourceColumnsNames }).ToArray(),
                 NgramLength = _ngramLength,
                 SkipLength = _skipLength,
-                AllLengths = _allLengths,
+                UseAllLengths = _useAllLengths,
                 MaxNumTerms = new[] { _maxNumTerms },
                 Weighting = _weighting
             };
@@ -193,7 +193,7 @@ public sealed class WordHashBagEstimator : IEstimator<ITransformer>
         private readonly int _numberOfBits;
         private readonly int _ngramLength;
         private readonly int _skipLength;
-        private readonly bool _allLengths;
+        private readonly bool _useAllLengths;
         private readonly uint _seed;
         private readonly bool _ordered;
         private readonly int _maximumNumberOfInverts;
@@ -208,7 +208,7 @@ public sealed class WordHashBagEstimator : IEstimator<ITransformer>
         /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="seed">Hashing seed.</param>
         /// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
         /// <param name="maximumNumberOfInverts">During hashing we constuct mappings between original values and the produced hash values.
@@ -221,12 +221,12 @@ internal WordHashBagEstimator(IHostEnvironment env,
             int numberOfBits = 16,
             int ngramLength = 1,
             int skipLength = 0,
-            bool allLengths = true,
+            bool useAllLengths = true,
             uint seed = 314489979,
             bool useOrderedHashing = true,
             int maximumNumberOfInverts = 0)
             : this(env, new[] { (outputColumnName, new[] { inputColumnName ?? outputColumnName }) }, numberOfBits: numberOfBits,
-                  ngramLength: ngramLength, skipLength: skipLength, allLengths: allLengths, seed: seed,
+                  ngramLength: ngramLength, skipLength: skipLength, useAllLengths: useAllLengths, seed: seed,
                   useOrderedHashing: useOrderedHashing, maximumNumberOfInverts: maximumNumberOfInverts)
         {
         }
@@ -241,7 +241,7 @@ internal WordHashBagEstimator(IHostEnvironment env,
         /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="seed">Hashing seed.</param>
         /// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
         /// <param name="maximumNumberOfInverts">During hashing we constuct mappings between original values and the produced hash values.
@@ -254,12 +254,12 @@ internal WordHashBagEstimator(IHostEnvironment env,
             int numberOfBits = 16,
             int ngramLength = 1,
             int skipLength = 0,
-            bool allLengths = true,
+            bool useAllLengths = true,
             uint seed = 314489979,
             bool useOrderedHashing = true,
             int maximumNumberOfInverts = 0)
             : this(env, new[] { (outputColumnName, inputColumnNames) }, numberOfBits: numberOfBits,
-                  ngramLength: ngramLength, skipLength: skipLength, allLengths: allLengths, seed: seed,
+                  ngramLength: ngramLength, skipLength: skipLength, useAllLengths: useAllLengths, seed: seed,
                   useOrderedHashing: useOrderedHashing, maximumNumberOfInverts: maximumNumberOfInverts)
         {
         }
@@ -273,7 +273,7 @@ internal WordHashBagEstimator(IHostEnvironment env,
         /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="seed">Hashing seed.</param>
         /// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
         /// <param name="maximumNumberOfInverts">During hashing we constuct mappings between original values and the produced hash values.
@@ -285,7 +285,7 @@ internal WordHashBagEstimator(IHostEnvironment env,
             int numberOfBits = 16,
             int ngramLength = 1,
             int skipLength = 0,
-            bool allLengths = true,
+            bool useAllLengths = true,
             uint seed = 314489979,
             bool useOrderedHashing = true,
             int maximumNumberOfInverts = 0)
@@ -303,7 +303,7 @@ internal WordHashBagEstimator(IHostEnvironment env,
             _numberOfBits = numberOfBits;
             _ngramLength = ngramLength;
             _skipLength = skipLength;
-            _allLengths = allLengths;
+            _useAllLengths = useAllLengths;
             _seed = seed;
             _ordered = useOrderedHashing;
             _maximumNumberOfInverts = maximumNumberOfInverts;
@@ -319,7 +319,7 @@ public ITransformer Fit(IDataView input)
                 NumberOfBits = _numberOfBits,
                 NgramLength = _ngramLength,
                 SkipLength = _skipLength,
-                AllLengths = _allLengths,
+                UseAllLengths = _useAllLengths,
                 Seed = _seed,
                 Ordered = _ordered,
                 MaximumNumberOfInverts = _maximumNumberOfInverts
diff --git a/test/Microsoft.ML.Functional.Tests/DataTransformation.cs b/test/Microsoft.ML.Functional.Tests/DataTransformation.cs
index e020dd740f..50d4a38f63 100644
--- a/test/Microsoft.ML.Functional.Tests/DataTransformation.cs
+++ b/test/Microsoft.ML.Functional.Tests/DataTransformation.cs
@@ -138,7 +138,7 @@ void ExtensibilityModifyTextFeaturization()
             var pipeline = mlContext.Transforms.Text.FeaturizeText("Features",
                     new TextFeaturizingEstimator.Options
                     {
-                        CharFeatureExtractor = new WordBagEstimator.Options() { NgramLength = 3, AllLengths = false },
+                        CharFeatureExtractor = new WordBagEstimator.Options() { NgramLength = 3, UseAllLengths = false },
                         WordFeatureExtractor = new WordBagEstimator.Options(),
                         VectorNormalizer = TextFeaturizingEstimator.NormFunction.L1
                     }, "SentimentText")
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs
index 84b864c84f..50c0439112 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs
@@ -302,7 +302,7 @@ private void TextFeaturizationOn(string dataPath)
 
                 // NLP pipeline 2: bag of bigrams, using hashes instead of dictionary indices.
                 .Append(new WordHashBagEstimator(mlContext, "BagOfBigrams","NormalizedMessage", 
-                            ngramLength: 2, allLengths: false))
+                            ngramLength: 2, useAllLengths: false))
 
                 // NLP pipeline 3: bag of tri-character sequences with TF-IDF weighting.
                 .Append(mlContext.Transforms.Text.TokenizeCharacters("MessageChars", "Message"))

From 8a5c0c28f77d805f8978a1df189ceaeb7a807f5c Mon Sep 17 00:00:00 2001
From: Wei-Sheng Chin <wechi@microsoft.com>
Date: Tue, 12 Mar 2019 23:28:34 -0700
Subject: [PATCH 12/12] Finish replacement of allLengths

---
 .../TextStaticExtensions.cs                   | 48 +++++++++----------
 .../Api/CookbookSamples/CookbookSamples.cs    |  2 +-
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
index 4d597837f8..c4ef323c97 100644
--- a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
+++ b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
@@ -309,16 +309,16 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// <param name="input">The column to apply to.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="maximumNgramsCount">Maximum number of ngrams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         public static Vector<float> ProduceWordBags(this Scalar<string> input,
             int ngramLength = 1,
             int skipLength = 0,
-            bool allLengths = true,
+            bool useAllLengths = true,
             int maximumNgramsCount = 10000000,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.WeightingCriteria.Tf)
-                => new OutPipelineColumn(input, ngramLength, skipLength, allLengths, maximumNgramsCount, weighting);
+                => new OutPipelineColumn(input, ngramLength, skipLength, useAllLengths, maximumNgramsCount, weighting);
     }
 
     /// <summary>
@@ -334,11 +334,11 @@ public OutPipelineColumn(Scalar<string> input,
                 int numberOfBits,
                 int ngramLength,
                 int skipLength,
-                bool allLengths,
+                bool useAllLengths,
                 uint seed,
                 bool useOrderedHashing,
                 int maximumNumberOfInverts)
-                : base(new Reconciler(numberOfBits, ngramLength, skipLength, allLengths, seed, useOrderedHashing, maximumNumberOfInverts), input)
+                : base(new Reconciler(numberOfBits, ngramLength, skipLength, useAllLengths, seed, useOrderedHashing, maximumNumberOfInverts), input)
             {
                 Input = input;
             }
@@ -354,12 +354,12 @@ private sealed class Reconciler : EstimatorReconciler, IEquatable<Reconciler>
             private readonly bool _useOrderedHashing;
             private readonly int _maximumNumberOfInverts;
 
-            public Reconciler(int numberOfBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool useOrderedHashing, int maximumNumberOfInverts)
+            public Reconciler(int numberOfBits, int ngramLength, int skipLength, bool useAllLengths, uint seed, bool useOrderedHashing, int maximumNumberOfInverts)
             {
                 _numberOfBits = numberOfBits;
                 _ngramLength = ngramLength;
                 _skipLength = skipLength;
-                _useAllLengths = allLengths;
+                _useAllLengths = useAllLengths;
                 _seed = seed;
                 _useOrderedHashing = useOrderedHashing;
                 _maximumNumberOfInverts = maximumNumberOfInverts;
@@ -400,7 +400,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="seed">Hashing seed.</param>
         /// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
         /// <param name="maximumNumberOfInverts">During hashing we constuct mappings between original values and the produced hash values.
@@ -411,10 +411,10 @@ public static Vector<float> ProduceHashedWordBags(this Scalar<string> input,
             int numberOfBits = 16,
             int ngramLength = 1,
             int skipLength = 0,
-            bool allLengths = true,
+            bool useAllLengths = true,
             uint seed = 314489979,
             bool useOrderedHashing = true,
-            int maximumNumberOfInverts = 0) => new OutPipelineColumn(input, numberOfBits, ngramLength, skipLength, allLengths, seed, useOrderedHashing, maximumNumberOfInverts);
+            int maximumNumberOfInverts = 0) => new OutPipelineColumn(input, numberOfBits, ngramLength, skipLength, useAllLengths, seed, useOrderedHashing, maximumNumberOfInverts);
     }
 
     /// <summary>
@@ -429,10 +429,10 @@ private sealed class OutPipelineColumn : Vector<float>
             public OutPipelineColumn(PipelineColumn input,
                 int ngramLength,
                 int skipLength,
-                bool allLengths,
+                bool useAllLengths,
                 int maxNumTerms,
                 NgramExtractingEstimator.WeightingCriteria weighting)
-                : base(new Reconciler(ngramLength, skipLength, allLengths, maxNumTerms, weighting), input)
+                : base(new Reconciler(ngramLength, skipLength, useAllLengths, maxNumTerms, weighting), input)
             {
                 Input = input;
             }
@@ -446,11 +446,11 @@ private sealed class Reconciler : EstimatorReconciler, IEquatable<Reconciler>
             private readonly int _maxNgramsCount;
             private readonly NgramExtractingEstimator.WeightingCriteria _weighting;
 
-            public Reconciler(int ngramLength, int skipLength, bool allLengths, int maxNumTerms, NgramExtractingEstimator.WeightingCriteria weighting)
+            public Reconciler(int ngramLength, int skipLength, bool useAllLengths, int maxNumTerms, NgramExtractingEstimator.WeightingCriteria weighting)
             {
                 _ngramLength = ngramLength;
                 _skipLength = skipLength;
-                _useAllLengths = allLengths;
+                _useAllLengths = useAllLengths;
                 _maxNgramsCount = maxNumTerms;
                 _weighting = weighting;
 
@@ -491,16 +491,16 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// <param name="input">The column to apply to.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="maximumNgramsCount">Maximum number of n-grams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         public static Vector<float> ProduceNgrams<TKey>(this VarVector<Key<TKey, string>> input,
             int ngramLength = 1,
             int skipLength = 0,
-            bool allLengths = true,
+            bool useAllLengths = true,
             int maximumNgramsCount = 10000000,
             NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.WeightingCriteria.Tf)
-                => new OutPipelineColumn(input, ngramLength, skipLength, allLengths, maximumNgramsCount, weighting);
+                => new OutPipelineColumn(input, ngramLength, skipLength, useAllLengths, maximumNgramsCount, weighting);
     }
 
     /// <summary>
@@ -512,8 +512,8 @@ private sealed class OutPipelineColumn : Vector<float>
         {
             public readonly VarVector<Key<uint, string>> Input;
 
-            public OutPipelineColumn(VarVector<Key<uint, string>> input, int numberOfBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool useOrderedHashing, int maximumNumberOfInverts)
-                : base(new Reconciler(numberOfBits, ngramLength, skipLength, allLengths, seed, useOrderedHashing, maximumNumberOfInverts), input)
+            public OutPipelineColumn(VarVector<Key<uint, string>> input, int numberOfBits, int ngramLength, int skipLength, bool useAllLengths, uint seed, bool useOrderedHashing, int maximumNumberOfInverts)
+                : base(new Reconciler(numberOfBits, ngramLength, skipLength, useAllLengths, seed, useOrderedHashing, maximumNumberOfInverts), input)
             {
                 Input = input;
             }
@@ -529,12 +529,12 @@ private sealed class Reconciler : EstimatorReconciler, IEquatable<Reconciler>
             private readonly bool _useOrderedHashing;
             private readonly int _maximumNumberOfInverts;
 
-            public Reconciler(int numberOfBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool useOrderedHashing, int maximumNumberOfInverts)
+            public Reconciler(int numberOfBits, int ngramLength, int skipLength, bool useAllLengths, uint seed, bool useOrderedHashing, int maximumNumberOfInverts)
             {
                 _numberOfBits = numberOfBits;
                 _ngramLength = ngramLength;
                 _skipLength = skipLength;
-                _useAllLengths = allLengths;
+                _useAllLengths = useAllLengths;
                 _seed = seed;
                 _useOrderedHashing = useOrderedHashing;
                 _maximumNumberOfInverts = maximumNumberOfInverts;
@@ -578,7 +578,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
         /// <param name="ngramLength">Ngram length.</param>
         /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="seed">Hashing seed.</param>
         /// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
         /// <param name="maximumNumberOfInverts">During hashing we constuct mappings between original values and the produced hash values.
@@ -589,9 +589,9 @@ public static Vector<float> ProduceHashedNgrams(this VarVector<Key<uint, string>
             int numberOfBits = 16,
             int ngramLength = 2,
             int skipLength = 0,
-            bool allLengths = true,
+            bool useAllLengths = true,
             uint seed = 314489979,
             bool useOrderedHashing = true,
-            int maximumNumberOfInverts = 0) => new OutPipelineColumn(input, numberOfBits, ngramLength, skipLength, allLengths, seed, useOrderedHashing, maximumNumberOfInverts);
+            int maximumNumberOfInverts = 0) => new OutPipelineColumn(input, numberOfBits, ngramLength, skipLength, useAllLengths, seed, useOrderedHashing, maximumNumberOfInverts);
     }
 }
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
index 0261377a49..e2bacb6309 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
@@ -464,7 +464,7 @@ private void TextFeaturizationOn(string dataPath)
                     BagOfWords: r.Message.NormalizeText().ProduceWordBags(),
 
                     // NLP pipeline 2: bag of bigrams, using hashes instead of dictionary indices.
-                    BagOfBigrams: r.Message.NormalizeText().ProduceHashedWordBags(ngramLength: 2, allLengths: false),
+                    BagOfBigrams: r.Message.NormalizeText().ProduceHashedWordBags(ngramLength: 2, useAllLengths: false),
 
                     // NLP pipeline 3: bag of tri-character sequences with TF-IDF weighting.
                     BagOfTrichar: r.Message.TokenizeIntoCharacters().ProduceNgrams(ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf),