dotnet · wschin · Feb 22, 2019 · Feb 19, 2019 · Feb 19, 2019 · Feb 19, 2019
diff --git a/docs/code/MlNetCookBook.md b/docs/code/MlNetCookBook.md
@@ -219,10 +219,10 @@ private class AdultData
 
 // Read the data into a data view.
 var trainData = mlContext.Data.ReadFromTextFile<AdultData>(trainDataPath,
-                // First line of the file is a header, not a data row.
-                hasHeader: true,
                 // Default separator is tab, but we need a semicolon.
-                separatorChar: ';'
+                separatorChar: ';',
+                // First line of the file is a header, not a data row.
+                hasHeader: true
 );		
 
 ```
@@ -328,7 +328,7 @@ In the file above, the last column (12th) is label that we predict, and all the
 // First, we define the reader: specify the data columns and where to find them in the text file.
 // Read the data into a data view. Remember though, readers are lazy, so the actual reading will happen when the data is accessed.
 var trainData = mlContext.Data.ReadFromTextFile<AdultData>(dataPath,
-    // First line of the file is a header, not a data row.
+    // Default separator is tab, but the dataset has comma.
     separatorChar: ','
 );
 
@@ -372,7 +372,7 @@ Assuming the example above was used to train the model, here's how you calculate
 ```csharp
 // Read the test dataset.
 var testData = mlContext.Data.ReadFromTextFile<AdultData>(testDataPath,
-    // First line of the file is a header, not a data row.
+    // Default separator is tab, but the dataset has comma.
     separatorChar: ','
 );
 // Calculate metrics of the model on the test data.

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs
@@ -34,12 +34,12 @@ public static void Example()
 
             // This is the dictionary to convert words into the integer indexes.
             var lookupMap = mlContext.Data.ReadFromTextFile(Path.Combine(modelLocation, "imdb_word_index.csv"),
-                   columns: new[]
+                columns: new[]
                    {
                         new TextLoader.Column("Words", DataKind.TX, 0),
                         new TextLoader.Column("Ids", DataKind.I4, 1),
                    },
-                   separatorChar: ','
+                separatorChar: ','
                );
 
             // Load the TensorFlow model once.

diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs
@@ -13,7 +13,6 @@
 using Microsoft.ML.Data;
 using Microsoft.ML.Internal.Utilities;
 using Microsoft.ML.Model;
-using Float = System.Single;
 
 [assembly: LoadableClass(TextLoader.Summary, typeof(IDataLoader), typeof(TextLoader), typeof(TextLoader.Options), typeof(SignatureDataLoader),
     "Text Loader", "TextLoader", "Text", DocName = "loader/TextLoader.md")]
@@ -487,8 +486,8 @@ internal bool IsValid()
 
         internal static class Defaults
         {
-            internal const bool AllowQuoting = true;
-            internal const bool AllowSparse = true;
+            internal const bool AllowQuoting = false;
+            internal const bool AllowSparse = false;
             internal const char Separator = '\t';
             internal const bool HasHeader = false;
             internal const bool TrimWhitespace = false;
@@ -1065,18 +1064,22 @@ private bool HasHeader
         /// </summary>
         /// <param name="env">The environment to use.</param>
         /// <param name="columns">Defines a mapping between input columns in the file and IDataView columns.</param>
-        /// <param name="hasHeader">Whether the file has a header.</param>
         /// <param name="separatorChar"> The character used as separator between data points in a row. By default the tab character is used as separator.</param>
+        /// <param name="hasHeader">Whether the file has a header.</param>
+        /// <param name="allowSparse">Whether the file can contain numerical vectors in sparse format.</param>
+        /// <param name="allowQuoting">Whether the content of a column can be parsed from a string starting and ending with quote.</param>
         /// <param name="dataSample">Allows to expose items that can be used for reading.</param>
-        internal TextLoader(IHostEnvironment env, Column[] columns, bool hasHeader = false, char separatorChar = '\t', IMultiStreamSource dataSample = null)
-            : this(env, MakeArgs(columns, hasHeader, new[] { separatorChar }), dataSample)
+        internal TextLoader(IHostEnvironment env, Column[] columns, char separatorChar = Defaults.Separator,
+            bool hasHeader = Defaults.HasHeader, bool allowSparse = Defaults.AllowSparse,
+            bool allowQuoting = Defaults.AllowQuoting, IMultiStreamSource dataSample = null)
+            : this(env, MakeArgs(columns, hasHeader, new[] { separatorChar }, allowSparse, allowQuoting), dataSample)
         {
         }
 
-        private static Options MakeArgs(Column[] columns, bool hasHeader, char[] separatorChars)
+        private static Options MakeArgs(Column[] columns, bool hasHeader, char[] separatorChars, bool allowSparse, bool allowQuoting)
         {
             Contracts.AssertValue(separatorChars);
-            var result = new Options { Columns = columns, HasHeader = hasHeader, Separators = separatorChars};
+            var result = new Options { Columns = columns, HasHeader = hasHeader, Separators = separatorChars, AllowSparse = allowSparse, AllowQuoting = allowQuoting };
             return result;
         }
 
@@ -1345,7 +1348,7 @@ private TextLoader(IHost host, ModelLoadContext ctx)
             // char[]: separators
             // bindings
             int cbFloat = ctx.Reader.ReadInt32();
-            host.CheckDecode(cbFloat == sizeof(Float));
+            host.CheckDecode(cbFloat == sizeof(float));
             _maxRows = ctx.Reader.ReadInt64();
             host.CheckDecode(_maxRows > 0);
             _flags = (OptionFlags)ctx.Reader.ReadUInt32();
@@ -1408,7 +1411,7 @@ void ICanSaveModel.Save(ModelSaveContext ctx)
             // int: number of separators
             // char[]: separators
             // bindings
-            ctx.Writer.Write(sizeof(Float));
+            ctx.Writer.Write(sizeof(float));
             ctx.Writer.Write(_maxRows);
             _host.Assert((_flags & ~OptionFlags.All) == 0);
             ctx.Writer.Write((uint)_flags);

diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs
@@ -16,15 +16,19 @@ public static class TextLoaderSaverCatalog
         /// </summary>
         /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
         /// <param name="columns">Array of columns <see cref="TextLoader.Column"/> defining the schema.</param>
-        /// <param name="hasHeader">Whether the file has a header.</param>
         /// <param name="separatorChar">The character used as separator between data points in a row. By default the tab character is used as separator.</param>
+        /// <param name="hasHeader">Whether the file has a header.</param>
+        /// <param name="allowSparse">Whether the file can contain numerical vectors in sparse format.</param>
+        /// <param name="allowQuoting">Whether the file can contain column defined by a quoted string.</param>
         /// <param name="dataSample">The optional location of a data sample. The sample can be used to infer column names and number of slots in each column.</param>
         public static TextLoader CreateTextLoader(this DataOperationsCatalog catalog,
             TextLoader.Column[] columns,
-            bool hasHeader = TextLoader.Defaults.HasHeader,
             char separatorChar = TextLoader.Defaults.Separator,
+            bool hasHeader = TextLoader.Defaults.HasHeader,
+            bool allowSparse = TextLoader.Defaults.AllowSparse,
+            bool allowQuoting = TextLoader.Defaults.AllowQuoting,
             IMultiStreamSource dataSample = null)
-            => new TextLoader(CatalogUtils.GetEnvironment(catalog), columns, hasHeader, separatorChar, dataSample);
+            => new TextLoader(CatalogUtils.GetEnvironment(catalog), columns, separatorChar, hasHeader, allowSparse, allowQuoting, dataSample);
 
         /// <summary>
         /// Create a text loader <see cref="TextLoader"/>.
@@ -41,24 +45,24 @@ public static TextLoader CreateTextLoader(this DataOperationsCatalog catalog,
         /// Create a text loader <see cref="TextLoader"/> by inferencing the dataset schema from a data model type.
         /// </summary>
         /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
-        /// <param name="hasHeader">Does the file contains header?</param>
         /// <param name="separatorChar">Column separator character. Default is '\t'</param>
-        /// <param name="allowQuotedStrings">Whether the input may include quoted values,
+        /// <param name="hasHeader">Does the file contains header?</param>
+        /// <param name="allowQuoting">Whether the input may include quoted values,
         /// which can contain separator characters, colons,
         /// and distinguish empty values from missing values. When true, consecutive separators
         /// denote a missing value and an empty value is denoted by \"\".
         /// When false, consecutive separators denote an empty value.</param>
-        /// <param name="supportSparse">Whether the input may include sparse representations for example,
+        /// <param name="allowSparse">Whether the input may include sparse representations for example,
         /// if one of the row contains "5 2:6 4:3" that's mean there are 5 columns all zero
         /// except for 3rd and 5th columns which have values 6 and 3</param>
         /// <param name="trimWhitespace">Remove trailing whitespace from lines</param>
         public static TextLoader CreateTextLoader<TInput>(this DataOperationsCatalog catalog,
-            bool hasHeader = TextLoader.Defaults.HasHeader,
             char separatorChar = TextLoader.Defaults.Separator,
-            bool allowQuotedStrings = TextLoader.Defaults.AllowQuoting,
-            bool supportSparse = TextLoader.Defaults.AllowSparse,
+            bool hasHeader = TextLoader.Defaults.HasHeader,
+            bool allowQuoting = TextLoader.Defaults.AllowQuoting,
+            bool allowSparse = TextLoader.Defaults.AllowSparse,
             bool trimWhitespace = TextLoader.Defaults.TrimWhitespace)
-            => TextLoader.CreateTextReader<TInput>(CatalogUtils.GetEnvironment(catalog), hasHeader, separatorChar, allowQuotedStrings, supportSparse, trimWhitespace);
+            => TextLoader.CreateTextReader<TInput>(CatalogUtils.GetEnvironment(catalog), hasHeader, separatorChar, allowQuoting, allowSparse, trimWhitespace);
 
         /// <summary>
         /// Read a data view from a text file using <see cref="TextLoader"/>.
@@ -72,16 +76,16 @@ public static TextLoader CreateTextLoader<TInput>(this DataOperationsCatalog cat
         public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog,
             string path,
             TextLoader.Column[] columns,
-            bool hasHeader = TextLoader.Defaults.HasHeader,
-            char separatorChar = TextLoader.Defaults.Separator)
+            char separatorChar = TextLoader.Defaults.Separator,
+            bool hasHeader = TextLoader.Defaults.HasHeader)
         {
             Contracts.CheckNonEmpty(path, nameof(path));
 
             var env = catalog.GetEnvironment();
 
             // REVIEW: it is almost always a mistake to have a 'trainable' text loader here.
             // Therefore, we are going to disallow data sample.
-            var reader = new TextLoader(env, columns, hasHeader, separatorChar, dataSample: null);
+            var reader = new TextLoader(env, columns, separatorChar, hasHeader, dataSample: null);
             return reader.Read(new MultiFileSource(path));
         }
 
@@ -91,30 +95,30 @@ public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog,
         /// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
         /// <param name="hasHeader">Does the file contains header?</param>
         /// <param name="separatorChar">Column separator character. Default is '\t'</param>
-        /// <param name="allowQuotedStrings">Whether the input may include quoted values,
+        /// <param name="allowQuoting">Whether the input may include quoted values,
         /// which can contain separator characters, colons,
         /// and distinguish empty values from missing values. When true, consecutive separators
         /// denote a missing value and an empty value is denoted by \"\".
         /// When false, consecutive separators denote an empty value.</param>
-        /// <param name="supportSparse">Whether the input may include sparse representations for example,
+        /// <param name="allowSparse">Whether the input may include sparse representations for example,
         /// if one of the row contains "5 2:6 4:3" that's mean there are 5 columns all zero
         /// except for 3rd and 5th columns which have values 6 and 3</param>
         /// <param name="trimWhitespace">Remove trailing whitespace from lines</param>
         /// <param name="path">The path to the file.</param>
         /// <returns>The data view.</returns>
         public static IDataView ReadFromTextFile<TInput>(this DataOperationsCatalog catalog,
             string path,
-            bool hasHeader = TextLoader.Defaults.HasHeader,
             char separatorChar = TextLoader.Defaults.Separator,
-            bool allowQuotedStrings = TextLoader.Defaults.AllowQuoting,
-            bool supportSparse = TextLoader.Defaults.AllowSparse,
+            bool hasHeader = TextLoader.Defaults.HasHeader,
+            bool allowQuoting = TextLoader.Defaults.AllowQuoting,
+            bool allowSparse = TextLoader.Defaults.AllowSparse,
             bool trimWhitespace = TextLoader.Defaults.TrimWhitespace)
         {
             Contracts.CheckNonEmpty(path, nameof(path));
 
             // REVIEW: it is almost always a mistake to have a 'trainable' text loader here.
             // Therefore, we are going to disallow data sample.
-            return TextLoader.CreateTextReader<TInput>(CatalogUtils.GetEnvironment(catalog), hasHeader, separatorChar, allowQuotedStrings, supportSparse, trimWhitespace)
+            return TextLoader.CreateTextReader<TInput>(CatalogUtils.GetEnvironment(catalog), hasHeader, separatorChar, allowQuoting, allowSparse, trimWhitespace)
                              .Read(new MultiFileSource(path));
         }
 
@@ -144,20 +148,22 @@ public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog, str
         /// <param name="headerRow">Whether to write the header row.</param>
         /// <param name="schema">Whether to write the header comment with the schema.</param>
         /// <param name="keepHidden">Whether to keep hidden columns in the dataset.</param>
+        /// <param name="forceDense">Whether to save columns in dense format even if they are sparse vectors.</param>
         public static void SaveAsText(this DataOperationsCatalog catalog,
             IDataView data,
             Stream stream,
-            char separatorChar = TextLoader.Defaults.Separator,
-            bool headerRow = TextLoader.Defaults.HasHeader,
-            bool schema = true,
-            bool keepHidden = false)
+            char separatorChar = TextSaver.Defaults.Separator,
+            bool headerRow = TextSaver.Defaults.OutputHeader,
+            bool schema = TextSaver.Defaults.OutputSchema,
+            bool keepHidden = false,
+            bool forceDense = TextSaver.Defaults.ForceDense)
         {
             Contracts.CheckValue(catalog, nameof(catalog));
             Contracts.CheckValue(data, nameof(data));
             Contracts.CheckValue(stream, nameof(stream));
 
             var env = catalog.GetEnvironment();
-            var saver = new TextSaver(env, new TextSaver.Arguments { Separator = separatorChar.ToString(), OutputHeader = headerRow, OutputSchema = schema });
+            var saver = new TextSaver(env, new TextSaver.Arguments { Dense = forceDense, Separator = separatorChar.ToString(), OutputHeader = headerRow, OutputSchema = schema });
 
             using (var ch = env.Start("Saving data"))
                 DataSaverUtils.SaveDataView(ch, saver, data, stream, keepHidden);

diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs
@@ -22,25 +22,33 @@ namespace Microsoft.ML.Data.IO
     [BestFriend]
     internal sealed class TextSaver : IDataSaver
     {
+        internal static class Defaults
+        {
+            internal const char Separator = '\t';
+            internal const bool ForceDense = false;
+            internal const bool OutputSchema = true;
+            internal const bool OutputHeader = true;
+        }
+
         // REVIEW: consider saving a command line in a separate file.
         public sealed class Arguments
         {
             [Argument(ArgumentType.AtMostOnce, HelpText = "Separator", ShortName = "sep")]
-            public string Separator = "tab";
+            public string Separator = Defaults.Separator.ToString();
 
             [Argument(ArgumentType.AtMostOnce, HelpText = "Force dense format", ShortName = "dense")]
-            public bool Dense;
+            public bool Dense = Defaults.ForceDense;
 
             // REVIEW: This and the corresponding BinarySaver option should be removed,
             // with the silence being handled, somehow, at the environment level. (Task 6158846.)
             [Argument(ArgumentType.LastOccurenceWins, HelpText = "Suppress any info output (not warnings or errors)", Hide = true)]
             public bool Silent;
 
             [Argument(ArgumentType.AtMostOnce, HelpText = "Output the comment containing the loader settings", ShortName = "schema")]
-            public bool OutputSchema = true;
+            public bool OutputSchema = Defaults.OutputSchema;
 
             [Argument(ArgumentType.AtMostOnce, HelpText = "Output the header", ShortName = "header")]
-            public bool OutputHeader = true;
+            public bool OutputHeader = Defaults.OutputHeader;
         }
 
         internal const string Summary = "Writes data into a text file.";

diff --git a/src/Microsoft.ML.StaticPipe/DataLoadSaveOperationsExtensions.cs b/src/Microsoft.ML.StaticPipe/DataLoadSaveOperationsExtensions.cs
@@ -36,6 +36,6 @@ public static DataReader<IMultiStreamSource, TShape> CreateTextReader<[IsShape]
             this DataOperationsCatalog catalog, Func<Context, TShape> func, IMultiStreamSource files = null,
             bool hasHeader = false, char separator = '\t', bool allowQuoting = true, bool allowSparse = true,
             bool trimWhitspace = false)
-         => CreateReader(catalog.Environment, func, files, hasHeader, separator, allowQuoting, allowSparse, trimWhitspace);
+         => CreateReader(catalog.Environment, func, files, separator, hasHeader, allowQuoting, allowSparse, trimWhitspace);
     }
 }