diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs index 6e8168041c..c83e9e4d9f 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs @@ -479,6 +479,10 @@ public class Options /// /// Whether the data file has a header with feature names. + /// Note: If a TextLoader is created with hasHeader = true but without a dataSample, then vector columns made by TextLoader will not contain slot name + /// annotations (slots being the elements of the given vector column), because the output schema is made when the TextLoader is made, and not when + /// TextLoader.Load(IMultiStreamSource source) is called. In addition, the case where dataSample = null and hasHeader = true indicates to the + /// loader that when it is given a file when is called, it needs to skip the first line. /// [Argument(ArgumentType.AtMostOnce, ShortName = "header", HelpText = "Data file has header with feature names. Header is read only if options 'hs' and 'hf' are not specified.")] @@ -1557,4 +1561,4 @@ public DataViewRowCursor[] GetRowCursorSet(IEnumerable co void ICanSaveModel.Save(ModelSaveContext ctx) => ((ICanSaveModel)_loader).Save(ctx); } } -} \ No newline at end of file +} diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs index 8962b550bf..490464c5f3 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs @@ -21,7 +21,11 @@ public static class TextLoaderSaverCatalog /// The catalog. /// Array of columns defining the schema. /// The character used as separator between data points in a row. By default the tab character is used as separator. - /// Whether the file has a header. + /// Whether the file has a header with feature names. Note: If a TextLoader is created with hasHeader = true but without a + /// , then vector columns made by TextLoader will not contain slot name annotations (slots being the elements of the given vector column), + /// because the output schema is made when the TextLoader is made, and not when is called. + /// In addition, the case where dataSample = null and hasHeader = true indicates to the loader that when it is given a file when Load() + /// is called, it needs to skip the first line. /// The optional location of a data sample. The sample can be used to infer column names and number of slots in each column. /// Whether the file can contain columns defined by a quoted string. /// Remove trailing whitespace from lines @@ -67,7 +71,11 @@ public static TextLoader CreateTextLoader(this DataOperationsCatalog catalog, /// names and their data types in the schema of the loaded data. /// The catalog. /// Column separator character. Default is '\t' - /// Does the file contains header? + /// Whether the file has a header with feature names. Note: If a TextLoader is created with hasHeader = true but without a + /// , then vector columns made by TextLoader will not contain slot name annotations (slots being the elements of the given vector column), + /// because the output schema is made when the TextLoader is made, and not when is called. + /// In addition, the case where dataSample = null and hasHeader = true indicates to the loader that when it is given a file when Load() + /// is called, it needs to skip the first line. /// The optional location of a data sample. The sample can be used to infer information /// about the columns, such as slot names. /// Whether the input may include quoted values, @@ -97,7 +105,11 @@ public static TextLoader CreateTextLoader(this DataOperationsCatalog cat /// The path to the file. /// The columns of the schema. /// The character used as separator between data points in a row. By default the tab character is used as separator. - /// Whether the file has a header. + /// Whether the file has a header with feature names. Note: If a TextLoader is created with hasHeader = true but without a + /// dataSample, then vector columns made by TextLoader will not contain slot name annotations (slots being the elements of the given vector column), + /// because the output schema is made when the TextLoader is made, and not when is called. + /// In addition, the case where dataSample = null and hasHeader = true indicates to the loader that when it is given a file when Load() + /// is called, it needs to skip the first line. /// Whether the file can contain columns defined by a quoted string. /// Remove trailing whitespace from lines /// Whether the file can contain numerical vectors in sparse format. @@ -134,7 +146,11 @@ public static IDataView LoadFromTextFile(this DataOperationsCatalog catalog, /// The catalog. /// The path to the file. /// Column separator character. Default is '\t' - /// Does the file contains header? + /// Whether the file has a header with feature names. Note: If a TextLoader is created with hasHeader = true but without a + /// dataSample, then vector columns made by TextLoader will not contain slot name annotations (slots being the elements of the given vector column), + /// because the output schema is made when the TextLoader is made, and not when is called. + /// In addition, the case where dataSample = null and hasHeader = true indicates to the loader that when it is given a file when Load() + /// is called, it needs to skip the first line. /// Whether the input may include quoted values, /// which can contain separator characters, colons, /// and distinguish empty values from missing values. When true, consecutive separators