@@ -16,15 +16,19 @@ public static class TextLoaderSaverCatalog
16
16
/// </summary>
17
17
/// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
18
18
/// <param name="columns">Array of columns <see cref="TextLoader.Column"/> defining the schema.</param>
19
- /// <param name="hasHeader">Whether the file has a header.</param>
20
19
/// <param name="separatorChar">The character used as separator between data points in a row. By default the tab character is used as separator.</param>
20
+ /// <param name="hasHeader">Whether the file has a header.</param>
21
+ /// <param name="allowSparse">Whether the file can contain numerical vectors in sparse format.</param>
22
+ /// <param name="allowQuoting">Whether the file can contain column defined by a quoted string.</param>
21
23
/// <param name="dataSample">The optional location of a data sample. The sample can be used to infer column names and number of slots in each column.</param>
22
24
public static TextLoader CreateTextLoader ( this DataOperationsCatalog catalog ,
23
25
TextLoader . Column [ ] columns ,
24
- bool hasHeader = TextLoader . Defaults . HasHeader ,
25
26
char separatorChar = TextLoader . Defaults . Separator ,
27
+ bool hasHeader = TextLoader . Defaults . HasHeader ,
28
+ bool allowSparse = TextLoader . Defaults . AllowSparse ,
29
+ bool allowQuoting = TextLoader . Defaults . AllowQuoting ,
26
30
IMultiStreamSource dataSample = null )
27
- => new TextLoader ( CatalogUtils . GetEnvironment ( catalog ) , columns , hasHeader , separatorChar , dataSample ) ;
31
+ => new TextLoader ( CatalogUtils . GetEnvironment ( catalog ) , columns , separatorChar , hasHeader , allowSparse , allowQuoting , dataSample ) ;
28
32
29
33
/// <summary>
30
34
/// Create a text loader <see cref="TextLoader"/>.
@@ -41,24 +45,24 @@ public static TextLoader CreateTextLoader(this DataOperationsCatalog catalog,
41
45
/// Create a text loader <see cref="TextLoader"/> by inferencing the dataset schema from a data model type.
42
46
/// </summary>
43
47
/// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
44
- /// <param name="hasHeader">Does the file contains header?</param>
45
48
/// <param name="separatorChar">Column separator character. Default is '\t'</param>
46
- /// <param name="allowQuotedStrings">Whether the input may include quoted values,
49
+ /// <param name="hasHeader">Does the file contains header?</param>
50
+ /// <param name="allowQuoting">Whether the input may include quoted values,
47
51
/// which can contain separator characters, colons,
48
52
/// and distinguish empty values from missing values. When true, consecutive separators
49
53
/// denote a missing value and an empty value is denoted by \"\".
50
54
/// When false, consecutive separators denote an empty value.</param>
51
- /// <param name="supportSparse ">Whether the input may include sparse representations for example,
55
+ /// <param name="allowSparse ">Whether the input may include sparse representations for example,
52
56
/// if one of the row contains "5 2:6 4:3" that's mean there are 5 columns all zero
53
57
/// except for 3rd and 5th columns which have values 6 and 3</param>
54
58
/// <param name="trimWhitespace">Remove trailing whitespace from lines</param>
55
59
public static TextLoader CreateTextLoader < TInput > ( this DataOperationsCatalog catalog ,
56
- bool hasHeader = TextLoader . Defaults . HasHeader ,
57
60
char separatorChar = TextLoader . Defaults . Separator ,
58
- bool allowQuotedStrings = TextLoader . Defaults . AllowQuoting ,
59
- bool supportSparse = TextLoader . Defaults . AllowSparse ,
61
+ bool hasHeader = TextLoader . Defaults . HasHeader ,
62
+ bool allowQuoting = TextLoader . Defaults . AllowQuoting ,
63
+ bool allowSparse = TextLoader . Defaults . AllowSparse ,
60
64
bool trimWhitespace = TextLoader . Defaults . TrimWhitespace )
61
- => TextLoader . CreateTextReader < TInput > ( CatalogUtils . GetEnvironment ( catalog ) , hasHeader , separatorChar , allowQuotedStrings , supportSparse , trimWhitespace ) ;
65
+ => TextLoader . CreateTextReader < TInput > ( CatalogUtils . GetEnvironment ( catalog ) , hasHeader , separatorChar , allowQuoting , allowSparse , trimWhitespace ) ;
62
66
63
67
/// <summary>
64
68
/// Read a data view from a text file using <see cref="TextLoader"/>.
@@ -72,16 +76,16 @@ public static TextLoader CreateTextLoader<TInput>(this DataOperationsCatalog cat
72
76
public static IDataView ReadFromTextFile ( this DataOperationsCatalog catalog ,
73
77
string path ,
74
78
TextLoader . Column [ ] columns ,
75
- bool hasHeader = TextLoader . Defaults . HasHeader ,
76
- char separatorChar = TextLoader . Defaults . Separator )
79
+ char separatorChar = TextLoader . Defaults . Separator ,
80
+ bool hasHeader = TextLoader . Defaults . HasHeader )
77
81
{
78
82
Contracts . CheckNonEmpty ( path , nameof ( path ) ) ;
79
83
80
84
var env = catalog . GetEnvironment ( ) ;
81
85
82
86
// REVIEW: it is almost always a mistake to have a 'trainable' text loader here.
83
87
// Therefore, we are going to disallow data sample.
84
- var reader = new TextLoader ( env , columns , hasHeader , separatorChar , dataSample : null ) ;
88
+ var reader = new TextLoader ( env , columns , separatorChar , hasHeader , dataSample : null ) ;
85
89
return reader . Read ( new MultiFileSource ( path ) ) ;
86
90
}
87
91
@@ -91,30 +95,30 @@ public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog,
91
95
/// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
92
96
/// <param name="hasHeader">Does the file contains header?</param>
93
97
/// <param name="separatorChar">Column separator character. Default is '\t'</param>
94
- /// <param name="allowQuotedStrings ">Whether the input may include quoted values,
98
+ /// <param name="allowQuoting ">Whether the input may include quoted values,
95
99
/// which can contain separator characters, colons,
96
100
/// and distinguish empty values from missing values. When true, consecutive separators
97
101
/// denote a missing value and an empty value is denoted by \"\".
98
102
/// When false, consecutive separators denote an empty value.</param>
99
- /// <param name="supportSparse ">Whether the input may include sparse representations for example,
103
+ /// <param name="allowSparse ">Whether the input may include sparse representations for example,
100
104
/// if one of the row contains "5 2:6 4:3" that's mean there are 5 columns all zero
101
105
/// except for 3rd and 5th columns which have values 6 and 3</param>
102
106
/// <param name="trimWhitespace">Remove trailing whitespace from lines</param>
103
107
/// <param name="path">The path to the file.</param>
104
108
/// <returns>The data view.</returns>
105
109
public static IDataView ReadFromTextFile < TInput > ( this DataOperationsCatalog catalog ,
106
110
string path ,
107
- bool hasHeader = TextLoader . Defaults . HasHeader ,
108
111
char separatorChar = TextLoader . Defaults . Separator ,
109
- bool allowQuotedStrings = TextLoader . Defaults . AllowQuoting ,
110
- bool supportSparse = TextLoader . Defaults . AllowSparse ,
112
+ bool hasHeader = TextLoader . Defaults . HasHeader ,
113
+ bool allowQuoting = TextLoader . Defaults . AllowQuoting ,
114
+ bool allowSparse = TextLoader . Defaults . AllowSparse ,
111
115
bool trimWhitespace = TextLoader . Defaults . TrimWhitespace )
112
116
{
113
117
Contracts . CheckNonEmpty ( path , nameof ( path ) ) ;
114
118
115
119
// REVIEW: it is almost always a mistake to have a 'trainable' text loader here.
116
120
// Therefore, we are going to disallow data sample.
117
- return TextLoader . CreateTextReader < TInput > ( CatalogUtils . GetEnvironment ( catalog ) , hasHeader , separatorChar , allowQuotedStrings , supportSparse , trimWhitespace )
121
+ return TextLoader . CreateTextReader < TInput > ( CatalogUtils . GetEnvironment ( catalog ) , hasHeader , separatorChar , allowQuoting , allowSparse , trimWhitespace )
118
122
. Read ( new MultiFileSource ( path ) ) ;
119
123
}
120
124
@@ -144,20 +148,22 @@ public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog, str
144
148
/// <param name="headerRow">Whether to write the header row.</param>
145
149
/// <param name="schema">Whether to write the header comment with the schema.</param>
146
150
/// <param name="keepHidden">Whether to keep hidden columns in the dataset.</param>
151
+ /// <param name="forceDense">Whether to save columns in dense format even if they are sparse vectors.</param>
147
152
public static void SaveAsText ( this DataOperationsCatalog catalog ,
148
153
IDataView data ,
149
154
Stream stream ,
150
- char separatorChar = TextLoader . Defaults . Separator ,
151
- bool headerRow = TextLoader . Defaults . HasHeader ,
152
- bool schema = true ,
153
- bool keepHidden = false )
155
+ char separatorChar = TextSaver . Defaults . Separator ,
156
+ bool headerRow = TextSaver . Defaults . OutputHeader ,
157
+ bool schema = TextSaver . Defaults . OutputSchema ,
158
+ bool keepHidden = false ,
159
+ bool forceDense = TextSaver . Defaults . ForceDense )
154
160
{
155
161
Contracts . CheckValue ( catalog , nameof ( catalog ) ) ;
156
162
Contracts . CheckValue ( data , nameof ( data ) ) ;
157
163
Contracts . CheckValue ( stream , nameof ( stream ) ) ;
158
164
159
165
var env = catalog . GetEnvironment ( ) ;
160
- var saver = new TextSaver ( env , new TextSaver . Arguments { Separator = separatorChar . ToString ( ) , OutputHeader = headerRow , OutputSchema = schema } ) ;
166
+ var saver = new TextSaver ( env , new TextSaver . Arguments { Dense = forceDense , Separator = separatorChar . ToString ( ) , OutputHeader = headerRow , OutputSchema = schema } ) ;
161
167
162
168
using ( var ch = env . Start ( "Saving data" ) )
163
169
DataSaverUtils . SaveDataView ( ch , saver , data , stream , keepHidden ) ;
0 commit comments