Skip to content

Remove IMultiStreamSource when path (type: string) exists in text loader APIs #2745

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 27, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 4 additions & 12 deletions src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ public static TextLoader CreateTextLoader<TInput>(this DataOperationsCatalog cat
/// <param name="columns">The columns of the schema.</param>
/// <param name="separatorChar">The character used as separator between data points in a row. By default the tab character is used as separator.</param>
/// <param name="hasHeader">Whether the file has a header.</param>
/// <param name="dataSample">The optional location of a data sample. The sample can be used to infer column names and number of slots in each column.</param>
/// <param name="allowQuoting">Whether the file can contain column defined by a quoted string.</param>
/// <param name="trimWhitespace">Remove trailing whitespace from lines</param>
/// <param name="allowSparse">Whether the file can contain numerical vectors in sparse format.</param>
Expand All @@ -99,7 +98,6 @@ public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog,
TextLoader.Column[] columns,
char separatorChar = TextLoader.Defaults.Separator,
bool hasHeader = TextLoader.Defaults.HasHeader,
IMultiStreamSource dataSample = null,
bool allowQuoting = TextLoader.Defaults.AllowQuoting,
bool trimWhitespace = TextLoader.Defaults.TrimWhitespace,
bool allowSparse = TextLoader.Defaults.AllowSparse)
Expand All @@ -116,7 +114,7 @@ public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog,
AllowSparse = allowSparse
};

var reader = new TextLoader(CatalogUtils.GetEnvironment(catalog), options: options, dataSample: dataSample);
var reader = new TextLoader(CatalogUtils.GetEnvironment(catalog), options: options);
return reader.Read(new MultiFileSource(path));
}

Expand All @@ -127,7 +125,6 @@ public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog,
/// <param name="path">The path to the file.</param>
/// <param name="separatorChar">Column separator character. Default is '\t'</param>
/// <param name="hasHeader">Does the file contains header?</param>
/// <param name="dataSample">The optional location of a data sample. The sample can be used to infer column names and number of slots in each column.</param>
/// <param name="allowQuoting">Whether the input may include quoted values,
/// which can contain separator characters, colons,
/// and distinguish empty values from missing values. When true, consecutive separators
Expand All @@ -142,7 +139,6 @@ public static IDataView ReadFromTextFile<TInput>(this DataOperationsCatalog cata
string path,
char separatorChar = TextLoader.Defaults.Separator,
bool hasHeader = TextLoader.Defaults.HasHeader,
IMultiStreamSource dataSample = null,
bool allowQuoting = TextLoader.Defaults.AllowQuoting,
bool trimWhitespace = TextLoader.Defaults.TrimWhitespace,
bool allowSparse = TextLoader.Defaults.AllowSparse)
Expand All @@ -152,7 +148,7 @@ public static IDataView ReadFromTextFile<TInput>(this DataOperationsCatalog cata
// REVIEW: it is almost always a mistake to have a 'trainable' text loader here.
// Therefore, we are going to disallow data sample.
return TextLoader.CreateTextReader<TInput>(CatalogUtils.GetEnvironment(catalog), hasHeader, separatorChar,
allowQuoting, allowSparse, trimWhitespace, dataSample: dataSample).Read(new MultiFileSource(path));
allowQuoting, allowSparse, trimWhitespace).Read(new MultiFileSource(path));
}

/// <summary>
Expand All @@ -161,19 +157,15 @@ public static IDataView ReadFromTextFile<TInput>(this DataOperationsCatalog cata
/// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
/// <param name="path">Specifies a file from which to read.</param>
/// <param name="options">Defines the settings of the load operation.</param>
/// <param name="dataSample">The optional location of a data sample. The sample can be used to infer column names and number of slots in each column.</param>
public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog, string path,
TextLoader.Options options = null, IMultiStreamSource dataSample = null)
TextLoader.Options options = null)
{
Contracts.CheckNonEmpty(path, nameof(path));

var env = catalog.GetEnvironment();
var source = new MultiFileSource(path);

if (dataSample == null)
return new TextLoader(env, options, source).Read(source);
else
return new TextLoader(env, options, dataSample).Read(source);
return new TextLoader(env, options, dataSample: source).Read(source);
}

/// <summary>
Expand Down