Skip to content

Rename CreateTextReader to CreateTextLoader #2125

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 17, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/samples/Microsoft.ML.Samples/Dynamic/Calibrator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public static void Calibration()
var mlContext = new MLContext();

// Create a text loader.
var reader = mlContext.Data.CreateTextReader(new TextLoader.Arguments()
var reader = mlContext.Data.CreateTextLoader(new TextLoader.Arguments()
{
Separator = "tab",
HasHeader = true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public static void FeatureContributionCalculationTransform_Regression()

// Step 1: Read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.CreateTextReader(
var reader = mlContext.Data.CreateTextLoader(
columns: new[]
{
new TextLoader.Column("MedianHomeValue", DataKind.R4, 0),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public static void FeatureSelectionTransform()

// First, we define the reader: specify the data columns and where to find them in the text file. Notice that we combine entries from
// all the feature columns into entries of a vector of a single column named "Features".
var reader = ml.Data.CreateTextReader(
var reader = ml.Data.CreateTextLoader(
columns: new[]
{
new TextLoader.Column("Label", DataKind.BL, 0),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public static void FFM_BinaryClassification()

// Step 1: Read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.CreateTextReader(
var reader = mlContext.Data.CreateTextLoader(
columns: new[]
{
new TextLoader.Column("Sentiment", DataKind.BL, 0),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public static void RunExample()

// Step 1: Read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.CreateTextReader(
var reader = mlContext.Data.CreateTextLoader(
columns: new[]
{
new TextLoader.Column("MedianHomeValue", DataKind.R4, 0),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public static IDataView GetHousingRegressionIDataView(MLContext mlContext, out s
// First, we define the reader: specify the data columns and where to find them in the text file.
// The data file is composed of rows of data, with each row having 11 numerical columns
// separated by whitespace.
var reader = mlContext.Data.CreateTextReader(
var reader = mlContext.Data.CreateTextLoader(
columns: new[]
{
// Read the first column (indexed by 0) in the data file as an R4 (float)
Expand Down
2 changes: 1 addition & 1 deletion docs/samples/Microsoft.ML.Samples/Dynamic/SDCA.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public static void SDCA_BinaryClassification()

// Step 1: Read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.CreateTextReader(
var reader = mlContext.Data.CreateTextLoader(
columns: new[]
{
new TextLoader.Column("Sentiment", DataKind.BL, 0),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,33 +11,33 @@ namespace Microsoft.ML
public static class TextLoaderSaverCatalog
{
/// <summary>
/// Create a text reader <see cref="TextLoader"/>.
/// Create a text loader <see cref="TextLoader"/>.
/// </summary>
/// <param name="catalog">The <see cref="DataOperations"/> catalog.</param>
/// <param name="columns">The columns of the schema.</param>
/// <param name="columns">Array of columns <see cref="TextLoader.Column"/> defining the schema.</param>
/// <param name="hasHeader">Whether the file has a header.</param>
/// <param name="separatorChar">The character used as separator between data points in a row. By default the tab character is used as separator.</param>
/// <param name="dataSample">The optional location of a data sample.</param>
public static TextLoader CreateTextReader(this DataOperations catalog,
/// <param name="dataSample">The optional location of a data sample. The sample can be used to infer column names and number of slots in each column.</param>
public static TextLoader CreateTextLoader(this DataOperations catalog,
Copy link
Member

@sfilipi sfilipi Jan 12, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CreateTextLoader [](start = 33, length = 16)

My personal take on this is: TextReader is a much better name than TextLoader. I would rather rename the TextLoader class to TextReader..

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is correct, we are replacing the IDataLoader idiom with the IDataReader idiom, as part of #581 and much subsequent work.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TextReader is conflated with System.IO.TextReader and should be avoided. I've filed #2144 for renaming the class. I propose making the method name and return type consistent in this PR and revisiting the name when we rename the class.

cc: @glebuk @eerhardt

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah. Good point,. Maybe we should actually yname IDataReader to IDataLoader? And call the existing legacy IDataLoader interface something like IDataLoaderLegacy? What we all think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar renames to IDataReaderEstimator to I suppose IDataLoaderEstimator.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My point is to be consistent between method's name and returned type, so either on of the following would be good for me: 👍

public static TextLoader CreateTextLoader()

public static TextReader CreateTextReader()

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good. Can't name it reader, guess we have to reuse the old "loader" thing.

TextLoader.Column[] columns,
bool hasHeader = TextLoader.DefaultArguments.HasHeader,
char separatorChar = TextLoader.DefaultArguments.Separator,
IMultiStreamSource dataSample = null)
=> new TextLoader(CatalogUtils.GetEnvironment(catalog), columns, hasHeader, separatorChar, dataSample);

/// <summary>
/// Create a text reader <see cref="TextLoader"/>.
/// Create a text loader <see cref="TextLoader"/>.
/// </summary>
/// <param name="catalog">The <see cref="DataOperations"/> catalog.</param>
/// <param name="args">Defines the settings of the load operation.</param>
/// <param name="dataSample">Allows to expose items that can be used for reading.</param>
public static TextLoader CreateTextReader(this DataOperations catalog,
/// <param name="dataSample">The optional location of a data sample. The sample can be used to infer column names and number of slots in each column.</param>
public static TextLoader CreateTextLoader(this DataOperations catalog,
TextLoader.Arguments args,
IMultiStreamSource dataSample = null)
=> new TextLoader(CatalogUtils.GetEnvironment(catalog), args, dataSample);

/// <summary>
/// Create a text reader <see cref="TextLoader"/> by inferencing the dataset schema from a data model type.
/// Create a text loader <see cref="TextLoader"/> by inferencing the dataset schema from a data model type.
/// </summary>
/// <param name="catalog">The <see cref="DataOperations"/> catalog.</param>
/// <param name="hasHeader">Does the file contains header?</param>
Expand All @@ -51,7 +51,7 @@ public static TextLoader CreateTextReader(this DataOperations catalog,
/// if one of the row contains "5 2:6 4:3" that's mean there are 5 columns all zero
/// except for 3rd and 5th columns which have values 6 and 3</param>
/// <param name="trimWhitespace">Remove trailing whitespace from lines</param>
public static TextLoader CreateTextReader<TInput>(this DataOperations catalog,
public static TextLoader CreateTextLoader<TInput>(this DataOperations catalog,
bool hasHeader = TextLoader.DefaultArguments.HasHeader,
char separatorChar = TextLoader.DefaultArguments.Separator,
bool allowQuotedStrings = TextLoader.DefaultArguments.AllowQuoting,
Expand Down
2 changes: 1 addition & 1 deletion test/Microsoft.ML.Benchmarks/RffTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public void SetupTrainingSpeedTests()
public void CV_Multiclass_Digits_RffTransform_OVAAveragedPerceptron()
{
var mlContext = new MLContext();
var reader = mlContext.Data.CreateTextReader(new TextLoader.Arguments
var reader = mlContext.Data.CreateTextLoader(new TextLoader.Arguments
{
Column = new[]
{
Expand Down
4 changes: 2 additions & 2 deletions test/Microsoft.ML.Predictor.Tests/TestIniModels.cs
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,7 @@ public TestIniModels(ITestOutputHelper output) : base(output)
public void TestGamRegressionIni()
{
var mlContext = new MLContext(seed: 0);
var idv = mlContext.Data.CreateTextReader(
var idv = mlContext.Data.CreateTextLoader(
new TextLoader.Arguments()
{
HasHeader = false,
Expand Down Expand Up @@ -560,7 +560,7 @@ public void TestGamRegressionIni()
public void TestGamBinaryClassificationIni()
{
var mlContext = new MLContext(seed: 0);
var idv = mlContext.Data.CreateTextReader(
var idv = mlContext.Data.CreateTextLoader(
new TextLoader.Arguments()
{
HasHeader = false,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ private void TextFeaturizationOn(string dataPath)
var mlContext = new MLContext();

// Define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.CreateTextReader(new[]
var reader = mlContext.Data.CreateTextLoader(new[]
{
new TextLoader.Column("IsToxic", DataKind.BL, 0),
new TextLoader.Column("Message", DataKind.TX, 1),
Expand Down Expand Up @@ -316,7 +316,7 @@ private void CategoricalFeaturizationOn(params string[] dataPath)
var mlContext = new MLContext();

// Define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.CreateTextReader(new[]
var reader = mlContext.Data.CreateTextLoader(new[]
{
new TextLoader.Column("Label", DataKind.BL, 0),
// We will load all the categorical features into one vector column of size 8.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public void Evaluation()
var ml = new MLContext(seed: 1, conc: 1);

// Pipeline.
var pipeline = ml.Data.CreateTextReader(TestDatasets.Sentiment.GetLoaderColumns(), hasHeader: true)
var pipeline = ml.Data.CreateTextLoader(TestDatasets.Sentiment.GetLoaderColumns(), hasHeader: true)
.Append(ml.Transforms.Text.FeaturizeText("SentimentText", "Features"))
.Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: s => s.NumThreads = 1));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ void Extensibility()
var dataPath = GetDataPath(TestDatasets.irisData.trainFilename);

var ml = new MLContext();
var data = ml.Data.CreateTextReader(TestDatasets.irisData.GetLoaderColumns(), separatorChar: ',')
var data = ml.Data.CreateTextLoader(TestDatasets.irisData.GetLoaderColumns(), separatorChar: ',')
.Read(dataPath);

Action<IrisData, IrisData> action = (i, j) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ void FileBasedSavingOfData()

var ml = new MLContext(seed: 1, conc: 1);
var src = new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename));
var trainData = ml.Data.CreateTextReader(TestDatasets.Sentiment.GetLoaderColumns(), hasHeader: true)
var trainData = ml.Data.CreateTextLoader(TestDatasets.Sentiment.GetLoaderColumns(), hasHeader: true)
.Append(ml.Transforms.Text.FeaturizeText("SentimentText", "Features"))
.Fit(src).Read(src);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public partial class ApiScenariosTests
void Visibility()
{
var ml = new MLContext(seed: 1, conc: 1);
var pipeline = ml.Data.CreateTextReader(TestDatasets.Sentiment.GetLoaderColumns(), hasHeader: true)
var pipeline = ml.Data.CreateTextLoader(TestDatasets.Sentiment.GetLoaderColumns(), hasHeader: true)
.Append(ml.Transforms.Text.FeaturizeText("SentimentText", "Features", s => s.OutputTokens = true));

var src = new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public void TrainAndPredictIrisModelTest()
{
var mlContext = new MLContext(seed: 1, conc: 1);

var reader = mlContext.Data.CreateTextReader(columns: new[]
var reader = mlContext.Data.CreateTextLoader(columns: new[]
{
new TextLoader.Column("Label", DataKind.R4, 0),
new TextLoader.Column("SepalLength", DataKind.R4, 1),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public void TrainAndPredictIrisModelWithStringLabelTest()
{
var mlContext = new MLContext(seed: 1, conc: 1);

var reader = mlContext.Data.CreateTextReader(columns: new[]
var reader = mlContext.Data.CreateTextLoader(columns: new[]
{
new TextLoader.Column("SepalLength", DataKind.R4, 0),
new TextLoader.Column("SepalWidth", DataKind.R4, 1),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public void TrainAndPredictIrisModelUsingDirectInstantiationTest()
{
var mlContext = new MLContext(seed: 1, conc: 1);

var reader = mlContext.Data.CreateTextReader(columns: new[]
var reader = mlContext.Data.CreateTextLoader(columns: new[]
{
new TextLoader.Column("Label", DataKind.R4, 0),
new TextLoader.Column("SepalLength", DataKind.R4, 1),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ public void TensorFlowInputsOutputsSchemaTest()
public void TensorFlowTransformMNISTConvTest()
{
var mlContext = new MLContext(seed: 1, conc: 1);
var reader = mlContext.Data.CreateTextReader(
var reader = mlContext.Data.CreateTextLoader(
columns: new[]
{
new TextLoader.Column("Label", DataKind.U4 , new [] { new TextLoader.Range(0) }, new KeyRange(0, 9)),
Expand Down Expand Up @@ -262,7 +262,7 @@ public void TensorFlowTransformMNISTLRTrainingTest()
try
{
var mlContext = new MLContext(seed: 1, conc: 1);
var reader = mlContext.Data.CreateTextReader(columns: new[]
var reader = mlContext.Data.CreateTextLoader(columns: new[]
{
new TextLoader.Column("Label", DataKind.I8, 0),
new TextLoader.Column("Placeholder", DataKind.R4, new []{ new TextLoader.Range(1, 784) })
Expand Down Expand Up @@ -352,7 +352,7 @@ private void ExecuteTFTransformMNISTConvTrainingTest(bool shuffle, int? shuffleS
{
var mlContext = new MLContext(seed: 1, conc: 1);

var reader = mlContext.Data.CreateTextReader(new[]
var reader = mlContext.Data.CreateTextLoader(new[]
{
new TextLoader.Column("Label", DataKind.U4, new []{ new TextLoader.Range(0) }, new KeyRange(0, 9)),
new TextLoader.Column("TfLabel", DataKind.I8, 0),
Expand Down Expand Up @@ -441,7 +441,7 @@ public void TensorFlowTransformMNISTConvSavedModelTest()
// of predicted label of a single in-memory example.

var mlContext = new MLContext(seed: 1, conc: 1);
var reader = mlContext.Data.CreateTextReader(columns: new[]
var reader = mlContext.Data.CreateTextLoader(columns: new[]
{
new TextLoader.Column("Label", DataKind.U4 , new [] { new TextLoader.Range(0) }, new KeyRange(0, 9)),
new TextLoader.Column("Placeholder", DataKind.R4, new []{ new TextLoader.Range(1, 784) })
Expand Down
6 changes: 3 additions & 3 deletions test/Microsoft.ML.Tests/TextLoaderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -720,7 +720,7 @@ public void LoaderColumnsFromIrisData()
var irisFirstRowValues = irisFirstRow.Values.GetEnumerator();

// Simple load
var dataIris = mlContext.Data.CreateTextReader<Iris>(separatorChar: ',').Read(dataPath);
var dataIris = mlContext.Data.CreateTextLoader<Iris>(separatorChar: ',').Read(dataPath);
var previewIris = dataIris.Preview(1);

Assert.Equal(5, previewIris.ColumnView.Length);
Expand All @@ -736,7 +736,7 @@ public void LoaderColumnsFromIrisData()
Assert.Equal("Iris-setosa", previewIris.RowView[0].Values[index].Value.ToString());

// Load with start and end indexes
var dataIrisStartEnd = mlContext.Data.CreateTextReader<IrisStartEnd>(separatorChar: ',').Read(dataPath);
var dataIrisStartEnd = mlContext.Data.CreateTextLoader<IrisStartEnd>(separatorChar: ',').Read(dataPath);
var previewIrisStartEnd = dataIrisStartEnd.Preview(1);

Assert.Equal(2, previewIrisStartEnd.ColumnView.Length);
Expand All @@ -753,7 +753,7 @@ public void LoaderColumnsFromIrisData()
}

// load setting the distinct columns. Loading column 0 and 2
var dataIrisColumnIndices = mlContext.Data.CreateTextReader<IrisColumnIndices>(separatorChar: ',').Read(dataPath);
var dataIrisColumnIndices = mlContext.Data.CreateTextLoader<IrisColumnIndices>(separatorChar: ',').Read(dataPath);
var previewIrisColumnIndices = dataIrisColumnIndices.Preview(1);

Assert.Equal(2, previewIrisColumnIndices.ColumnView.Length);
Expand Down
4 changes: 2 additions & 2 deletions test/Microsoft.ML.Tests/Transformers/CustomMappingTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public void TestCustomTransformer()
{
string dataPath = GetDataPath("adult.tiny.with-schema.txt");
var source = new MultiFileSource(dataPath);
var loader = ML.Data.CreateTextReader(new[] {
var loader = ML.Data.CreateTextLoader(new[] {
new TextLoader.Column("Float1", DataKind.R4, 9),
new TextLoader.Column("Float4", DataKind.R4, new[]{new TextLoader.Range(9), new TextLoader.Range(10), new TextLoader.Range(11), new TextLoader.Range(12) })
}, hasHeader: true);
Expand Down Expand Up @@ -90,7 +90,7 @@ public void TestSchemaPropagation()
{
string dataPath = GetDataPath("adult.test");
var source = new MultiFileSource(dataPath);
var loader = ML.Data.CreateTextReader(new[] {
var loader = ML.Data.CreateTextLoader(new[] {
new TextLoader.Column("Float1", DataKind.R4, 0),
new TextLoader.Column("Float4", DataKind.R4, new[]{new TextLoader.Range(0), new TextLoader.Range(2), new TextLoader.Range(4), new TextLoader.Range(10) }),
new TextLoader.Column("Text1", DataKind.Text, 0)
Expand Down