diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Calibrator.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Calibrator.cs index 78c62668ec..8084c31413 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Calibrator.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Calibrator.cs @@ -28,7 +28,7 @@ public static void Calibration() var mlContext = new MLContext(); // Create a text loader. - var reader = mlContext.Data.CreateTextReader(new TextLoader.Arguments() + var reader = mlContext.Data.CreateTextLoader(new TextLoader.Arguments() { Separator = "tab", HasHeader = true, diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs index 3af22fba9b..e100a0fea7 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs @@ -18,7 +18,7 @@ public static void FeatureContributionCalculationTransform_Regression() // Step 1: Read the data as an IDataView. // First, we define the reader: specify the data columns and where to find them in the text file. - var reader = mlContext.Data.CreateTextReader( + var reader = mlContext.Data.CreateTextLoader( columns: new[] { new TextLoader.Column("MedianHomeValue", DataKind.R4, 0), diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureSelectionTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureSelectionTransform.cs index aa3d4446bb..fdeaf6f42c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureSelectionTransform.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureSelectionTransform.cs @@ -30,7 +30,7 @@ public static void FeatureSelectionTransform() // First, we define the reader: specify the data columns and where to find them in the text file. Notice that we combine entries from // all the feature columns into entries of a vector of a single column named "Features". - var reader = ml.Data.CreateTextReader( + var reader = ml.Data.CreateTextLoader( columns: new[] { new TextLoader.Column("Label", DataKind.BL, 0), diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/FieldAwareFactorizationMachine.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/FieldAwareFactorizationMachine.cs index 812de0fd27..9c8d9d6039 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/FieldAwareFactorizationMachine.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/FieldAwareFactorizationMachine.cs @@ -22,7 +22,7 @@ public static void FFM_BinaryClassification() // Step 1: Read the data as an IDataView. // First, we define the reader: specify the data columns and where to find them in the text file. - var reader = mlContext.Data.CreateTextReader( + var reader = mlContext.Data.CreateTextLoader( columns: new[] { new TextLoader.Column("Sentiment", DataKind.BL, 0), diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/GeneralizedAdditiveModels.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/GeneralizedAdditiveModels.cs index acd08979a2..9b27c88d5c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/GeneralizedAdditiveModels.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/GeneralizedAdditiveModels.cs @@ -19,7 +19,7 @@ public static void RunExample() // Step 1: Read the data as an IDataView. // First, we define the reader: specify the data columns and where to find them in the text file. - var reader = mlContext.Data.CreateTextReader( + var reader = mlContext.Data.CreateTextLoader( columns: new[] { new TextLoader.Column("MedianHomeValue", DataKind.R4, 0), diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIHelper.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIHelper.cs index ebb4def616..f8f1fab34e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIHelper.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIHelper.cs @@ -19,7 +19,7 @@ public static IDataView GetHousingRegressionIDataView(MLContext mlContext, out s // First, we define the reader: specify the data columns and where to find them in the text file. // The data file is composed of rows of data, with each row having 11 numerical columns // separated by whitespace. - var reader = mlContext.Data.CreateTextReader( + var reader = mlContext.Data.CreateTextLoader( columns: new[] { // Read the first column (indexed by 0) in the data file as an R4 (float) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/SDCA.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/SDCA.cs index 5f08dee906..1d2b04280c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/SDCA.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/SDCA.cs @@ -24,7 +24,7 @@ public static void SDCA_BinaryClassification() // Step 1: Read the data as an IDataView. // First, we define the reader: specify the data columns and where to find them in the text file. - var reader = mlContext.Data.CreateTextReader( + var reader = mlContext.Data.CreateTextLoader( columns: new[] { new TextLoader.Column("Sentiment", DataKind.BL, 0), diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs index 882c6d5998..05800fdec8 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs @@ -11,14 +11,14 @@ namespace Microsoft.ML public static class TextLoaderSaverCatalog { /// - /// Create a text reader . + /// Create a text loader . /// /// The catalog. - /// The columns of the schema. + /// Array of columns defining the schema. /// Whether the file has a header. /// The character used as separator between data points in a row. By default the tab character is used as separator. - /// The optional location of a data sample. - public static TextLoader CreateTextReader(this DataOperations catalog, + /// The optional location of a data sample. The sample can be used to infer column names and number of slots in each column. + public static TextLoader CreateTextLoader(this DataOperations catalog, TextLoader.Column[] columns, bool hasHeader = TextLoader.DefaultArguments.HasHeader, char separatorChar = TextLoader.DefaultArguments.Separator, @@ -26,18 +26,18 @@ public static TextLoader CreateTextReader(this DataOperations catalog, => new TextLoader(CatalogUtils.GetEnvironment(catalog), columns, hasHeader, separatorChar, dataSample); /// - /// Create a text reader . + /// Create a text loader . /// /// The catalog. /// Defines the settings of the load operation. - /// Allows to expose items that can be used for reading. - public static TextLoader CreateTextReader(this DataOperations catalog, + /// The optional location of a data sample. The sample can be used to infer column names and number of slots in each column. + public static TextLoader CreateTextLoader(this DataOperations catalog, TextLoader.Arguments args, IMultiStreamSource dataSample = null) => new TextLoader(CatalogUtils.GetEnvironment(catalog), args, dataSample); /// - /// Create a text reader by inferencing the dataset schema from a data model type. + /// Create a text loader by inferencing the dataset schema from a data model type. /// /// The catalog. /// Does the file contains header? @@ -51,7 +51,7 @@ public static TextLoader CreateTextReader(this DataOperations catalog, /// if one of the row contains "5 2:6 4:3" that's mean there are 5 columns all zero /// except for 3rd and 5th columns which have values 6 and 3 /// Remove trailing whitespace from lines - public static TextLoader CreateTextReader(this DataOperations catalog, + public static TextLoader CreateTextLoader(this DataOperations catalog, bool hasHeader = TextLoader.DefaultArguments.HasHeader, char separatorChar = TextLoader.DefaultArguments.Separator, bool allowQuotedStrings = TextLoader.DefaultArguments.AllowQuoting, diff --git a/test/Microsoft.ML.Benchmarks/RffTransform.cs b/test/Microsoft.ML.Benchmarks/RffTransform.cs index 1b486a1551..7ca9c76c83 100644 --- a/test/Microsoft.ML.Benchmarks/RffTransform.cs +++ b/test/Microsoft.ML.Benchmarks/RffTransform.cs @@ -27,7 +27,7 @@ public void SetupTrainingSpeedTests() public void CV_Multiclass_Digits_RffTransform_OVAAveragedPerceptron() { var mlContext = new MLContext(); - var reader = mlContext.Data.CreateTextReader(new TextLoader.Arguments + var reader = mlContext.Data.CreateTextLoader(new TextLoader.Arguments { Column = new[] { diff --git a/test/Microsoft.ML.Predictor.Tests/TestIniModels.cs b/test/Microsoft.ML.Predictor.Tests/TestIniModels.cs index edb6bd5e86..eab8280104 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestIniModels.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestIniModels.cs @@ -521,7 +521,7 @@ public TestIniModels(ITestOutputHelper output) : base(output) public void TestGamRegressionIni() { var mlContext = new MLContext(seed: 0); - var idv = mlContext.Data.CreateTextReader( + var idv = mlContext.Data.CreateTextLoader( new TextLoader.Arguments() { HasHeader = false, @@ -560,7 +560,7 @@ public void TestGamRegressionIni() public void TestGamBinaryClassificationIni() { var mlContext = new MLContext(seed: 0); - var idv = mlContext.Data.CreateTextReader( + var idv = mlContext.Data.CreateTextLoader( new TextLoader.Arguments() { HasHeader = false, diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs index 2d00f36957..2e1ff30ad8 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs @@ -249,7 +249,7 @@ private void TextFeaturizationOn(string dataPath) var mlContext = new MLContext(); // Define the reader: specify the data columns and where to find them in the text file. - var reader = mlContext.Data.CreateTextReader(new[] + var reader = mlContext.Data.CreateTextLoader(new[] { new TextLoader.Column("IsToxic", DataKind.BL, 0), new TextLoader.Column("Message", DataKind.TX, 1), @@ -316,7 +316,7 @@ private void CategoricalFeaturizationOn(params string[] dataPath) var mlContext = new MLContext(); // Define the reader: specify the data columns and where to find them in the text file. - var reader = mlContext.Data.CreateTextReader(new[] + var reader = mlContext.Data.CreateTextLoader(new[] { new TextLoader.Column("Label", DataKind.BL, 0), // We will load all the categorical features into one vector column of size 8. diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs index 5d890cf7b8..142e2f96ad 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs @@ -22,7 +22,7 @@ public void Evaluation() var ml = new MLContext(seed: 1, conc: 1); // Pipeline. - var pipeline = ml.Data.CreateTextReader(TestDatasets.Sentiment.GetLoaderColumns(), hasHeader: true) + var pipeline = ml.Data.CreateTextLoader(TestDatasets.Sentiment.GetLoaderColumns(), hasHeader: true) .Append(ml.Transforms.Text.FeaturizeText("SentimentText", "Features")) .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: s => s.NumThreads = 1)); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs index 84bd6691e9..71d05f175f 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs @@ -26,7 +26,7 @@ void Extensibility() var dataPath = GetDataPath(TestDatasets.irisData.trainFilename); var ml = new MLContext(); - var data = ml.Data.CreateTextReader(TestDatasets.irisData.GetLoaderColumns(), separatorChar: ',') + var data = ml.Data.CreateTextLoader(TestDatasets.irisData.GetLoaderColumns(), separatorChar: ',') .Read(dataPath); Action action = (i, j) => diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/FileBasedSavingOfData.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/FileBasedSavingOfData.cs index afae98455c..b9108239da 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/FileBasedSavingOfData.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/FileBasedSavingOfData.cs @@ -25,7 +25,7 @@ void FileBasedSavingOfData() var ml = new MLContext(seed: 1, conc: 1); var src = new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename)); - var trainData = ml.Data.CreateTextReader(TestDatasets.Sentiment.GetLoaderColumns(), hasHeader: true) + var trainData = ml.Data.CreateTextLoader(TestDatasets.Sentiment.GetLoaderColumns(), hasHeader: true) .Append(ml.Transforms.Text.FeaturizeText("SentimentText", "Features")) .Fit(src).Read(src); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs index 2ef382e5d5..4d39f774c8 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs @@ -23,7 +23,7 @@ public partial class ApiScenariosTests void Visibility() { var ml = new MLContext(seed: 1, conc: 1); - var pipeline = ml.Data.CreateTextReader(TestDatasets.Sentiment.GetLoaderColumns(), hasHeader: true) + var pipeline = ml.Data.CreateTextLoader(TestDatasets.Sentiment.GetLoaderColumns(), hasHeader: true) .Append(ml.Transforms.Text.FeaturizeText("SentimentText", "Features", s => s.OutputTokens = true)); var src = new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename)); diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs index f3906ca806..1056cdf16e 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs @@ -17,7 +17,7 @@ public void TrainAndPredictIrisModelTest() { var mlContext = new MLContext(seed: 1, conc: 1); - var reader = mlContext.Data.CreateTextReader(columns: new[] + var reader = mlContext.Data.CreateTextLoader(columns: new[] { new TextLoader.Column("Label", DataKind.R4, 0), new TextLoader.Column("SepalLength", DataKind.R4, 1), diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs index ff38fbebe5..43fca7f4df 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs @@ -14,7 +14,7 @@ public void TrainAndPredictIrisModelWithStringLabelTest() { var mlContext = new MLContext(seed: 1, conc: 1); - var reader = mlContext.Data.CreateTextReader(columns: new[] + var reader = mlContext.Data.CreateTextLoader(columns: new[] { new TextLoader.Column("SepalLength", DataKind.R4, 0), new TextLoader.Column("SepalWidth", DataKind.R4, 1), diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs index 646eb7b148..4b4326bc12 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs @@ -15,7 +15,7 @@ public void TrainAndPredictIrisModelUsingDirectInstantiationTest() { var mlContext = new MLContext(seed: 1, conc: 1); - var reader = mlContext.Data.CreateTextReader(columns: new[] + var reader = mlContext.Data.CreateTextLoader(columns: new[] { new TextLoader.Column("Label", DataKind.R4, 0), new TextLoader.Column("SepalLength", DataKind.R4, 1), diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs index 4699131680..33b3b5beb6 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs @@ -219,7 +219,7 @@ public void TensorFlowInputsOutputsSchemaTest() public void TensorFlowTransformMNISTConvTest() { var mlContext = new MLContext(seed: 1, conc: 1); - var reader = mlContext.Data.CreateTextReader( + var reader = mlContext.Data.CreateTextLoader( columns: new[] { new TextLoader.Column("Label", DataKind.U4 , new [] { new TextLoader.Range(0) }, new KeyRange(0, 9)), @@ -262,7 +262,7 @@ public void TensorFlowTransformMNISTLRTrainingTest() try { var mlContext = new MLContext(seed: 1, conc: 1); - var reader = mlContext.Data.CreateTextReader(columns: new[] + var reader = mlContext.Data.CreateTextLoader(columns: new[] { new TextLoader.Column("Label", DataKind.I8, 0), new TextLoader.Column("Placeholder", DataKind.R4, new []{ new TextLoader.Range(1, 784) }) @@ -352,7 +352,7 @@ private void ExecuteTFTransformMNISTConvTrainingTest(bool shuffle, int? shuffleS { var mlContext = new MLContext(seed: 1, conc: 1); - var reader = mlContext.Data.CreateTextReader(new[] + var reader = mlContext.Data.CreateTextLoader(new[] { new TextLoader.Column("Label", DataKind.U4, new []{ new TextLoader.Range(0) }, new KeyRange(0, 9)), new TextLoader.Column("TfLabel", DataKind.I8, 0), @@ -441,7 +441,7 @@ public void TensorFlowTransformMNISTConvSavedModelTest() // of predicted label of a single in-memory example. var mlContext = new MLContext(seed: 1, conc: 1); - var reader = mlContext.Data.CreateTextReader(columns: new[] + var reader = mlContext.Data.CreateTextLoader(columns: new[] { new TextLoader.Column("Label", DataKind.U4 , new [] { new TextLoader.Range(0) }, new KeyRange(0, 9)), new TextLoader.Column("Placeholder", DataKind.R4, new []{ new TextLoader.Range(1, 784) }) diff --git a/test/Microsoft.ML.Tests/TextLoaderTests.cs b/test/Microsoft.ML.Tests/TextLoaderTests.cs index cffc70a22e..5c6b5841de 100644 --- a/test/Microsoft.ML.Tests/TextLoaderTests.cs +++ b/test/Microsoft.ML.Tests/TextLoaderTests.cs @@ -720,7 +720,7 @@ public void LoaderColumnsFromIrisData() var irisFirstRowValues = irisFirstRow.Values.GetEnumerator(); // Simple load - var dataIris = mlContext.Data.CreateTextReader(separatorChar: ',').Read(dataPath); + var dataIris = mlContext.Data.CreateTextLoader(separatorChar: ',').Read(dataPath); var previewIris = dataIris.Preview(1); Assert.Equal(5, previewIris.ColumnView.Length); @@ -736,7 +736,7 @@ public void LoaderColumnsFromIrisData() Assert.Equal("Iris-setosa", previewIris.RowView[0].Values[index].Value.ToString()); // Load with start and end indexes - var dataIrisStartEnd = mlContext.Data.CreateTextReader(separatorChar: ',').Read(dataPath); + var dataIrisStartEnd = mlContext.Data.CreateTextLoader(separatorChar: ',').Read(dataPath); var previewIrisStartEnd = dataIrisStartEnd.Preview(1); Assert.Equal(2, previewIrisStartEnd.ColumnView.Length); @@ -753,7 +753,7 @@ public void LoaderColumnsFromIrisData() } // load setting the distinct columns. Loading column 0 and 2 - var dataIrisColumnIndices = mlContext.Data.CreateTextReader(separatorChar: ',').Read(dataPath); + var dataIrisColumnIndices = mlContext.Data.CreateTextLoader(separatorChar: ',').Read(dataPath); var previewIrisColumnIndices = dataIrisColumnIndices.Preview(1); Assert.Equal(2, previewIrisColumnIndices.ColumnView.Length); diff --git a/test/Microsoft.ML.Tests/Transformers/CustomMappingTests.cs b/test/Microsoft.ML.Tests/Transformers/CustomMappingTests.cs index 0688a09aea..8c67a109a0 100644 --- a/test/Microsoft.ML.Tests/Transformers/CustomMappingTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/CustomMappingTests.cs @@ -51,7 +51,7 @@ public void TestCustomTransformer() { string dataPath = GetDataPath("adult.tiny.with-schema.txt"); var source = new MultiFileSource(dataPath); - var loader = ML.Data.CreateTextReader(new[] { + var loader = ML.Data.CreateTextLoader(new[] { new TextLoader.Column("Float1", DataKind.R4, 9), new TextLoader.Column("Float4", DataKind.R4, new[]{new TextLoader.Range(9), new TextLoader.Range(10), new TextLoader.Range(11), new TextLoader.Range(12) }) }, hasHeader: true); @@ -90,7 +90,7 @@ public void TestSchemaPropagation() { string dataPath = GetDataPath("adult.test"); var source = new MultiFileSource(dataPath); - var loader = ML.Data.CreateTextReader(new[] { + var loader = ML.Data.CreateTextLoader(new[] { new TextLoader.Column("Float1", DataKind.R4, 0), new TextLoader.Column("Float4", DataKind.R4, new[]{new TextLoader.Range(0), new TextLoader.Range(2), new TextLoader.Range(4), new TextLoader.Range(10) }), new TextLoader.Column("Text1", DataKind.Text, 0)