-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Rename IDataLoader, IDataReader and IDataReaderEstimator #2731
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Codecov Report
@@ Coverage Diff @@
## master #2731 +/- ##
=======================================
Coverage 71.64% 71.64%
=======================================
Files 807 807
Lines 142337 142337
Branches 16117 16117
=======================================
Hits 101983 101983
+ Misses 35918 35917 -1
- Partials 4436 4437 +1
|
where TLastTransformer : class, ITransformer | ||
{ | ||
/// <summary> | ||
/// The underlying data reader. | ||
/// </summary> | ||
public readonly IDataReader<TSource> Reader; | ||
public readonly IDataLoader<TSource> Reader; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
public readonly IDataLoader<TSource> Reader; | |
public readonly IDataLoader<TSource> Loader; |
Please double-check other public fields and public functions' arguments. #Resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
/// <summary> | ||
/// The chain of transformers (possibly empty) that are applied to data upon reading. | ||
/// </summary> | ||
public readonly TransformerChain<TLastTransformer> Transformer; | ||
|
||
public CompositeDataReader(IDataReader<TSource> reader, TransformerChain<TLastTransformer> transformerChain = null) | ||
public CompositeDataReader(IDataLoader<TSource> reader, TransformerChain<TLastTransformer> transformerChain = null) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
public CompositeDataReader(IDataLoader<TSource> reader, TransformerChain<TLastTransformer> transformerChain = null) | |
public CompositeDataReader(IDataLoader<TSource> loader, TransformerChain<TLastTransformer> transformerChain = null) | |
``` #Resolved |
@@ -90,7 +90,7 @@ public static class CompositeDataReader | |||
/// <summary> | |||
/// Save the contents to a stream, as a "model file". | |||
/// </summary> | |||
public static void SaveTo<TSource>(this IDataReader<TSource> reader, IHostEnvironment env, Stream outputStream) | |||
public static void SaveTo<TSource>(this IDataLoader<TSource> reader, IHostEnvironment env, Stream outputStream) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
public static void SaveTo<TSource>(this IDataLoader<TSource> reader, IHostEnvironment env, Stream outputStream) | |
public static void SaveTo<TSource>(this IDataLoader<TSource> loader, IHostEnvironment env, Stream outputStream) | |
``` #Resolved |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@@ -106,7 +106,7 @@ public static void SaveTo<TSource>(this IDataReader<TSource> reader, IHostEnviro | |||
using (var ch = env.Start("Loading pipeline")) | |||
{ | |||
ch.Trace("Loading data reader"); | |||
ModelLoadContext.LoadModel<IDataReader<IMultiStreamSource>, SignatureLoadModel>(env, out var reader, rep, "Reader"); | |||
ModelLoadContext.LoadModel<IDataLoader<IMultiStreamSource>, SignatureLoadModel>(env, out var reader, rep, "Reader"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ModelLoadContext.LoadModel<IDataLoader<IMultiStreamSource>, SignatureLoadModel>(env, out var reader, rep, "Reader"); | |
ModelLoadContext.LoadModel<IDataLoader<IMultiStreamSource>, SignatureLoadModel>(env, out var loader, rep, "Reader"); | |
``` #Resolved |
@@ -106,7 +106,7 @@ public static void SaveTo<TSource>(this IDataReader<TSource> reader, IHostEnviro | |||
using (var ch = env.Start("Loading pipeline")) | |||
{ | |||
ch.Trace("Loading data reader"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ch.Trace("Loading data reader"); | |
ch.Trace("Loading data loader"); | |
``` #Resolved |
@@ -8,13 +8,13 @@ namespace Microsoft.ML.Data | |||
/// An estimator class for composite data reader. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
/// An estimator class for composite data reader. | |
/// An estimator class for composite data loader. |
So many reader
s. Is it possible to do a global text replacement? #Resolved
@@ -14,7 +14,7 @@ public static class DataReaderExtensions | |||
/// </summary> | |||
/// <param name="reader">The reader to use.</param> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
/// <param name="reader">The reader to use.</param> | |
/// <param name="loader">The loader to use.</param> | |
``` #Resolved |
public interface IDataReaderEstimator<in TSource, out TReader> | ||
where TReader : IDataReader<TSource> | ||
public interface IDataLoaderEstimator<in TSource, out TLoader> | ||
where TLoader : IDataLoader<TSource> | ||
{ | ||
// REVIEW: you could consider the transformer to take a different <typeparamref name="TSource"/>, but we don't have such components | ||
// yet, so why complicate matters? | ||
/// <summary> | ||
/// Train and return a data reader. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
reader [](start = 36, length = 6)
loader
#Resolved
@@ -225,7 +225,7 @@ internal bool TryFindColumn(string name, out Column column) | |||
/// The 'data reader' takes a certain kind of input and turns it into an <see cref="IDataView"/>. | |||
/// </summary> | |||
/// <typeparam name="TSource">The type of input the reader takes.</typeparam> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
reader [](start = 56, length = 6)
loader - I would do a pass over the comments replacing reader with loader. #Resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@@ -58,7 +58,7 @@ public static DataDebuggerPreview Preview(this ITransformer transformer, IDataVi | |||
/// <param name="reader">The data reader to preview</param> | |||
/// <param name="source">The source to pull the data from</param> | |||
/// <param name="maxRows">Maximum number of rows to pull</param> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Change reader to loader? If changed, update the paramref in the comments. #Resolved
@@ -18,7 +18,7 @@ public static class LearningPipelineExtensions | |||
/// Create a new composite reader estimator, by appending another estimator to the end of this data reader estimator. | |||
/// </summary> | |||
public static CompositeReaderEstimator<TSource, TTrans> Append<TSource, TTrans>( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CompositeReaderEstimator [](start = 22, length = 24)
So we now have a DataLoaderEstimator but now have a CompositeReaderEstimator. Should this also become a CompositeLoaderEstimator? And TrivialReaderEstimator->TrivialLoaderEstimator? #Resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@@ -66,9 +66,9 @@ public static IDataView LoadPipeline(IHostEnvironment env, RepositoryReader rep, | |||
env.CheckValue(files, nameof(files)); | |||
using (var ent = rep.OpenEntry(DirDataLoaderModel, ModelLoadContext.ModelStreamName)) | |||
{ | |||
IDataLoader loader; | |||
ILegacyDataLoader loader; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ILegacyDataLoader [](start = 16, length = 17)
Just curious - will all code that uses the ILegacyDataLoader be also moved to legacy? Or does it need to be updated to use the DataLoader? #Resolved
@@ -33,7 +33,7 @@ namespace Microsoft.ML.Data | |||
/// Loads a parquet file into an IDataView. Supports basic mapping from Parquet input column data types to framework data types. | |||
/// </summary> | |||
[BestFriend] | |||
internal sealed class ParquetLoader : IDataLoader, IDisposable | |||
internal sealed class ParquetLoader : ILegacyDataLoader, IDisposable |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ILegacyDataLoader [](start = 42, length = 17)
ah! I cant see ParquetLoader going to legacy, so this will need to be updated to DataLoader right? Are there any issues tracking this work? I would think this could happen post 1.0 but would still be good to track.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will open an issue regarding this. You are right, I wonder if @yaeldekel 's work related to #2735 will touch this.
In reply to: 260398136 [](ancestors = 260398136)
@artidoro I approve - but left comments on naming #Resolved |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you @artidoro, and also thank you for making sure your commits were structured logically in the natural order of renamings. It made it much easier to review. I feel like we will probably need to do more work in the future to make sure documentation, parameters, and whatnot, are appropriately structured.
d55999f
to
59b4492
Compare
Label: c.LoadBool(0), | ||
Features: c.LoadFloat(1, 9) | ||
), | ||
separator: '\t', hasHeader: true); | ||
|
||
// Then, we use the reader to read the data as an IDataView. | ||
var data = reader.Read(dataFilePath); | ||
// Then, we use the reader to load the data as an IDataView. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
reader [](start = 32, length = 6)
loader #Resolved
} | ||
|
||
private static IDataLoader CreateCore(IHost host, IDataLoader srcLoader, | ||
KeyValuePair<string, IComponentFactory<IDataView, IDataTransform>>[] transformArgs) | ||
public IDataView Load(TSource input) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Load [](start = 25, length = 4)
Add a line of comment similar to the IDataLoader.Load that explains this is lazy. #Resolved
@@ -7,14 +7,14 @@ | |||
|
|||
namespace Microsoft.ML | |||
{ | |||
public static class DataReaderExtensions | |||
public static class DataLoaderExtensions | |||
{ | |||
/// <summary> | |||
/// Reads data from one or more file <paramref name="path"/> into an <see cref="IDataView"/>. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Reads [](start = 12, length = 5)
Load. #Resolved
{ | ||
/// <summary> | ||
/// Reads data from one or more file <paramref name="path"/> into an <see cref="IDataView"/>. | ||
/// </summary> | ||
/// <param name="reader">The reader to use.</param> | ||
/// <param name="loader">The loader to use.</param> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add one line saying that this is lazy. #Resolved
@@ -12,33 +12,33 @@ namespace Microsoft.ML | |||
public static class BinaryLoaderSaverCatalog | |||
{ | |||
/// <summary> | |||
/// Read a data view from an <see cref="IMultiStreamSource"/> on a binary file using <see cref="BinaryLoader"/>. | |||
/// Load a data view from an <see cref="IMultiStreamSource"/> on a binary file. | |||
/// </summary> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a line explaining that this is lazy. #Resolved
} | ||
|
||
/// <summary> | ||
/// Read a data view from a binary file using <see cref="BinaryLoader"/>. | ||
/// Load a data view from a binary file. | ||
/// </summary> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a line explaining that this is lazy. #Resolved
allowSparse, trimWhitespace, dataSample: dataSample); | ||
|
||
/// <summary> | ||
/// Read a data view from a text file using <see cref="TextLoader"/>. | ||
/// Load a data view from a text file using <see cref="TextLoader"/>. | ||
/// </summary> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a line explaining that this is lazy. #Resolved
} | ||
|
||
/// <summary> | ||
/// Read a data view from a text file using <see cref="TextLoader"/>. | ||
/// Load a data view from a text file using <see cref="TextLoader"/>. | ||
/// </summary> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a line explaining that this is lazy. #Resolved
} | ||
|
||
/// <summary> | ||
/// Read a data view from a text file using <see cref="TextLoader"/>. | ||
/// Load a data view from a text file using <see cref="TextLoader"/>. | ||
/// </summary> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a line explaining that this is lazy. #Resolved
"Composite Data Loader", "CompositeDataLoader", "Composite", "PipeData", "Pipe", "PipeDataLoader")] | ||
|
||
[assembly: LoadableClass(typeof(IDataLoader), typeof(CompositeDataLoader), null, typeof(SignatureLoadDataLoader), | ||
"Pipe DataL Loader", CompositeDataLoader.LoaderSignature)] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why remove the entrypoints? Shouldn't these remain?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see -- this is moved to LegacyCompositeDataLoader.
In reply to: 260564833 [](ancestors = 260564833)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it better to git rename CompositeDataLoader -> LegacyCompositeDataLoader and then add a new file of CompositeDataLoader? The diff looks like you replaced the code in this file and added a LegacyCompositeDataLoader. The rename would help maintain history with the file.
In reply to: 260565188 [](ancestors = 260565188,260564833)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will do thank you for pointing it out and taking another look at this.
In reply to: 260565382 [](ancestors = 260565382,260565188,260564833)
Also need to rename ReadFromEnumerable to LoadFromEnumerable. #Resolved |
59b4492
to
0915702
Compare
0915702
to
ad6374a
Compare
Fixes #2144.
As discussed in the issue, it was agreed that
TextReader
,BinaryReader
andIDataReader
were bad names because they overlap with .NET concepts.In this PR I:
IDataLoader
toILegacyDataLoader
(commit 1).IDataReader
toIDataLoader
andIDataReaderEstimator
toIDataLoaderEstimator
(commit 2).