-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Internalize IDataLoader #2309
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Internalize IDataLoader #2309
Changes from 7 commits
5c39b91
9efd092
8a94acc
5de2fa5
88500f4
66de22e
7e7e257
a875b7b
49fc434
255b344
7dad3a9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -126,7 +126,8 @@ public static IDataView LoadTransforms(this IHostEnvironment env, Stream modelSt | |
/// <summary> | ||
/// Creates a data loader from the arguments object. | ||
/// </summary> | ||
public static IDataLoader CreateLoader<TArgs>(this IHostEnvironment env, TArgs arguments, IMultiStreamSource files) | ||
[BestFriend] | ||
internal static IDataLoader CreateLoader<TArgs>(this IHostEnvironment env, TArgs arguments, IMultiStreamSource files) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Thanks Ivan. I wouldn't necessarily do this now, but this entire assembly seems like a lot of stuff we do not want public. But not as part of this PR. #Resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's hard to have In reply to: 252375310 [](ancestors = 252375310) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh what I meant was the whole class. But like I said not part of this PR. |
||
where TArgs : class, new() | ||
{ | ||
Contracts.CheckValue(env, nameof(env)); | ||
|
@@ -137,7 +138,8 @@ public static IDataLoader CreateLoader<TArgs>(this IHostEnvironment env, TArgs a | |
/// <summary> | ||
/// Creates a data loader from the 'LoadName{settings}' string. | ||
/// </summary> | ||
public static IDataLoader CreateLoader(this IHostEnvironment env, string settings, IMultiStreamSource files) | ||
[BestFriend] | ||
internal static IDataLoader CreateLoader(this IHostEnvironment env, string settings, IMultiStreamSource files) | ||
{ | ||
Contracts.CheckValue(env, nameof(env)); | ||
Contracts.CheckValue(files, nameof(files)); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,12 +33,14 @@ namespace Microsoft.ML.Data | |
/// <summary> | ||
/// Delegate signature for a partitioned path parser. | ||
/// </summary> | ||
public delegate void PartitionedPathParser(); | ||
[BestFriend] | ||
internal delegate void PartitionedPathParser(); | ||
|
||
/// <summary> | ||
/// Supports extracting column names and values from a path string. | ||
/// </summary> | ||
public interface IPartitionedPathParser | ||
[BestFriend] | ||
internal interface IPartitionedPathParser | ||
{ | ||
/// <summary> | ||
/// Extract the column definitions from a file path. | ||
|
@@ -58,12 +60,13 @@ public interface IPartitionedPathParser | |
} | ||
|
||
[TlcModule.ComponentKind("PartitionedPathParser")] | ||
public interface IPartitionedPathParserFactory : IComponentFactory<IPartitionedPathParser> | ||
[BestFriend] | ||
internal interface IPartitionedPathParserFactory : IComponentFactory<IPartitionedPathParser> | ||
{ | ||
new IPartitionedPathParser CreateComponent(IHostEnvironment env); | ||
} | ||
|
||
public sealed class SimplePartitionedPathParser : IPartitionedPathParser, ICanSaveModel | ||
internal sealed class SimplePartitionedPathParser : IPartitionedPathParser, ICanSaveModel | ||
{ | ||
internal const string Summary = "A simple parser that extracts directory names as column values. Column names are defined as arguments."; | ||
internal const string UserName = "Simple Partitioned Path Parser"; | ||
|
@@ -193,12 +196,13 @@ public IEnumerable<string> ParseValues(string path) | |
|
||
[TlcModule.Component(Name = ParquetPartitionedPathParser.LoadName, FriendlyName = ParquetPartitionedPathParser.UserName, | ||
Desc = ParquetPartitionedPathParser.Summary, Alias = ParquetPartitionedPathParser.ShortName)] | ||
public class ParquetPartitionedPathParserFactory : IPartitionedPathParserFactory | ||
internal class ParquetPartitionedPathParserFactory : IPartitionedPathParserFactory | ||
{ | ||
public IPartitionedPathParser CreateComponent(IHostEnvironment env) => new ParquetPartitionedPathParser(); | ||
} | ||
|
||
public sealed class ParquetPartitionedPathParser : IPartitionedPathParser, ICanSaveModel | ||
[BestFriend] | ||
internal sealed class ParquetPartitionedPathParser : IPartitionedPathParser, ICanSaveModel | ||
{ | ||
internal const string Summary = "Extract name/value pairs from Parquet formatted directory names. Example path: Year=2018/Month=12/data1.parquet"; | ||
internal const string UserName = "Parquet Partitioned Path Parser"; | ||
|
@@ -276,8 +280,7 @@ public void Save(ModelSaveContext ctx) | |
ctx.SaveString(sb.ToString()); | ||
}; | ||
} | ||
|
||
public IEnumerable<PartitionedFileLoader.Column> ParseColumns(string path) | ||
IEnumerable<PartitionedFileLoader.Column> IPartitionedPathParser.ParseColumns(string path) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Why this change if the outer class is internal? Also you didn't change There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I made this change first, and only after that made whole class internal. Thanks for the catch! In reply to: 252397204 [](ancestors = 252397204) |
||
{ | ||
if (!TryParseNames(path, out List<string> names)) | ||
{ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
// See the LICENSE file in the project root for more information. | ||
|
||
using System.Runtime.CompilerServices; | ||
using Microsoft.ML; | ||
|
||
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.TestFramework" + PublicKey.TestValue)] | ||
[assembly: WantsToBeBestFriends] |
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For consistency, I would either make this public and only make specific field internal, or make the
Arguments
internal in the other transforms too. #ResolvedThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In other places we use
Arguments
classes as EntryPoint arguments. I don't want to get into business of cleaning entrypoints in this PR.For this transform we don't have entrypoint, so I prefer to just hide it completely.
In reply to: 252398379 [](ancestors = 252398379)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok sounds good!
In reply to: 252399795 [](ancestors = 252399795,252398379)