diff --git a/src/Microsoft.ML.Core/Utilities/PathUtils.cs b/src/Microsoft.ML.Core/Utilities/PathUtils.cs index e1129bf9c5..74ccec30c0 100644 --- a/src/Microsoft.ML.Core/Utilities/PathUtils.cs +++ b/src/Microsoft.ML.Core/Utilities/PathUtils.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. @@ -67,13 +67,17 @@ public static string FindExistentFileOrNull(string fileName, string folderPrefix // 1. Search in customSearchDir. if (!string.IsNullOrWhiteSpace(customSearchDir) && TryFindFile(fileName, folderPrefix, customSearchDir, out candidate)) - return candidate; + { + return candidate; + } // 2. Search in the path specified by the environment variable. var envDir = Environment.GetEnvironmentVariable(CustomSearchDirEnvVariable); if (!string.IsNullOrWhiteSpace(envDir) && TryFindFile(fileName, folderPrefix, envDir, out candidate)) - return candidate; + { + return candidate; + } // 3. Search in the path specified by the assemblyForBasePath. if (assemblyForBasePath != null) diff --git a/src/Microsoft.ML.Data/Commands/DataCommand.cs b/src/Microsoft.ML.Data/Commands/DataCommand.cs index a68dd2022a..083695825a 100644 --- a/src/Microsoft.ML.Data/Commands/DataCommand.cs +++ b/src/Microsoft.ML.Data/Commands/DataCommand.cs @@ -396,6 +396,20 @@ public static void SaveLoader(IDataLoader loader, IFileHandle file) Contracts.CheckParam(file.CanWrite, nameof(file), "Must be writable"); using (var stream = file.CreateWriteStream()) + { + SaveLoader(loader, stream); + } + } + + /// + /// Saves to the specified . + /// + public static void SaveLoader(IDataLoader loader, Stream stream) + { + Contracts.CheckValue(loader, nameof(loader)); + Contracts.CheckValue(stream, nameof(stream)); + Contracts.CheckParam(stream.CanWrite, nameof(stream), "Must be writable"); + using (var rep = RepositoryWriter.CreateNew(stream)) { ModelSaveContext.SaveModel(rep, loader, ModelFileUtils.DirDataLoaderModel); diff --git a/src/Microsoft.ML.Data/DataLoadSave/PartitionedFileLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/PartitionedFileLoader.cs new file mode 100644 index 0000000000..69eb3bbb3b --- /dev/null +++ b/src/Microsoft.ML.Data/DataLoadSave/PartitionedFileLoader.cs @@ -0,0 +1,755 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.CommandLine; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.Data.Conversion; +using Microsoft.ML.Runtime.Data.IO; +using Microsoft.ML.Runtime.Data.Utilities; +using Microsoft.ML.Runtime.Internal.Utilities; +using Microsoft.ML.Runtime.Model; + +[assembly: LoadableClass(PartitionedFileLoader.Summary, typeof(PartitionedFileLoader), typeof(PartitionedFileLoader.Arguments), typeof(SignatureDataLoader), + PartitionedFileLoader.UserName, PartitionedFileLoader.LoadName, PartitionedFileLoader.ShortName)] + +[assembly: LoadableClass(PartitionedFileLoader.Summary, typeof(PartitionedFileLoader), null, typeof(SignatureLoadDataLoader), + PartitionedFileLoader.UserName, PartitionedFileLoader.LoadName, PartitionedFileLoader.ShortName)] + +namespace Microsoft.ML.Runtime.Data +{ + /// + /// Loads a set of directory partitioned files into an IDataView. + /// The directories of the file will treated as column data and the underlying files are loaded using the data loader. + /// The first file will be used as the basis for all follow-up file paths and schemas. Any files that don't match + /// the expected path or schema will be skipped. + /// + /// + /// Sample directory structure: + /// + /// Data/ + /// Year=2017/ + /// Month=01/ + /// data1.parquet + /// data1.parquet + /// Month=02/ + /// data1.parquet + /// data1.parquet + /// Year=2018/ + /// Month=01/ + /// data1.parquet + /// data1.parquet + /// + public sealed class PartitionedFileLoader : IDataLoader + { + internal const string Summary = "Loads a horizontally partitioned file set."; + internal const string UserName = "Partitioned Loader"; + public const string LoadName = "PartitionedLoader"; + public const string ShortName = "Part"; + + private static VersionInfo GetVersionInfo() + { + return new VersionInfo( + modelSignature: "PARTLOAD", + verWrittenCur: 0x00010001, // Initial + verReadableCur: 0x00010001, + verWeCanReadBack: 0x00010001, + loaderSignature: LoadName); + } + + public class Arguments + { + [Argument(ArgumentType.Required, HelpText = "Base path to the directory of your partitioned files.", ShortName = "bp")] + public string BasePath; + + [Argument(ArgumentType.AtMostOnce, HelpText = "Append a column with the file path.", ShortName = "path")] + public bool IncludePathColumn = false; + + [Argument(ArgumentType.AtMostOnce, HelpText = "Path parser to extract column name/value pairs from the file path.", ShortName = "parser")] + public IPartitionedPathParserFactory PathParserFactory = new ParquetPartitionedPathParserFactory(); + + [Argument(ArgumentType.Multiple, HelpText = "The data loader.")] + public SubComponent Loader; + } + + public sealed class Column + { + [Argument(ArgumentType.Required, HelpText = "Name of the column.")] + public string Name; + + [Argument(ArgumentType.AtMostOnce, HelpText = "Data type of the column.")] + public DataKind? Type; + + [Argument(ArgumentType.Required, HelpText = "Index of the directory representing this column.")] + public int Source; + + public static Column Parse(string str) + { + Contracts.AssertNonEmpty(str); + + if (TryParse(str, out Column column)) + { + return column; + } + + return null; + } + + public static bool TryParse(string str, out Column column) + { + column = null; + + if (string.IsNullOrEmpty(str)) + { + return false; + } + + if (!ColumnParsingUtils.TryParse(str, out string name, out string sourceStr, out string kindStr)) + { + return false; + } + + DataKind? kind = null; + if (kindStr != null && TypeParsingUtils.TryParseDataKind(kindStr, out DataKind parsedKind, out KeyRange range)) + { + kind = parsedKind; + } + + if (!int.TryParse(sourceStr, out int source)) + { + return false; + } + + column = new Column() + { + Name = name, + Source = source, + Type = kind + }; + + return true; + } + + public bool TryUnparse(StringBuilder sb) + { + Contracts.AssertValue(sb); + + sb.Append($"{Name}"); + + if (Type.HasValue) + { + sb.Append($":{Type}"); + } + + sb.Append($":{Source}"); + + return true; + } + } + + private readonly IHost _host; + private readonly IMultiStreamSource _files; + private readonly int[] _srcDirIndex; + private readonly byte[] _subLoaderBytes; + + // Number of tailing directories to include. + private readonly int _tailingDirCount; + + private readonly IPartitionedPathParser _pathParser; + + private const string RegistrationName = LoadName; + private const string FilePathSpecCtxName = "FilePathSpec"; + private const string SchemaCtxName = "Schema.idv"; + private const int FilePathColIndex = -1; + + public PartitionedFileLoader(IHostEnvironment env, Arguments args, IMultiStreamSource files) + { + Contracts.CheckValue(env, nameof(env)); + _host = env.Register(RegistrationName); + _host.CheckValue(args, nameof(args)); + _host.CheckValue(files, nameof(files)); + + _pathParser = args.PathParserFactory.CreateComponent(_host); + _host.CheckUserArg(_pathParser != null, nameof(args.PathParserFactory), "Failed to create the FilePathSpec."); + + _files = files; + + var subLoader = args.Loader.CreateInstance(_host, _files); + _subLoaderBytes = SaveLoaderToBytes(subLoader); + + string relativePath = GetRelativePath(args.BasePath, files); + var columns = ParseColumns(relativePath).ToArray(); + _tailingDirCount = GetDirectoryCount(relativePath); + + if (args.IncludePathColumn) + { + var pathCol = new Column() + { + Name = "Path", + Source = FilePathColIndex, + Type = DataKind.Text + }; + + columns = columns.Concat(new[] { pathCol }).ToArray(); + } + + _srcDirIndex = columns.Select(c => c.Source).ToArray(); + Schema = CreateSchema(_host, columns, subLoader); + } + + private PartitionedFileLoader(IHost host, ModelLoadContext ctx, IMultiStreamSource files) + { + Contracts.AssertValue(host); + _host = host; + _host.AssertValue(ctx); + _host.AssertValue(files); + + // ** Binary format ** + // int: tailing directory count + // Schema of the loader + // int[]: srcColumns + // byte[]: subloader + // model: file path spec + + _tailingDirCount = ctx.Reader.ReadInt32(); + + // Load the schema + byte[] buffer = null; + if (!ctx.TryLoadBinaryStream(SchemaCtxName, r => buffer = r.ReadByteArray())) + throw _host.ExceptDecode(); + BinaryLoader loader = null; + var strm = new MemoryStream(buffer, writable: false); + loader = new BinaryLoader(_host, new BinaryLoader.Arguments(), strm); + Schema = loader.Schema; + + _srcDirIndex = ctx.Reader.ReadIntArray(); + _subLoaderBytes = ctx.Reader.ReadByteArray(); + + ctx.LoadModel(_host, out _pathParser, FilePathSpecCtxName); + + _files = files; + } + + public static PartitionedFileLoader Create(IHostEnvironment env, ModelLoadContext ctx, IMultiStreamSource files) + { + Contracts.CheckValue(env, nameof(env)); + IHost host = env.Register(RegistrationName); + + env.CheckValue(ctx, nameof(ctx)); + ctx.CheckAtModel(GetVersionInfo()); + env.CheckValue(files, nameof(files)); + + return host.Apply("Loading Model", + ch => new PartitionedFileLoader(host, ctx, files)); + } + + public void Save(ModelSaveContext ctx) + { + Contracts.CheckValue(ctx, nameof(ctx)); + ctx.CheckAtModel(); + ctx.SetVersionInfo(GetVersionInfo()); + + // ** Binary format ** + // int: tailing directory count + // Schema of the loader + // int[]: srcColumns + // byte[]: subloader + // model: file path spec + + ctx.Writer.Write(_tailingDirCount); + + // Save the schema + var noRows = new EmptyDataView(_host, Schema); + var saverArgs = new BinarySaver.Arguments(); + saverArgs.Silent = true; + var saver = new BinarySaver(_host, saverArgs); + using (var strm = new MemoryStream()) + { + var allColumns = Enumerable.Range(0, Schema.ColumnCount).ToArray(); + saver.SaveData(strm, noRows, allColumns); + ctx.SaveBinaryStream(SchemaCtxName, w => w.WriteByteArray(strm.ToArray())); + } + ctx.Writer.WriteIntArray(_srcDirIndex); + + ctx.Writer.WriteByteArray(_subLoaderBytes); + ctx.SaveModel(_pathParser, FilePathSpecCtxName); + } + + public bool CanShuffle => true; + + public ISchema Schema { get; } + + public long? GetRowCount(bool lazy = true) + { + return null; + } + + public IRowCursor GetRowCursor(Func needCol, IRandom rand = null) + { + return new Cursor(_host, this, _files, needCol, rand); + } + + public IRowCursor[] GetRowCursorSet(out IRowCursorConsolidator consolidator, Func needCol, int n, IRandom rand = null) + { + consolidator = null; + var cursor = new Cursor(_host, this, _files, needCol, rand); + return new IRowCursor[] { cursor }; + } + + /// + /// Create a composite schema of both the partitioned columns and the underlying loader columns. + /// + /// The exception context. + /// The partitioned columns. + /// The sub loader. + /// The resulting schema. + private ISchema CreateSchema(IExceptionContext ectx, Column[] cols, IDataLoader subLoader) + { + Contracts.AssertValue(cols); + Contracts.AssertValue(subLoader); + + var columnNameTypes = cols.Select((col) => new KeyValuePair(col.Name, PrimitiveType.FromKind(col.Type.Value))); + var colSchema = new SimpleSchema(ectx, columnNameTypes.ToArray()); + + var subSchema = subLoader.Schema; + + if (subSchema.ColumnCount == 0) + { + return colSchema; + } + else + { + var schemas = new ISchema[] + { + subSchema, + colSchema + }; + + return new CompositeSchema(schemas); + } + } + + private byte [] SaveLoaderToBytes(IDataLoader loader) + { + Contracts.CheckValue(loader, nameof(loader)); + + using (var stream = new MemoryStream()) + { + LoaderUtils.SaveLoader(loader, stream); + return stream.GetBuffer(); + } + } + + private IDataLoader CreateLoaderFromBytes(byte [] loaderBytes, IMultiStreamSource files) + { + Contracts.CheckValue(loaderBytes, nameof(loaderBytes)); + Contracts.CheckValue(files, nameof(files)); + + using (var stream = new MemoryStream(loaderBytes)) + using (var rep = RepositoryReader.Open(stream, _host)) + { + return ModelFileUtils.LoadLoader(_host, rep, files, false); + } + } + + private sealed class Cursor : RootCursorBase, IRowCursor + { + private PartitionedFileLoader _parent; + + private bool[] _active; + private bool[] _subActive; // Active columns of the sub-cursor. + private Delegate[] _getters; + private Delegate[] _subGetters; // Cached getters of the sub-cursor. + + private DvText[] _colValues; // Column values cached from the file path. + private IRowCursor _subCursor; // Sub cursor of the current file. + + private IEnumerator _fileOrder; + + public Cursor(IChannelProvider provider, PartitionedFileLoader parent, IMultiStreamSource files, Func predicate, IRandom rand) + : base(provider) + { + Contracts.AssertValue(parent); + Contracts.AssertValue(files); + Contracts.AssertValue(predicate); + + _parent = parent; + + _active = Utils.BuildArray(Schema.ColumnCount, predicate); + _subActive = _active.Take(SubColumnCount).ToArray(); + _colValues = new DvText[Schema.ColumnCount - SubColumnCount]; + + _subGetters = new Delegate[SubColumnCount]; + _getters = CreateGetters(); + + _fileOrder = CreateFileOrder(rand).GetEnumerator(); + } + + public override long Batch => 0; + + public ISchema Schema => _parent.Schema; + + public ValueGetter GetGetter(int col) + { + Ch.Check(IsColumnActive(col)); + + var getter = _getters[col] as ValueGetter; + if (getter == null) + { + throw Ch.Except("Invalid TValue: '{0}'", typeof(TValue)); + } + + return getter; + } + + public override ValueGetter GetIdGetter() + { + return + (ref UInt128 val) => + { + Ch.Check(IsGood, "Cannot call ID getter in current state"); + + val = new UInt128(0, (ulong)Position); + }; + } + + public bool IsColumnActive(int col) + { + Ch.Check(0 <= col && col < Schema.ColumnCount); + return _active[col]; + } + + protected override bool MoveNextCore() + { + // Iterate sub cursor or move to the next file. + while (_subCursor == null || !_subCursor.MoveNext()) + { + // Cleanup old sub cursor + if (_subCursor != null) + { + _subCursor.Dispose(); + _subCursor = null; + } + + if (!TryGetNextPathAndValues(out string path, out string relativePath, out List values)) + { + return false; + } + + IDataLoader loader = null; + try + { + // Load the sub cursor and reset the data. + loader = _parent.CreateLoaderFromBytes(_parent._subLoaderBytes, new MultiFileSource(path)); + } + catch (Exception e) + { + Ch.Warning($"Failed to load file {path} due to a loader exception. Moving on to the next file. Ex: {e.Message}"); + continue; + } + + _subCursor = loader.GetRowCursor(col => _subActive[col]); + + try + { + UpdateSubGetters(); + UpdateColumnValues(relativePath, values); + } + catch (InvalidOperationException e) + { + // Failed to load this file so skip. + Ch.Warning(MessageSensitivity.Schema, e.Message); + if (_subCursor != null) + { + _subCursor.Dispose(); + _subCursor = null; + } + } + } + + return true; + } + + private bool TryGetNextPathAndValues(out string path, out string relativePath, out List values) + { + path = null; + relativePath = null; + values = null; + + do + { + // No more files to load. + if (!_fileOrder.MoveNext()) + { + return false; + } + + // Get next file and parse the column values from the file path. + string curPath = _parent._files.GetPathOrNull(_fileOrder.Current); + if (String.IsNullOrEmpty(curPath)) + { + Ch.Warning($"File at index {_fileOrder.Current} is missing a path. Loading of file is being skipped."); + continue; + } + + if (!TryTruncatePath(_parent._tailingDirCount, curPath, out relativePath)) + { + continue; + } + + if (!TryParseValuesFromPath(relativePath, out values)) + { + continue; + } + + path = curPath; + + } while (String.IsNullOrEmpty(path)); + + return true; + } + + private void UpdateSubGetters() + { + // Reset getters for the subcursor. + for (int i = 0; i < SubColumnCount; i++) + { + if (_subActive[i]) + { + var type = _subCursor.Schema.GetColumnType(i); + _subGetters[i] = MarshalGetter(_subCursor.GetGetter, type.RawType, i); + } + } + } + + private void UpdateColumnValues(string path, List values) + { + // Cache the column values for future Getter calls. + for (int i = 0; i < _colValues.Length; i++) + { + var source = _parent._srcDirIndex[i]; + if (source >= 0 && source < values.Count) + { + _colValues[i] = new DvText(values[source]); + } + else if (source == FilePathColIndex) + { + // Force Unix path for consistency. + var cleanPath = path.Replace(@"\", @"/"); + _colValues[i] = new DvText(cleanPath); + } + } + } + + private Delegate[] CreateGetters() + { + Delegate[] getters = new Delegate[Schema.ColumnCount]; + for (int i = 0; i < getters.Length; i++) + { + if (!_active[i]) + { + continue; + } + + var type = Schema.GetColumnType(i); + + // Use sub-cursor for all sub-columns. + if (IsSubColumn(i)) + { + getters[i] = Utils.MarshalInvoke(CreateSubGetterDelegateCore, type.RawType, i); + } + else + { + int idx = i - SubColumnCount; + getters[i] = Utils.MarshalInvoke(CreateGetterDelegateCore, type.RawType, idx, type); + } + } + + return getters; + } + + private Delegate CreateSubGetterDelegateCore(int col) + { + return (Delegate)SubGetterDelegateCore(col); + } + + private ValueGetter SubGetterDelegateCore(int col) + { + Ch.Check(col >= 0 && col < SubColumnCount); + + return (ref TValue value) => + { + // SubCursor may change so always requery the getter. + ValueGetter getter = _subGetters[col] as ValueGetter; + getter?.Invoke(ref value); + }; + } + + private Delegate CreateGetterDelegateCore(int col, ColumnType type) + { + return (Delegate)GetterDelegateCore(col, type); + } + + private ValueGetter GetterDelegateCore(int col, ColumnType type) + { + Ch.Check(col >= 0 && col < _colValues.Length); + Ch.AssertValue(type); + + var conv = Conversions.Instance.GetStandardConversion(TextType.Instance, type) as ValueMapper; + if (conv == null) + { + throw Ch.Except("Invalid TValue: '{0}' of the conversion.", typeof(TValue)); + } + + return (ref TValue value) => + { + conv(ref _colValues[col], ref value); + }; + } + + private bool IsSubColumn(int col) + { + return col < SubColumnCount; + } + + private int SubColumnCount => Schema.ColumnCount - _parent._srcDirIndex.Length; + + private IEnumerable CreateFileOrder(IRandom rand) + { + if (rand == null) + { + return Enumerable.Range(0, _parent._files.Count); + } + else + { + return Utils.GetRandomPermutation(rand, _parent._files.Count); + } + } + + private bool SchemasMatch(ISchema schema1, ISchema schema2) + { + if (schema1.ColumnCount != schema2.ColumnCount) + { + return false; + } + + int colLim = schema1.ColumnCount; + for (int col = 0; col < colLim; col++) + { + var type1 = schema1.GetColumnType(col); + var type2 = schema2.GetColumnType(col); + if (!type1.Equals(type2)) + { + return false; + } + } + + return true; + } + + private Delegate MarshalGetter(Func> func, Type type, int col) + { + var returnType = typeof(ValueGetter<>).MakeGenericType(type); + var meth = func.Method; + + var typedMeth = meth.GetGenericMethodDefinition().MakeGenericMethod(type); + return (Delegate)typedMeth.Invoke(func.Target, new object[] { col }); + } + + /// + /// Truncate path to the specified number of trailing directories. + /// + /// Number of directories to retain. + /// Path to truncate. + /// The resulting truncated path. + /// true if the truncation was successful. + private bool TryTruncatePath(int dirCount, string path, out string truncPath) + { + truncPath = null; + + // Remove directories that shouldn't be parsed. + var segments = PartitionedPathUtils.SplitDirectories(path); + segments = segments.Skip(segments.Count() - dirCount - 1); + + if (segments.Count() < dirCount - 1) + { + Ch.Warning($"Path {path} did not have {dirCount} directories necessary for parsing."); + return false; + } + + // Rejoin segments to create a valid path. + truncPath = String.Join(Path.DirectorySeparatorChar.ToString(), segments); + return true; + } + + + /// + /// Parse all column values from the directory path. + /// + /// The directory path to parse for name/value pairs. + /// The resulting name value pairs. + /// true if the parsing was successfull. + private bool TryParseValuesFromPath(string path, out List results) + { + Contracts.CheckNonWhiteSpace(path, nameof(path)); + + results = null; + + try + { + results = _parent._pathParser.ParseValues(path).ToList(); + return true; + } + catch (InvalidOperationException e) + { + Ch.Warning($"Could not parse column values from the path {path}. Ex: {e.Message}"); + results = null; + return false; + } + } + } + + /// + /// Get a path relative to the base path. + /// + /// A base path. + /// A list of files under the base path. + /// A realtive file path. + private string GetRelativePath(string basepath, IMultiStreamSource files) + { + Contracts.CheckNonEmpty(basepath, nameof(basepath)); + + string path = files.GetPathOrNull(0); + _host.CheckNonEmpty(path, nameof(path)); + + var relativePath = PartitionedPathUtils.MakePathRelative(basepath, path); + return relativePath; + } + + /// + /// Parse the column definitions using a path parser. + /// + /// The path to a file. + /// The resulting Columns. + private IEnumerable ParseColumns(string path) + { + return _pathParser.ParseColumns(path).ToArray(); + } + + /// + /// Get the number of directories in the file path. + /// + /// A file path. + /// The number of directories + private int GetDirectoryCount(string path) + { + return PartitionedPathUtils.SplitDirectories(path).Count() - 1; + } + } +} diff --git a/src/Microsoft.ML.Data/DataLoadSave/PartitionedPathParser.cs b/src/Microsoft.ML.Data/DataLoadSave/PartitionedPathParser.cs new file mode 100644 index 0000000000..ca3aa075ab --- /dev/null +++ b/src/Microsoft.ML.Data/DataLoadSave/PartitionedPathParser.cs @@ -0,0 +1,385 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Web; +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.CommandLine; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.Data.Utilities; +using Microsoft.ML.Runtime.EntryPoints; +using Microsoft.ML.Runtime.Model; + +[assembly: LoadableClass(SimplePartitionedPathParser.Summary, typeof(SimplePartitionedPathParser), typeof(SimplePartitionedPathParser.Arguments), typeof(PartitionedPathParser), + SimplePartitionedPathParser.UserName, SimplePartitionedPathParser.LoadName, SimplePartitionedPathParser.ShortName)] +[assembly: LoadableClass(ParquetPartitionedPathParser.Summary, typeof(ParquetPartitionedPathParser), null, typeof(PartitionedPathParser), + ParquetPartitionedPathParser.UserName, ParquetPartitionedPathParser.LoadName, ParquetPartitionedPathParser.ShortName)] + +// This is for deserialization +[assembly: LoadableClass(SimplePartitionedPathParser.Summary, typeof(SimplePartitionedPathParser), null, typeof(SignatureLoadModel), + SimplePartitionedPathParser.UserName, SimplePartitionedPathParser.LoadName, SimplePartitionedPathParser.ShortName)] +[assembly: LoadableClass(ParquetPartitionedPathParser.Summary, typeof(ParquetPartitionedPathParser), null, typeof(SignatureLoadModel), + ParquetPartitionedPathParser.UserName, ParquetPartitionedPathParser.LoadName, ParquetPartitionedPathParser.ShortName)] + +[assembly: EntryPointModule(typeof(SimplePartitionedPathParser.Arguments))] +[assembly: EntryPointModule(typeof(ParquetPartitionedPathParserFactory))] + +namespace Microsoft.ML.Runtime.Data +{ + /// + /// Delegate signature for a partitioned path parser. + /// + public delegate void PartitionedPathParser(); + + /// + /// Supports extracting column names and values from a path string. + /// + public interface IPartitionedPathParser + { + /// + /// Extract the column definitions from a file path. + /// + /// The file path. + /// The resulting column definitions. + /// Thrown when parsing fails. + IEnumerable ParseColumns(string path); + + /// + /// Extract the column values from a file path. + /// + /// The file path. + /// The resulting column values. + /// Thrown when parsing fails. + IEnumerable ParseValues(string path); + } + + [TlcModule.ComponentKind("PartitionedPathParser")] + public interface IPartitionedPathParserFactory : IComponentFactory + { + new IPartitionedPathParser CreateComponent(IHostEnvironment env); + } + + public sealed class SimplePartitionedPathParser : IPartitionedPathParser, ICanSaveModel + { + internal const string Summary = "A simple parser that extracts directory names as column values. Column names are defined as arguments."; + internal const string UserName = "Simple Partitioned Path Parser"; + public const string LoadName = "SimplePathParser"; + public const string ShortName = "SmplPP"; + + [TlcModule.Component(Name = SimplePartitionedPathParser.LoadName, FriendlyName = SimplePartitionedPathParser.UserName, + Desc = SimplePartitionedPathParser.Summary, Alias = SimplePartitionedPathParser.ShortName)] + public class Arguments : IPartitionedPathParserFactory + { + [Argument(ArgumentType.Multiple, HelpText = "Column definitions used to override the Partitioned Path Parser. Expected with the format name:type:numeric-source, e.g. col=MyFeature:R4:1", + ShortName = "col", SortOrder = 1)] + public Microsoft.ML.Runtime.Data.PartitionedFileLoader.Column[] Columns; + + [Argument(ArgumentType.AtMostOnce, HelpText = "Data type of each column.")] + public DataKind Type = DataKind.Text; + + public IPartitionedPathParser CreateComponent(IHostEnvironment env) => new SimplePartitionedPathParser(env, this); + } + + private static VersionInfo GetVersionInfo() + { + return new VersionInfo( + modelSignature: "SMPLPARS", + verWrittenCur: 0x00010001, // Initial + verReadableCur: 0x00010001, + verWeCanReadBack: 0x00010001, + loaderSignature: LoadName); + } + + private IHost _host; + private PartitionedFileLoader.Column[] _columns; + + public SimplePartitionedPathParser(IHostEnvironment env, Arguments args) + { + _host = env.Register(LoadName); + + _columns = args.Columns; + foreach (var col in _columns) + { + if (!col.Type.HasValue) + { + col.Type = args.Type; + } + } + } + + private SimplePartitionedPathParser(IHost host, ModelLoadContext ctx) + { + Contracts.AssertValue(host); + _host = host; + _host.AssertValue(ctx); + + // ** Binary format ** + // int: number of columns + // foreach column: + // string: column representation + + int numColumns = ctx.Reader.ReadInt32(); + _host.CheckDecode(numColumns >= 0); + + _columns = new PartitionedFileLoader.Column[numColumns]; + for (int i = 0; i < numColumns; i++) + { + var column = PartitionedFileLoader.Column.Parse(ctx.LoadString()); + _host.CheckDecode(column != null); + _columns[i] = column; + } + } + + public static SimplePartitionedPathParser Create(IHostEnvironment env, ModelLoadContext ctx) + { + Contracts.CheckValue(ctx, nameof(ctx)); + IHost host = env.Register(LoadName); + ctx.CheckAtModel(GetVersionInfo()); + + return host.Apply("Loading Parser", + ch => new SimplePartitionedPathParser(host, ctx)); + } + + public void Save(ModelSaveContext ctx) + { + Contracts.CheckValue(ctx, nameof(ctx)); + ctx.SetVersionInfo(GetVersionInfo()); + + // ** Binary format ** + // int: number of columns + // foreach column: + // string: column representation + + ctx.Writer.Write(_columns.Length); + StringBuilder sb = new StringBuilder(); + foreach (var col in _columns) + { + sb.Clear(); + _host.Check(col.TryUnparse(sb)); + ctx.SaveString(sb.ToString()); + } + } + + public IEnumerable ParseColumns(string path) + { + Contracts.AssertNonEmpty(path); + + // Verify that path matches the columns expectations. + var values = ParseValues(path); + foreach (var col in _columns) + { + if (col.Source < 0 || col.Source >= values.Count()) + { + throw Contracts.Except($"Column definition {col} is outside the bounds of path {path}."); + } + } + + return _columns; + } + + public IEnumerable ParseValues(string path) + { + Contracts.AssertNonEmpty(path); + + var dirs = PartitionedPathUtils.SplitDirectories(path); + return dirs.Take(dirs.Count() - 1); // Ignore last directory which is the file name. + } + } + + [TlcModule.Component(Name = ParquetPartitionedPathParser.LoadName, FriendlyName = ParquetPartitionedPathParser.UserName, + Desc = ParquetPartitionedPathParser.Summary, Alias = ParquetPartitionedPathParser.ShortName)] + public class ParquetPartitionedPathParserFactory : IPartitionedPathParserFactory + { + public IPartitionedPathParser CreateComponent(IHostEnvironment env) => new ParquetPartitionedPathParser(); + } + + public sealed class ParquetPartitionedPathParser : IPartitionedPathParser, ICanSaveModel + { + internal const string Summary = "Extract name/value pairs from Parquet formatted directory names. Example path: Year=2018/Month=12/data1.parquet"; + internal const string UserName = "Parquet Partitioned Path Parser"; + public const string LoadName = "ParquetPathParser"; + public const string ShortName = "ParqPP"; + + private IHost _host; + private PartitionedFileLoader.Column[] _columns; + + private static VersionInfo GetVersionInfo() + { + return new VersionInfo( + modelSignature: "PARQPARS", + verWrittenCur: 0x00010001, // Initial + verReadableCur: 0x00010001, + verWeCanReadBack: 0x00010001, + loaderSignature: LoadName); + } + + public ParquetPartitionedPathParser() + { + _columns = new PartitionedFileLoader.Column[0]; + } + + private ParquetPartitionedPathParser(IHost host, ModelLoadContext ctx) + { + Contracts.AssertValue(host); + _host = host; + _host.AssertValue(ctx); + + // ** Binary format ** + // int: number of columns + // foreach column: + // string: column representation + + int numColumns = ctx.Reader.ReadInt32(); + _host.CheckDecode(numColumns >= 0); + + _columns = new PartitionedFileLoader.Column[numColumns]; + for (int i = 0; i < numColumns; i++) + { + var column = PartitionedFileLoader.Column.Parse(ctx.LoadString()); + _host.CheckDecode(column != null); + _columns[i] = column; + } + } + + public static ParquetPartitionedPathParser Create(IHostEnvironment env, ModelLoadContext ctx) + { + Contracts.CheckValue(ctx, nameof(ctx)); + IHost host = env.Register(LoadName); + ctx.CheckAtModel(GetVersionInfo()); + + return host.Apply("Loading Parser", + ch => new ParquetPartitionedPathParser(host, ctx)); + } + + public void Save(ModelSaveContext ctx) + { + Contracts.CheckValue(ctx, nameof(ctx)); + ctx.SetVersionInfo(GetVersionInfo()); + + // ** Binary format ** + // int: number of columns + // foreach column: + // string: column representation + + ctx.Writer.Write(_columns.Length); + StringBuilder sb = new StringBuilder(); + foreach (var col in _columns) + { + sb.Clear(); + _host.Check(col.TryUnparse(sb)); + ctx.SaveString(sb.ToString()); + }; + } + + public IEnumerable ParseColumns(string path) + { + if (!TryParseNames(path, out List names)) + { + throw Contracts.Except($"Failed to parse names from path {path}. Expected directory names with the format 'Name=Value'."); + } + + _columns = new PartitionedFileLoader.Column[names.Count]; + for (int i = 0; i < names.Count; i++) + { + _columns[i] = new PartitionedFileLoader.Column() + { + Name = names[i], + Source = i, + Type = DataKind.Text + }; + } + + return _columns; + } + + public IEnumerable ParseValues(string path) + { + if (!TryParseValues(path, out List values)) + { + throw Contracts.Except($"Failed to parse names from path {path}. Expected directory names with the format 'Name=Value'."); + } + + if (values.Count != _columns.Length) + { + throw Contracts.Except($"The extracted value count of {values.Count} does not match the expected Column count of {_columns.Length} for path {path}"); + } + + return values; + } + + public bool TryParseNames(string path, out List names) + { + return TryParseNamesAndValues(path, out names, out List values); + } + + public bool TryParseValues(string path, out List values) + { + return TryParseNamesAndValues(path, out List names, out values); + } + + public bool TryParseNamesAndValues(string path, out List names, out List values) + { + names = null; + values = null; + + if (string.IsNullOrEmpty(path)) + { + return false; + } + + var dirs = PartitionedPathUtils.SplitDirectories(path); + dirs = dirs.Take(dirs.Count() - 1); // Ignore last directory which is the file name. + + names = new List(dirs.Count()); + values = new List(dirs.Count()); + + foreach (var dir in dirs) + { + if (!TryParseNameValueFromDir(dir, out string name, out string value)) + { + return false; + } + + names.Add(name); + values.Add(value); + } + + return true; + } + + /// + /// Parse the name/value pair from a partitioned directory name. + /// + /// The directory name. + /// The resulting name. + /// The resulting value. + /// true if the parsing was successfull. + private static bool TryParseNameValueFromDir(string dir, out string name, out string value) + { + const char nameValueSeparator = '='; + + name = null; + value = null; + + if (string.IsNullOrEmpty(dir)) + { + return false; + } + + var nameValue = dir.Split(nameValueSeparator); + if (nameValue.Length != 2) + { + return false; + } + + name = nameValue[0]; + value = HttpUtility.UrlDecode(nameValue[1]); + + return true; + } + } +} diff --git a/src/Microsoft.ML.Data/DataView/CompositeSchema.cs b/src/Microsoft.ML.Data/DataView/CompositeSchema.cs new file mode 100644 index 0000000000..4d387de1d5 --- /dev/null +++ b/src/Microsoft.ML.Data/DataView/CompositeSchema.cs @@ -0,0 +1,119 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using Microsoft.ML.Runtime.Internal.Utilities; + +namespace Microsoft.ML.Runtime.Data +{ + /// + /// A convenience class for concatenating several schemas together. + /// This would be necessary when combining IDataViews through any type of combining operation, e.g. zip. + /// + internal sealed class CompositeSchema : ISchema + { + private readonly ISchema[] _sources; + + // Zero followed by cumulative column counts. Zero being used for the empty case. + private readonly int[] _cumulativeColCounts; + + public CompositeSchema(ISchema[] sources) + { + Contracts.AssertNonEmpty(sources); + _sources = sources; + _cumulativeColCounts = new int[_sources.Length + 1]; + _cumulativeColCounts[0] = 0; + + for (int i = 0; i < sources.Length; i++) + { + var schema = sources[i]; + _cumulativeColCounts[i + 1] = _cumulativeColCounts[i] + schema.ColumnCount; + } + } + + public int ColumnCount => _cumulativeColCounts[_cumulativeColCounts.Length - 1]; + + /// + /// Returns an array of input predicated for sources, corresponding to the input predicate. + /// The returned array size is equal to the number of sources, but if a given source is not needed at all, + /// the corresponding predicate will be null. + /// + public Func[] GetInputPredicates(Func predicate) + { + Contracts.AssertValue(predicate); + var result = new Func[_sources.Length]; + for (int i = 0; i < _sources.Length; i++) + { + var lastColCount = _cumulativeColCounts[i]; + result[i] = srcCol => predicate(srcCol + lastColCount); + } + + return result; + } + + /// + /// Checks whether the column index is in range. + /// + public void CheckColumnInRange(int col) + { + Contracts.CheckParam(0 <= col && col < _cumulativeColCounts[_cumulativeColCounts.Length - 1], nameof(col), "Column index out of range"); + } + + public void GetColumnSource(int col, out int srcIndex, out int srcCol) + { + CheckColumnInRange(col); + if (!_cumulativeColCounts.TryFindIndexSorted(0, _cumulativeColCounts.Length, col, out srcIndex)) + srcIndex--; + Contracts.Assert(0 <= srcIndex && srcIndex < _cumulativeColCounts.Length); + srcCol = col - _cumulativeColCounts[srcIndex]; + Contracts.Assert(0 <= srcCol && srcCol < _sources[srcIndex].ColumnCount); + } + + public bool TryGetColumnIndex(string name, out int col) + { + for (int i = _sources.Length; --i >= 0;) + { + if (_sources[i].TryGetColumnIndex(name, out col)) + { + col += _cumulativeColCounts[i]; + return true; + } + } + + col = -1; + return false; + } + + public string GetColumnName(int col) + { + GetColumnSource(col, out int dv, out int srcCol); + return _sources[dv].GetColumnName(srcCol); + } + + public ColumnType GetColumnType(int col) + { + GetColumnSource(col, out int dv, out int srcCol); + return _sources[dv].GetColumnType(srcCol); + } + + public IEnumerable> GetMetadataTypes(int col) + { + GetColumnSource(col, out int dv, out int srcCol); + return _sources[dv].GetMetadataTypes(srcCol); + } + + public ColumnType GetMetadataTypeOrNull(string kind, int col) + { + GetColumnSource(col, out int dv, out int srcCol); + return _sources[dv].GetMetadataTypeOrNull(kind, srcCol); + } + + public void GetMetadata(string kind, int col, ref TValue value) + { + GetColumnSource(col, out int dv, out int srcCol); + _sources[dv].GetMetadata(kind, srcCol, ref value); + } + } +} diff --git a/src/Microsoft.ML.Data/DataView/ZipDataView.cs b/src/Microsoft.ML.Data/DataView/ZipDataView.cs index d10e4f3223..9a7e79bab8 100644 --- a/src/Microsoft.ML.Data/DataView/ZipDataView.cs +++ b/src/Microsoft.ML.Data/DataView/ZipDataView.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. @@ -25,7 +25,7 @@ public sealed class ZipDataView : IDataView private readonly IHost _host; private readonly IDataView[] _sources; - private readonly ZipSchema _schema; + private readonly CompositeSchema _schema; public static IDataView Create(IHostEnvironment env, IEnumerable sources) { @@ -47,7 +47,7 @@ private ZipDataView(IHost host, IDataView[] sources) _host.Assert(Utils.Size(sources) > 1); _sources = sources; - _schema = new ZipSchema(_sources.Select(x => x.Schema).ToArray()); + _schema = new CompositeSchema(_sources.Select(x => x.Schema).ToArray()); } public bool CanShuffle { get { return false; } } @@ -104,127 +104,10 @@ public IRowCursor[] GetRowCursorSet(out IRowCursorConsolidator consolidator, Fun return new IRowCursor[] { GetRowCursor(predicate, rand) }; } - /// - /// This is a result of appending several schema together. - /// - internal sealed class ZipSchema : ISchema - { - private readonly ISchema[] _sources; - // Zero followed by cumulative column counts. - private readonly int[] _cumulativeColCounts; - - public ZipSchema(ISchema[] sources) - { - Contracts.AssertNonEmpty(sources); - _sources = sources; - _cumulativeColCounts = new int[_sources.Length + 1]; - _cumulativeColCounts[0] = 0; - - for (int i = 0; i < sources.Length; i++) - { - var schema = sources[i]; - _cumulativeColCounts[i + 1] = _cumulativeColCounts[i] + schema.ColumnCount; - } - } - - /// - /// Returns an array of input predicated for sources, corresponding to the input predicate. - /// The returned array size is equal to the number of sources, but if a given source is not needed at all, - /// the corresponding predicate will be null. - /// - public Func[] GetInputPredicates(Func predicate) - { - Contracts.AssertValue(predicate); - var result = new Func[_sources.Length]; - for (int i = 0; i < _sources.Length; i++) - { - var lastColCount = _cumulativeColCounts[i]; - result[i] = srcCol => predicate(srcCol + lastColCount); - } - - return result; - } - - /// - /// Checks whether the column index is in range. - /// - public void CheckColumnInRange(int col) - { - Contracts.CheckParam(0 <= col && col < _cumulativeColCounts[_cumulativeColCounts.Length - 1], nameof(col), "Column index out of range"); - } - - public void GetColumnSource(int col, out int srcIndex, out int srcCol) - { - CheckColumnInRange(col); - if (!_cumulativeColCounts.TryFindIndexSorted(0, _cumulativeColCounts.Length, col, out srcIndex)) - srcIndex--; - Contracts.Assert(0 <= srcIndex && srcIndex < _cumulativeColCounts.Length); - srcCol = col - _cumulativeColCounts[srcIndex]; - Contracts.Assert(0 <= srcCol && srcCol < _sources[srcIndex].ColumnCount); - } - - public int ColumnCount { get { return _cumulativeColCounts[_cumulativeColCounts.Length - 1]; } } - - public bool TryGetColumnIndex(string name, out int col) - { - for (int i = _sources.Length; --i >= 0; ) - { - if (_sources[i].TryGetColumnIndex(name, out col)) - { - col += _cumulativeColCounts[i]; - return true; - } - } - - col = -1; - return false; - } - - public string GetColumnName(int col) - { - int dv; - int srcCol; - GetColumnSource(col, out dv, out srcCol); - return _sources[dv].GetColumnName(srcCol); - } - - public ColumnType GetColumnType(int col) - { - int dv; - int srcCol; - GetColumnSource(col, out dv, out srcCol); - return _sources[dv].GetColumnType(srcCol); - } - - public IEnumerable> GetMetadataTypes(int col) - { - int dv; - int srcCol; - GetColumnSource(col, out dv, out srcCol); - return _sources[dv].GetMetadataTypes(srcCol); - } - - public ColumnType GetMetadataTypeOrNull(string kind, int col) - { - int dv; - int srcCol; - GetColumnSource(col, out dv, out srcCol); - return _sources[dv].GetMetadataTypeOrNull(kind, srcCol); - } - - public void GetMetadata(string kind, int col, ref TValue value) - { - int dv; - int srcCol; - GetColumnSource(col, out dv, out srcCol); - _sources[dv].GetMetadata(kind, srcCol, ref value); - } - } - private sealed class Cursor : RootCursorBase, IRowCursor { private readonly IRowCursor[] _cursors; - private readonly ZipSchema _schema; + private readonly CompositeSchema _schema; private readonly bool[] _isColumnActive; public override long Batch { get { return 0; } } diff --git a/src/Microsoft.ML.Data/Utilities/PartitionedPathUtils.cs b/src/Microsoft.ML.Data/Utilities/PartitionedPathUtils.cs new file mode 100644 index 0000000000..b13a0d5cee --- /dev/null +++ b/src/Microsoft.ML.Data/Utilities/PartitionedPathUtils.cs @@ -0,0 +1,73 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.IO; + +namespace Microsoft.ML.Runtime.Data.Utilities +{ + internal static class PartitionedPathUtils + { + /// + /// Make a full path realtive to a base path. + /// + /// The base path, assumed to be a directory. + /// The full path. + /// The relative path. + /// If the paths are not relative. + internal static string MakePathRelative(string basepath, string path) + { + Contracts.AssertNonEmpty(basepath); + Contracts.AssertNonEmpty(path); + + Uri baseUri = new Uri(basepath); + Uri uri = new Uri(path); + + if (baseUri.Scheme != uri.Scheme) + { + throw Contracts.ExceptParam(nameof(basepath), "Paths cannot be made relative as they are of different schemes."); + } + + string relativePath; + try + { + if (!baseUri.AbsoluteUri.EndsWith("/")) + { + baseUri = new Uri(baseUri.AbsoluteUri + "/"); + } + + relativePath = Uri.UnescapeDataString(baseUri.MakeRelativeUri(uri).ToString()); + } + catch (ArgumentNullException e) + { + throw Contracts.Except(e, "Paths could not be made relative."); + } + catch (InvalidOperationException e) + { + throw Contracts.Except(e, "Paths could not be made relative."); + } + + if (uri.Scheme.Equals("file", StringComparison.OrdinalIgnoreCase)) + { + relativePath = relativePath.Replace(Path.AltDirectorySeparatorChar, Path.DirectorySeparatorChar); + } + + return relativePath; + } + + /// + /// Split a path string into an enumerable list of the directories. + /// + /// The path string to split. + /// An enumerable list of all non-empty directories. + internal static IEnumerable SplitDirectories(string path) + { + char[] separators = { Path.DirectorySeparatorChar }; + + var cleanPath = path.Replace(Path.AltDirectorySeparatorChar, Path.DirectorySeparatorChar); + return cleanPath.Split(separators, StringSplitOptions.RemoveEmptyEntries); + } + } +} diff --git a/src/Microsoft.ML.Data/Utilities/StreamUtils.cs b/src/Microsoft.ML.Data/Utilities/StreamUtils.cs index 45a808c55e..ac05684a8e 100644 --- a/src/Microsoft.ML.Data/Utilities/StreamUtils.cs +++ b/src/Microsoft.ML.Data/Utilities/StreamUtils.cs @@ -4,6 +4,7 @@ using System.Collections.Generic; using System.IO; +using System.Linq; namespace Microsoft.ML.Runtime.Internal.Utilities { @@ -94,7 +95,7 @@ private static string[] Expand(string pattern) try { // this is actually incorrect, for 3-char extensions: *** - var files = Directory.GetFiles(dir, right); + var files = Directory.GetFiles(dir, right).OrderBy(f => f).ToArray(); if (pathEmpty) { for (int i = 0; i < files.Length; i++) @@ -104,7 +105,7 @@ private static string[] Expand(string pattern) } } matchList.AddRange(files); - var subs = Directory.GetDirectories(dir); + var subs = Directory.GetDirectories(dir).OrderBy(f => f).ToArray(); for (var i = subs.Length - 1; i >= 0; i--) dirsLeft.Push(subs[i]); } @@ -125,7 +126,7 @@ private static string[] Expand(string pattern) // watch for lack of access: try { - var files = Directory.GetFiles(path, Path.GetFileName(currentPattern)); + var files = Directory.GetFiles(path, Path.GetFileName(currentPattern)).OrderBy(f => f).ToArray(); if (pathEmpty) { for (int i = 0; i < files.Length; i++) diff --git a/src/Microsoft.ML/CSharpApi.cs b/src/Microsoft.ML/CSharpApi.cs index 058e8bafe3..5cf81ab1b8 100644 --- a/src/Microsoft.ML/CSharpApi.cs +++ b/src/Microsoft.ML/CSharpApi.cs @@ -1906,12 +1906,12 @@ public sealed partial class BinaryCrossValidator /// /// The training subgraph inputs /// - public Models.CrossValidationBinaryMacroSubGraphInput Inputs { get; set; } = new Models.CrossValidationBinaryMacroSubGraphInput(); + public Microsoft.ML.Models.CrossValidationBinaryMacroSubGraphInput Inputs { get; set; } = new Microsoft.ML.Models.CrossValidationBinaryMacroSubGraphInput(); /// /// The training subgraph outputs /// - public Models.CrossValidationBinaryMacroSubGraphOutput Outputs { get; set; } = new Models.CrossValidationBinaryMacroSubGraphOutput(); + public Microsoft.ML.Models.CrossValidationBinaryMacroSubGraphOutput Outputs { get; set; } = new Microsoft.ML.Models.CrossValidationBinaryMacroSubGraphOutput(); /// /// Column to use for stratification @@ -2248,12 +2248,12 @@ public sealed partial class CrossValidator /// /// The training subgraph inputs /// - public Models.CrossValidationMacroSubGraphInput Inputs { get; set; } = new Models.CrossValidationMacroSubGraphInput(); + public Microsoft.ML.Models.CrossValidationMacroSubGraphInput Inputs { get; set; } = new Microsoft.ML.Models.CrossValidationMacroSubGraphInput(); /// /// The training subgraph outputs /// - public Models.CrossValidationMacroSubGraphOutput Outputs { get; set; } = new Models.CrossValidationMacroSubGraphOutput(); + public Microsoft.ML.Models.CrossValidationMacroSubGraphOutput Outputs { get; set; } = new Microsoft.ML.Models.CrossValidationMacroSubGraphOutput(); /// /// Column to use for stratification @@ -2268,7 +2268,7 @@ public sealed partial class CrossValidator /// /// Specifies the trainer kind, which determines the evaluator to be used. /// - public Models.MacroUtilsTrainerKinds Kind { get; set; } = Models.MacroUtilsTrainerKinds.SignatureBinaryClassifierTrainer; + public Microsoft.ML.Models.MacroUtilsTrainerKinds Kind { get; set; } = Microsoft.ML.Models.MacroUtilsTrainerKinds.SignatureBinaryClassifierTrainer; public sealed class Output @@ -2662,7 +2662,7 @@ public sealed partial class OneVersusAll : Microsoft.ML.Runtime.EntryPoints.Comm /// /// The training subgraph output. /// - public Models.OneVersusAllMacroSubGraphOutput OutputForSubGraph { get; set; } = new Models.OneVersusAllMacroSubGraphOutput(); + public Microsoft.ML.Models.OneVersusAllMacroSubGraphOutput OutputForSubGraph { get; set; } = new Microsoft.ML.Models.OneVersusAllMacroSubGraphOutput(); /// /// Use probabilities in OVA combiner @@ -2692,12 +2692,12 @@ public sealed partial class OneVersusAll : Microsoft.ML.Runtime.EntryPoints.Comm /// /// Normalize option for the feature column /// - public Models.NormalizeOption NormalizeFeatures { get; set; } = Models.NormalizeOption.Auto; + public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto; /// /// Whether learner should cache input training data /// - public Models.CachingOptions Caching { get; set; } = Models.CachingOptions.Auto; + public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto; public sealed class Output @@ -2780,12 +2780,12 @@ public sealed partial class OvaModelCombiner : Microsoft.ML.Runtime.EntryPoints. /// /// Normalize option for the feature column /// - public Models.NormalizeOption NormalizeFeatures { get; set; } = Models.NormalizeOption.Auto; + public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto; /// /// Whether learner should cache input training data /// - public Models.CachingOptions Caching { get; set; } = Models.CachingOptions.Auto; + public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto; public sealed class Output @@ -3339,12 +3339,12 @@ public sealed partial class TrainTestBinaryEvaluator /// /// The training subgraph inputs /// - public Models.TrainTestBinaryMacroSubGraphInput Inputs { get; set; } = new Models.TrainTestBinaryMacroSubGraphInput(); + public Microsoft.ML.Models.TrainTestBinaryMacroSubGraphInput Inputs { get; set; } = new Microsoft.ML.Models.TrainTestBinaryMacroSubGraphInput(); /// /// The training subgraph outputs /// - public Models.TrainTestBinaryMacroSubGraphOutput Outputs { get; set; } = new Models.TrainTestBinaryMacroSubGraphOutput(); + public Microsoft.ML.Models.TrainTestBinaryMacroSubGraphOutput Outputs { get; set; } = new Microsoft.ML.Models.TrainTestBinaryMacroSubGraphOutput(); public sealed class Output @@ -3434,17 +3434,17 @@ public sealed partial class TrainTestEvaluator /// /// The training subgraph inputs /// - public Models.TrainTestMacroSubGraphInput Inputs { get; set; } = new Models.TrainTestMacroSubGraphInput(); + public Microsoft.ML.Models.TrainTestMacroSubGraphInput Inputs { get; set; } = new Microsoft.ML.Models.TrainTestMacroSubGraphInput(); /// /// The training subgraph outputs /// - public Models.TrainTestMacroSubGraphOutput Outputs { get; set; } = new Models.TrainTestMacroSubGraphOutput(); + public Microsoft.ML.Models.TrainTestMacroSubGraphOutput Outputs { get; set; } = new Microsoft.ML.Models.TrainTestMacroSubGraphOutput(); /// /// Specifies the trainer kind, which determines the evaluator to be used. /// - public Models.MacroUtilsTrainerKinds Kind { get; set; } = Models.MacroUtilsTrainerKinds.SignatureBinaryClassifierTrainer; + public Microsoft.ML.Models.MacroUtilsTrainerKinds Kind { get; set; } = Microsoft.ML.Models.MacroUtilsTrainerKinds.SignatureBinaryClassifierTrainer; /// /// Identifies which pipeline was run for this train test. @@ -3634,12 +3634,12 @@ public sealed partial class AveragedPerceptronBinaryClassifier : Microsoft.ML.Ru /// /// Normalize option for the feature column /// - public Models.NormalizeOption NormalizeFeatures { get; set; } = Models.NormalizeOption.Auto; + public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto; /// /// Whether learner should cache input training data /// - public Models.CachingOptions Caching { get; set; } = Models.CachingOptions.Auto; + public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto; public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput @@ -3791,7 +3791,7 @@ public sealed partial class FastForestBinaryClassifier : Microsoft.ML.Runtime.En /// /// Bundle low population bins. Bundle.None(0): no bundling, Bundle.AggregateLowPopulation(1): Bundle low population, Bundle.Adjacent(2): Neighbor low population bundle. /// - public Trainers.Bundle Bundling { get; set; } = Trainers.Bundle.None; + public Microsoft.ML.Trainers.Bundle Bundling { get; set; } = Microsoft.ML.Trainers.Bundle.None; /// /// Maximum number of distinct values (bins) per feature @@ -3934,12 +3934,12 @@ public sealed partial class FastForestBinaryClassifier : Microsoft.ML.Runtime.En /// /// Normalize option for the feature column /// - public Models.NormalizeOption NormalizeFeatures { get; set; } = Models.NormalizeOption.Auto; + public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto; /// /// Whether learner should cache input training data /// - public Models.CachingOptions Caching { get; set; } = Models.CachingOptions.Auto; + public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto; public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput @@ -4073,7 +4073,7 @@ public sealed partial class FastForestRegressor : Microsoft.ML.Runtime.EntryPoin /// /// Bundle low population bins. Bundle.None(0): no bundling, Bundle.AggregateLowPopulation(1): Bundle low population, Bundle.Adjacent(2): Neighbor low population bundle. /// - public Trainers.Bundle Bundling { get; set; } = Trainers.Bundle.None; + public Microsoft.ML.Trainers.Bundle Bundling { get; set; } = Microsoft.ML.Trainers.Bundle.None; /// /// Maximum number of distinct values (bins) per feature @@ -4216,12 +4216,12 @@ public sealed partial class FastForestRegressor : Microsoft.ML.Runtime.EntryPoin /// /// Normalize option for the feature column /// - public Models.NormalizeOption NormalizeFeatures { get; set; } = Models.NormalizeOption.Auto; + public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto; /// /// Whether learner should cache input training data /// - public Models.CachingOptions Caching { get; set; } = Models.CachingOptions.Auto; + public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto; public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IRegressionOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput @@ -4306,7 +4306,7 @@ public sealed partial class FastTreeBinaryClassifier : Microsoft.ML.Runtime.Entr /// /// Optimization algorithm to be used (GradientDescent, AcceleratedGradientDescent) /// - public Trainers.BoostedTreeArgsOptimizationAlgorithmType OptimizationAlgorithm { get; set; } = Trainers.BoostedTreeArgsOptimizationAlgorithmType.GradientDescent; + public Microsoft.ML.Trainers.BoostedTreeArgsOptimizationAlgorithmType OptimizationAlgorithm { get; set; } = Microsoft.ML.Trainers.BoostedTreeArgsOptimizationAlgorithmType.GradientDescent; /// /// Early stopping rule. (Validation set (/valid) is required.) @@ -4471,7 +4471,7 @@ public sealed partial class FastTreeBinaryClassifier : Microsoft.ML.Runtime.Entr /// /// Bundle low population bins. Bundle.None(0): no bundling, Bundle.AggregateLowPopulation(1): Bundle low population, Bundle.Adjacent(2): Neighbor low population bundle. /// - public Trainers.Bundle Bundling { get; set; } = Trainers.Bundle.None; + public Microsoft.ML.Trainers.Bundle Bundling { get; set; } = Microsoft.ML.Trainers.Bundle.None; /// /// Maximum number of distinct values (bins) per feature @@ -4614,12 +4614,12 @@ public sealed partial class FastTreeBinaryClassifier : Microsoft.ML.Runtime.Entr /// /// Normalize option for the feature column /// - public Models.NormalizeOption NormalizeFeatures { get; set; } = Models.NormalizeOption.Auto; + public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto; /// /// Whether learner should cache input training data /// - public Models.CachingOptions Caching { get; set; } = Models.CachingOptions.Auto; + public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto; public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput @@ -4732,7 +4732,7 @@ public sealed partial class FastTreeRanker : Microsoft.ML.Runtime.EntryPoints.Co /// /// Optimization algorithm to be used (GradientDescent, AcceleratedGradientDescent) /// - public Trainers.BoostedTreeArgsOptimizationAlgorithmType OptimizationAlgorithm { get; set; } = Trainers.BoostedTreeArgsOptimizationAlgorithmType.GradientDescent; + public Microsoft.ML.Trainers.BoostedTreeArgsOptimizationAlgorithmType OptimizationAlgorithm { get; set; } = Microsoft.ML.Trainers.BoostedTreeArgsOptimizationAlgorithmType.GradientDescent; /// /// Early stopping rule. (Validation set (/valid) is required.) @@ -4897,7 +4897,7 @@ public sealed partial class FastTreeRanker : Microsoft.ML.Runtime.EntryPoints.Co /// /// Bundle low population bins. Bundle.None(0): no bundling, Bundle.AggregateLowPopulation(1): Bundle low population, Bundle.Adjacent(2): Neighbor low population bundle. /// - public Trainers.Bundle Bundling { get; set; } = Trainers.Bundle.None; + public Microsoft.ML.Trainers.Bundle Bundling { get; set; } = Microsoft.ML.Trainers.Bundle.None; /// /// Maximum number of distinct values (bins) per feature @@ -5040,12 +5040,12 @@ public sealed partial class FastTreeRanker : Microsoft.ML.Runtime.EntryPoints.Co /// /// Normalize option for the feature column /// - public Models.NormalizeOption NormalizeFeatures { get; set; } = Models.NormalizeOption.Auto; + public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto; /// /// Whether learner should cache input training data /// - public Models.CachingOptions Caching { get; set; } = Models.CachingOptions.Auto; + public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto; public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IRankingOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput @@ -5118,7 +5118,7 @@ public sealed partial class FastTreeRegressor : Microsoft.ML.Runtime.EntryPoints /// /// Optimization algorithm to be used (GradientDescent, AcceleratedGradientDescent) /// - public Trainers.BoostedTreeArgsOptimizationAlgorithmType OptimizationAlgorithm { get; set; } = Trainers.BoostedTreeArgsOptimizationAlgorithmType.GradientDescent; + public Microsoft.ML.Trainers.BoostedTreeArgsOptimizationAlgorithmType OptimizationAlgorithm { get; set; } = Microsoft.ML.Trainers.BoostedTreeArgsOptimizationAlgorithmType.GradientDescent; /// /// Early stopping rule. (Validation set (/valid) is required.) @@ -5283,7 +5283,7 @@ public sealed partial class FastTreeRegressor : Microsoft.ML.Runtime.EntryPoints /// /// Bundle low population bins. Bundle.None(0): no bundling, Bundle.AggregateLowPopulation(1): Bundle low population, Bundle.Adjacent(2): Neighbor low population bundle. /// - public Trainers.Bundle Bundling { get; set; } = Trainers.Bundle.None; + public Microsoft.ML.Trainers.Bundle Bundling { get; set; } = Microsoft.ML.Trainers.Bundle.None; /// /// Maximum number of distinct values (bins) per feature @@ -5426,12 +5426,12 @@ public sealed partial class FastTreeRegressor : Microsoft.ML.Runtime.EntryPoints /// /// Normalize option for the feature column /// - public Models.NormalizeOption NormalizeFeatures { get; set; } = Models.NormalizeOption.Auto; + public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto; /// /// Whether learner should cache input training data /// - public Models.CachingOptions Caching { get; set; } = Models.CachingOptions.Auto; + public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto; public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IRegressionOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput @@ -5509,7 +5509,7 @@ public sealed partial class FastTreeTweedieRegressor : Microsoft.ML.Runtime.Entr /// /// Optimization algorithm to be used (GradientDescent, AcceleratedGradientDescent) /// - public Trainers.BoostedTreeArgsOptimizationAlgorithmType OptimizationAlgorithm { get; set; } = Trainers.BoostedTreeArgsOptimizationAlgorithmType.GradientDescent; + public Microsoft.ML.Trainers.BoostedTreeArgsOptimizationAlgorithmType OptimizationAlgorithm { get; set; } = Microsoft.ML.Trainers.BoostedTreeArgsOptimizationAlgorithmType.GradientDescent; /// /// Early stopping rule. (Validation set (/valid) is required.) @@ -5674,7 +5674,7 @@ public sealed partial class FastTreeTweedieRegressor : Microsoft.ML.Runtime.Entr /// /// Bundle low population bins. Bundle.None(0): no bundling, Bundle.AggregateLowPopulation(1): Bundle low population, Bundle.Adjacent(2): Neighbor low population bundle. /// - public Trainers.Bundle Bundling { get; set; } = Trainers.Bundle.None; + public Microsoft.ML.Trainers.Bundle Bundling { get; set; } = Microsoft.ML.Trainers.Bundle.None; /// /// Maximum number of distinct values (bins) per feature @@ -5817,12 +5817,12 @@ public sealed partial class FastTreeTweedieRegressor : Microsoft.ML.Runtime.Entr /// /// Normalize option for the feature column /// - public Models.NormalizeOption NormalizeFeatures { get; set; } = Models.NormalizeOption.Auto; + public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto; /// /// Whether learner should cache input training data /// - public Models.CachingOptions Caching { get; set; } = Models.CachingOptions.Auto; + public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto; public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IRegressionOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput @@ -5974,12 +5974,12 @@ public sealed partial class GeneralizedAdditiveModelBinaryClassifier : Microsoft /// /// Normalize option for the feature column /// - public Models.NormalizeOption NormalizeFeatures { get; set; } = Models.NormalizeOption.Auto; + public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto; /// /// Whether learner should cache input training data /// - public Models.CachingOptions Caching { get; set; } = Models.CachingOptions.Auto; + public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto; public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput @@ -6115,12 +6115,12 @@ public sealed partial class GeneralizedAdditiveModelRegressor : Microsoft.ML.Run /// /// Normalize option for the feature column /// - public Models.NormalizeOption NormalizeFeatures { get; set; } = Models.NormalizeOption.Auto; + public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto; /// /// Whether learner should cache input training data /// - public Models.CachingOptions Caching { get; set; } = Models.CachingOptions.Auto; + public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto; public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IRegressionOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput @@ -6356,12 +6356,12 @@ public sealed partial class LinearSvmBinaryClassifier : Microsoft.ML.Runtime.Ent /// /// Normalize option for the feature column /// - public Models.NormalizeOption NormalizeFeatures { get; set; } = Models.NormalizeOption.Auto; + public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto; /// /// Whether learner should cache input training data /// - public Models.CachingOptions Caching { get; set; } = Models.CachingOptions.Auto; + public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto; public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput @@ -6656,12 +6656,12 @@ public sealed partial class LogisticRegressionClassifier : Microsoft.ML.Runtime. /// /// Normalize option for the feature column /// - public Models.NormalizeOption NormalizeFeatures { get; set; } = Models.NormalizeOption.Auto; + public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto; /// /// Whether learner should cache input training data /// - public Models.CachingOptions Caching { get; set; } = Models.CachingOptions.Auto; + public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto; public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IMulticlassClassificationOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput @@ -6729,12 +6729,12 @@ public sealed partial class NaiveBayesClassifier : Microsoft.ML.Runtime.EntryPoi /// /// Normalize option for the feature column /// - public Models.NormalizeOption NormalizeFeatures { get; set; } = Models.NormalizeOption.Auto; + public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto; /// /// Whether learner should cache input training data /// - public Models.CachingOptions Caching { get; set; } = Models.CachingOptions.Auto; + public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto; public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IMulticlassClassificationOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput @@ -6884,12 +6884,12 @@ public sealed partial class OnlineGradientDescentRegressor : Microsoft.ML.Runtim /// /// Normalize option for the feature column /// - public Models.NormalizeOption NormalizeFeatures { get; set; } = Models.NormalizeOption.Auto; + public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto; /// /// Whether learner should cache input training data /// - public Models.CachingOptions Caching { get; set; } = Models.CachingOptions.Auto; + public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto; public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IRegressionOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput @@ -6973,12 +6973,12 @@ public sealed partial class OrdinaryLeastSquaresRegressor : Microsoft.ML.Runtime /// /// Normalize option for the feature column /// - public Models.NormalizeOption NormalizeFeatures { get; set; } = Models.NormalizeOption.Auto; + public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto; /// /// Whether learner should cache input training data /// - public Models.CachingOptions Caching { get; set; } = Models.CachingOptions.Auto; + public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto; public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IRegressionOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput @@ -7214,12 +7214,12 @@ public sealed partial class PoissonRegressor : Microsoft.ML.Runtime.EntryPoints. /// /// Normalize option for the feature column /// - public Models.NormalizeOption NormalizeFeatures { get; set; } = Models.NormalizeOption.Auto; + public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto; /// /// Whether learner should cache input training data /// - public Models.CachingOptions Caching { get; set; } = Models.CachingOptions.Auto; + public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto; public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IRegressionOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput @@ -7355,12 +7355,12 @@ public sealed partial class StochasticDualCoordinateAscentBinaryClassifier : Mic /// /// Normalize option for the feature column /// - public Models.NormalizeOption NormalizeFeatures { get; set; } = Models.NormalizeOption.Auto; + public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto; /// /// Whether learner should cache input training data /// - public Models.CachingOptions Caching { get; set; } = Models.CachingOptions.Auto; + public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto; public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput @@ -7480,12 +7480,12 @@ public sealed partial class StochasticDualCoordinateAscentClassifier : Microsoft /// /// Normalize option for the feature column /// - public Models.NormalizeOption NormalizeFeatures { get; set; } = Models.NormalizeOption.Auto; + public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto; /// /// Whether learner should cache input training data /// - public Models.CachingOptions Caching { get; set; } = Models.CachingOptions.Auto; + public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto; public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IMulticlassClassificationOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput @@ -7605,12 +7605,12 @@ public sealed partial class StochasticDualCoordinateAscentRegressor : Microsoft. /// /// Normalize option for the feature column /// - public Models.NormalizeOption NormalizeFeatures { get; set; } = Models.NormalizeOption.Auto; + public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto; /// /// Whether learner should cache input training data /// - public Models.CachingOptions Caching { get; set; } = Models.CachingOptions.Auto; + public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto; public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IRegressionOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput @@ -7744,12 +7744,12 @@ public sealed partial class StochasticGradientDescentBinaryClassifier : Microsof /// /// Normalize option for the feature column /// - public Models.NormalizeOption NormalizeFeatures { get; set; } = Models.NormalizeOption.Auto; + public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto; /// /// Whether learner should cache input training data /// - public Models.CachingOptions Caching { get; set; } = Models.CachingOptions.Auto; + public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto; public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput @@ -8000,15 +8000,15 @@ public BinNormalizer(params ValueTuple[] inputOutputColumns) public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -8016,7 +8016,7 @@ public void AddColumn(string name, string source) /// /// New column definition(s) (optional form: name:src) /// - public Transforms.NormalizeTransformBinColumn[] Column { get; set; } + public Microsoft.ML.Transforms.NormalizeTransformBinColumn[] Column { get; set; } /// /// Max number of bins, power of 2 recommended @@ -8119,7 +8119,7 @@ public sealed partial class CategoricalHashTransformColumn : OneToOneColumn /// Output kind: Bag (multi-set vector), Ind (indicator vector), or Key (index) /// - public Transforms.CategoricalTransformOutputKind? OutputKind { get; set; } + public Microsoft.ML.Transforms.CategoricalTransformOutputKind? OutputKind { get; set; } /// /// Name of the new column @@ -8167,15 +8167,15 @@ public CategoricalHashOneHotVectorizer(params ValueTuple[] input public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -8183,7 +8183,7 @@ public void AddColumn(string name, string source) /// /// New column definition(s) (optional form: name:hashBits:src) /// - public Transforms.CategoricalHashTransformColumn[] Column { get; set; } + public Microsoft.ML.Transforms.CategoricalHashTransformColumn[] Column { get; set; } /// /// Number of bits to hash into. Must be between 1 and 30, inclusive. @@ -8208,7 +8208,7 @@ public void AddColumn(string name, string source) /// /// Output kind: Bag (multi-set vector), Ind (indicator vector), or Key (index) /// - public Transforms.CategoricalTransformOutputKind OutputKind { get; set; } = Transforms.CategoricalTransformOutputKind.Bag; + public Microsoft.ML.Transforms.CategoricalTransformOutputKind OutputKind { get; set; } = Microsoft.ML.Transforms.CategoricalTransformOutputKind.Bag; /// /// Input dataset @@ -8274,7 +8274,7 @@ public sealed partial class CategoricalTransformColumn : OneToOneColumn /// Output kind: Bag (multi-set vector), Ind (indicator vector), Key (index), or Binary encoded indicator vector /// - public Transforms.CategoricalTransformOutputKind? OutputKind { get; set; } + public Microsoft.ML.Transforms.CategoricalTransformOutputKind? OutputKind { get; set; } /// /// Maximum number of terms to keep when auto-training @@ -8289,7 +8289,7 @@ public sealed partial class CategoricalTransformColumn : OneToOneColumn /// How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, e.g., text sorting will be case sensitive (e.g., 'A' then 'Z' then 'a'). /// - public Transforms.TermTransformSortOrder? Sort { get; set; } + public Microsoft.ML.Transforms.TermTransformSortOrder? Sort { get; set; } /// /// Whether key value metadata should be text, regardless of the actual input type @@ -8342,15 +8342,15 @@ public CategoricalOneHotVectorizer(params ValueTuple[] inputOutp public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -8358,12 +8358,12 @@ public void AddColumn(string name, string source) /// /// New column definition(s) (optional form: name:src) /// - public Transforms.CategoricalTransformColumn[] Column { get; set; } + public Microsoft.ML.Transforms.CategoricalTransformColumn[] Column { get; set; } /// /// Output kind: Bag (multi-set vector), Ind (indicator vector), or Key (index) /// - public Transforms.CategoricalTransformOutputKind OutputKind { get; set; } = Transforms.CategoricalTransformOutputKind.Ind; + public Microsoft.ML.Transforms.CategoricalTransformOutputKind OutputKind { get; set; } = Microsoft.ML.Transforms.CategoricalTransformOutputKind.Ind; /// /// Maximum number of terms to keep per column when auto-training @@ -8378,7 +8378,7 @@ public void AddColumn(string name, string source) /// /// How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, e.g., text sorting will be case sensitive (e.g., 'A' then 'Z' then 'a'). /// - public Transforms.TermTransformSortOrder Sort { get; set; } = Transforms.TermTransformSortOrder.Occurrence; + public Microsoft.ML.Transforms.TermTransformSortOrder Sort { get; set; } = Microsoft.ML.Transforms.TermTransformSortOrder.Occurrence; /// /// Whether key value metadata should be text, regardless of the actual input type @@ -8486,15 +8486,15 @@ public CharacterTokenizer(params ValueTuple[] inputOutputColumns public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -8502,7 +8502,7 @@ public void AddColumn(string name, string source) /// /// New column definition(s) (optional form: name:src) /// - public Transforms.CharTokenizeTransformColumn[] Column { get; set; } + public Microsoft.ML.Transforms.CharTokenizeTransformColumn[] Column { get; set; } /// /// Whether to mark the beginning/end of each row/slot with start of text character (0x02)/end of text character (0x03) @@ -8593,8 +8593,8 @@ public ColumnConcatenator(string outputColumn, params string[] inputColumns) public void AddColumn(string name, params string[] source) { - var list = Column == null ? new List() : new List(Column); - list.Add(ManyToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(ManyToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -8602,7 +8602,7 @@ public void AddColumn(string name, params string[] source) /// /// New column definition(s) (optional form: name:srcs) /// - public Transforms.ConcatTransformColumn[] Column { get; set; } + public Microsoft.ML.Transforms.ConcatTransformColumn[] Column { get; set; } /// /// Input dataset @@ -8705,15 +8705,15 @@ public ColumnCopier(params ValueTuple[] inputOutputColumns) public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -8721,7 +8721,7 @@ public void AddColumn(string name, string source) /// /// New column definition(s) (optional form: name:src) /// - public Transforms.CopyColumnsTransformColumn[] Column { get; set; } + public Microsoft.ML.Transforms.CopyColumnsTransformColumn[] Column { get; set; } /// /// Input dataset @@ -8939,7 +8939,7 @@ public sealed partial class ConvertTransformColumn : OneToOneColumn /// The result type /// - public Transforms.DataKind? ResultType { get; set; } + public Microsoft.ML.Transforms.DataKind? ResultType { get; set; } /// /// For a key column, this defines the range of values @@ -8992,15 +8992,15 @@ public ColumnTypeConverter(params ValueTuple[] inputOutputColumn public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -9008,12 +9008,12 @@ public void AddColumn(string name, string source) /// /// New column definition(s) (optional form: name:type:src) /// - public Transforms.ConvertTransformColumn[] Column { get; set; } + public Microsoft.ML.Transforms.ConvertTransformColumn[] Column { get; set; } /// /// The result type /// - public Transforms.DataKind? ResultType { get; set; } + public Microsoft.ML.Transforms.DataKind? ResultType { get; set; } /// /// For a key column, this defines the range of values @@ -9201,15 +9201,15 @@ public ConditionalNormalizer(params ValueTuple[] inputOutputColu public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -9217,7 +9217,7 @@ public void AddColumn(string name, string source) /// /// New column definition(s) (optional form: name:src) /// - public Transforms.NormalizeTransformAffineColumn[] Column { get; set; } + public Microsoft.ML.Transforms.NormalizeTransformAffineColumn[] Column { get; set; } /// /// Whether to map zero to zero, preserving sparsity @@ -9298,7 +9298,7 @@ public sealed partial class DataCache : Microsoft.ML.Runtime.EntryPoints.CommonI /// /// Caching strategy /// - public Transforms.CacheCachingType Caching { get; set; } = Transforms.CacheCachingType.Memory; + public Microsoft.ML.Transforms.CacheCachingType Caching { get; set; } = Microsoft.ML.Transforms.CacheCachingType.Memory; /// /// Input dataset @@ -9441,7 +9441,7 @@ public sealed partial class TermTransformColumn : OneToOneColumn /// How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, e.g., text sorting will be case sensitive (e.g., 'A' then 'Z' then 'a'). /// - public Transforms.TermTransformSortOrder? Sort { get; set; } + public Microsoft.ML.Transforms.TermTransformSortOrder? Sort { get; set; } /// /// Whether key value metadata should be text, regardless of the actual input type @@ -9494,15 +9494,15 @@ public Dictionarizer(params ValueTuple[] inputOutputColumns) public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -9510,7 +9510,7 @@ public void AddColumn(string name, string source) /// /// New column definition(s) (optional form: name:src) /// - public Transforms.TermTransformColumn[] Column { get; set; } + public Microsoft.ML.Transforms.TermTransformColumn[] Column { get; set; } /// /// Maximum number of terms to keep per column when auto-training @@ -9525,7 +9525,7 @@ public void AddColumn(string name, string source) /// /// How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, e.g., text sorting will be case sensitive (e.g., 'A' then 'Z' then 'a'). /// - public Transforms.TermTransformSortOrder Sort { get; set; } = Transforms.TermTransformSortOrder.Occurrence; + public Microsoft.ML.Transforms.TermTransformSortOrder Sort { get; set; } = Microsoft.ML.Transforms.TermTransformSortOrder.Occurrence; /// /// Whether key value metadata should be text, regardless of the actual input type @@ -9863,15 +9863,15 @@ public GlobalContrastNormalizer(params ValueTuple[] inputOutputC public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -9879,7 +9879,7 @@ public void AddColumn(string name, string source) /// /// New column definition(s) (optional form: name:src) /// - public Transforms.LpNormNormalizerTransformGcnColumn[] Column { get; set; } + public Microsoft.ML.Transforms.LpNormNormalizerTransformGcnColumn[] Column { get; set; } /// /// Subtract mean from each value before normalizing @@ -10022,15 +10022,15 @@ public HashConverter(params ValueTuple[] inputOutputColumns) public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -10038,7 +10038,7 @@ public void AddColumn(string name, string source) /// /// New column definition(s) (optional form: name:src) /// - public Transforms.HashJoinTransformColumn[] Column { get; set; } + public Microsoft.ML.Transforms.HashJoinTransformColumn[] Column { get; set; } /// /// Whether the values need to be combined for a single hash @@ -10161,15 +10161,15 @@ public KeyToTextConverter(params ValueTuple[] inputOutputColumns public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -10177,7 +10177,7 @@ public void AddColumn(string name, string source) /// /// New column definition(s) (optional form: name:src) /// - public Transforms.KeyToValueTransformColumn[] Column { get; set; } + public Microsoft.ML.Transforms.KeyToValueTransformColumn[] Column { get; set; } /// /// Input dataset @@ -10355,15 +10355,15 @@ public LabelIndicator(params ValueTuple[] inputOutputColumns) public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -10371,7 +10371,7 @@ public void AddColumn(string name, string source) /// /// New column definition(s) (optional form: name:src) /// - public Transforms.LabelIndicatorTransformColumn[] Column { get; set; } + public Microsoft.ML.Transforms.LabelIndicatorTransformColumn[] Column { get; set; } /// /// Label of the positive class. @@ -10549,15 +10549,15 @@ public LogMeanVarianceNormalizer(params ValueTuple[] inputOutput public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -10570,7 +10570,7 @@ public void AddColumn(string name, string source) /// /// New column definition(s) (optional form: name:src) /// - public Transforms.NormalizeTransformLogNormalColumn[] Column { get; set; } + public Microsoft.ML.Transforms.NormalizeTransformLogNormalColumn[] Column { get; set; } /// /// Max number of examples used to train the normalizer @@ -10643,7 +10643,7 @@ public sealed partial class LpNormNormalizerTransformColumn : OneToOneColumn /// The norm to use to normalize each sample /// - public Transforms.LpNormNormalizerTransformNormalizerKind? NormKind { get; set; } + public Microsoft.ML.Transforms.LpNormNormalizerTransformNormalizerKind? NormKind { get; set; } /// /// Subtract mean from each value before normalizing @@ -10696,15 +10696,15 @@ public LpNormalizer(params ValueTuple[] inputOutputColumns) public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -10712,12 +10712,12 @@ public void AddColumn(string name, string source) /// /// New column definition(s) (optional form: name:src) /// - public Transforms.LpNormNormalizerTransformColumn[] Column { get; set; } + public Microsoft.ML.Transforms.LpNormNormalizerTransformColumn[] Column { get; set; } /// /// The norm to use to normalize each sample /// - public Transforms.LpNormNormalizerTransformNormalizerKind NormKind { get; set; } = Transforms.LpNormNormalizerTransformNormalizerKind.L2Norm; + public Microsoft.ML.Transforms.LpNormNormalizerTransformNormalizerKind NormKind { get; set; } = Microsoft.ML.Transforms.LpNormNormalizerTransformNormalizerKind.L2Norm; /// /// Subtract mean from each value before normalizing @@ -10843,15 +10843,15 @@ public MeanVarianceNormalizer(params ValueTuple[] inputOutputCol public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -10864,7 +10864,7 @@ public void AddColumn(string name, string source) /// /// New column definition(s) (optional form: name:src) /// - public Transforms.NormalizeTransformAffineColumn[] Column { get; set; } + public Microsoft.ML.Transforms.NormalizeTransformAffineColumn[] Column { get; set; } /// /// Whether to map zero to zero, preserving sparsity @@ -10963,15 +10963,15 @@ public MinMaxNormalizer(params ValueTuple[] inputOutputColumns) public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -10979,7 +10979,7 @@ public void AddColumn(string name, string source) /// /// New column definition(s) (optional form: name:src) /// - public Transforms.NormalizeTransformAffineColumn[] Column { get; set; } + public Microsoft.ML.Transforms.NormalizeTransformAffineColumn[] Column { get; set; } /// /// Whether to map zero to zero, preserving sparsity @@ -11061,7 +11061,7 @@ public sealed partial class NAHandleTransformColumn : OneToOneColumn /// The replacement method to utilize /// - public Transforms.NAHandleTransformReplacementKind? Kind { get; set; } + public Microsoft.ML.Transforms.NAHandleTransformReplacementKind? Kind { get; set; } /// /// Whether to impute values by slot @@ -11119,15 +11119,15 @@ public MissingValueHandler(params ValueTuple[] inputOutputColumn public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -11135,12 +11135,12 @@ public void AddColumn(string name, string source) /// /// New column definition(s) (optional form: name:rep:src) /// - public Transforms.NAHandleTransformColumn[] Column { get; set; } + public Microsoft.ML.Transforms.NAHandleTransformColumn[] Column { get; set; } /// /// The replacement method to utilize /// - public Transforms.NAHandleTransformReplacementKind ReplaceWith { get; set; } = Transforms.NAHandleTransformReplacementKind.Def; + public Microsoft.ML.Transforms.NAHandleTransformReplacementKind ReplaceWith { get; set; } = Microsoft.ML.Transforms.NAHandleTransformReplacementKind.Def; /// /// Whether to impute values by slot @@ -11253,15 +11253,15 @@ public MissingValueIndicator(params ValueTuple[] inputOutputColu public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -11269,7 +11269,7 @@ public void AddColumn(string name, string source) /// /// New column definition(s) (optional form: name:src) /// - public Transforms.NAIndicatorTransformColumn[] Column { get; set; } + public Microsoft.ML.Transforms.NAIndicatorTransformColumn[] Column { get; set; } /// /// Input dataset @@ -11372,15 +11372,15 @@ public MissingValuesDropper(params ValueTuple[] inputOutputColum public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -11388,7 +11388,7 @@ public void AddColumn(string name, string source) /// /// Columns to drop the NAs for /// - public Transforms.NADropTransformColumn[] Column { get; set; } + public Microsoft.ML.Transforms.NADropTransformColumn[] Column { get; set; } /// /// Input dataset @@ -11538,7 +11538,7 @@ public sealed partial class NAReplaceTransformColumn : OneToOneColumn /// The replacement method to utilize /// - public Transforms.NAReplaceTransformReplacementKind? Kind { get; set; } + public Microsoft.ML.Transforms.NAReplaceTransformReplacementKind? Kind { get; set; } /// /// Whether to impute values by slot @@ -11591,15 +11591,15 @@ public MissingValueSubstitutor(params ValueTuple[] inputOutputCo public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -11607,12 +11607,12 @@ public void AddColumn(string name, string source) /// /// New column definition(s) (optional form: name:rep:src) /// - public Transforms.NAReplaceTransformColumn[] Column { get; set; } + public Microsoft.ML.Transforms.NAReplaceTransformColumn[] Column { get; set; } /// /// The replacement method to utilize /// - public Transforms.NAReplaceTransformReplacementKind ReplacementKind { get; set; } = Transforms.NAReplaceTransformReplacementKind.Def; + public Microsoft.ML.Transforms.NAReplaceTransformReplacementKind ReplacementKind { get; set; } = Microsoft.ML.Transforms.NAReplaceTransformReplacementKind.Def; /// /// Whether to impute values by slot @@ -11731,7 +11731,7 @@ public sealed partial class NgramTransformColumn : OneToOneColumn /// Statistical measure used to evaluate how important a word is to a document in a corpus /// - public Transforms.NgramTransformWeightingCriteria? Weighting { get; set; } + public Microsoft.ML.Transforms.NgramTransformWeightingCriteria? Weighting { get; set; } /// /// Name of the new column @@ -11779,15 +11779,15 @@ public NGramTranslator(params ValueTuple[] inputOutputColumns) public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -11795,7 +11795,7 @@ public void AddColumn(string name, string source) /// /// New column definition(s) (optional form: name:src) /// - public Transforms.NgramTransformColumn[] Column { get; set; } + public Microsoft.ML.Transforms.NgramTransformColumn[] Column { get; set; } /// /// Maximum ngram length @@ -11820,7 +11820,7 @@ public void AddColumn(string name, string source) /// /// The weighting criteria /// - public Transforms.NgramTransformWeightingCriteria Weighting { get; set; } = Transforms.NgramTransformWeightingCriteria.Tf; + public Microsoft.ML.Transforms.NgramTransformWeightingCriteria Weighting { get; set; } = Microsoft.ML.Transforms.NgramTransformWeightingCriteria.Tf; /// /// Input dataset @@ -12263,7 +12263,7 @@ public sealed partial class RandomNumberGenerator : Microsoft.ML.Runtime.EntryPo /// /// New column definition(s) (optional form: name:seed) /// - public Transforms.GenerateNumberTransformColumn[] Column { get; set; } + public Microsoft.ML.Transforms.GenerateNumberTransformColumn[] Column { get; set; } /// /// Use an auto-incremented integer starting at zero instead of a random number @@ -12737,7 +12737,7 @@ public sealed partial class Segregator : Microsoft.ML.Runtime.EntryPoints.Common /// /// Specifies how to unroll multiple pivot columns of different size. /// - public Transforms.UngroupTransformUngroupMode Mode { get; set; } = Transforms.UngroupTransformUngroupMode.Inner; + public Microsoft.ML.Transforms.UngroupTransformUngroupMode Mode { get; set; } = Microsoft.ML.Transforms.UngroupTransformUngroupMode.Inner; /// /// Input dataset @@ -12896,15 +12896,15 @@ public SupervisedBinNormalizer(params ValueTuple[] inputOutputCo public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -12922,7 +12922,7 @@ public void AddColumn(string name, string source) /// /// New column definition(s) (optional form: name:src) /// - public Transforms.NormalizeTransformBinColumn[] Column { get; set; } + public Microsoft.ML.Transforms.NormalizeTransformBinColumn[] Column { get; set; } /// /// Max number of bins, power of 2 recommended @@ -13042,7 +13042,7 @@ public sealed partial class TermLoaderArguments /// /// How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, e.g., text sorting will be case sensitive (e.g., 'A' then 'Z' then 'a'). /// - public Transforms.TermTransformSortOrder Sort { get; set; } = Transforms.TermTransformSortOrder.Occurrence; + public Microsoft.ML.Transforms.TermTransformSortOrder Sort { get; set; } = Microsoft.ML.Transforms.TermTransformSortOrder.Occurrence; /// /// Drop unknown terms instead of mapping them to NA term. @@ -13068,19 +13068,19 @@ public TextFeaturizer(string outputColumn, params string[] inputColumns) public void AddColumn(string name, params string[] source) { - Column = ManyToOneColumn.Create(name, source); + Column = ManyToOneColumn.Create(name, source); } /// /// New column definition (optional form: name:srcs). /// - public Transforms.TextTransformColumn Column { get; set; } + public Microsoft.ML.Transforms.TextTransformColumn Column { get; set; } /// /// Dataset language or 'AutoDetect' to detect language per row. /// - public Transforms.TextTransformLanguage Language { get; set; } = Transforms.TextTransformLanguage.English; + public Microsoft.ML.Transforms.TextTransformLanguage Language { get; set; } = Microsoft.ML.Transforms.TextTransformLanguage.English; /// /// Stopwords remover. @@ -13091,7 +13091,7 @@ public void AddColumn(string name, params string[] source) /// /// Casing text using the rules of the invariant culture. /// - public Transforms.TextNormalizerTransformCaseNormalizationMode TextCase { get; set; } = Transforms.TextNormalizerTransformCaseNormalizationMode.Lower; + public Microsoft.ML.Transforms.TextNormalizerTransformCaseNormalizationMode TextCase { get; set; } = Microsoft.ML.Transforms.TextNormalizerTransformCaseNormalizationMode.Lower; /// /// Whether to keep diacritical marks or remove them. @@ -13116,7 +13116,7 @@ public void AddColumn(string name, params string[] source) /// /// A dictionary of whitelisted terms. /// - public Transforms.TermLoaderArguments Dictionary { get; set; } + public Microsoft.ML.Transforms.TermLoaderArguments Dictionary { get; set; } /// /// Ngram feature extractor to use for words (WordBag/WordHashBag). @@ -13133,7 +13133,7 @@ public void AddColumn(string name, params string[] source) /// /// Normalize vectors (rows) individually by rescaling them to unit norm. /// - public Transforms.TextTransformTextNormKind VectorNormalizer { get; set; } = Transforms.TextTransformTextNormKind.L2; + public Microsoft.ML.Transforms.TextTransformTextNormKind VectorNormalizer { get; set; } = Microsoft.ML.Transforms.TextTransformTextNormKind.L2; /// /// Input dataset @@ -13222,15 +13222,15 @@ public TextToKeyConverter(params ValueTuple[] inputOutputColumns public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -13238,7 +13238,7 @@ public void AddColumn(string name, string source) /// /// New column definition(s) (optional form: name:src) /// - public Transforms.TermTransformColumn[] Column { get; set; } + public Microsoft.ML.Transforms.TermTransformColumn[] Column { get; set; } /// /// Maximum number of terms to keep per column when auto-training @@ -13253,7 +13253,7 @@ public void AddColumn(string name, string source) /// /// How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, e.g., text sorting will be case sensitive (e.g., 'A' then 'Z' then 'a'). /// - public Transforms.TermTransformSortOrder Sort { get; set; } = Transforms.TermTransformSortOrder.Occurrence; + public Microsoft.ML.Transforms.TermTransformSortOrder Sort { get; set; } = Microsoft.ML.Transforms.TermTransformSortOrder.Occurrence; /// /// Whether key value metadata should be text, regardless of the actual input type @@ -13515,15 +13515,15 @@ public WordTokenizer(params ValueTuple[] inputOutputColumns) public void AddColumn(string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(source)); Column = list.ToArray(); } public void AddColumn(string name, string source) { - var list = Column == null ? new List() : new List(Column); - list.Add(OneToOneColumn.Create(name, source)); + var list = Column == null ? new List() : new List(Column); + list.Add(OneToOneColumn.Create(name, source)); Column = list.ToArray(); } @@ -13531,7 +13531,7 @@ public void AddColumn(string name, string source) /// /// New column definition(s) /// - public Transforms.DelimitedTokenizeTransformColumn[] Column { get; set; } + public Microsoft.ML.Transforms.DelimitedTokenizeTransformColumn[] Column { get; set; } /// /// Comma separated set of term separator(s). Commonly: 'space', 'comma', 'semicolon' or other single character. @@ -15454,6 +15454,57 @@ public sealed class SingleParallelTraining : ParallelTraining internal override string ComponentName => "Single"; } + public abstract class PartitionedPathParser : ComponentKind {} + + + + /// + /// Extract name/value pairs from Parquet formatted directory names. Example path: Year=2018/Month=12/data1.parquet + /// + public sealed class ParquetPathParserPartitionedPathParser : PartitionedPathParser + { + internal override string ComponentName => "ParquetPathParser"; + } + + + public sealed class PartitionedFileLoaderColumn + { + /// + /// Name of the column. + /// + public string Name { get; set; } + + /// + /// Data type of the column. + /// + public Microsoft.ML.Transforms.DataKind? Type { get; set; } + + /// + /// Source index of the column. + /// + public int Source { get; set; } + + } + + + /// + /// A simple parser that extracts directory names as column values. Column names are defined as arguments. + /// + public sealed class SimplePathParserPartitionedPathParser : PartitionedPathParser + { + /// + /// Column definitions used to override the Partitioned Path Parser. Expected with the format name:type:numeric-source, e.g. col=MyFeature:R4:1 + /// + public Microsoft.ML.Runtime.PartitionedFileLoaderColumn[] Columns { get; set; } + + /// + /// Data type of each column. + /// + public Microsoft.ML.Transforms.DataKind Type { get; set; } = Microsoft.ML.Transforms.DataKind.TX; + + internal override string ComponentName => "SimplePathParser"; + } + public abstract class RegressionLossFunction : ComponentKind {} diff --git a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs index c893f469ae..234c87fade 100644 --- a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs +++ b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs @@ -595,7 +595,7 @@ private static string GetSymbolFromType(Dictionary typesSymbolTa Contracts.Assert(typesSymbolTable.Select(kvp => kvp.Value).All(str => string.Compare(str, name) != 0)); - return name; + return "Microsoft.ML." + name; } private void GenerateEnums(IndentingTextWriter writer, Type inputType, string currentNamespace) diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index 7e06c77821..980fb903cb 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -22196,6 +22196,140 @@ } ] }, + { + "Kind": "PartitionedPathParser", + "Components": [ + { + "Name": "ParquetPathParser", + "Desc": "Extract name/value pairs from Parquet formatted directory names. Example path: Year=2018/Month=12/data1.parquet", + "FriendlyName": "Parquet Partitioned Path Parser", + "Aliases": [ + "ParqPP" + ], + "Settings": [] + }, + { + "Name": "SimplePathParser", + "Desc": "A simple parser that extracts directory names as column values. Column names are defined as arguments.", + "FriendlyName": "Simple Partitioned Path Parser", + "Aliases": [ + "SmplPP" + ], + "Settings": [ + { + "Name": "Columns", + "Type": { + "Kind": "Array", + "ItemType": { + "Kind": "Struct", + "Fields": [ + { + "Name": "Name", + "Type": "String", + "Desc": "Name of the column.", + "Required": true, + "SortOrder": 150.0, + "IsNullable": false + }, + { + "Name": "Type", + "Type": { + "Kind": "Enum", + "Values": [ + "I1", + "U1", + "I2", + "U2", + "I4", + "U4", + "I8", + "U8", + "R4", + "Num", + "R8", + "TX", + "Text", + "TXT", + "BL", + "Bool", + "TimeSpan", + "TS", + "DT", + "DateTime", + "DZ", + "DateTimeZone", + "UG", + "U16" + ] + }, + "Desc": "Data type of the column.", + "Required": false, + "SortOrder": 150.0, + "IsNullable": true, + "Default": null + }, + { + "Name": "Source", + "Type": "Int", + "Desc": "Index of the directory representing this column.", + "Required": true, + "SortOrder": 150.0, + "IsNullable": false, + "Default": 0 + } + ] + } + }, + "Desc": "Column definitions used to override the Partitioned Path Parser. Expected with the format name:type:numeric-source, e.g. col=MyFeature:R4:1", + "Aliases": [ + "col" + ], + "Required": false, + "SortOrder": 1.0, + "IsNullable": false, + "Default": null + }, + { + "Name": "Type", + "Type": { + "Kind": "Enum", + "Values": [ + "I1", + "U1", + "I2", + "U2", + "I4", + "U4", + "I8", + "U8", + "R4", + "Num", + "R8", + "TX", + "Text", + "TXT", + "BL", + "Bool", + "TimeSpan", + "TS", + "DT", + "DateTime", + "DZ", + "DateTimeZone", + "UG", + "U16" + ] + }, + "Desc": "Data type of each column.", + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": "TX" + } + ] + } + ] + }, { "Kind": "RegressionLossFunction", "Components": [ diff --git a/test/BaselineOutput/SingleDebug/SavePipe/PartitionedNamedDirectories-Data.txt b/test/BaselineOutput/SingleDebug/SavePipe/PartitionedNamedDirectories-Data.txt new file mode 100644 index 0000000000..68888f43f4 --- /dev/null +++ b/test/BaselineOutput/SingleDebug/SavePipe/PartitionedNamedDirectories-Data.txt @@ -0,0 +1,14 @@ +#@ TextLoader{ +#@ header+ +#@ sep=tab +#@ col=L0:TX:0 +#@ col=Year:TX:1 +#@ col=Month:TX:2 +#@ } +L0 Year Month +0 2017 01 +4 2017 01 +6 2017 01 +21 2017 02 +23 2017 02 +25 2017 02 diff --git a/test/BaselineOutput/SingleDebug/SavePipe/PartitionedNamedDirectories-Schema.txt b/test/BaselineOutput/SingleDebug/SavePipe/PartitionedNamedDirectories-Schema.txt new file mode 100644 index 0000000000..0220433ff0 --- /dev/null +++ b/test/BaselineOutput/SingleDebug/SavePipe/PartitionedNamedDirectories-Schema.txt @@ -0,0 +1,5 @@ +---- PartitionedFileLoader ---- +3 columns: + L0: Text + Year: Text + Month: Text diff --git a/test/BaselineOutput/SingleDebug/SavePipe/PartitionedUnnamedDirectories-Data.txt b/test/BaselineOutput/SingleDebug/SavePipe/PartitionedUnnamedDirectories-Data.txt new file mode 100644 index 0000000000..4c7f650844 --- /dev/null +++ b/test/BaselineOutput/SingleDebug/SavePipe/PartitionedUnnamedDirectories-Data.txt @@ -0,0 +1,16 @@ +#@ TextLoader{ +#@ header+ +#@ sep=tab +#@ col=L0:I4:0 +#@ col=Month:I4:1 +#@ col=Path:TX:2 +#@ } +L0 Month Path +1 1 2017/01/data1.csv +5 1 2017/01/data2.csv +7 1 2017/01/data2.csv +0 1 2017/01/dataBadSchema.csv +0 1 2017/01/dataBadSchema.csv +22 2 2017/02/data1.csv +24 2 2017/02/data1.csv +26 2 2017/02/data1.csv diff --git a/test/BaselineOutput/SingleDebug/SavePipe/PartitionedUnnamedDirectories-Schema.txt b/test/BaselineOutput/SingleDebug/SavePipe/PartitionedUnnamedDirectories-Schema.txt new file mode 100644 index 0000000000..5eeac7698b --- /dev/null +++ b/test/BaselineOutput/SingleDebug/SavePipe/PartitionedUnnamedDirectories-Schema.txt @@ -0,0 +1,5 @@ +---- PartitionedFileLoader ---- +3 columns: + L0: I4 + Month: I4 + Path: Text diff --git a/test/BaselineOutput/SingleRelease/SavePipe/PartitionedNamedDirectories-Data.txt b/test/BaselineOutput/SingleRelease/SavePipe/PartitionedNamedDirectories-Data.txt new file mode 100644 index 0000000000..68888f43f4 --- /dev/null +++ b/test/BaselineOutput/SingleRelease/SavePipe/PartitionedNamedDirectories-Data.txt @@ -0,0 +1,14 @@ +#@ TextLoader{ +#@ header+ +#@ sep=tab +#@ col=L0:TX:0 +#@ col=Year:TX:1 +#@ col=Month:TX:2 +#@ } +L0 Year Month +0 2017 01 +4 2017 01 +6 2017 01 +21 2017 02 +23 2017 02 +25 2017 02 diff --git a/test/BaselineOutput/SingleRelease/SavePipe/PartitionedNamedDirectories-Schema.txt b/test/BaselineOutput/SingleRelease/SavePipe/PartitionedNamedDirectories-Schema.txt new file mode 100644 index 0000000000..0220433ff0 --- /dev/null +++ b/test/BaselineOutput/SingleRelease/SavePipe/PartitionedNamedDirectories-Schema.txt @@ -0,0 +1,5 @@ +---- PartitionedFileLoader ---- +3 columns: + L0: Text + Year: Text + Month: Text diff --git a/test/BaselineOutput/SingleRelease/SavePipe/PartitionedUnnamedDirectories-Data.txt b/test/BaselineOutput/SingleRelease/SavePipe/PartitionedUnnamedDirectories-Data.txt new file mode 100644 index 0000000000..4c7f650844 --- /dev/null +++ b/test/BaselineOutput/SingleRelease/SavePipe/PartitionedUnnamedDirectories-Data.txt @@ -0,0 +1,16 @@ +#@ TextLoader{ +#@ header+ +#@ sep=tab +#@ col=L0:I4:0 +#@ col=Month:I4:1 +#@ col=Path:TX:2 +#@ } +L0 Month Path +1 1 2017/01/data1.csv +5 1 2017/01/data2.csv +7 1 2017/01/data2.csv +0 1 2017/01/dataBadSchema.csv +0 1 2017/01/dataBadSchema.csv +22 2 2017/02/data1.csv +24 2 2017/02/data1.csv +26 2 2017/02/data1.csv diff --git a/test/BaselineOutput/SingleRelease/SavePipe/PartitionedUnnamedDirectories-Schema.txt b/test/BaselineOutput/SingleRelease/SavePipe/PartitionedUnnamedDirectories-Schema.txt new file mode 100644 index 0000000000..5eeac7698b --- /dev/null +++ b/test/BaselineOutput/SingleRelease/SavePipe/PartitionedUnnamedDirectories-Schema.txt @@ -0,0 +1,5 @@ +---- PartitionedFileLoader ---- +3 columns: + L0: I4 + Month: I4 + Path: Text diff --git a/test/Microsoft.ML.Tests/PartitionedFileLoaderTests.cs b/test/Microsoft.ML.Tests/PartitionedFileLoaderTests.cs new file mode 100644 index 0000000000..4b5371a98b --- /dev/null +++ b/test/Microsoft.ML.Tests/PartitionedFileLoaderTests.cs @@ -0,0 +1,54 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Runtime.RunTests; +using System.IO; +using Xunit; +using Xunit.Abstractions; + +namespace Microsoft.ML.Tests +{ + public class PartitionedFileLoaderTests : TestDataPipeBase + { + public PartitionedFileLoaderTests(ITestOutputHelper output) + : base(output) + { + + } + + [Fact] + public void PartitionedNamedDirectories() + { + string basePath = GetDataPath("Partitioned", "Named"); + string pathData = Path.Combine(basePath, "...", "*.csv"); + + TestCore(pathData, false, + new[] { + "loader=Part{bp=" + basePath + " loader=Text{header+ sep=comma col=L0:TX:0}}" + }); + + Done(); + } + + [Fact] + public void PartitionedUnnamedDirectories() + { + string basePath = GetDataPath("Partitioned", "Unnamed"); ; + string pathData = Path.Combine(basePath, "...", "*.csv"); + + TestCore(pathData, false, + new[] { + "loader=Part{parser=SmplPP{col=Month:I4:1} path+ bp=" + basePath + " loader=Text{header+ sep=comma col=L0:I4:1}}" + }); + + // Test again with global parser data type. + TestCore(pathData, false, + new[] { + "loader=Part{parser=SmplPP{type=I4 col=Month:1} path+ bp=" + basePath + " loader=Text{header+ sep=comma col=L0:I4:1}}" + }); + + Done(); + } + } +} diff --git a/test/data/Partitioned/Named/Year=2017/Month=01/data1.csv b/test/data/Partitioned/Named/Year=2017/Month=01/data1.csv new file mode 100644 index 0000000000..c69b170df5 --- /dev/null +++ b/test/data/Partitioned/Named/Year=2017/Month=01/data1.csv @@ -0,0 +1,2 @@ +col1, col2 +0, 1 diff --git a/test/data/Partitioned/Named/Year=2017/Month=01/data2.csv b/test/data/Partitioned/Named/Year=2017/Month=01/data2.csv new file mode 100644 index 0000000000..23cb4a7b19 --- /dev/null +++ b/test/data/Partitioned/Named/Year=2017/Month=01/data2.csv @@ -0,0 +1,3 @@ +col1, col2 +4, 5 +6, 7 \ No newline at end of file diff --git a/test/data/Partitioned/Named/Year=2017/Month=01/dataEmpty.csv b/test/data/Partitioned/Named/Year=2017/Month=01/dataEmpty.csv new file mode 100644 index 0000000000..d55d30cf89 --- /dev/null +++ b/test/data/Partitioned/Named/Year=2017/Month=01/dataEmpty.csv @@ -0,0 +1 @@ +col1, col2 \ No newline at end of file diff --git a/test/data/Partitioned/Named/Year=2017/Month=02/data1.csv b/test/data/Partitioned/Named/Year=2017/Month=02/data1.csv new file mode 100644 index 0000000000..6d80a7f679 --- /dev/null +++ b/test/data/Partitioned/Named/Year=2017/Month=02/data1.csv @@ -0,0 +1,4 @@ +col1, col2 +21, 22 +23, 24 +25, 26 diff --git a/test/data/Partitioned/Named/Year=2017/TestBadDir/data1.csv b/test/data/Partitioned/Named/Year=2017/TestBadDir/data1.csv new file mode 100644 index 0000000000..6d80a7f679 --- /dev/null +++ b/test/data/Partitioned/Named/Year=2017/TestBadDir/data1.csv @@ -0,0 +1,4 @@ +col1, col2 +21, 22 +23, 24 +25, 26 diff --git a/test/data/Partitioned/Unnamed/2017/01/data1.csv b/test/data/Partitioned/Unnamed/2017/01/data1.csv new file mode 100644 index 0000000000..c69b170df5 --- /dev/null +++ b/test/data/Partitioned/Unnamed/2017/01/data1.csv @@ -0,0 +1,2 @@ +col1, col2 +0, 1 diff --git a/test/data/Partitioned/Unnamed/2017/01/data2.csv b/test/data/Partitioned/Unnamed/2017/01/data2.csv new file mode 100644 index 0000000000..23cb4a7b19 --- /dev/null +++ b/test/data/Partitioned/Unnamed/2017/01/data2.csv @@ -0,0 +1,3 @@ +col1, col2 +4, 5 +6, 7 \ No newline at end of file diff --git a/test/data/Partitioned/Unnamed/2017/01/dataBadSchema.csv b/test/data/Partitioned/Unnamed/2017/01/dataBadSchema.csv new file mode 100644 index 0000000000..43dbd9e3f6 --- /dev/null +++ b/test/data/Partitioned/Unnamed/2017/01/dataBadSchema.csv @@ -0,0 +1,3 @@ +col1 +11 +12 diff --git a/test/data/Partitioned/Unnamed/2017/02/data1.csv b/test/data/Partitioned/Unnamed/2017/02/data1.csv new file mode 100644 index 0000000000..6d80a7f679 --- /dev/null +++ b/test/data/Partitioned/Unnamed/2017/02/data1.csv @@ -0,0 +1,4 @@ +col1, col2 +21, 22 +23, 24 +25, 26