Skip to content

Commit 62e650b

Browse files
committed
Merge remote-tracking branch 'upstream/master' into parsingPerf
2 parents d10e521 + c31c2c1 commit 62e650b

File tree

251 files changed

+35169
-2429
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

251 files changed

+35169
-2429
lines changed

README.md

+18-17
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@ Along with these ML capabilities, this first release of ML.NET also brings the f
1616

1717
[![NuGet Status](https://img.shields.io/nuget/v/Microsoft.ML.svg?style=flat)](https://www.nuget.org/packages/Microsoft.ML/)
1818

19-
ML.NET runs on Windows, Linux, and macOS - any platform where x64 [.NET Core](https://github.com/dotnet/core) or later is available. In addition, .NET Framework on Windows x64 is also supported.
19+
ML.NET runs on Windows, Linux, and macOS using [.NET Core](https://github.com/dotnet/core), or Windows using .NET Framework. 64 bit is supported on all platforms. 32 bit is supported on Windows, except for TensorFlow, LightGBM, and ONNX related functionality.
2020

21-
The current release is 0.6. Check out the [release notes](docs/release-notes/0.6/release-0.6.md) to see what's new.
21+
The current release is 0.7. Check out the [release notes](docs/release-notes/0.7/release-0.7.md) and [blog post](https://blogs.msdn.microsoft.com/dotnet/2018/11/08/announcing-ml-net-0-7-machine-learning-net/) to see what's new.
2222

23-
First, ensure you have installed [.NET Core 2.0](https://www.microsoft.com/net/learn/get-started) or later. ML.NET also works on the .NET Framework. Note that ML.NET currently must run in a 64-bit process.
23+
First, ensure you have installed [.NET Core 2.1](https://www.microsoft.com/net/learn/get-started) or later. ML.NET also works on the .NET Framework 4.6.1 or later, but 4.7.2 or later is recommended.
2424

2525
Once you have an app, you can install the ML.NET NuGet package from the .NET Core CLI using:
2626
```
@@ -65,31 +65,32 @@ Here's an example of code to train a model to predict sentiment from text sample
6565
(You can find a sample of the legacy API [here](test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs)):
6666

6767
```C#
68-
var env = new LocalEnvironment();
69-
var reader = TextLoader.CreateReader(env, ctx => (
70-
Target: ctx.LoadFloat(2),
71-
FeatureVector: ctx.LoadFloat(3, 6)),
72-
separator: ',',
73-
hasHeader: true);
74-
var data = reader.Read(new MultiFileSource(dataPath));
75-
var classification = new MulticlassClassificationContext(env);
76-
var learningPipeline = reader.MakeNewEstimator()
77-
.Append(r => (
78-
r.Target,
79-
Prediction: classification.Trainers.Sdca(r.Target.ToKey(), r.FeatureVector)));
68+
var mlContext = new MLContext();
69+
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
70+
{
71+
Column = new[] {
72+
new TextLoader.Column("SentimentText", DataKind.Text, 1),
73+
new TextLoader.Column("Label", DataKind.Bool, 0),
74+
},
75+
HasHeader = true,
76+
Separator = ","
77+
});
78+
var data = reader.Read(dataPath);
79+
var learningPipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText", "Features")
80+
.Append(mlContext.BinaryClassification.Trainers.FastTree());
8081
var model = learningPipeline.Fit(data);
8182

8283
```
8384

8485
Now from the model we can make inferences (predictions):
8586

8687
```C#
87-
var predictionFunc = model.MakePredictionFunction<SentimentInput, SentimentPrediction>(env);
88+
var predictionFunc = model.MakePredictionFunction<SentimentData, SentimentPrediction>(mlContext);
8889
var prediction = predictionFunc.Predict(new SentimentData
8990
{
9091
SentimentText = "Today is a great day!"
9192
});
92-
Console.WriteLine("prediction: " + prediction.Sentiment);
93+
Console.WriteLine("prediction: " + prediction.Prediction);
9394
```
9495
A cookbook that shows how to use these APIs for a variety of existing and new scenarios can be found [here](docs/code/MlNetCookBook.md).
9596

build/ci/phase-template.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ phases:
1111
_phaseName: ${{ parameters.name }}
1212
_arch: ${{ parameters.architecture }}
1313
queue:
14-
timeoutInMinutes: 40
14+
timeoutInMinutes: 45
1515
parallel: 99
1616
matrix:
1717
Build_Debug:

docs/code/MlNetCookBook.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1331,7 +1331,7 @@ var learningPipeline = reader.MakeNewEstimator()
13311331
IEstimator<ITransformer> dynamicPipe = learningPipeline.AsDynamic;
13321332

13331333
// Create a binary classification trainer.
1334-
var binaryTrainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron();
1334+
var binaryTrainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron("Label", "Features");
13351335

13361336
// Append the OVA learner to the pipeline.
13371337
dynamicPipe = dynamicPipe.Append(new Ova(mlContext, binaryTrainer));

docs/samples/Microsoft.ML.Samples/Dynamic/MatrixFactorization.cs

+4-2
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,10 @@ public static void MatrixFactorizationInMemoryData()
7171
// Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the
7272
// matrix's column index, and "MatrixRowIndex" as the matrix's row index. Here nameof(...) is used to extract field
7373
// names' in MatrixElement class.
74-
var pipeline = new MatrixFactorizationTrainer(mlContext, nameof(MatrixElement.Value),
75-
nameof(MatrixElement.MatrixColumnIndex), nameof(MatrixElement.MatrixRowIndex),
74+
var pipeline = new MatrixFactorizationTrainer(mlContext,
75+
nameof(MatrixElement.MatrixColumnIndex),
76+
nameof(MatrixElement.MatrixRowIndex),
77+
nameof(MatrixElement.Value),
7678
advancedSettings: s =>
7779
{
7880
s.NumIterations = 10;

docs/samples/Microsoft.ML.Samples/Dynamic/SDCA.cs

+3-3
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ public static void SDCA_BinaryClassification()
4848
// Then append a binary classifier, setting the "Label" column as the label of the dataset, and
4949
// the "Features" column produced by FeaturizeText as the features column.
5050
var pipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText", "Features")
51-
.Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent(label: "Sentiment", features: "Features", l2Const: 0.001f));
51+
.Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent(labelColumn: "Sentiment", featureColumn: "Features", l2Const: 0.001f));
5252

5353
// Step 3: Run Cross-Validation on this pipeline.
5454
var cvResults = mlContext.BinaryClassification.CrossValidate(data, pipeline, labelColumn: "Sentiment");
@@ -60,8 +60,8 @@ public static void SDCA_BinaryClassification()
6060
// we could do so by tweaking the 'advancedSetting'.
6161
var advancedPipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText", "Features")
6262
.Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent
63-
(label: "Sentiment",
64-
features: "Features",
63+
(labelColumn: "Sentiment",
64+
featureColumn: "Features",
6565
advancedSettings: s=>
6666
{
6767
s.ConvergenceTolerance = 0.01f; // The learning rate for adjusting bias from being regularized

docs/samples/Microsoft.ML.Samples/Static/FastTreeBinaryClassification.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ public static void FastTreeBinaryClassification()
8989
row.Features,
9090
numTrees: 100, // try: (int) 20-2000
9191
numLeaves: 20, // try: (int) 2-128
92-
minDatapointsInLeafs: 10, // try: (int) 1-100
92+
minDatapointsInLeaves: 10, // try: (int) 1-100
9393
learningRate: 0.2))) // try: (float) 0.025-0.4
9494
.Append(row => (
9595
Label: row.Label,

docs/samples/Microsoft.ML.Samples/Static/FastTreeRegression.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ public static void FastTreeRegression()
4747
r.features,
4848
numTrees: 100, // try: (int) 20-2000
4949
numLeaves: 20, // try: (int) 2-128
50-
minDatapointsInLeafs: 10, // try: (int) 1-100
50+
minDatapointsInLeaves: 10, // try: (int) 1-100
5151
learningRate: 0.2, // try: (float) 0.025-0.4
5252
onFit: p => pred = p)
5353
)

src/Microsoft.ML.Core/CommandLine/ArgumentAttribute.cs

+16-64
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5-
// This is separated from CmdParser.cs
6-
75
using System;
86
using System.Linq;
97

@@ -15,7 +13,8 @@ namespace Microsoft.ML.Runtime.CommandLine
1513
/// as the destination of command line argument parsing.
1614
/// </summary>
1715
[AttributeUsage(AttributeTargets.Field)]
18-
public class ArgumentAttribute : Attribute
16+
[BestFriend]
17+
internal class ArgumentAttribute : Attribute
1918
{
2019
public enum VisibilityType
2120
{
@@ -24,35 +23,23 @@ public enum VisibilityType
2423
EntryPointsOnly
2524
}
2625

27-
private ArgumentType _type;
2826
private string _shortName;
29-
private string _helpText;
30-
private bool _hide;
31-
private double _sortOrder;
32-
private string _nullName;
33-
private bool _isInputFileName;
34-
private string _specialPurpose;
35-
private VisibilityType _visibility;
3627
private string _name;
37-
private Type _signatureType;
3828

3929
/// <summary>
4030
/// Allows control of command line parsing.
4131
/// </summary>
4232
/// <param name="type"> Specifies the error checking to be done on the argument. </param>
4333
public ArgumentAttribute(ArgumentType type)
4434
{
45-
_type = type;
46-
_sortOrder = 150;
35+
Type = type;
36+
SortOrder = 150;
4737
}
4838

4939
/// <summary>
5040
/// The error checking to be done on the argument.
5141
/// </summary>
52-
public ArgumentType Type
53-
{
54-
get { return _type; }
55-
}
42+
public ArgumentType Type { get; }
5643

5744
/// <summary>
5845
/// The short name(s) of the argument.
@@ -64,7 +51,7 @@ public ArgumentType Type
6451
/// </summary>
6552
public string ShortName
6653
{
67-
get { return _shortName; }
54+
get => _shortName;
6855
set
6956
{
7057
Contracts.Check(value == null || !(this is DefaultArgumentAttribute));
@@ -75,54 +62,26 @@ public string ShortName
7562
/// <summary>
7663
/// The help text for the argument.
7764
/// </summary>
78-
public string HelpText
79-
{
80-
get { return _helpText; }
81-
set { _helpText = value; }
82-
}
65+
public string HelpText { get; set; }
8366

84-
public bool Hide
85-
{
86-
get { return _hide; }
87-
set { _hide = value; }
88-
}
67+
public bool Hide { get; set; }
8968

90-
public double SortOrder
91-
{
92-
get { return _sortOrder; }
93-
set { _sortOrder = value; }
94-
}
69+
public double SortOrder { get; set; }
9570

96-
public string NullName
97-
{
98-
get { return _nullName; }
99-
set { _nullName = value; }
100-
}
71+
public string NullName { get; set; }
10172

102-
public bool IsInputFileName
103-
{
104-
get { return _isInputFileName; }
105-
set { _isInputFileName = value; }
106-
}
73+
public bool IsInputFileName { get; set; }
10774

10875
/// <summary>
10976
/// Allows the GUI or other tools to inspect the intended purpose of the argument and pick a correct custom control.
11077
/// </summary>
111-
public string Purpose
112-
{
113-
get { return _specialPurpose; }
114-
set { _specialPurpose = value; }
115-
}
78+
public string Purpose { get; set; }
11679

117-
public VisibilityType Visibility
118-
{
119-
get { return _visibility; }
120-
set { _visibility = value; }
121-
}
80+
public VisibilityType Visibility { get; set; }
12281

12382
public string Name
12483
{
125-
get { return _name; }
84+
get => _name;
12685
set { _name = string.IsNullOrWhiteSpace(value) ? null : value; }
12786
}
12887

@@ -136,15 +95,8 @@ public string[] Aliases
13695
}
13796
}
13897

139-
public bool IsRequired
140-
{
141-
get { return ArgumentType.Required == (_type & ArgumentType.Required); }
142-
}
98+
public bool IsRequired => ArgumentType.Required == (Type & ArgumentType.Required);
14399

144-
public Type SignatureType
145-
{
146-
get { return _signatureType; }
147-
set { _signatureType = value; }
148-
}
100+
public Type SignatureType { get; set; }
149101
}
150102
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using System;
6+
7+
namespace Microsoft.ML.Runtime.CommandLine
8+
{
9+
/// <summary>
10+
/// Used to control parsing of command line arguments.
11+
/// </summary>
12+
[Flags]
13+
[BestFriend]
14+
internal enum ArgumentType
15+
{
16+
/// <summary>
17+
/// Indicates that this field is required. An error will be displayed
18+
/// if it is not present when parsing arguments.
19+
/// </summary>
20+
Required = 0x01,
21+
22+
/// <summary>
23+
/// Only valid in conjunction with Multiple.
24+
/// Duplicate values will result in an error.
25+
/// </summary>
26+
Unique = 0x02,
27+
28+
/// <summary>
29+
/// Inidicates that the argument may be specified more than once.
30+
/// Only valid if the argument is a collection
31+
/// </summary>
32+
Multiple = 0x04,
33+
34+
/// <summary>
35+
/// The default type for non-collection arguments.
36+
/// The argument is not required, but an error will be reported if it is specified more than once.
37+
/// </summary>
38+
AtMostOnce = 0x00,
39+
40+
/// <summary>
41+
/// For non-collection arguments, when the argument is specified more than
42+
/// once no error is reported and the value of the argument is the last
43+
/// value which occurs in the argument list.
44+
/// </summary>
45+
LastOccurenceWins = Multiple,
46+
47+
/// <summary>
48+
/// The default type for collection arguments.
49+
/// The argument is permitted to occur multiple times, but duplicate
50+
/// values will cause an error to be reported.
51+
/// </summary>
52+
MultipleUnique = Multiple | Unique,
53+
}
54+
}

src/Microsoft.ML.Core/CommandLine/CharCursor.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
namespace Microsoft.ML.Runtime.CommandLine
88
{
9-
public sealed class CharCursor
9+
internal sealed class CharCursor
1010
{
1111
private readonly string _text;
1212
private readonly int _ichLim;

src/Microsoft.ML.Core/CommandLine/CmdLexer.cs

+2-3
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,12 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5-
using System;
65
using System.Text;
7-
using Microsoft.ML.Runtime.Internal.Utilities;
86

97
namespace Microsoft.ML.Runtime.CommandLine
108
{
11-
public sealed class CmdLexer
9+
[BestFriend]
10+
internal sealed class CmdLexer
1211
{
1312
private CharCursor _curs;
1413

0 commit comments

Comments
 (0)