Skip to content

Commit d3004e6

Browse files
authored
XML strings for the documentation should live outside of the src code, in xml files. (#510)
* Moving from xml strings to having the documentation details in xml files. For the summary text that is common between several learners, the examples will be added on a separate node. An example of how that will look like is in the LogisticRegressionBinaryClassifier and LogisticRegressionClassifier. * fixing the aftermath of renaming the XML file. * removing the Desc from the EntryPoint attribute is a bad idea. * removing the XML docs from the doc folder, and added them under the respective projects. * Some OS get picky about casing. * file name should be vanilla * Fixing comment
1 parent 9d9d74e commit d3004e6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+556
-619
lines changed

src/Microsoft.ML.Core/EntryPoints/ModuleArgs.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -529,9 +529,9 @@ public sealed class EntryPointAttribute : Attribute
529529
public string ShortName { get; set; }
530530

531531
/// <summary>
532-
/// Remarks on the Entry Point, for more extensive XML documentation on the C#API
532+
/// The path to the XML documentation on the CSharpAPI component
533533
/// </summary>
534-
public string Remarks { get; set; }
534+
public string[] XmlInclude { get; set; }
535535
}
536536

537537
/// <summary>

src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ public sealed class EntryPointInfo
4444
public readonly string Description;
4545
public readonly string ShortName;
4646
public readonly string FriendlyName;
47-
public readonly string Remarks;
47+
public readonly string[] XmlInclude;
4848
public readonly MethodInfo Method;
4949
public readonly Type InputType;
5050
public readonly Type OutputType;
@@ -64,7 +64,7 @@ internal EntryPointInfo(IExceptionContext ectx, MethodInfo method,
6464
Method = method;
6565
ShortName = attribute.ShortName;
6666
FriendlyName = attribute.UserName;
67-
Remarks = attribute.Remarks;
67+
XmlInclude = attribute.XmlInclude;
6868
ObsoleteAttribute = obsoleteAttribute;
6969

7070
// There are supposed to be 2 parameters, env and input for non-macro nodes.

src/Microsoft.ML.FastTree/FastTree.cs

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -82,31 +82,6 @@ public abstract class FastTreeTrainerBase<TArgs, TPredictor> :
8282

8383
protected string InnerArgs => CmdParser.GetSettings(Host, Args, new TArgs());
8484

85-
internal const string Remarks = @"<remarks>
86-
<para>FastTrees is an efficient implementation of the <a href='https://arxiv.org/abs/1505.01866'>MART</a> gradient boosting algorithm.
87-
Gradient boosting is a machine learning technique for regression problems.
88-
It builds each regression tree in a step-wise fashion, using a predefined loss function to measure the error for each step and corrects for it in the next.
89-
So this prediction model is actually an ensemble of weaker prediction models. In regression problems, boosting builds a series of of such trees in a step-wise fashion and then selects the optimal tree using an arbitrary differentiable loss function.
90-
</para>
91-
<para>
92-
MART learns an ensemble of regression trees, which is a decision tree with scalar values in its leaves.
93-
A decision (or regression) tree is a binary tree-like flow chart, where at each interior node one decides which of the two child nodes to continue to based on one of the feature values from the input.
94-
At each leaf node, a value is returned. In the interior nodes, the decision is based on the test 'x <= v' where x is the value of the feature in the input sample and v is one of the possible values of this feature.
95-
The functions that can be produced by a regression tree are all the piece-wise constant functions.
96-
</para>
97-
<para>
98-
The ensemble of trees is produced by computing, in each step, a regression tree that approximates the gradient of the loss function, and adding it to the previous tree with coefficients that minimize the loss of the new tree.
99-
The output of the ensemble produced by MART on a given instance is the sum of the tree outputs.
100-
</para>
101-
<list type='bullet'>
102-
<item>In case of a binary classification problem, the output is converted to a probability by using some form of calibration.</item>
103-
<item>In case of a regression problem, the output is the predicted value of the function.</item>
104-
<item>In case of a ranking problem, the instances are ordered by the output value of the ensemble.</item>
105-
</list>
106-
<a href='https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting'>Wikipedia: Gradient boosting (Gradient tree boosting)</a>.
107-
<a href='http://projecteuclid.org/DPubS?service=UI&version=1.0&verb=Display&handle=euclid.aos/1013203451'>Greedy function approximation: A gradient boosting machine.</a>.
108-
</remarks>";
109-
11085
public override bool NeedNormalization => false;
11186

11287
public override bool WantCaching => false;

src/Microsoft.ML.FastTree/FastTreeArguments.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ public interface IFastTreeTrainerFactory : IComponentFactory<ITrainer>
2020
{
2121
}
2222

23+
/// <include file='./doc.xml' path='docs/members/member[@name="FastTree"]/*' />
2324
public sealed partial class FastTreeBinaryClassificationTrainer
2425
{
2526
[TlcModule.Component(Name = LoadNameValue, FriendlyName = UserNameValue, Desc = Summary)]

src/Microsoft.ML.FastTree/FastTreeClassification.cs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ public static IPredictorProducing<Float> Create(IHostEnvironment env, ModelLoadC
100100
public override PredictionKind PredictionKind { get { return PredictionKind.BinaryClassification; } }
101101
}
102102

103+
/// <include file = './doc.xml' path='docs/members/member[@name="FastTree"]/*' />
103104
public sealed partial class FastTreeBinaryClassificationTrainer :
104105
BoostingFastTreeTrainerBase<FastTreeBinaryClassificationTrainer.Arguments, IPredictorWithFeatureWeights<Float>>
105106
{
@@ -336,13 +337,16 @@ public void AdjustTreeOutputs(IChannel ch, RegressionTree tree,
336337
}
337338
}
338339

340+
/// <summary>
341+
/// The Entry Point for the FastTree Binary Classifier.
342+
/// </summary>
339343
public static partial class FastTree
340344
{
341345
[TlcModule.EntryPoint(Name = "Trainers.FastTreeBinaryClassifier",
342346
Desc = FastTreeBinaryClassificationTrainer.Summary,
343-
Remarks = FastTreeBinaryClassificationTrainer.Remarks,
344347
UserName = FastTreeBinaryClassificationTrainer.UserNameValue,
345-
ShortName = FastTreeBinaryClassificationTrainer.ShortName)]
348+
ShortName = FastTreeBinaryClassificationTrainer.ShortName,
349+
XmlInclude = new[] { @"<include file='../Microsoft.ML.FastTree/doc.xml' path='docs/members/member[@name=""FastTree""]/*' />" })]
346350
public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironment env, FastTreeBinaryClassificationTrainer.Arguments input)
347351
{
348352
Contracts.CheckValue(env, nameof(env));

src/Microsoft.ML.FastTree/FastTreeRanking.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838

3939
namespace Microsoft.ML.Runtime.FastTree
4040
{
41+
/// <include file='./doc.xml' path='docs/members/member[@name="FastTree"]/*' />
4142
public sealed partial class FastTreeRankingTrainer : BoostingFastTreeTrainerBase<FastTreeRankingTrainer.Arguments, FastTreeRankingPredictor>,
4243
IHasLabelGains
4344
{
@@ -1098,9 +1099,9 @@ public static partial class FastTree
10981099
{
10991100
[TlcModule.EntryPoint(Name = "Trainers.FastTreeRanker",
11001101
Desc = FastTreeRankingTrainer.Summary,
1101-
Remarks = FastTreeRankingTrainer.Remarks,
11021102
UserName = FastTreeRankingTrainer.UserNameValue,
1103-
ShortName = FastTreeRankingTrainer.ShortName)]
1103+
ShortName = FastTreeRankingTrainer.ShortName,
1104+
XmlInclude = new[] { @"<include file='../Microsoft.ML.FastTree/doc.xml' path='docs/members/member[@name=""FastTree""]/*' />" })]
11041105
public static CommonOutputs.RankingOutput TrainRanking(IHostEnvironment env, FastTreeRankingTrainer.Arguments input)
11051106
{
11061107
Contracts.CheckValue(env, nameof(env));

src/Microsoft.ML.FastTree/FastTreeRegression.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
namespace Microsoft.ML.Runtime.FastTree
3333
{
34+
/// <include file='./doc.xml' path='docs/members/member[@name="FastTree"]/*' />
3435
public sealed partial class FastTreeRegressionTrainer : BoostingFastTreeTrainerBase<FastTreeRegressionTrainer.Arguments, FastTreeRegressionPredictor>
3536
{
3637
public const string LoadNameValue = "FastTreeRegression";
@@ -450,9 +451,9 @@ public static partial class FastTree
450451
{
451452
[TlcModule.EntryPoint(Name = "Trainers.FastTreeRegressor",
452453
Desc = FastTreeRegressionTrainer.Summary,
453-
Remarks = FastTreeRegressionTrainer.Remarks,
454454
UserName = FastTreeRegressionTrainer.UserNameValue,
455-
ShortName = FastTreeRegressionTrainer.ShortName)]
455+
ShortName = FastTreeRegressionTrainer.ShortName,
456+
XmlInclude = new[] { @"<include file='../Microsoft.ML.FastTree/doc.xml' path='docs/members/member[@name=""FastTree""]/*' />" })]
456457
public static CommonOutputs.RegressionOutput TrainRegression(IHostEnvironment env, FastTreeRegressionTrainer.Arguments input)
457458
{
458459
Contracts.CheckValue(env, nameof(env));

src/Microsoft.ML.FastTree/FastTreeTweedie.cs

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,21 +27,15 @@
2727

2828
namespace Microsoft.ML.Runtime.FastTree
2929
{
30-
/// <summary>
31-
/// The Tweedie boosting model follows the mathematics established in:
32-
/// Yang, Quan, and Zou. "Insurance Premium Prediction via Gradient Tree-Boosted Tweedie Compound Poisson Models."
33-
/// https://arxiv.org/pdf/1508.06378.pdf
34-
/// </summary>
30+
// The Tweedie boosting model follows the mathematics established in:
31+
// Yang, Quan, and Zou. "Insurance Premium Prediction via Gradient Tree-Boosted Tweedie Compound Poisson Models."
32+
// https://arxiv.org/pdf/1508.06378.pdf
33+
/// <include file='./doc.xml' path='docs/members/member[@name="FastTreeTweedieRegression"]/*' />
3534
public sealed partial class FastTreeTweedieTrainer : BoostingFastTreeTrainerBase<FastTreeTweedieTrainer.Arguments, FastTreeTweediePredictor>
3635
{
3736
public const string LoadNameValue = "FastTreeTweedieRegression";
3837
public const string UserNameValue = "FastTree (Boosted Trees) Tweedie Regression";
3938
public const string Summary = "Trains gradient boosted decision trees to fit target values using a Tweedie loss function. This learner is a generalization of Poisson, compound Poisson, and gamma regression.";
40-
new public const string Remarks = @"<remarks>
41-
<a href='https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting'>Wikipedia: Gradient boosting (Gradient tree boosting)</a>
42-
<a href='http://projecteuclid.org/DPubS?service=UI&version=1.0&verb=Display&handle=euclid.aos/1013203451'>Greedy function approximation: A gradient boosting machine</a>
43-
</remarks>";
44-
4539
public const string ShortName = "fttweedie";
4640

4741
private TestHistory _firstTestSetHistory;
@@ -466,7 +460,8 @@ public static partial class FastTree
466460
[TlcModule.EntryPoint(Name = "Trainers.FastTreeTweedieRegressor",
467461
Desc = FastTreeTweedieTrainer.Summary,
468462
UserName = FastTreeTweedieTrainer.UserNameValue,
469-
ShortName = FastTreeTweedieTrainer.ShortName)]
463+
ShortName = FastTreeTweedieTrainer.ShortName,
464+
XmlInclude = new [] { @"<include file='../Microsoft.ML.FastTree/doc.xml' path='docs/members/member[@name=""FastTreeTweedieRegression""]/*' />" })]
470465
public static CommonOutputs.RegressionOutput TrainTweedieRegression(IHostEnvironment env, FastTreeTweedieTrainer.Arguments input)
471466
{
472467
Contracts.CheckValue(env, nameof(env));

src/Microsoft.ML.FastTree/RandomForest.cs

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -12,28 +12,6 @@ public abstract class RandomForestTrainerBase<TArgs, TPredictor> : FastTreeTrain
1212
where TArgs : FastForestArgumentsBase, new()
1313
where TPredictor : IPredictorProducing<Float>
1414
{
15-
new internal const string Remarks = @"<remarks>
16-
Decision trees are non-parametric models that perform a sequence of simple tests on inputs.
17-
This decision procedure maps them to outputs found in the training dataset whose inputs were similar to the instance being processed.
18-
A decision is made at each node of the binary tree data structure based on a measure of similarity that maps each instance recursively through the branches of the tree until the appropriate leaf node is reached and the output decision returned.
19-
<para>Decision trees have several advantages:</para>
20-
<list type='bullet'>
21-
<item>They are efficient in both computation and memory usage during training and prediction. </item>
22-
<item>They can represent non-linear decision boundaries.</item>
23-
<item>They perform integrated feature selection and classification. </item>
24-
<item>They are resilient in the presence of noisy features.</item>
25-
</list>
26-
Fast forest is a random forest implementation.
27-
The model consists of an ensemble of decision trees. Each tree in a decision forest outputs a Gaussian distribution by way of prediction.
28-
An aggregation is performed over the ensemble of trees to find a Gaussian distribution closest to the combined distribution for all trees in the model.
29-
This decision forest classifier consists of an ensemble of decision trees.
30-
Generally, ensemble models provide better coverage and accuracy than single decision trees.
31-
Each tree in a decision forest outputs a Gaussian distribution.
32-
<a href='http://en.wikipedia.org/wiki/Random_forest'>Wikipedia: Random forest</a>
33-
<a href='http://jmlr.org/papers/volume7/meinshausen06a/meinshausen06a.pdf'>Quantile regression forest</a>
34-
<a href='https://blogs.technet.microsoft.com/machinelearning/2014/09/10/from-stumps-to-trees-to-forests/'>From Stumps to Trees to Forests</a>
35-
</remarks>";
36-
3715
private readonly bool _quantileEnabled;
3816

3917
protected RandomForestTrainerBase(IHostEnvironment env, TArgs args, bool quantileEnabled = false)

src/Microsoft.ML.FastTree/RandomForestClassification.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ public static IPredictorProducing<Float> Create(IHostEnvironment env, ModelLoadC
106106
}
107107
}
108108

109+
/// <include file='./doc.xml' path='docs/members/member[@name="FastForest"]/*' />
109110
public sealed partial class FastForestClassification :
110111
RandomForestTrainerBase<FastForestClassification.Arguments, IPredictorWithFeatureWeights<Float>>
111112
{
@@ -210,9 +211,9 @@ public static partial class FastForest
210211
{
211212
[TlcModule.EntryPoint(Name = "Trainers.FastForestBinaryClassifier",
212213
Desc = FastForestClassification.Summary,
213-
Remarks = FastForestClassification.Remarks,
214214
UserName = FastForestClassification.UserNameValue,
215-
ShortName = FastForestClassification.ShortName)]
215+
ShortName = FastForestClassification.ShortName,
216+
XmlInclude = new[] { @"<include file='../Microsoft.ML.FastTree/doc.xml' path='docs/members/member[@name=""FastForest""]/*' />" })]
216217
public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironment env, FastForestClassification.Arguments input)
217218
{
218219
Contracts.CheckValue(env, nameof(env));

0 commit comments

Comments
 (0)