dotnet · ganik · Apr 21, 2019 · Apr 17, 2019 · Apr 17, 2019 · Apr 17, 2019
diff --git a/docs/api-reference/io-columns-clustering.md b/docs/api-reference/io-columns-clustering.md
@@ -0,0 +1,7 @@
+### Input and Output Columns
+The input features column data must be <xref:System.Single>. No label column needed. This trainer outputs the following columns:
+
+| Output Column Name | Column Type | Description|
+| -- | -- | -- |
+| `Score` | <xref:System.Single> | The unbounded score that was calculated by the trainer to determine the prediction.|
+| `PredictedLabel` | <xref:System.Int32> | The cluster id predicted by the trainer.|
diff --git a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs
@@ -26,7 +26,38 @@
 
 namespace Microsoft.ML.Trainers
 {
-    /// <include file='./doc.xml' path='doc/members/member[@name="KMeans++"]/*' />
+    /// <summary>
+    /// The <see cref="IEstimator{TTransformer}"/> for training a KMeans clusterer
+    /// </summary>
+    /// <remarks>
+    /// <format type="text/markdown"><![CDATA[
+    /// To create this trainer, use [KMeansTrainer](xref:Microsoft.ML.Trainers.KMeansTrainer).
+    ///
+    /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-clustering.md)]
+    ///
+    /// ### Trainer Characteristics
+    /// |  |  |
+    /// | -- | -- |
+    /// | Machine learning task | Clustering |
+    /// | Is normalization required? | Yes |
+    /// | Is caching required? | Yes |
+    /// | Required NuGet in addition to Microsoft.ML | None |
+    ///
+    /// ### Training Algorithm Details
+    /// K-means is a popular clustering algorithm. With K-means, the data is clustered into a specified
+    /// number of clusters in order to minimize the within-cluster sum of squares.
+    /// K-means++ improves upon K-means by using the [Yinyang K-Means](https://research.microsoft.com/apps/pubs/default.aspx?id=252149)
+    /// method for choosing the initial cluster centers.
+    /// K-Means++ accelerates K-Means up to an order of magnitude while producing exactly the same clustering results(modulo floating point precision issues).
+    /// K-Means++ observes that there is a lot of redundancy across iterations in the KMeans algorithms and most points do not change their clusters during an iteration.
+    /// It uses various bounding techniques to identify this redundancy and eliminate many distance computations and optimize centroid computations.
+    /// For more information on K-means, and K-means++ see:
+    /// [K-means](https://en.wikipedia.org/wiki/K-means_clustering)
+    /// [K-means++](https://en.wikipedia.org/wiki/K-means%2b%2b)
+    /// ]]>
+    /// </format>
+    /// </remarks>
+    /// <seealso cref="Microsoft.ML.Trainers.KMeansTrainer" />
     public class KMeansTrainer : TrainerEstimatorBase<ClusteringPredictionTransformer<KMeansModelParameters>, KMeansModelParameters>
     {
         internal const string LoadNameValue = "KMeansPlusPlus";
@@ -50,6 +81,10 @@ internal static class Defaults
             public const int NumberOfClusters = 5;
         }
 
+        /// <summary>
+        /// Options for the <see cref="KMeansTrainer"/> as used in
+        /// [KMeansTrainer(Options)](xref:"Microsoft.ML.KMeansClusteringExtensions.KMeans(Microsoft.ML.ClusteringCatalog.ClusteringTrainers,Microsoft.ML.Trainers.KMeansTrainer.Options)".
+        /// </summary>
         public sealed class Options : UnsupervisedTrainerInputBaseWithWeight
         {
             /// <summary>

diff --git a/src/Microsoft.ML.KMeansClustering/doc.xml b/src/Microsoft.ML.KMeansClustering/doc.xml
diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxCatalog.cs b/src/Microsoft.ML.OnnxTransformer/OnnxCatalog.cs
@@ -23,6 +23,13 @@ public static class OnnxCatalog
         /// <param name="modelFile">The path of the file containing the ONNX model.</param>
         /// <param name="gpuDeviceId">Optional GPU device ID to run execution on, <see langword="null" /> to run on CPU.</param>
         /// <param name="fallbackToCpu">If GPU error, raise exception or fallback to CPU.</param>
+        /// <example>
+        /// <format type="text/markdown">
+        /// <![CDATA[
+        /// [!code-csharp[ApplyOnnxModel](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApplyOnnxModel.cs)]
+        /// ]]>
+        /// </format>
+        /// </example>
         public static OnnxScoringEstimator ApplyOnnxModel(this TransformsCatalog catalog,
             string modelFile,
             int? gpuDeviceId = null,

diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs
@@ -33,30 +33,33 @@
 namespace Microsoft.ML.Transforms.Onnx
 {
     /// <summary>
-    /// <p>A transform for scoring ONNX models in the ML.NET framework.</p>
-    /// <format type="text/markdown">
-    /// <![CDATA[
-    /// [!code-csharp[MF](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/OnnxTransform.cs)]
-    /// ]]>
-    /// </format>
+    /// <see cref="IEstimator{TTransformer}"/> for scoring ONNX models in the ML.NET framework.
     /// </summary>
     /// <remarks>
-    /// <p>Supports inferencing of models in ONNX 1.2 and 1.3 format (opset 7, 8 and 9), using the
-    /// <a href='https://www.nuget.org/packages/Microsoft.ML.OnnxRuntime/'>Microsoft.ML.OnnxRuntime</a> library.
-    /// </p>
-    /// <p>Models are scored on CPU by default. If GPU execution is needed (optional), use the
-    /// NuGet package available at
-    /// <a href='https://www.nuget.org/packages/Microsoft.ML.OnnxRuntime.Gpu/'>Microsoft.ML.OnnxRuntime.Gpu</a>
-    /// and download
-    /// <a href='https://developer.nvidia.com/cuda-downloads'>CUDA 9.1 Toolkit</a>
-    /// and
-    /// <a href='https://developer.nvidia.com/cudnn'>cuDNN</a>.
-    ///  Set parameter 'gpuDeviceId' to a valid non-negative integer. Typical device ID values are 0 or 1.
-    /// </p>
-    /// <p>The inputs and outputs of the ONNX models must be Tensor type. Sequence and Maps are not yet supported.</p>
-    /// <p>OnnxRuntime currently works on Windows and Ubuntu 16.04 Linux 64-bit platforms. Mac OS to be supported soon.</p>
-    /// <p>Visit https://github.com/onnx/models to see a list of readily available models to get started with.</p>
-    /// <p>Refer to http://onnx.ai' for more information about ONNX.</p>
+    /// <format type="text/markdown"><![CDATA[
+    ///
+    /// ###  Estimator Characteristics
+    /// |  |  |
+    /// | -- | -- |
+    /// | Does this estimator need to look at the data to train its parameters? | No |
+    /// | Input column data type | Known-sized vector of <xref:System.Single> or <xref:System.Double> types. |
+    /// | Output column data type | The same data type as the input column |
+    ///
+    /// Supports inferencing of models in ONNX 1.2 and 1.3 format (opset 7, 8 and 9), using the
+    /// [Microsoft.ML.OnnxRuntime](https://www.nuget.org/packages/Microsoft.ML.OnnxRuntime/) library.
+    /// Models are scored on CPU by default. If GPU execution is needed (optional), use the
+    /// NuGet package available at [Microsoft.ML.OnnxRuntime.Gpu](https://www.nuget.org/packages/Microsoft.ML.OnnxRuntime.Gpu/)
+    /// and download [CUDA 9.1 Toolkit](https://developer.nvidia.com/cuda-downloads) and [cuDNN](https://developer.nvidia.com/cudnn).
+    /// Set parameter 'gpuDeviceId' to a valid non-negative integer. Typical device ID values are 0 or 1.
+    /// The inputs and outputs of the ONNX models must be Tensor type. Sequence and Maps are not yet supported.
+    /// OnnxRuntime currently works on Windows and Ubuntu 16.04 Linux 64-bit platforms. Mac OS to be supported soon.
+    /// Visit [ONNX Models](https://github.com/onnx/models) to see a list of readily available models to get started with.
+    /// Refer to [ONNX](http://onnx.ai) for more information.
+    ///
+    /// To create this estimator use the following:
+    /// [ApplyOnnxModel](xref:Microsoft.ML.OnnxCatalog.ApplyOnnxModel*)
+    /// ]]>
+    /// </format>
     /// </remarks>
     public sealed class OnnxTransformer : RowToRowTransformerBase
     {

diff --git a/...soft.ML.StandardTrainers/Standard/MulticlassClassification/MulticlassNaiveBayesTrainer.cs b/...soft.ML.StandardTrainers/Standard/MulticlassClassification/MulticlassNaiveBayesTrainer.cs
@@ -27,11 +27,38 @@
 namespace Microsoft.ML.Trainers
 {
     /// <summary>
-    /// Naive Bayes classifier is based on Bayes' theorem. It assumes independence among the presence of features
-    /// in a class even though they may be dependent on each other. It is a multi-class trainer that accepts
-    /// binary feature values of type float, i.e., feature values are either true or false, specifically a
-    /// feature value greater than zero is treated as true.
+    /// The <see cref="IEstimator{TTransformer}"/> for training a multiclass Naive Bayes predictor that supports binary feature values.
     /// </summary>
+    /// <remarks>
+    /// <format type="text/markdown"><![CDATA[
+    /// To create this trainer, use [NaiveBayes](xref:Microsoft.ML.StandardTrainersCatalog.NaiveBayes(Microsoft.ML.MulticlassClassificationCatalog.MulticlassClassificationTrainers,System.String,System.String)).
+    ///
+    /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-multiclass-classification.md)]
+    ///
+    /// ### Trainer Characteristics
+    /// |  |  |
+    /// | -- | -- |
+    /// | Machine learning task | Multiclass classification |
+    /// | Is normalization required? | Yes |
+    /// | Is caching required? | No |
+    /// | Required NuGet in addition to Microsoft.ML | None |
+    ///
+    /// ### Training Algorithm Details
+    /// [Naive Bayes](https://en.wikipedia.org/wiki/Naive_Bayes_classifier)
+    /// is a probabilistic classifier that can be used for multiclass problems.
+    /// Using Bayes' theorem, the conditional probability for a sample belonging to a class
+    /// can be calculated based on the sample count for each feature combination groups.
+    /// However, Naive Bayes Classifier is feasible only if the number of features and
+    /// the values each feature can take is relatively small.
+    /// It assumes independence among the presence of features in a class even though
+    /// they may be dependent on each other.
+    /// This multi-class trainer accepts "binary" feature values of type float:
+    /// feature values that are greater than zero are treated as true and feature values
+    /// that are less or equal to 0 are treated as false.
+    /// ]]>
+    /// </format>
+    /// </remarks>
+    /// <seealso cref="StandardTrainersCatalog.NaiveBayes(Microsoft.ML.MulticlassClassificationCatalog.MulticlassClassificationTrainers,System.String,System.String)"/>
     public sealed class NaiveBayesMulticlassTrainer : TrainerEstimatorBase<MulticlassPredictionTransformer<NaiveBayesMulticlassModelParameters>, NaiveBayesMulticlassModelParameters>
     {
         internal const string LoadName = "MultiClassNaiveBayes";

diff --git a/src/Microsoft.ML.StandardTrainers/Standard/MulticlassClassification/OneVersusAllTrainer.cs b/src/Microsoft.ML.StandardTrainers/Standard/MulticlassClassification/OneVersusAllTrainer.cs
@@ -24,7 +24,7 @@
 [assembly: LoadableClass(OneVersusAllTrainer.Summary, typeof(OneVersusAllTrainer), typeof(OneVersusAllTrainer.Options),
     new[] { typeof(SignatureMulticlassClassifierTrainer), typeof(SignatureTrainer) },
     OneVersusAllTrainer.UserNameValue,
-    OneVersusAllTrainer.LoadNameValue, DocName = "trainer/OvaPkpd.md")]
+    OneVersusAllTrainer.LoadNameValue)]
 
 [assembly: LoadableClass(typeof(OneVersusAllModelParameters), null, typeof(SignatureLoadModel),
     "OVA Executor",
@@ -37,7 +37,49 @@ namespace Microsoft.ML.Trainers
     using TDistPredictor = IDistPredictorProducing<float, float>;
     using TScalarPredictor = IPredictorProducing<float>;
     using TScalarTrainer = ITrainerEstimator<ISingleFeaturePredictionTransformer<IPredictorProducing<float>>, IPredictorProducing<float>>;
-
+    /// <summary>
+    /// The <see cref="IEstimator{TTransformer}"/> for training a one-versus-all multi-class classifier on top of the specified binary classifier.
+    /// </summary>
+    /// <remarks>
+    /// <format type="text/markdown"><![CDATA[
+    /// To create this trainer, use [OneVersusAll](xref:Microsoft.ML.StandardTrainersCatalog.OneVersusAll``1(Microsoft.ML.MulticlassClassificationCatalog.MulticlassClassificationTrainers,Microsoft.ML.Trainers.ITrainerEstimator{Microsoft.ML.Data.BinaryPredictionTransformer{``0},``0},System.String,System.Boolean,Microsoft.ML.IEstimator{Microsoft.ML.ISingleFeaturePredictionTransformer{Microsoft.ML.Calibrators.ICalibrator}},System.Int32,System.Boolean)).
+    ///
+    /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-multiclass-classification.md)]
+    ///
+    /// ### Trainer Characteristics
+    /// |  |  |
+    /// | -- | -- |
+    /// | Machine learning task | Multiclass classification |
+    /// | Is normalization required? | Depends on underline binary classifier |
+    /// | Is caching required? | Depends on underline binary classifier |
+    /// | Required NuGet in addition to Microsoft.ML | None |
+    ///
+    /// ### Training Algorithm Details
+    /// In this strategy, a binary classification algorithm is used to train one classifier for each class,
+    /// which distinguishes that class from all other classes. Prediction is then performed by running
+    /// these binary classifiers and choosing the prediction with the highest confidence score.
+    /// This algorithm can be used with any of the binary classifiers in ML.NET. A few binary classifiers
+    /// already have implementation for multi-class problems, thus users can choose either one depending on the context.
+    /// The OVA version of a binary classifier, such as wrapping a LightGbmBinaryClassifier ,
+    /// can be different from LightGbmClassifier, which develops a multi-class classifier directly.
+    /// Note that even if the classifier indicates that it does not need caching, OneVersusAll will always
+    /// request caching, as it will be performing multiple passes over the data set.
+    /// This trainer will request normalization from the data pipeline if the classifier indicates it would benefit from it.
+    ///
+    /// This can allow you to exploit trainers that do not naturally have a
+    /// multiclass option, for example, using the FastTree Binary Classification
+    /// to solve a multiclass problem.
+    /// Alternately, it can allow ML.NET to solve a "simpler" problem even in the cases
+    /// where the trainer has a multiclass option, but using it directly is not
+    /// practical due to, usually, memory constraints. For example, while a multiclass
+    /// logistic regression is a more principled way to solve a multiclass problem, it
+    /// requires that the trainer store a lot more intermediate state in the form of
+    /// L-BFGS history for all classes *simultaneously*, rather than just one-by-one
+    /// as would be needed for a one-versus-all classification model.
+    /// ]]>
+    /// </format>
+    /// </remarks>
+    /// <seealso cref="StandardTrainersCatalog.OneVersusAll{TModel}(MulticlassClassificationCatalog.MulticlassClassificationTrainers, ITrainerEstimator{BinaryPredictionTransformer{TModel}, TModel}, string, bool, IEstimator{ISingleFeaturePredictionTransformer{ICalibrator}}, int, bool)" />
     public sealed class OneVersusAllTrainer : MetaMulticlassTrainer<MulticlassPredictionTransformer<OneVersusAllModelParameters>, OneVersusAllModelParameters>
     {
         internal const string LoadNameValue = "OVA";