diff --git a/src/Microsoft.ML/LearningPipeline.cs b/src/Microsoft.ML/LearningPipeline.cs index 0ece3697a9..f51d6dae4e 100644 --- a/src/Microsoft.ML/LearningPipeline.cs +++ b/src/Microsoft.ML/LearningPipeline.cs @@ -26,25 +26,115 @@ public ScorerPipelineStep(Var data, Var model) public Var Model { get; } } + + /// + /// LearningPipeline class is used to define the steps needed to perform desired machine learning task. + /// The steps are defined by adding a data loader (e.g. ) followed by zero or more transforms (e.g. ) + /// and atmost one trainer/learner (e.g. ) in the pipeline. + /// + /// Data can be analyzed at every step by inspecting the LearningPipeline object in VS.Net debugger. + /// + /// + /// For example, + /// + /// var pipeline = new LearningPipeline(); + /// pipeline.Add(new TextLoader <SentimentData> (dataPath, separator: ",")); + /// pipeline.Add(new TextFeaturizer("Features", "SentimentText")); + /// pipeline.Add(new FastTreeBinaryClassifier()); + /// + /// var model = pipeline.Train<SentimentData, SentimentPrediction>(); + /// + /// + /// [DebuggerTypeProxy(typeof(LearningPipelineDebugProxy))] public class LearningPipeline : ICollection { private List Items { get; } = new List(); + /// + /// Construct an empty LearningPipeline object. + /// public LearningPipeline() { } + /// + /// Get the count of ML components in the LearningPipeline object + /// public int Count => Items.Count; public bool IsReadOnly => false; + + /// + /// Add a data loader, transform or trainer into the pipeline. + /// Possible data loader(s), transforms and trainers options are + /// + /// Data Loader: + /// + /// etc. + /// + /// + /// Transforms: + /// , + /// + /// , + /// , + /// , + /// , + /// etc. + /// + /// + /// Trainers: + /// , + /// , + /// , + /// , + /// etc. + /// + /// For a complete list of transforms and trainers, please see "Microsoft.ML.Transforms" and "Microsoft.ML.Trainers" namespaces. + /// + /// public void Add(ILearningPipelineItem item) => Items.Add(item); + + /// + /// Remove all the transforms/trainers from the pipeline. + /// public void Clear() => Items.Clear(); + + /// + /// Check if a specific loader/transform/trainer is in the pipeline? + /// + /// Any ML component (data loader, transform or trainer) defined as ILearningPipelineItem. + /// true/false public bool Contains(ILearningPipelineItem item) => Items.Contains(item); + + /// + /// Copy the pipeline items into an array. + /// + /// Array the items are copied to. + /// Index to start copying from. public void CopyTo(ILearningPipelineItem[] array, int arrayIndex) => Items.CopyTo(array, arrayIndex); public IEnumerator GetEnumerator() => Items.GetEnumerator(); + + /// + /// Remove an item from the pipeline. + /// + /// ILearningPipelineItem to remove. + /// true/false public bool Remove(ILearningPipelineItem item) => Items.Remove(item); IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); + /// + /// Train the model using the ML components in the pipeline. + /// + /// Type of data instances the model will be trained on. It's a custom type defined by the user according to the structure of data. + /// + /// E.g. please see "Microsoft.ML.Scenarios.ScenarioTests.SentimentData" in "Microsoft.ML.Tests.csproj" for input type definition for sentiment classification task. + /// The type is defined for a .csv file that contains sentiment classification data with Sentiment and SentimentText as two columns in the .csv file. + /// + /// Ouput type. The prediction will be return based on this type. + /// E.g. for sentiment classifcation scenario, the prediction type is defined at "Microsoft.ML.Scenarios.ScenarioTests.SentimentPrediction" in "Microsoft.ML.Tests.csproj". + /// + /// PredictionModel object. This is the model object used for prediction on new instances. public PredictionModel Train() where TInput : class where TOutput : class, new()