diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlowTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/ImageClassification.cs similarity index 93% rename from docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlowTransform.cs rename to docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/ImageClassification.cs index bd8c0ee6d1..b832d3e828 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlowTransform.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/ImageClassification.cs @@ -2,14 +2,14 @@ using System.Linq; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic +namespace Microsoft.ML.Samples.Dynamic.TensorFlow { - class TensorFlowTransformExample + class ImageClassification { /// - /// Example use of the TensorFlowEstimator in a ML.NET pipeline. + /// Example use of the TensorFlow image model in a ML.NET pipeline. /// - public static void TensorFlowScoringSample() + public static void ScoringWithImageClassificationModelSample() { // Download the ResNet 101 model from the location below. // https://storage.googleapis.com/download.tensorflow.org/models/tflite_11_05_08/resnet_v2_101.tgz diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs new file mode 100644 index 0000000000..b0e9a62991 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs @@ -0,0 +1,136 @@ +using System; +using System.IO; +using System.Linq; +using Microsoft.ML.Data; +using Microsoft.ML.Transforms.TensorFlow; + +namespace Microsoft.ML.Samples.Dynamic.TensorFlow +{ + class TextClassification + { + public const int MaxSentenceLenth = 600; + /// + /// Example use of the TensorFlow sentiment classification model. + /// + public static void ScoringWithTextClassificationModelSample() + { + string modelLocation = SamplesUtils.DatasetUtils.DownloadTensorFlowSentimentModel(); + + var mlContext = new MLContext(); + var data = new[] { new IMDBSentiment() { + Sentiment_Text = "this film was just brilliant casting location scenery story direction " + + "everyone's really suited the part they played and you could just imagine being there robert " + + "is an amazing actor and now the same being director father came from the same scottish " + + "island as myself so i loved the fact there was a real connection with this film the witty " + + "remarks throughout the film were great it was just brilliant so much that i bought the " + + "film as soon as it was released for and would recommend it to everyone to watch and the " + + "fly fishing was amazing really cried at the end it was so sad and you know what they say " + + "if you cry at a film it must have been good and this definitely was also to the two " + + "little boy's that played the of norman and paul they were just brilliant children are " + + "often left out of the list i think because the stars that play them all grown up are " + + "such a big profile for the whole film but these children are amazing and should be praised " + + "for what they have done don't you think the whole story was so lovely because it was true " + + "and was someone's life after all that was shared with us all" } }; + var dataView = mlContext.Data.ReadFromEnumerable(data); + + // This is the dictionary to convert words into the integer indexes. + var lookupMap = mlContext.Data.ReadFromTextFile(Path.Combine(modelLocation, "imdb_word_index.csv"), + columns: new[] + { + new TextLoader.Column("Words", DataKind.TX, 0), + new TextLoader.Column("Ids", DataKind.I4, 1), + }, + separatorChar: ',' + ); + + // Load the TensorFlow model once. + // - Use it for quering the schema for input and output in the model + // - Use it for prediction in the pipeline. + var modelInfo = TensorFlowUtils.LoadTensorFlowModel(mlContext, modelLocation); + var schema = modelInfo.GetModelSchema(); + var featuresType = (VectorType)schema["Features"].Type; + Console.WriteLine("Name: {0}, Type: {1}, Shape: (-1, {2})", "Features", featuresType.ItemType.RawType, featuresType.Dimensions[0]); + var predictionType = (VectorType)schema["Prediction/Softmax"].Type; + Console.WriteLine("Name: {0}, Type: {1}, Shape: (-1, {2})", "Prediction/Softmax", predictionType.ItemType.RawType, predictionType.Dimensions[0]); + + // The model expects the input feature vector to be a fixed length vector. + // In this sample, CustomMappingEstimator is used to resize variable length vector to fixed length vector. + // The following ML.NET pipeline + // 1. tokenzies the string into words, + // 2. maps each word to an integer which is an index in the dictionary ('lookupMap'), + // 3. Resizes the integer vector to a fixed length vector using CustomMappingEstimator ('ResizeFeaturesAction') + // 4. Passes the data to TensorFlow for scoring. + // 5. Retreives the 'Prediction' from TensorFlow and put it into ML.NET Pipeline + + Action ResizeFeaturesAction = (i, j) => + { + j.Sentiment_Text = i.Sentiment_Text; + var features = i.VariableLenghtFeatures; + Array.Resize(ref features, MaxSentenceLenth); + j.Features = features; + }; + + var engine = mlContext.Transforms.Text.TokenizeWords("TokenizedWords", "Sentiment_Text") + .Append(mlContext.Transforms.Conversion.ValueMap(lookupMap, "Words", "Ids", new[] { ("VariableLenghtFeatures", "TokenizedWords") })) + .Append(mlContext.Transforms.CustomMapping(ResizeFeaturesAction, "Resize")) + .Append(mlContext.Transforms.ScoreTensorFlowModel(modelInfo, new[] { "Prediction/Softmax" }, new[] { "Features" })) + .Append(mlContext.Transforms.CopyColumns(("Prediction", "Prediction/Softmax"))) + .Fit(dataView) + .CreatePredictionEngine(mlContext); + + // Predict with TensorFlow pipeline. + var prediction = engine.Predict(data[0]); + + Console.WriteLine("Number of classes: {0}", prediction.Prediction.Length); + Console.WriteLine("Is sentiment/review positive? {0}", prediction.Prediction[1] > 0.5 ? "Yes." : "No."); + Console.WriteLine("Prediction Confidence: {0}", prediction.Prediction[1].ToString("0.00")); + + /////////////////////////////////// Expected output /////////////////////////////////// + // + // Name: Features, Type: System.Int32, Shape: (-1, 600) + // Name: Prediction/Softmax, Type: System.Single, Shape: (-1, 2) + // + // Number of classes: 2 + // Is sentiment/review positive ? Yes + // Prediction Confidence: 0.65 + } + + + /// + /// Class to hold original sentiment data. + /// + public class IMDBSentiment + { + public string Sentiment_Text { get; set; } + + /// + /// This is a variable length vector designated by VectorType(0) attribute. + /// Variable length vectors are produced by applying operations such as 'TokenizeWords' on strings + /// resulting in vectors of tokens of variable lengths. + /// + [VectorType(0)] + public int[] VariableLenghtFeatures { get; set; } + } + + /// + /// Class to hold intermediate data. Mostly used by CustomMapping Estimator + /// + public class IntermediateFeatures + { + public string Sentiment_Text { get; set; } + + [VectorType(MaxSentenceLenth)] + public int[] Features { get; set; } + } + + /// + /// Class to contain the output values from the transformation. + /// + class OutputScores + { + [VectorType(2)] + public float[] Prediction { get; set; } + } + + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj index 44b673640f..9c4b494a6a 100644 --- a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj +++ b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj @@ -23,7 +23,7 @@ - + false diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs index 3df23dff41..ea62414178 100644 --- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs @@ -56,6 +56,35 @@ public static string DownloadImages() return $"{path}{Path.DirectorySeparatorChar}images.tsv"; } + /// + /// Downloads sentiment_model from the dotnet/machinelearning-testdata repo. + /// + /// + /// The model is downloaded from + /// https://github.com/dotnet/machinelearning-testdata/blob/master/Microsoft.ML.TensorFlow.TestModels/sentiment_model + /// The model is in 'SavedModel' format. For further explanation on how was the `sentiment_model` created + /// c.f. https://github.com/dotnet/machinelearning-testdata/blob/master/Microsoft.ML.TensorFlow.TestModels/sentiment_model/README.md + /// + public static string DownloadTensorFlowSentimentModel() + { + string remotePath = "https://github.com/dotnet/machinelearning-testdata/raw/master/Microsoft.ML.TensorFlow.TestModels/sentiment_model/"; + + string path = "sentiment_model"; + if(!Directory.Exists(path)) + Directory.CreateDirectory(path); + + string varPath = Path.Combine(path, "variables"); + if (!Directory.Exists(varPath)) + Directory.CreateDirectory(varPath); + + Download(Path.Combine(remotePath, "saved_model.pb"), Path.Combine(path,"saved_model.pb")); + Download(Path.Combine(remotePath, "imdb_word_index.csv"), Path.Combine(path, "imdb_word_index.csv")); + Download(Path.Combine(remotePath, "variables", "variables.data-00000-of-00001"), Path.Combine(varPath, "variables.data-00000-of-00001")); + Download(Path.Combine(remotePath, "variables", "variables.index"), Path.Combine(varPath, "variables.index")); + + return path; + } + private static string Download(string baseGitPath, string dataFile) { using (WebClient client = new WebClient()) diff --git a/src/Microsoft.ML.TensorFlow/TensorFlowModelInfo.cs b/src/Microsoft.ML.TensorFlow/TensorFlowModelInfo.cs index b8be1f91a5..b3966d3d4a 100644 --- a/src/Microsoft.ML.TensorFlow/TensorFlowModelInfo.cs +++ b/src/Microsoft.ML.TensorFlow/TensorFlowModelInfo.cs @@ -43,7 +43,7 @@ internal TensorFlowModelInfo(IHostEnvironment env, TFSession session, string mod /// /// Get for complete model. Every node in the TensorFlow model will be included in the object. /// - internal Schema GetModelSchema() + public Schema GetModelSchema() { return TensorFlowUtils.GetModelSchema(_env, Session.Graph); } diff --git a/src/Microsoft.ML.TensorFlow/TensorflowCatalog.cs b/src/Microsoft.ML.TensorFlow/TensorflowCatalog.cs index 2b46731076..a04c732633 100644 --- a/src/Microsoft.ML.TensorFlow/TensorflowCatalog.cs +++ b/src/Microsoft.ML.TensorFlow/TensorflowCatalog.cs @@ -12,7 +12,7 @@ namespace Microsoft.ML public static class TensorflowCatalog { /// - /// Scores a dataset using a pre-traiend TensorFlow model located in . + /// Scores a dataset using a pre-traiend TensorFlow model located in . /// /// The transform's catalog. /// Location of the TensorFlow model. @@ -21,7 +21,7 @@ public static class TensorflowCatalog /// /// /// /// /// @@ -32,12 +32,19 @@ public static TensorFlowEstimator ScoreTensorFlowModel(this TransformsCatalog ca => new TensorFlowEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnNames, inputColumnNames, modelLocation); /// - /// Scores a dataset using a pre-traiend TensorFlow model specified via . + /// Scores a dataset using a pre-traiend TensorFlow model specified via . /// /// The transform's catalog. /// The pre-trained TensorFlow model. /// The names of the model inputs. /// The names of the requested model outputs. + /// + /// + /// + /// + /// public static TensorFlowEstimator ScoreTensorFlowModel(this TransformsCatalog catalog, TensorFlowModelInfo tensorFlowModel, string[] outputColumnNames, diff --git a/src/Microsoft.ML.TensorFlow/doc.xml b/src/Microsoft.ML.TensorFlow/doc.xml index fafdeb17a6..5a05508918 100644 --- a/src/Microsoft.ML.TensorFlow/doc.xml +++ b/src/Microsoft.ML.TensorFlow/doc.xml @@ -8,12 +8,12 @@ - Scoring with pretrained TensorFlow model: In this mode, the transform extracts hidden layers' values from a pre-trained Tensorflow model and uses outputs as features in ML.Net pipeline. + Scoring with pretrained TensorFlow model: In this mode, the transform extracts hidden layers' values from a pre-trained Tensorflow model and uses outputs as features in ML.Net pipeline. - Retraining of TensorFlow model: In this mode, the transform retrains a TensorFlow model using the user data passed through ML.Net pipeline. Once the model is trained, it's outputs can be used as features for scoring. + Retraining of TensorFlow model: In this mode, the transform retrains a TensorFlow model using the user data passed through ML.Net pipeline. Once the model is trained, it's outputs can be used as features for scoring.