diff --git a/src/Microsoft.ML.TensorFlow/AssemblyInfo.cs b/src/Microsoft.ML.TensorFlow/AssemblyInfo.cs new file mode 100644 index 0000000000..ea6400c0d1 --- /dev/null +++ b/src/Microsoft.ML.TensorFlow/AssemblyInfo.cs @@ -0,0 +1,9 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.CompilerServices; +using Microsoft.ML; + +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Tests" + PublicKey.TestValue)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.TensorFlow.StaticPipe" + PublicKey.Value)] \ No newline at end of file diff --git a/src/Microsoft.ML.TensorFlow/TensorflowCatalog.cs b/src/Microsoft.ML.TensorFlow/TensorflowCatalog.cs index 7c5eac8f4a..517c6687c7 100644 --- a/src/Microsoft.ML.TensorFlow/TensorflowCatalog.cs +++ b/src/Microsoft.ML.TensorFlow/TensorflowCatalog.cs @@ -45,23 +45,23 @@ public static TensorFlowEstimator ScoreTensorFlowModel(this TransformsCatalog ca => new TensorFlowEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnNames, inputColumnNames, tensorFlowModel); /// - /// Score or Retrain a tensorflow model (based on setting of the ) setting. - /// The model is specified in the . + /// Score or Retrain a tensorflow model (based on setting of the ) setting. + /// The model is specified in the . /// /// The transform's catalog. - /// The specifying the inputs and the settings of the . + /// The specifying the inputs and the settings of the . public static TensorFlowEstimator TensorFlow(this TransformsCatalog catalog, - TensorFlowTransformer.Arguments args) + TensorFlowTransformer.Options args) => new TensorFlowEstimator(CatalogUtils.GetEnvironment(catalog), args); /// - /// Scores or retrains (based on setting of the ) a pre-traiend TensorFlow model specified via . + /// Scores or retrains (based on setting of the ) a pre-traiend TensorFlow model specified via . /// /// The transform's catalog. - /// The specifying the inputs and the settings of the . + /// The specifying the inputs and the settings of the . /// The pre-trained TensorFlow model. public static TensorFlowEstimator TensorFlow(this TransformsCatalog catalog, - TensorFlowTransformer.Arguments args, + TensorFlowTransformer.Options args, TensorFlowModelInfo tensorFlowModel) => new TensorFlowEstimator(CatalogUtils.GetEnvironment(catalog), args, tensorFlowModel); } diff --git a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs index c5d7600e7f..ad28f48525 100644 --- a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs +++ b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs @@ -19,7 +19,7 @@ using Microsoft.ML.Transforms.TensorFlow; [assembly: LoadableClass(TensorFlowTransformer.Summary, typeof(IDataTransform), typeof(TensorFlowTransformer), - typeof(TensorFlowTransformer.Arguments), typeof(SignatureDataTransform), TensorFlowTransformer.UserName, TensorFlowTransformer.ShortName)] + typeof(TensorFlowTransformer.Options), typeof(SignatureDataTransform), TensorFlowTransformer.UserName, TensorFlowTransformer.ShortName)] [assembly: LoadableClass(TensorFlowTransformer.Summary, typeof(IDataTransform), typeof(TensorFlowTransformer), null, typeof(SignatureLoadDataTransform), TensorFlowTransformer.UserName, TensorFlowTransformer.LoaderSignature)] @@ -37,7 +37,7 @@ namespace Microsoft.ML.Transforms /// public sealed class TensorFlowTransformer : RowToRowTransformerBase { - public sealed class Arguments : TransformInputBase + public sealed class Options : TransformInputBase { /// /// Location of the TensorFlow model. @@ -297,7 +297,7 @@ private static TensorFlowTransformer Create(IHostEnvironment env, ModelLoadConte } // Factory method for SignatureDataTransform. - internal static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) + internal static IDataTransform Create(IHostEnvironment env, Options args, IDataView input) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(args, nameof(args)); @@ -308,12 +308,12 @@ internal static IDataTransform Create(IHostEnvironment env, Arguments args, IDat return new TensorFlowTransformer(env, args, input).MakeDataTransform(input); } - internal TensorFlowTransformer(IHostEnvironment env, Arguments args, IDataView input) + internal TensorFlowTransformer(IHostEnvironment env, Options args, IDataView input) : this(env, args, TensorFlowUtils.LoadTensorFlowModel(env, args.ModelLocation), input) { } - internal TensorFlowTransformer(IHostEnvironment env, Arguments args, TensorFlowModelInfo tensorFlowModel, IDataView input) + internal TensorFlowTransformer(IHostEnvironment env, Options args, TensorFlowModelInfo tensorFlowModel, IDataView input) : this(env, tensorFlowModel.Session, args.OutputColumns, args.InputColumns, TensorFlowUtils.IsSavedModel(env, args.ModelLocation) ? args.ModelLocation : null, false) { @@ -332,7 +332,7 @@ internal TensorFlowTransformer(IHostEnvironment env, Arguments args, TensorFlowM } } - private void CheckTrainingParameters(Arguments args) + private void CheckTrainingParameters(Options args) { Host.CheckNonWhiteSpace(args.LabelColumn, nameof(args.LabelColumn)); Host.CheckNonWhiteSpace(args.OptimizationOperation, nameof(args.OptimizationOperation)); @@ -401,7 +401,7 @@ private void CheckTrainingParameters(Arguments args) return (inputColIndex, isInputVector, tfInputType, tfInputShape); } - private void TrainCore(Arguments args, IDataView input) + private void TrainCore(Options args, IDataView input) { var inputsForTraining = new string[Inputs.Length + 1]; var inputColIndices = new int[inputsForTraining.Length]; @@ -479,7 +479,7 @@ private void TrainCore(Arguments args, IDataView input) string[] inputsForTraining, ITensorValueGetter[] srcTensorGetters, List fetchList, - Arguments args) + Options args) { float loss = 0; float metric = 0; @@ -509,7 +509,7 @@ private void TrainCore(Arguments args, IDataView input) /// After retraining Session and Graphs are both up-to-date /// However model on disk is not which is used to serialzed to ML.Net stream /// - private void UpdateModelOnDisk(string modelDir, Arguments args) + private void UpdateModelOnDisk(string modelDir, Options args) { try { @@ -957,7 +957,7 @@ protected override Schema.DetachedColumn[] GetOutputColumnsCore() Desc = Summary, UserName = UserName, ShortName = ShortName)] - internal static CommonOutputs.TransformOutput TensorFlowScorer(IHostEnvironment env, Arguments input) + internal static CommonOutputs.TransformOutput TensorFlowScorer(IHostEnvironment env, Options input) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(input, nameof(input)); @@ -1080,28 +1080,28 @@ public TFTensor GetBufferedBatchTensor() public sealed class TensorFlowEstimator : IEstimator { private readonly IHost _host; - private readonly TensorFlowTransformer.Arguments _args; + private readonly TensorFlowTransformer.Options _args; private readonly TensorFlowModelInfo _tensorFlowModel; private readonly TFDataType[] _tfInputTypes; private readonly ColumnType[] _outputTypes; private TensorFlowTransformer _transformer; - public TensorFlowEstimator(IHostEnvironment env, string[] outputColumnNames, string[] inputColumnNames, string modelLocation) + internal TensorFlowEstimator(IHostEnvironment env, string[] outputColumnNames, string[] inputColumnNames, string modelLocation) : this(env, outputColumnNames, inputColumnNames, TensorFlowUtils.LoadTensorFlowModel(env, modelLocation)) { } - public TensorFlowEstimator(IHostEnvironment env, string[] outputColumnNames, string[] inputColumnNames, TensorFlowModelInfo tensorFlowModel) + internal TensorFlowEstimator(IHostEnvironment env, string[] outputColumnNames, string[] inputColumnNames, TensorFlowModelInfo tensorFlowModel) : this(env, CreateArguments(tensorFlowModel, outputColumnNames, inputColumnNames), tensorFlowModel) { } - public TensorFlowEstimator(IHostEnvironment env, TensorFlowTransformer.Arguments args) + internal TensorFlowEstimator(IHostEnvironment env, TensorFlowTransformer.Options args) : this(env, args, TensorFlowUtils.LoadTensorFlowModel(env, args.ModelLocation)) { } - public TensorFlowEstimator(IHostEnvironment env, TensorFlowTransformer.Arguments args, TensorFlowModelInfo tensorFlowModel) + internal TensorFlowEstimator(IHostEnvironment env, TensorFlowTransformer.Options args, TensorFlowModelInfo tensorFlowModel) { _host = Contracts.CheckRef(env, nameof(env)).Register(nameof(TensorFlowEstimator)); _args = args; @@ -1112,15 +1112,19 @@ public TensorFlowEstimator(IHostEnvironment env, TensorFlowTransformer.Arguments _outputTypes = outputTuple.outputTypes; } - private static TensorFlowTransformer.Arguments CreateArguments(TensorFlowModelInfo tensorFlowModel, string[] outputColumnNames, string[] inputColumnName) + private static TensorFlowTransformer.Options CreateArguments(TensorFlowModelInfo tensorFlowModel, string[] outputColumnNames, string[] inputColumnName) { - var args = new TensorFlowTransformer.Arguments(); + var args = new TensorFlowTransformer.Options(); args.ModelLocation = tensorFlowModel.ModelPath; args.InputColumns = inputColumnName; args.OutputColumns = outputColumnNames; args.ReTrain = false; return args; } + + /// + /// Returns the output schema shape of the estimator, if the input schema shape is like the one provided. + /// public SchemaShape GetOutputSchema(SchemaShape inputSchema) { _host.CheckValue(inputSchema, nameof(inputSchema)); @@ -1146,6 +1150,9 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema) return new SchemaShape(resultDic.Values); } + /// + /// Train and return a transformer. + /// public TensorFlowTransformer Fit(IDataView input) { _host.CheckValue(input, nameof(input)); diff --git a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv index 9df43c0e84..f9004ac8c6 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv +++ b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv @@ -126,7 +126,7 @@ Transforms.ScoreColumnSelector Selects only the last score columns and the extra Transforms.Scorer Turn the predictor model into a transform model Microsoft.ML.EntryPoints.ScoreModel MakeScoringTransform Microsoft.ML.EntryPoints.ScoreModel+ModelInput Microsoft.ML.EntryPoints.ScoreModel+Output Transforms.Segregator Un-groups vector columns into sequences of rows, inverse of Group transform Microsoft.ML.Transforms.GroupingOperations Ungroup Microsoft.ML.Transforms.UngroupTransform+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.SentimentAnalyzer Uses a pretrained sentiment model to score input strings Microsoft.ML.Transforms.Text.TextAnalytics AnalyzeSentiment Microsoft.ML.Transforms.Text.SentimentAnalyzingTransformer+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput -Transforms.TensorFlowScorer Transforms the data using the TensorFlow model. Microsoft.ML.Transforms.TensorFlowTransformer TensorFlowScorer Microsoft.ML.Transforms.TensorFlowTransformer+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput +Transforms.TensorFlowScorer Transforms the data using the TensorFlow model. Microsoft.ML.Transforms.TensorFlowTransformer TensorFlowScorer Microsoft.ML.Transforms.TensorFlowTransformer+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.TextFeaturizer A transform that turns a collection of text documents into numerical feature vectors. The feature vectors are normalized counts of (word and/or character) ngrams in a given tokenized text. Microsoft.ML.Transforms.Text.TextAnalytics TextTransform Microsoft.ML.Transforms.Text.TextFeaturizingEstimator+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.TextToKeyConverter Converts input values (words, numbers, etc.) to index in a dictionary. Microsoft.ML.Transforms.Categorical.Categorical TextToKey Microsoft.ML.Transforms.Conversions.ValueToKeyMappingTransformer+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.TrainTestDatasetSplitter Split the dataset into train and test sets Microsoft.ML.EntryPoints.TrainTestSplit Split Microsoft.ML.EntryPoints.TrainTestSplit+Input Microsoft.ML.EntryPoints.TrainTestSplit+Output diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs index bb46984a38..d6e0448db3 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs @@ -433,7 +433,7 @@ public void TensorFlowTransformMNISTLRTrainingTest() var pipe = mlContext.Transforms.Categorical.OneHotEncoding("OneHotLabel", "Label") .Append(mlContext.Transforms.Normalize(new NormalizingEstimator.MinMaxColumn("Features", "Placeholder"))) - .Append(new TensorFlowEstimator(mlContext, new TensorFlowTransformer.Arguments() + .Append(mlContext.Transforms.TensorFlow(new TensorFlowTransformer.Options() { ModelLocation = model_location, InputColumns = new[] { "Features" }, @@ -547,7 +547,7 @@ private void ExecuteTFTransformMNISTConvTrainingTest(bool shuffle, int? shuffleS } var pipe = mlContext.Transforms.CopyColumns(("Features", "Placeholder")) - .Append(new TensorFlowEstimator(mlContext, new TensorFlowTransformer.Arguments() + .Append(new TensorFlowEstimator(mlContext, new TensorFlowTransformer.Options() { ModelLocation = modelLocation, InputColumns = new[] { "Features" },