Skip to content

Input output swap #2239

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jan 29, 2019
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,14 @@ public static void FeatureSelectionTransform()
// In this example we define a CountFeatureSelectingEstimator, that selects slots in a feature vector that have more non-default
// values than the specified count. This transformation can be used to remove slots with too many missing values.
var countSelectEst = ml.Transforms.FeatureSelection.SelectFeaturesBasedOnCount(
inputColumn: "Features", outputColumn: "FeaturesCountSelect", count: 695);
outputColumnName: "FeaturesCountSelect", inputColumnName: "Features", count: 695);

// We also define a MutualInformationFeatureSelectingEstimator that selects the top k slots in a feature
// vector based on highest mutual information between that slot and a specified label. Notice that it is possible to
// specify the parameter `numBins', which controls the number of bins used in the approximation of the mutual information
// between features and label.
var mutualInfoEst = ml.Transforms.FeatureSelection.SelectFeaturesBasedOnMutualInformation(
inputColumn: "FeaturesCountSelect", outputColumn: "FeaturesMISelect", labelColumn: "Label", slotsInOutput: 5);
outputColumnName: "FeaturesMISelect", inputColumnName: "FeaturesCountSelect", labelColumn: "Label", slotsInOutput: 5);

// Now, we can put the previous two transformations together in a pipeline.
var pipeline = countSelectEst.Append(mutualInfoEst);
Expand Down
4 changes: 2 additions & 2 deletions docs/samples/Microsoft.ML.Samples/Dynamic/KeyToValue_Term.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,15 @@ public static void KeyToValue_Term()
string defaultColumnName = "DefaultKeys";
// REVIEW create through the catalog extension
var default_pipeline = new WordTokenizingEstimator(ml, "Review")
.Append(new ValueToKeyMappingEstimator(ml, "Review", defaultColumnName));
.Append(new ValueToKeyMappingEstimator(ml, defaultColumnName, "Review"));

// Another pipeline, that customizes the advanced settings of the TermEstimator.
// We can change the maxNumTerm to limit how many keys will get generated out of the set of words,
// and condition the order in which they get evaluated by changing sort from the default Occurence (order in which they get encountered)
// to value/alphabetically.
string customizedColumnName = "CustomizedKeys";
var customized_pipeline = new WordTokenizingEstimator(ml, "Review")
.Append(new ValueToKeyMappingEstimator(ml, "Review", customizedColumnName, maxNumTerms: 10, sort: ValueToKeyMappingTransformer.SortOrder.Value));
.Append(new ValueToKeyMappingEstimator(ml,customizedColumnName, "Review", maxNumTerms: 10, sort: ValueToKeyMappingTransformer.SortOrder.Value));

// The transformed data.
var transformedData_default = default_pipeline.Fit(trainData).Transform(trainData);
Expand Down
6 changes: 3 additions & 3 deletions docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ public static void NgramTransform()
// A pipeline to tokenize text as characters and then combine them together into ngrams
// The pipeline uses the default settings to featurize.

var charsPipeline = ml.Transforms.Text.TokenizeCharacters("SentimentText", "Chars", useMarkerCharacters:false);
var ngramOnePipeline = ml.Transforms.Text.ProduceNgrams("Chars", "CharsUnigrams", ngramLength:1);
var ngramTwpPipeline = ml.Transforms.Text.ProduceNgrams("Chars", "CharsTwograms");
var charsPipeline = ml.Transforms.Text.TokenizeCharacters("Chars", "SentimentText", useMarkerCharacters:false);
var ngramOnePipeline = ml.Transforms.Text.ProduceNgrams("CharsUnigrams", "Chars", ngramLength:1);
var ngramTwpPipeline = ml.Transforms.Text.ProduceNgrams("CharsTwograms", "Chars");
var oneCharsPipeline = charsPipeline.Append(ngramOnePipeline);
var twoCharsPipeline = charsPipeline.Append(ngramTwpPipeline);

Expand Down
6 changes: 3 additions & 3 deletions docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public static void Normalizer()
var transformer = pipeline.Fit(trainData);

var modelParams = transformer.Columns
.First(x => x.Output == "Induced")
.First(x => x.Name == "Induced")
.ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters<float>;

Console.WriteLine($"The normalization parameters are: Scale = {modelParams.Scale} and Offset = {modelParams.Offset}");
Expand Down Expand Up @@ -66,7 +66,7 @@ public static void Normalizer()

// Composing a different pipeline if we wanted to normalize more than one column at a time.
// Using log scale as the normalization mode.
var multiColPipeline = ml.Transforms.Normalize(NormalizingEstimator.NormalizerMode.LogMeanVariance, new[] { ("Induced", "LogInduced"), ("Spontaneous", "LogSpontaneous") });
var multiColPipeline = ml.Transforms.Normalize(NormalizingEstimator.NormalizerMode.LogMeanVariance, new[] { ("LogInduced", "Induced"), ("LogSpontaneous", "Spontaneous") });
// The transformed data.
var multiColtransformer = multiColPipeline.Fit(trainData);
var multiColtransformedData = multiColtransformer.Transform(trainData);
Expand Down Expand Up @@ -97,7 +97,7 @@ public static void Normalizer()

// Inspect the weights of normalizing the columns
var multiColModelParams = multiColtransformer.Columns
.First(x=> x.Output == "LogInduced")
.First(x=> x.Name == "LogInduced")
.ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters<float>;

Console.WriteLine($"The normalization parameters are: Mean = {multiColModelParams.Mean} and Stddev = {multiColModelParams.Stddev}");
Expand Down
2 changes: 1 addition & 1 deletion docs/samples/Microsoft.ML.Samples/Dynamic/OnnxTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public static void OnnxTransformSample()
var mlContext = new MLContext();
var data = GetTensorData();
var idv = mlContext.Data.ReadFromEnumerable(data);
var pipeline = new OnnxScoringEstimator(mlContext, modelPath, new[] { inputInfo.Key }, new[] { outputInfo.Key });
var pipeline = new OnnxScoringEstimator(mlContext, new[] { outputInfo.Key }, new[] { inputInfo.Key }, modelPath);

// Run the pipeline and get the transformed values
var transformedValues = pipeline.Fit(idv).Transform(idv);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ public static void TensorFlowScoringSample()
// Create a ML pipeline.
var pipeline = mlContext.Transforms.ScoreTensorFlowModel(
modelLocation,
new[] { nameof(TensorData.input) },
new[] { nameof(OutputScores.output) });
new[] { nameof(OutputScores.output) },
new[] { nameof(TensorData.input) });

// Run the pipeline and get the transformed values.
var estimator = pipeline.Fit(idv);
Expand Down
4 changes: 2 additions & 2 deletions docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ public static void TextTransform()
// A pipeline for featurization of the "SentimentText" column, and placing the output in a new column named "DefaultTextFeatures"
// The pipeline uses the default settings to featurize.
string defaultColumnName = "DefaultTextFeatures";
var default_pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", defaultColumnName);
var default_pipeline = ml.Transforms.Text.FeaturizeText(defaultColumnName , "SentimentText");

// Another pipeline, that customizes the advanced settings of the FeaturizeText transformer.
string customizedColumnName = "CustomizedTextFeatures";
var customized_pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", customizedColumnName, s =>
var customized_pipeline = ml.Transforms.Text.FeaturizeText(customizedColumnName, "SentimentText", s =>
{
s.KeepPunctuations = false;
s.KeepNumbers = false;
Expand Down
18 changes: 9 additions & 9 deletions src/Microsoft.ML.Core/Data/IEstimator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -75,29 +75,29 @@ internal Column(string name, VectorKind vecKind, ColumnType itemType, bool isKey
}

/// <summary>
/// Returns whether <paramref name="inputColumn"/> is a valid input, if this object represents a
/// Returns whether <paramref name="source"/> is a valid input, if this object represents a
/// requirement.
///
/// Namely, it returns true iff:
/// - The <see cref="Name"/>, <see cref="Kind"/>, <see cref="ItemType"/>, <see cref="IsKey"/> fields match.
/// - The columns of <see cref="Metadata"/> of <paramref name="inputColumn"/> is a superset of our <see cref="Metadata"/> columns.
/// - The columns of <see cref="Metadata"/> of <paramref name="source"/> is a superset of our <see cref="Metadata"/> columns.
/// - Each such metadata column is itself compatible with the input metadata column.
/// </summary>
[BestFriend]
internal bool IsCompatibleWith(Column inputColumn)
internal bool IsCompatibleWith(Column source)
Copy link
Member Author

@sfilipi sfilipi Jan 25, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Column source) [](start = 43, length = 14)

unnecessary change.. #Resolved

{
Contracts.Check(inputColumn.IsValid, nameof(inputColumn));
if (Name != inputColumn.Name)
Contracts.Check(source.IsValid, nameof(source));
if (Name != source.Name)
return false;
if (Kind != inputColumn.Kind)
if (Kind != source.Kind)
return false;
if (!ItemType.Equals(inputColumn.ItemType))
if (!ItemType.Equals(source.ItemType))
return false;
if (IsKey != inputColumn.IsKey)
if (IsKey != source.IsKey)
return false;
foreach (var metaCol in Metadata)
{
if (!inputColumn.Metadata.TryFindColumn(metaCol.Name, out var inputMetaCol))
if (!source.Metadata.TryFindColumn(metaCol.Name, out var inputMetaCol))
return false;
if (!metaCol.IsCompatibleWith(inputMetaCol))
return false;
Expand Down
8 changes: 4 additions & 4 deletions src/Microsoft.ML.Data/Evaluators/AnomalyDetectionEvaluator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -708,11 +708,11 @@ private protected override void PrintFoldResultsCore(IChannel ch, Dictionary<str
var pFormatName = string.Format(FoldDrAtPFormat, _p);
var numAnomName = string.Format(FoldDrAtNumAnomaliesFormat, numAnomalies);

(string Source, string Name)[] cols =
(string name, string source)[] cols =
{
(AnomalyDetectionEvaluator.OverallMetrics.DrAtK, kFormatName),
(AnomalyDetectionEvaluator.OverallMetrics.DrAtPFpr, pFormatName),
(AnomalyDetectionEvaluator.OverallMetrics.DrAtNumPos, numAnomName)
(kFormatName, AnomalyDetectionEvaluator.OverallMetrics.DrAtK),
(pFormatName, AnomalyDetectionEvaluator.OverallMetrics.DrAtPFpr),
(numAnomName, AnomalyDetectionEvaluator.OverallMetrics.DrAtNumPos)
};

// List of columns to keep, note that the order specified determines the order of the output
Expand Down
8 changes: 4 additions & 4 deletions src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1198,11 +1198,11 @@ private protected override void PrintFoldResultsCore(IChannel ch, Dictionary<str
if (!metrics.TryGetValue(MetricKinds.ConfusionMatrix, out conf))
throw ch.Except("No overall metrics found");

(string Source, string Name)[] cols =
(string name, string source)[] cols =
{
(BinaryClassifierEvaluator.Accuracy, FoldAccuracy),
(BinaryClassifierEvaluator.LogLoss, FoldLogLoss),
(BinaryClassifierEvaluator.LogLossReduction, FoldLogLosRed)
(FoldAccuracy, BinaryClassifierEvaluator.Accuracy),
(FoldLogLoss, BinaryClassifierEvaluator.LogLoss),
(FoldLogLosRed, BinaryClassifierEvaluator.LogLossReduction)
};

var colsToKeep = new List<string>();
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.Data/Evaluators/MamlEvaluator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ private IDataView WrapPerInstance(RoleMappedData perInst)

// Make a list of column names that Maml outputs as part of the per-instance data view, and then wrap
// the per-instance data computed by the evaluator in a SelectColumnsTransform.
var cols = new List<(string Source, string Name)>();
var cols = new List<(string name, string source)>();
var colsToKeep = new List<string>();

// If perInst is the result of cross-validation and contains a fold Id column, include it.
Expand All @@ -241,7 +241,7 @@ private IDataView WrapPerInstance(RoleMappedData perInst)
// Maml always outputs a name column, if it doesn't exist add a GenerateNumberTransform.
if (perInst.Schema.Name?.Name is string nameName)
{
cols.Add((nameName, "Instance"));
cols.Add(("Instance", nameName));
colsToKeep.Add("Instance");
}
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -950,7 +950,7 @@ private protected override IDataView GetOverallResultsCore(IDataView overall)

private IDataView ChangeTopKAccColumnName(IDataView input)
{
input = new ColumnCopyingTransformer(Host, (MultiClassClassifierEvaluator.TopKAccuracy, string.Format(TopKAccuracyFormat, _outputTopKAcc))).Transform(input);
input = new ColumnCopyingTransformer(Host, (string.Format(TopKAccuracyFormat, _outputTopKAcc), MultiClassClassifierEvaluator.TopKAccuracy)).Transform(input);
return ColumnSelectingTransformer.CreateDrop(Host, input, MultiClassClassifierEvaluator.TopKAccuracy);
}

Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.Data/TrainCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,9 @@ private void EnsureStratificationColumn(ref IDataView data, ref string stratific
stratificationColumn = string.Format("{0}_{1:000}", origStratCol, ++inc);
HashingTransformer.ColumnInfo columnInfo;
if (seed.HasValue)
columnInfo = new HashingTransformer.ColumnInfo(origStratCol, stratificationColumn, 30, seed.Value);
columnInfo = new HashingTransformer.ColumnInfo(stratificationColumn, origStratCol, 30, seed.Value);
else
columnInfo = new HashingTransformer.ColumnInfo(origStratCol, stratificationColumn, 30);
columnInfo = new HashingTransformer.ColumnInfo(stratificationColumn, origStratCol, 30);
data = new HashingEstimator(Host, columnInfo).Fit(data).Transform(data);
}
}
Expand Down
16 changes: 8 additions & 8 deletions src/Microsoft.ML.Data/Transforms/ColumnConcatenatingEstimator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,20 @@ public sealed class ColumnConcatenatingEstimator : IEstimator<ITransformer>
/// Initializes a new instance of <see cref="ColumnConcatenatingEstimator"/>
/// </summary>
/// <param name="env">The local instance of <see cref="IHostEnvironment"/>.</param>
/// <param name="outputColumn">The name of the resulting column.</param>
/// <param name="inputColumns">The columns to concatenate together.</param>
public ColumnConcatenatingEstimator (IHostEnvironment env, string outputColumn, params string[] inputColumns)
/// <param name="outputColumnName">The name of the resulting column.</param>
/// <param name="inputColumnNames">The columns to concatenate together.</param>
public ColumnConcatenatingEstimator(IHostEnvironment env, string outputColumnName, params string[] inputColumnNames)
{
Contracts.CheckValue(env, nameof(env));
_host = env.Register("ColumnConcatenatingEstimator ");

_host.CheckNonEmpty(outputColumn, nameof(outputColumn));
_host.CheckValue(inputColumns, nameof(inputColumns));
_host.CheckParam(!inputColumns.Any(r => string.IsNullOrEmpty(r)), nameof(inputColumns),
_host.CheckNonEmpty(outputColumnName, nameof(outputColumnName));
_host.CheckValue(inputColumnNames, nameof(inputColumnNames));
_host.CheckParam(!inputColumnNames.Any(r => string.IsNullOrEmpty(r)), nameof(inputColumnNames),
"Contained some null or empty items");

_name = outputColumn;
_source = inputColumns;
_name = outputColumnName;
_source = inputColumnNames;
}

public ITransformer Fit(IDataView input)
Expand Down
Loading