Skip to content

TrainTestSplit should be inside MLContext.Data #2905

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public static void Example()

IDataView data = loader.Load(dataFilePath);

var split = ml.BinaryClassification.TrainTestSplit(data, testFraction: 0.2);
var split = ml.Data.TrainTestSplit(data, testFraction: 0.2);

var pipeline = ml.Transforms.Concatenate("Text", "workclass", "education", "marital-status",
"relationship", "ethnicity", "sex", "native-country")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public static void Example()
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Create data training pipeline.
var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(numIterations: 10);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public static void Example()
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Define the trainer options.
var options = new AveragedPerceptronTrainer.Options()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public static void Example()
// Download and featurize the dataset.
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
// Leave out 10% of data for testing.
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.3);
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.3);

// Create data training pipeline for non calibrated trainer and train Naive calibrator on top of it.
var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public static void Example()
// Download and featurize the dataset.
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
// Leave out 10% of data for testing.
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.3);
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.3);

// Create data training pipeline for non calibrated trainer and train Naive calibrator on top of it.
var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public static void Example()
// Download and featurize the dataset.
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
// Leave out 10% of data for testing.
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.3);
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.3);

// Create data training pipeline for non calibrated trainer and train Naive calibrator on top of it.
var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public static void Example()
// Download and featurize the dataset.
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
// Leave out 10% of data for testing.
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.3);
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.3);

// Create data training pipeline for non calibrated trainer and train Naive calibrator on top of it.
var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public static void Example()
var dataview = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var split = mlContext.BinaryClassification.TrainTestSplit(dataview, testFraction: 0.1);
var split = mlContext.Data.TrainTestSplit(dataview, testFraction: 0.1);

// Create the Estimator.
var pipeline = mlContext.BinaryClassification.Trainers.LightGbm();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public static void Example()
var dataview = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var split = mlContext.BinaryClassification.TrainTestSplit(dataview, testFraction: 0.1);
var split = mlContext.Data.TrainTestSplit(dataview, testFraction: 0.1);

// Create the pipeline with LightGbm Estimator using advanced options.
var pipeline = mlContext.BinaryClassification.Trainers.LightGbm(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public static void Example()
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Define the trainer options.
var options = new SdcaBinaryTrainer.Options()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public static void Example()
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Create data training pipeline.
var pipeline = mlContext.BinaryClassification.Trainers.StochasticGradientDescent();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public static void Example()
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Create data training pipeline.
var pipeline = mlContext.BinaryClassification.Trainers.StochasticGradientDescentNonCalibrated();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public static void Example()
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Create data training pipeline.
var pipeline = mlContext.BinaryClassification
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public static void Example()
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Define the trainer options.
var options = new SgdBinaryTrainer.Options()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public static void Example()
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var split = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var split = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);
// Create data training pipeline.
var pipeline = mlContext.BinaryClassification.Trainers.SymbolicStochasticGradientDescent(labelColumnName: "IsOver50K", numberOfIterations: 25);
var model = pipeline.Fit(split.TrainSet);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public static void Example()
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

// Leave out 10% of data for testing.
var split = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var split = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);
// Create data training pipeline
var pipeline = mlContext.BinaryClassification.Trainers.SymbolicStochasticGradientDescent(
new ML.Trainers.SymbolicStochasticGradientDescentClassificationTrainer.Options()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public static void Example()

// Split the static-typed data into training and test sets. Only training set is used in fitting
// the created pipeline. Metrics are computed on the test.
var split = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.5);
var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.5);

// Train the model.
var model = pipeline.Fit(split.TrainSet);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public static void Example()

// Split the static-typed data into training and test sets. Only training set is used in fitting
// the created pipeline. Metrics are computed on the test.
var split = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.5);
var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.5);

// Train the model.
var model = pipeline.Fit(split.TrainSet);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public static void Example()

// Split the data into training and test sets. Only training set is used in fitting
// the created pipeline. Metrics are computed on the test.
var split = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.1);
var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.1);

// Train the model.
var model = pipeline.Fit(split.TrainSet);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public static void Example()

// Split the data into training and test sets. Only training set is used in fitting
// the created pipeline. Metrics are computed on the test.
var split = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.1);
var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.1);

// Train the model.
var model = pipeline.Fit(split.TrainSet);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ public static void Example()

// Leave out 10% of the dataset for testing. Since this is a ranking problem, we must ensure that the split
// respects the GroupId column, i.e. rows with the same GroupId are either all in the train split or all in
// the test split. The samplingKeyColumn parameter in Ranking.TrainTestSplit is used for this purpose.
var split = mlContext.Ranking.TrainTestSplit(dataview, testFraction: 0.1, samplingKeyColumn: "GroupId");
// the test split. The samplingKeyColumn parameter in Data.TrainTestSplit is used for this purpose.
var split = mlContext.Data.TrainTestSplit(dataview, testFraction: 0.1, samplingKeyColumn: "GroupId");

// Create the Estimator pipeline. For simplicity, we will train a small tree with 4 leaves and 2 boosting iterations.
var pipeline = mlContext.Ranking.Trainers.LightGbm(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ public static void Example()

// Leave out 10% of the dataset for testing. Since this is a ranking problem, we must ensure that the split
// respects the GroupId column, i.e. rows with the same GroupId are either all in the train split or all in
// the test split. The samplingKeyColumn parameter in Ranking.TrainTestSplit is used for this purpose.
var split = mlContext.Ranking.TrainTestSplit(dataview, testFraction: 0.1, samplingKeyColumn: "GroupId");
// the test split. The samplingKeyColumn parameter in Data.TrainTestSplit is used for this purpose.
var split = mlContext.Data.TrainTestSplit(dataview, testFraction: 0.1, samplingKeyColumn: "GroupId");

// Create the Estimator pipeline. For simplicity, we will train a small tree with 4 leaves and 2 boosting iterations.
var pipeline = mlContext.Ranking.Trainers.LightGbm(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public static void Example()
// 21.60 0.02731 00.00 7.070 0 0.4690 6.4210 78.90 ...
// 34.70 0.02729 00.00 7.070 0 0.4690 7.1850 61.10 ...

var split = mlContext.Regression.TrainTestSplit(dataView, testFraction: 0.1);
var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.1);

// Create the estimator, here we only need LightGbm trainer
// as data is already processed in a form consumable by the trainer.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public static void Example()
// 21.60 0.02731 00.00 7.070 0 0.4690 6.4210 78.90 ...
// 34.70 0.02729 00.00 7.070 0 0.4690 7.1850 61.10 ...

var split = mlContext.Regression.TrainTestSplit(dataView, testFraction: 0.1);
var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.1);

// Create a pipeline with LightGbm estimator with advanced options.
// Here we only need LightGbm trainer as data is already processed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public static void Example()
// 21.60 0.02731 00.00 7.070 0 0.4690 6.4210 78.90
// 34.70 0.02729 00.00 7.070 0 0.4690 7.1850 61.10

var split = mlContext.Regression.TrainTestSplit(dataView, testFraction: 0.2);
var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);

// Create the estimator, here we only need OrdinaryLeastSquares trainer
// as data is already processed in a form consumable by the trainer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public static void Example()
// 21.60 0.02731 00.00 7.070 0 0.4690 6.4210 78.90
// 34.70 0.02729 00.00 7.070 0 0.4690 7.1850 61.10

var split = mlContext.Regression.TrainTestSplit(dataView, testFraction: 0.2);
var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);

// Create the estimator, here we only need OrdinaryLeastSquares trainer
// as data is already processed in a form consumable by the trainer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public static void Example()

// Split the data into training and test sets. Only training set is used in fitting
// the created pipeline. Metrics are computed on the test.
var split = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.1);
var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.1);

// Train the model.
var pipeline = mlContext.Regression.Trainers.StochasticDualCoordinateAscent();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public static void Example()

// Split the data into training and test sets. Only training set is used in fitting
// the created pipeline. Metrics are computed on the test.
var split = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.1);
var split = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.1);

// Create trainer options.
var options = new SdcaRegressionTrainer.Options
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public static void AveragedPerceptronBinaryClassification()

// Load the data, and leave 10% out, so we can use them for testing
var data = loader.Load(dataFilePath);
var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var (trainData, testData) = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Create the Estimator
var learningPipeline = loader.MakeNewEstimator()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public static void FastTreeBinaryClassification()

// Loader the data, and leave 10% out, so we can use them for testing
var data = loader.Load(dataFilePath);
var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var (trainData, testData) = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Create the Estimator
var learningPipeline = loader.MakeNewEstimator()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public static void LightGbmBinaryClassification()

// Load the data, and leave 10% out, so we can use them for testing
var data = loader.Load(dataFilePath);
var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var (trainData, testData) = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Create the Estimator
var learningPipeline = loader.MakeNewEstimator()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public void MultiClassLightGbmStaticPipelineWithInMemoryData()

// Split the static-typed data into training and test sets. Only training set is used in fitting
// the created pipeline. Metrics are computed on the test.
var (trainingData, testingData) = mlContext.MulticlassClassification.TrainTestSplit(staticDataView, testFraction: 0.5);
var (trainingData, testingData) = mlContext.Data.TrainTestSplit(staticDataView, testFraction: 0.5);

// Train the model.
var model = pipe.Fit(trainingData);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public static void LightGbmRegression()

// Load the data, and leave 10% out, so we can use them for testing
var data = loader.Load(new MultiFileSource(dataFile));
var (trainData, testData) = mlContext.Regression.TrainTestSplit(data, testFraction: 0.1);
var (trainData, testData) = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// The predictor that gets produced out of training
LightGbmRegressionModelParameters pred = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public static void SdcaBinaryClassification()

// Load the data, and leave 10% out, so we can use them for testing
var data = loader.Load(dataFilePath);
var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
var (trainData, testData) = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// Create the Estimator
var learningPipeline = loader.MakeNewEstimator()
Expand Down
2 changes: 1 addition & 1 deletion docs/samples/Microsoft.ML.Samples/Static/SDCARegression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public static void SdcaRegression()

// Load the data, and leave 10% out, so we can use them for testing
var data = loader.Load(dataFile);
var (trainData, testData) = mlContext.Regression.TrainTestSplit(data, testFraction: 0.1);
var (trainData, testData) = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);

// The predictor that gets produced out of training
LinearRegressionModelParameters pred = null;
Expand Down
Loading