Skip to content

Commit 033d27d

Browse files
authored
Added RffBenchmark (dotnet#1855)
* Added rffBenchmark * scikit removed and another reference added * Converting to api * adding cacheCheck point to correct place * comment added about dataset
1 parent 337cc55 commit 033d27d

File tree

5 files changed

+69
-1
lines changed

5 files changed

+69
-1
lines changed

build.proj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@
7878

7979
<ItemGroup Condition="'$(IncludeBenchmarkData)' == 'true'">
8080
<BenchmarkFile Update="@(BenchmarkFile)">
81-
<Url>https://aka.ms/tlc-resources/benchmarks/%(Identity)</Url>
81+
<Url>https://aka.ms/mlnet-resources/benchmarks/%(Identity)</Url>
8282
<DestinationFile>$(MSBuildThisFileDirectory)/test/data/external/%(Identity)</DestinationFile>
8383
</BenchmarkFile>
8484

build/ExternalBenchmarkDataFiles.props

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
<Project>
22
<ItemGroup>
3+
<BenchmarkFile Include="digits.csv" />
34
<BenchmarkFile Include="MSLRWeb10KTest240kRows.tsv" />
45
<BenchmarkFile Include="MSLRWeb10KTrain720kRows.tsv" />
56
<BenchmarkFile Include="MSLRWeb10KValidate240kRows.tsv" />
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using BenchmarkDotNet.Attributes;
6+
using Microsoft.ML.Runtime.Data;
7+
using Microsoft.ML.Runtime.RunTests;
8+
using Microsoft.ML.Transforms.Conversions;
9+
using System.IO;
10+
11+
namespace Microsoft.ML.Benchmarks
12+
{
13+
public class RffTransformTrain
14+
{
15+
private string _dataPath_Digits;
16+
17+
[GlobalSetup]
18+
public void SetupTrainingSpeedTests()
19+
{
20+
_dataPath_Digits = Path.GetFullPath(TestDatasets.Digits.trainFilename);
21+
22+
if (!File.Exists(_dataPath_Digits))
23+
throw new FileNotFoundException(string.Format(Errors.DatasetNotFound, _dataPath_Digits));
24+
}
25+
26+
[Benchmark]
27+
public void CV_Multiclass_Digits_RffTransform_OVAAveragedPerceptron()
28+
{
29+
var mlContext = new MLContext();
30+
var reader = mlContext.Data.CreateTextReader(new TextLoader.Arguments
31+
{
32+
Column = new[]
33+
{
34+
new TextLoader.Column("Label", DataKind.R4, 64),
35+
new TextLoader.Column("Features", DataKind.R4, new [] { new TextLoader.Range() { Min = 0, Max = 63 }})
36+
},
37+
HasHeader = false,
38+
Separator = ","
39+
});
40+
41+
var data = reader.Read(_dataPath_Digits);
42+
43+
var pipeline = mlContext.Transforms.Projection.CreateRandomFourierFeatures("Features", "FeaturesRFF")
44+
.AppendCacheCheckpoint(mlContext)
45+
.Append(mlContext.Transforms.Concatenate("Features", "FeaturesRFF"))
46+
.Append(new ValueToKeyMappingEstimator(mlContext, "Label"))
47+
.Append(mlContext.MulticlassClassification.Trainers.OneVersusAll(mlContext.BinaryClassification.Trainers.AveragedPerceptron(numIterations: 10)));
48+
49+
var cvResults = mlContext.MulticlassClassification.CrossValidate(data, pipeline, numFolds: 5);
50+
}
51+
}
52+
}

test/Microsoft.ML.TestFramework/Datasets.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,15 @@ public static class TestDatasets
137137
loaderSettings = "xf=expr{col=Features expr=x:float(x>4?1:0)}"
138138
};
139139

140+
// The data set contains images of hand-written digits.
141+
// The input is given in the form of matrix id 8x8 where
142+
// each element is an integer in the range 0..16
143+
public static TestDataset Digits = new TestDataset
144+
{
145+
name = "Digits",
146+
trainFilename = @"external/digits.csv",
147+
};
148+
140149
public static TestDataset vw = new TestDataset
141150
{
142151
name = "vw",

test/data/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ The datasets are provided under the original terms that Microsoft received such
1616
>
1717
>Original readme: https://meta.wikimedia.org/wiki/Research:Detox
1818
19+
### Digits
20+
> This dataset is provided under http://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits.
21+
>
22+
> References: C. Kaynak (1995) Methods of Combining Multiple Classifiers and Their Applications to Handwritten Digit Recognition, MSc Thesis, Institute of Graduate Studies in Science and Engineering, Bogazici University.
23+
> E. Alpaydin, C. Kaynak (1998) Cascading Classifiers, Kybernetika.
24+
1925
### UCI Adult Dataset
2026

2127
>Dua, D. and Karra Taniskidou, E. (2017). UCI Machine Learning Repository [https://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science.

0 commit comments

Comments
 (0)