diff --git a/src/Microsoft.ML.StaticPipe/TransformsStatic.cs b/src/Microsoft.ML.StaticPipe/TransformsStatic.cs index 4e42fd722c..054399cdba 100644 --- a/src/Microsoft.ML.StaticPipe/TransformsStatic.cs +++ b/src/Microsoft.ML.StaticPipe/TransformsStatic.cs @@ -1560,9 +1560,9 @@ private readonly struct Config public readonly int NewDim; public readonly bool UseSin; public readonly int? Seed; - public readonly IComponentFactory Generator; + public readonly KernelBase Generator; - public Config(int newDim, bool useSin, IComponentFactory generator, int? seed = null) + public Config(int newDim, bool useSin, KernelBase generator, int? seed = null) { NewDim = newDim; UseSin = useSin; @@ -1612,11 +1612,11 @@ public override IEstimator Reconcile(IHostEnvironment env, Pipelin /// The column to apply Random Fourier transfomration. /// Expected size of new vector. /// Create two features for every random Fourier frequency? (one for cos and one for sin) - /// Which kernel to use. ( by default) + /// Which kernel to use. (if it is null, is used.) /// The seed of the random number generator for generating the new features. If not specified global random would be used. public static Vector LowerVectorSizeWithRandomFourierTransformation(this Vector input, int newDim = RandomFourierFeaturizingEstimator.Defaults.NewDim, bool useSin = RandomFourierFeaturizingEstimator.Defaults.UseSin, - IComponentFactory generator = null, int? seed = null) + KernelBase generator = null, int? seed = null) { Contracts.CheckValue(input, nameof(input)); return new ImplVector(input, new Config(newDim, useSin, generator, seed)); diff --git a/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs b/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs index 9bc6b4220e..d3ea00f5e0 100644 --- a/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs +++ b/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs @@ -5,205 +5,297 @@ using System; using Microsoft.ML; using Microsoft.ML.CommandLine; -using Microsoft.ML.EntryPoints; +using Microsoft.ML.Data; using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Model; -using Microsoft.ML.Transforms; +using Microsoft.ML.Numeric; +using Microsoft.ML.Transforms.Projections; -[assembly: LoadableClass(typeof(GaussianFourierSampler), typeof(GaussianFourierSampler.Options), typeof(SignatureFourierDistributionSampler), - "Gaussian Kernel", GaussianFourierSampler.LoadName, "Gaussian")] +[assembly: LoadableClass(typeof(GaussianKernel), typeof(GaussianKernel.Options), typeof(SignatureKernelBase), + "Gaussian Kernel", GaussianKernel.LoadName, "Gaussian")] -[assembly: LoadableClass(typeof(LaplacianFourierSampler), typeof(LaplacianFourierSampler.Options), typeof(SignatureFourierDistributionSampler), - "Laplacian Kernel", LaplacianFourierSampler.RegistrationName, "Laplacian")] +[assembly: LoadableClass(typeof(LaplacianKernel), typeof(LaplacianKernel.Options), typeof(SignatureKernelBase), + "Laplacian Kernel", LaplacianKernel.LoadName, "Laplacian")] // This is for deserialization from a binary model file. -[assembly: LoadableClass(typeof(GaussianFourierSampler), null, typeof(SignatureLoadModel), - "Gaussian Fourier Sampler Executor", "GaussianSamplerExecutor", GaussianFourierSampler.LoaderSignature)] +[assembly: LoadableClass(typeof(GaussianKernel.RandomNumberGenerator), null, typeof(SignatureLoadModel), + "Gaussian Fourier Sampler Executor", "GaussianSamplerExecutor", GaussianKernel.RandomNumberGenerator.LoaderSignature)] // This is for deserialization from a binary model file. -[assembly: LoadableClass(typeof(LaplacianFourierSampler), null, typeof(SignatureLoadModel), - "Laplacian Fourier Sampler Executor", "LaplacianSamplerExecutor", LaplacianFourierSampler.LoaderSignature)] +[assembly: LoadableClass(typeof(LaplacianKernel.RandomNumberGenerator), null, typeof(SignatureLoadModel), + "Laplacian Fourier Sampler Executor", "LaplacianSamplerExecutor", LaplacianKernel.RandomNumberGenerator.LoaderSignature)] -// REVIEW: Roll all of this in with the RffTransform. -namespace Microsoft.ML.Transforms +namespace Microsoft.ML.Transforms.Projections { /// - /// Signature for an IFourierDistributionSampler constructor. + /// Signature for a constructor. /// [BestFriend] - internal delegate void SignatureFourierDistributionSampler(float avgDist); + internal delegate void SignatureKernelBase(); - public interface IFourierDistributionSampler : ICanSaveModel + /// + /// This class indicates which kernel should be approximated by the . + /// . + /// + public abstract class KernelBase { - float Next(Random rand); + // Private protected constructor, so that external devs cannot inherit from this class. + private protected KernelBase() + { + } + + /// + /// The kernels deriving from this class are shift-invariant, and each of them depends on a different distance between + /// its inputs. The depends on the L2 distance, and the depends + /// on the L1 distance. + /// + internal abstract float Distance(in VBuffer first, in VBuffer second); + + /// + /// This method returns an object that can sample from the non-negative measure that is the Fourier transform of this kernel. + /// + internal abstract FourierRandomNumberGeneratorBase GetRandomNumberGenerator(float averageDistance); } - [TlcModule.ComponentKind("FourierDistributionSampler")] - internal interface IFourierDistributionSamplerFactory : IComponentFactory + /// + /// The Fourier transform of a continuous positive definite kernel is a non-negative measure + /// (Bochner's theorem). This class + /// samples numbers from the non-negative measure corresponding to the given kernel. + /// + internal abstract class FourierRandomNumberGeneratorBase { + public abstract float Next(Random rand); } - public sealed class GaussianFourierSampler : IFourierDistributionSampler + /// + /// The Gaussian kernel is defined as k(x,y)=exp(-gamma*|x-y|_2^2). The distribution that is the Fourier transform of + /// this kernel is the Normal distribution with variance 2*gamma. + /// + public sealed class GaussianKernel : KernelBase { - private readonly IHost _host; - - public sealed class Options : IFourierDistributionSamplerFactory + internal sealed class Options : IComponentFactory { [Argument(ArgumentType.AtMostOnce, HelpText = "gamma in the kernel definition: exp(-gamma*||x-y||^2 / r^2). r is an estimate of the average intra-example distance", ShortName = "g")] public float Gamma = 1; - IFourierDistributionSampler IComponentFactory.CreateComponent(IHostEnvironment env, float avgDist) - => new GaussianFourierSampler(env, this, avgDist); - } - - internal const string LoaderSignature = "RandGaussFourierExec"; - private static VersionInfo GetVersionInfo() - { - return new VersionInfo( - modelSignature: "RND GAUS", - verWrittenCur: 0x00010001, // Initial - verReadableCur: 0x00010001, - verWeCanReadBack: 0x00010001, - loaderSignature: LoaderSignature, - loaderAssemblyName: typeof(GaussianFourierSampler).Assembly.FullName); + public KernelBase CreateComponent(IHostEnvironment env) => new GaussianKernel(env, this); } internal const string LoadName = "GaussianRandom"; private readonly float _gamma; - public GaussianFourierSampler(IHostEnvironment env, Options options, float avgDist) + /// + /// Create a new instance of a GaussianKernel. + /// + /// The coefficient in the exponent of the kernel function. It should be positive. + public GaussianKernel(float gamma = 1) { - Contracts.CheckValue(env, nameof(env)); - _host = env.Register(LoadName); - _host.CheckValue(options, nameof(options)); - - _gamma = options.Gamma / avgDist; + Contracts.CheckParam(gamma > 0, nameof(gamma)); + _gamma = gamma; } - private static GaussianFourierSampler Create(IHostEnvironment env, ModelLoadContext ctx) + internal GaussianKernel(IHostEnvironment env, Options options) { - Contracts.CheckValue(env, nameof(env)); - env.CheckValue(ctx, nameof(ctx)); - ctx.CheckAtModel(GetVersionInfo()); - return new GaussianFourierSampler(env, ctx); + Contracts.CheckValueOrNull(env, nameof(env)); + env.CheckValue(options, nameof(options)); + + _gamma = options.Gamma; } - private GaussianFourierSampler(IHostEnvironment env, ModelLoadContext ctx) + internal override float Distance(in VBuffer first, in VBuffer second) { - Contracts.AssertValue(env); - _host = env.Register(LoadName); - _host.AssertValue(ctx); - - // *** Binary format *** - // int: sizeof(Float) - // Float: gamma - - int cbFloat = ctx.Reader.ReadInt32(); - _host.CheckDecode(cbFloat == sizeof(float)); - - _gamma = ctx.Reader.ReadFloat(); - _host.CheckDecode(FloatUtils.IsFinite(_gamma)); + return VectorUtils.L2DistSquared(in first, in second); } - void ICanSaveModel.Save(ModelSaveContext ctx) + internal override FourierRandomNumberGeneratorBase GetRandomNumberGenerator(float averageDistance) { - ctx.SetVersionInfo(GetVersionInfo()); - - // *** Binary format *** - // int: sizeof(Float) - // Float: gamma - - ctx.Writer.Write(sizeof(float)); - _host.Assert(FloatUtils.IsFinite(_gamma)); - ctx.Writer.Write(_gamma); + Contracts.Assert(averageDistance > 0); + return new RandomNumberGenerator(_gamma, averageDistance); } - public float Next(Random rand) + internal sealed class RandomNumberGenerator : FourierRandomNumberGeneratorBase, ICanSaveModel { - return (float)Stats.SampleFromGaussian(rand) * MathUtils.Sqrt(2 * _gamma); + internal const string LoaderSignature = "RandGaussFourierExec"; + private static VersionInfo GetVersionInfo() + { + return new VersionInfo( + modelSignature: "RND GAUS", + verWrittenCur: 0x00010001, // Initial + verReadableCur: 0x00010001, + verWeCanReadBack: 0x00010001, + loaderSignature: LoaderSignature, + loaderAssemblyName: typeof(RandomNumberGenerator).Assembly.FullName); + } + + private readonly float _gamma; + + public RandomNumberGenerator(float gamma, float averageDistance) + : base() + { + Contracts.Assert(gamma > 0); + Contracts.Assert(averageDistance > 0); + _gamma = gamma / averageDistance; + } + + private static RandomNumberGenerator Create(IHostEnvironment env, ModelLoadContext ctx) + { + Contracts.CheckValue(env, nameof(env)); + env.CheckValue(ctx, nameof(ctx)); + ctx.CheckAtModel(GetVersionInfo()); + return new RandomNumberGenerator(env, ctx); + } + + private RandomNumberGenerator(IHostEnvironment env, ModelLoadContext ctx) + { + Contracts.AssertValue(env); + env.AssertValue(ctx); + + // *** Binary format *** + // int: sizeof(Float) + // Float: gamma + + int cbFloat = ctx.Reader.ReadInt32(); + env.CheckDecode(cbFloat == sizeof(float)); + + _gamma = ctx.Reader.ReadFloat(); + env.CheckDecode(FloatUtils.IsFinite(_gamma)); + } + + void ICanSaveModel.Save(ModelSaveContext ctx) + { + ctx.SetVersionInfo(GetVersionInfo()); + + // *** Binary format *** + // int: sizeof(Float) + // Float: gamma + + ctx.Writer.Write(sizeof(float)); + Contracts.Assert(FloatUtils.IsFinite(_gamma)); + ctx.Writer.Write(_gamma); + } + + public override float Next(Random rand) + { + return (float)Stats.SampleFromGaussian(rand) * MathUtils.Sqrt(2 * _gamma); + } } } - public sealed class LaplacianFourierSampler : IFourierDistributionSampler + /// + /// The Laplacian kernel is defined as k(x,y)=exp(-a*|x-y|_1). The distribution that is the Fourier transform of this + /// kernel is the Cauchy distribution with parameters (0, a). + /// + public sealed class LaplacianKernel : KernelBase { - public sealed class Options : IFourierDistributionSamplerFactory + internal sealed class Options : IComponentFactory { [Argument(ArgumentType.AtMostOnce, HelpText = "a in the term exp(-a|x| / r). r is an estimate of the average intra-example L1 distance")] public float A = 1; - IFourierDistributionSampler IComponentFactory.CreateComponent(IHostEnvironment env, float avgDist) - => new LaplacianFourierSampler(env, this, avgDist); + public KernelBase CreateComponent(IHostEnvironment env) => new LaplacianKernel(env, this); } - private static VersionInfo GetVersionInfo() - { - return new VersionInfo( - modelSignature: "RND LPLC", - verWrittenCur: 0x00010001, // Initial - verReadableCur: 0x00010001, - verWeCanReadBack: 0x00010001, - loaderSignature: LoaderSignature, - loaderAssemblyName: typeof(LaplacianFourierSampler).Assembly.FullName); - } + internal const string LoadName = "LaplacianRandom"; - internal const string LoaderSignature = "RandLaplacianFourierExec"; - internal const string RegistrationName = "LaplacianRandom"; - - private readonly IHost _host; private readonly float _a; - public LaplacianFourierSampler(IHostEnvironment env, Options options, float avgDist) + /// + /// Create a new instance of a LaplacianKernel. + /// + /// The coefficient in the exponent of the kernel function + public LaplacianKernel(float a = 1) { - Contracts.CheckValue(env, nameof(env)); - _host = env.Register(RegistrationName); - _host.CheckValue(options, nameof(options)); - - _a = options.A / avgDist; + Contracts.CheckParam(a > 0, nameof(a)); + _a = a; } - private static LaplacianFourierSampler Create(IHostEnvironment env, ModelLoadContext ctx) + internal LaplacianKernel(IHostEnvironment env, Options options) { Contracts.CheckValue(env, nameof(env)); - env.CheckValue(ctx, nameof(ctx)); - ctx.CheckAtModel(GetVersionInfo()); + env.CheckValue(options, nameof(options)); - return new LaplacianFourierSampler(env, ctx); + _a = options.A; } - private LaplacianFourierSampler(IHostEnvironment env, ModelLoadContext ctx) + internal override float Distance(in VBuffer first, in VBuffer second) { - Contracts.AssertValue(env); - _host = env.Register(RegistrationName); - _host.AssertValue(ctx); - - // *** Binary format *** - // int: sizeof(Float) - // Float: a - - int cbFloat = ctx.Reader.ReadInt32(); - _host.CheckDecode(cbFloat == sizeof(float)); - - _a = ctx.Reader.ReadFloat(); - _host.CheckDecode(FloatUtils.IsFinite(_a)); + return VectorUtils.L1Distance(in first, in second); } - void ICanSaveModel.Save(ModelSaveContext ctx) + internal override FourierRandomNumberGeneratorBase GetRandomNumberGenerator(float averageDistance) { - ctx.SetVersionInfo(GetVersionInfo()); - - // *** Binary format *** - // int: sizeof(Float) - // Float: a - - ctx.Writer.Write(sizeof(float)); - _host.Assert(FloatUtils.IsFinite(_a)); - ctx.Writer.Write(_a); + Contracts.Assert(averageDistance > 0); + return new RandomNumberGenerator(_a, averageDistance); } - public float Next(Random rand) + internal sealed class RandomNumberGenerator : FourierRandomNumberGeneratorBase, ICanSaveModel { - return _a * Stats.SampleFromCauchy(rand); + private static VersionInfo GetVersionInfo() + { + return new VersionInfo( + modelSignature: "RND LPLC", + verWrittenCur: 0x00010001, // Initial + verReadableCur: 0x00010001, + verWeCanReadBack: 0x00010001, + loaderSignature: LoaderSignature, + loaderAssemblyName: typeof(RandomNumberGenerator).Assembly.FullName); + } + + internal const string LoaderSignature = "RandLaplacianFourierExec"; + internal const string RegistrationName = "LaplacianRandom"; + + private readonly float _a; + + public RandomNumberGenerator(float a, float averageDistance) + { + Contracts.Assert(a > 0); + Contracts.Assert(averageDistance > 0); + _a = a / averageDistance; + } + + private static RandomNumberGenerator Create(IHostEnvironment env, ModelLoadContext ctx) + { + Contracts.CheckValue(env, nameof(env)); + env.CheckValue(ctx, nameof(ctx)); + ctx.CheckAtModel(GetVersionInfo()); + + return new RandomNumberGenerator(env, ctx); + } + + private RandomNumberGenerator(IHostEnvironment env, ModelLoadContext ctx) + { + Contracts.AssertValue(env); + env.AssertValue(ctx); + + // *** Binary format *** + // int: sizeof(Float) + // Float: a + + int cbFloat = ctx.Reader.ReadInt32(); + env.CheckDecode(cbFloat == sizeof(float)); + + _a = ctx.Reader.ReadFloat(); + env.CheckDecode(FloatUtils.IsFinite(_a)); + } + + void ICanSaveModel.Save(ModelSaveContext ctx) + { + ctx.SetVersionInfo(GetVersionInfo()); + + // *** Binary format *** + // int: sizeof(Float) + // Float: a + + ctx.Writer.Write(sizeof(float)); + Contracts.Assert(FloatUtils.IsFinite(_a)); + ctx.Writer.Write(_a); + } + + public override float Next(Random rand) + { + return _a * Stats.SampleFromCauchy(rand); + } } } } diff --git a/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs b/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs index 693b0263b1..5890ef09da 100644 --- a/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs +++ b/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs @@ -13,7 +13,6 @@ using Microsoft.ML.Internal.CpuMath; using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Model; -using Microsoft.ML.Numeric; using Microsoft.ML.Transforms.Projections; [assembly: LoadableClass(RandomFourierFeaturizingTransformer.Summary, typeof(IDataTransform), typeof(RandomFourierFeaturizingTransformer), typeof(RandomFourierFeaturizingTransformer.Options), typeof(SignatureDataTransform), @@ -31,7 +30,11 @@ namespace Microsoft.ML.Transforms.Projections { /// - /// Maps vector columns to a low -dimensional feature space. + /// Maps vector columns to a feature space where the inner products approximate a user specified shift-invariant kernel. + /// The kernel is indicated by specifying a instance. The available implementations + /// are and . + /// This transformation is based on this paper by + /// Rahimi and Recht. /// public sealed class RandomFourierFeaturizingTransformer : OneToOneTransformerBase { @@ -43,8 +46,9 @@ internal sealed class Options [Argument(ArgumentType.AtMostOnce, HelpText = "The number of random Fourier features to create", ShortName = "dim")] public int NewDim = RandomFourierFeaturizingEstimator.Defaults.NewDim; - [Argument(ArgumentType.Multiple, HelpText = "Which kernel to use?", ShortName = "kernel", SignatureType = typeof(SignatureFourierDistributionSampler))] - public IComponentFactory MatrixGenerator = new GaussianFourierSampler.Options(); + [Argument(ArgumentType.Multiple, HelpText = "Which kernel to use?", ShortName = "kernel", SignatureType = typeof(SignatureKernelBase))] + public IComponentFactory MatrixGenerator = new GaussianKernel.Options(); + [Argument(ArgumentType.AtMostOnce, HelpText = "Create two features for every random Fourier frequency? (one for cos and one for sin)")] public bool UseSin = RandomFourierFeaturizingEstimator.Defaults.UseSin; @@ -59,8 +63,8 @@ internal sealed class Column : OneToOneColumn [Argument(ArgumentType.AtMostOnce, HelpText = "The number of random Fourier features to create", ShortName = "dim")] public int? NewDim; - [Argument(ArgumentType.Multiple, HelpText = "which kernel to use?", ShortName = "kernel", SignatureType = typeof(SignatureFourierDistributionSampler))] - public IComponentFactory MatrixGenerator; + [Argument(ArgumentType.Multiple, HelpText = "which kernel to use?", ShortName = "kernel", SignatureType = typeof(SignatureKernelBase))] + public IComponentFactory MatrixGenerator; [Argument(ArgumentType.AtMostOnce, HelpText = "create two features for every random Fourier frequency? (one for cos and one for sin)")] public bool? UseSin; @@ -100,7 +104,7 @@ private sealed class TransformInfo // the random rotations public readonly AlignedArray RotationTerms; - private readonly IFourierDistributionSampler _matrixGenerator; + private readonly FourierRandomNumberGeneratorBase _matrixGenerator; private readonly bool _useSin; private readonly TauswortheHybrid _rand; private readonly TauswortheHybrid.State _state; @@ -118,7 +122,7 @@ public TransformInfo(IHost host, RandomFourierFeaturizingEstimator.ColumnInfo co _state = _rand.GetState(); var generator = column.Generator; - _matrixGenerator = generator.CreateComponent(host, avgDist); + _matrixGenerator = generator.GetRandomNumberGenerator(avgDist); int roundedUpD = RoundUp(NewDim, _cfltAlign); int roundedUpNumFeatures = RoundUp(SrcDim, _cfltAlign); @@ -151,7 +155,7 @@ public TransformInfo(IHostEnvironment env, ModelLoadContext ctx, string director _rand = new TauswortheHybrid(_state); env.CheckDecode(ctx.Repository != null && - ctx.LoadModelOrNull(env, out _matrixGenerator, directoryName)); + ctx.LoadModelOrNull(env, out _matrixGenerator, directoryName)); // initialize the transform matrix int roundedUpD = RoundUp(NewDim, _cfltAlign); @@ -354,11 +358,7 @@ private float[] GetAvgDistances(RandomFourierFeaturizingEstimator.ColumnInfo[] c else { float[] distances; - // create a dummy generator in order to get its type. - // REVIEW this should be refactored. See https://github.com/dotnet/machinelearning/issues/699 - var matrixGenerator = columns[iinfo].Generator.CreateComponent(Host, 1); - bool gaussian = matrixGenerator is GaussianFourierSampler; - + var generator = columns[iinfo].Generator; // If the number of pairs is at most the maximum reservoir size / 2, go over all the pairs. if (resLength < reservoirSize) { @@ -367,10 +367,7 @@ private float[] GetAvgDistances(RandomFourierFeaturizingEstimator.ColumnInfo[] c for (int i = 0; i < instanceCount; i++) { for (int j = i + 1; j < instanceCount; j++) - { - distances[count++] = gaussian ? VectorUtils.L2DistSquared(in res[i], in res[j]) - : VectorUtils.L1Distance(in res[i], in res[j]); - } + distances[count++] = generator.Distance(in res[i], in res[j]); } Host.Assert(count == distances.Length); } @@ -378,12 +375,7 @@ private float[] GetAvgDistances(RandomFourierFeaturizingEstimator.ColumnInfo[] c { distances = new float[reservoirSize / 2]; for (int i = 0; i < reservoirSize - 1; i += 2) - { - // For Gaussian kernels, we scale by the L2 distance squared, since the kernel function is exp(-gamma ||x-y||^2). - // For Laplacian kernels, we scale by the L1 distance, since the kernel function is exp(-gamma ||x-y||_1). - distances[i / 2] = gaussian ? VectorUtils.L2DistSquared(in res[i], in res[i + 1]) : - VectorUtils.L1Distance(in res[i], in res[i + 1]); - } + distances[i / 2] = generator.Distance(in res[i], in res[i + 1]); } // If by chance, in the random permutation all the pairs are the same instance we return 1. @@ -424,6 +416,7 @@ private static IDataTransform Create(IHostEnvironment env, Options options, IDat { Contracts.CheckValue(env, nameof(env)); env.CheckValue(options, nameof(options)); + env.CheckValue(options.MatrixGenerator, nameof(options.MatrixGenerator)); env.CheckValue(input, nameof(input)); env.CheckValue(options.Columns, nameof(options.Columns)); @@ -439,7 +432,7 @@ private static IDataTransform Create(IHostEnvironment env, Options options, IDat item.NewDim ?? options.NewDim, item.UseSin ?? options.UseSin, item.Source ?? item.Name, - item.MatrixGenerator ?? options.MatrixGenerator, + (item.MatrixGenerator ?? options.MatrixGenerator).CreateComponent(env), item.Seed ?? options.Seed); }; } @@ -638,7 +631,7 @@ public sealed class ColumnInfo /// /// Which fourier generator to use. /// - public readonly IComponentFactory Generator; + public readonly KernelBase Generator; /// /// The number of random Fourier features to create. /// @@ -661,12 +654,12 @@ public sealed class ColumnInfo /// Name of column to transform. /// Which fourier generator to use. /// The seed of the random number generator for generating the new features (if unspecified, the global random is used). - public ColumnInfo(string name, int newDim, bool useSin, string inputColumnName = null, IComponentFactory generator = null, int? seed = null) + public ColumnInfo(string name, int newDim, bool useSin, string inputColumnName = null, KernelBase generator = null, int? seed = null) { Contracts.CheckUserArg(newDim > 0, nameof(newDim), "must be positive."); InputColumnName = inputColumnName ?? name; Name = name; - Generator = generator ?? new GaussianFourierSampler.Options(); + Generator = generator ?? new GaussianKernel(); NewDim = newDim; UseSin = useSin; Seed = seed; diff --git a/test/BaselineOutput/Common/Rff/featurized.tsv b/test/BaselineOutput/Common/Rff/featurized.tsv new file mode 100644 index 0000000000..2e76fec682 --- /dev/null +++ b/test/BaselineOutput/Common/Rff/featurized.tsv @@ -0,0 +1,12 @@ +#@ TextLoader{ +#@ header+ +#@ sep=tab +#@ col=VectorFloat:R4:0-7 +#@ col=Label:R4:8 +#@ col=RffVectorFloat:R4:9-14 +#@ } +15 8:Label +5 1 1 1 2 1 3 1 0 0.127561361 0.5630821 0.5682155 -0.10229627 0.177215546 -0.5494797 +5 4 4 5 7 10 3 2 0 -0.563501656 0.125694782 0.532651365 -0.222746149 -0.243922859 0.5232924 +3 1 1 1 2 2 3 1 0 0.245731741 0.52244544 0.559933841 -0.140738815 0.197038963 -0.5426868 +6 8 8 1 3 4 3 7 0 0.316527039 0.482849836 0.06657861 -0.573498547 -0.5591914 0.143660337 diff --git a/test/BaselineOutput/SingleDebug/Rff/featurized.tsv b/test/BaselineOutput/SingleDebug/Rff/featurized.tsv deleted file mode 100644 index 6ce6b204cc..0000000000 --- a/test/BaselineOutput/SingleDebug/Rff/featurized.tsv +++ /dev/null @@ -1,12 +0,0 @@ -#@ TextLoader{ -#@ header+ -#@ sep=tab -#@ col=VectorFloat:R4:0-7 -#@ col=Label:R4:8 -#@ col=RffVectorFloat:R4:9-14 -#@ } -15 8:Label -5 1 1 1 2 1 3 1 0 0.3418174 -0.465289354 0.247575819 -0.5215741 0.576975167 -0.02080728 -5 4 4 5 7 10 3 2 0 0.566368 -0.112074189 0.169326738 -0.5519618 0.517083168 -0.2568235 -3 1 1 1 2 2 3 1 0 0.497458071 -0.293033749 0.419352561 -0.39683342 0.572824 -0.07215268 -6 8 8 1 3 4 3 7 0 -0.382303447 -0.432640046 0.289225727 -0.4996817 0.571184337 -0.08415316 diff --git a/test/BaselineOutput/SingleRelease/Rff/featurized.tsv b/test/BaselineOutput/SingleRelease/Rff/featurized.tsv deleted file mode 100644 index 6ce6b204cc..0000000000 --- a/test/BaselineOutput/SingleRelease/Rff/featurized.tsv +++ /dev/null @@ -1,12 +0,0 @@ -#@ TextLoader{ -#@ header+ -#@ sep=tab -#@ col=VectorFloat:R4:0-7 -#@ col=Label:R4:8 -#@ col=RffVectorFloat:R4:9-14 -#@ } -15 8:Label -5 1 1 1 2 1 3 1 0 0.3418174 -0.465289354 0.247575819 -0.5215741 0.576975167 -0.02080728 -5 4 4 5 7 10 3 2 0 0.566368 -0.112074189 0.169326738 -0.5519618 0.517083168 -0.2568235 -3 1 1 1 2 2 3 1 0 0.497458071 -0.293033749 0.419352561 -0.39683342 0.572824 -0.07215268 -6 8 8 1 3 4 3 7 0 -0.382303447 -0.432640046 0.289225727 -0.4996817 0.571184337 -0.08415316 diff --git a/test/Microsoft.ML.Tests/Transformers/RffTests.cs b/test/Microsoft.ML.Tests/Transformers/RffTests.cs index c58e56b4d7..dec7254102 100644 --- a/test/Microsoft.ML.Tests/Transformers/RffTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/RffTests.cs @@ -51,11 +51,10 @@ public void RffWorkout() var invalidData = ML.Data.ReadFromEnumerable(new[] { new TestClassInvalidSchema { A = 1 }, new TestClassInvalidSchema { A = 1 } }); var validFitInvalidData = ML.Data.ReadFromEnumerable(new[] { new TestClassBiggerSize { A = new float[200] }, new TestClassBiggerSize { A = new float[200] } }); var dataView = ML.Data.ReadFromEnumerable(data); - var generator = new GaussianFourierSampler.Options(); var pipe = ML.Transforms.Projection.CreateRandomFourierFeatures(new[]{ new RandomFourierFeaturizingEstimator.ColumnInfo("RffA", 5, false, "A"), - new RandomFourierFeaturizingEstimator.ColumnInfo("RffB", 10, true, "A", new LaplacianFourierSampler.Options()) + new RandomFourierFeaturizingEstimator.ColumnInfo("RffB", 10, true, "A", new LaplacianKernel()) }); TestEstimatorCore(pipe, dataView, invalidInput: invalidData, validForFitNotValidForTransformInput: validFitInvalidData); @@ -105,7 +104,7 @@ public void TestOldSavingAndLoading() var est = ML.Transforms.Projection.CreateRandomFourierFeatures(new[]{ new RandomFourierFeaturizingEstimator.ColumnInfo("RffA", 5, false, "A"), - new RandomFourierFeaturizingEstimator.ColumnInfo("RffB", 10, true, "A", new LaplacianFourierSampler.Options()) + new RandomFourierFeaturizingEstimator.ColumnInfo("RffB", 10, true, "A", new LaplacianKernel()) }); var result = est.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result);