Skip to content

Commit bc10d60

Browse files
throw exception when call AppendCacheCheckPoint on empty estimator chain (#5291)
1 parent 5d0edf2 commit bc10d60

File tree

2 files changed

+24
-3
lines changed

2 files changed

+24
-3
lines changed

src/Microsoft.ML.Data/DataLoadSave/EstimatorChain.cs

+7-3
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5+
using System;
56
using System.Linq;
67
using Microsoft.ML.Internal.Utilities;
78
using Microsoft.ML.Runtime;
@@ -99,7 +100,7 @@ public EstimatorChain<TNewTrans> Append<TNewTrans>(IEstimator<TNewTrans> estimat
99100
/// cached data. It is helpful to have a caching checkpoint before trainers or feature engineering that take multiple data passes.
100101
/// It is also helpful to have after a slow operation, for example after dataset loading from a slow source or after feature
101102
/// engineering that is slow on its apply phase, if downstream estimators will do multiple passes over the output of this operation.
102-
/// Adding a cache checkpoint at the end of an <see cref="EstimatorChain{TLastTransformer}"/> is meaningless and should be avoided.
103+
/// Adding a cache checkpoint at the begin or end of an <see cref="EstimatorChain{TLastTransformer}"/> is meaningless and should be avoided.
103104
/// Cache checkpoints should be removed if disk thrashing or OutOfMemory exceptions are seen, which can occur on when the featured
104105
/// dataset immediately prior to the checkpoint is larger than available RAM.
105106
/// </summary>
@@ -108,9 +109,12 @@ public EstimatorChain<TLastTransformer> AppendCacheCheckpoint(IHostEnvironment e
108109
{
109110
Contracts.CheckValue(env, nameof(env));
110111

111-
if (_estimators.Length == 0 || _needCacheAfter.Last())
112+
if(_estimators.Length == 0)
113+
throw new InvalidOperationException("Current estimator chain has no estimator, can't append cache checkpoint.");
114+
115+
if (_needCacheAfter.Last())
112116
{
113-
// If there are no estimators, or if we already need to cache after this, we don't need to do anything else.
117+
// If we already need to cache after this, we don't need to do anything else.
114118
return this;
115119
}
116120

test/Microsoft.ML.Tests/CachingTests.cs

+17
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5+
using System;
56
using System.Linq;
67
using System.Threading;
78
using Microsoft.ML.Data;
@@ -60,6 +61,22 @@ public void CacheCheckpointTest()
6061
Assert.True(trainData.All(x => x.AccessCount == 1));
6162
}
6263

64+
[Fact]
65+
public void CacheOnEmptyEstimatorChainTest()
66+
{
67+
var ex = Assert.Throws<InvalidOperationException>(() => CacheOnEmptyEstimatorChain());
68+
Assert.Contains("Current estimator chain has no estimator, can't append cache checkpoint.", ex.Message,
69+
StringComparison.InvariantCultureIgnoreCase);
70+
}
71+
72+
private void CacheOnEmptyEstimatorChain()
73+
{
74+
new EstimatorChain<ITransformer>().AppendCacheCheckpoint(ML)
75+
.Append(ML.Transforms.CopyColumns("F1", "Features"))
76+
.Append(ML.Transforms.NormalizeMinMax("Norm1", "F1"))
77+
.Append(ML.Transforms.NormalizeMeanVariance("Norm2", "F1"));
78+
}
79+
6380
[Fact]
6481
public void CacheTest()
6582
{

0 commit comments

Comments
 (0)