diff --git a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs index 45505d5ece..12a3f8c9ed 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs @@ -362,6 +362,9 @@ internal sealed class SrCnnEntireModeler private static readonly double _unitForZero = 0.3; private static readonly double _minimumScore = 0.0; private static readonly double _maximumScore = 1.0; + // Use this threshold to correct false anomalies + private static readonly double _zscoreThreshold = 1.5; + // If the score window is smaller than this value, the anomaly score is tend to be small. // Proof: For each point, the SR anomaly score is calculated as (w is average window size): // (mag - avg_mag) / avg_mag @@ -426,6 +429,8 @@ internal sealed class SrCnnEntireModeler //used in all modes private double _minimumOriginValue; private double _maximumOriginValue; + private double _std; + private double _mean; private readonly double[] _predictArray; private double[] _backAddArray; private double[] _fftRe; @@ -491,14 +496,23 @@ public void Train(double[] values, ref double[][] results) _minimumOriginValue = Double.MaxValue; _maximumOriginValue = Double.MinValue; + var sum = 0.0; + var squareSum = 0.0; + Array.Resize(ref _seriesToDetect, values.Length); for (int i = 0; i < values.Length; ++i) { - _seriesToDetect[i] = values[i]; - _minimumOriginValue = Math.Min(_minimumOriginValue, values[i]); - _maximumOriginValue = Math.Max(_maximumOriginValue, values[i]); + var value = values[i]; + _seriesToDetect[i] = value; + _minimumOriginValue = Math.Min(_minimumOriginValue, value); + _maximumOriginValue = Math.Max(_maximumOriginValue, value); + sum += value; + squareSum += value * value; } + _mean = sum / values.Length; + _std = Math.Sqrt((squareSum - (sum * sum) / values.Length) / values.Length); + if (_period > 0) { _deseasonalityFunction.Deseasonality(ref values, _period, ref _seriesToDetect); @@ -612,9 +626,22 @@ private void SpectralResidual(double[] values, double[][] results, double thresh var detres = score > threshold ? 1 : 0; + // Anomalies correction by zscore + if (detres > 0) + { + // Use zscore to filter out those false anomalies that lie within 1.5 sigma region. + var zscore = Math.Abs(values[i] - _mean) / _std; + if (_std < _eps || zscore < _zscoreThreshold) + { + detres = 0; + score = 0.0; + } + } + results[i][0] = detres; results[i][1] = score; results[i][2] = _ifftMagList[i]; + } } diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 2877c3150f..1dcd2f52c3 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -4,7 +4,6 @@ using System; using System.Collections.Generic; -using System.Data; using System.IO; using System.Linq; using Microsoft.ML.Data; @@ -717,6 +716,61 @@ public void TestSrCnnAnomalyDetectorWithSeasonalData( } } + [Theory, CombinatorialData] + public void TestSrCnnAnomalyDetectorBigSpike( + [CombinatorialValues(SrCnnDetectMode.AnomalyOnly, SrCnnDetectMode.AnomalyAndExpectedValue, SrCnnDetectMode.AnomalyOnly)] SrCnnDetectMode mode + ) + { + var ml = new MLContext(1); + IDataView dataView; + List data; + + var dataPath = GetDataPath("Timeseries", "big_spike_data.csv"); + + // Load data from file into the dataView + dataView = ml.Data.LoadFromTextFile(dataPath, hasHeader: true); + data = ml.Data.CreateEnumerable(dataView, reuseRowObject: false).ToList(); + + // Setup the detection arguments + string outputColumnName = nameof(SrCnnAnomalyDetection.Prediction); + string inputColumnName = nameof(TimeSeriesDataDouble.Value); + + // Do batch anomaly detection + var options = new SrCnnEntireAnomalyDetectorOptions() + { + Threshold = 0.3, + BatchSize = -1, + Sensitivity = 80.0, + DetectMode = mode, + Period = 0, + DeseasonalityMode = SrCnnDeseasonalityMode.Stl + }; + + var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, options); + + // Getting the data of the newly created column as an IEnumerable of SrCnnAnomalyDetection. + var predictionColumn = ml.Data.CreateEnumerable( + outputDataView, reuseRowObject: false); + + var anomalyIndex = 26; + + int k = 0; + foreach (var prediction in predictionColumn) + { + if (anomalyIndex == k) + { + Assert.Equal(1, prediction.Prediction[0]); + } + else + { + Assert.Equal(0, prediction.Prediction[0]); + } + + ++k; + } + + } + [Theory, CombinatorialData] public void TestSrCnnAnomalyDetectorWithSeasonalAnomalyData( [CombinatorialValues(SrCnnDeseasonalityMode.Stl, SrCnnDeseasonalityMode.Mean, SrCnnDeseasonalityMode.Median)] SrCnnDeseasonalityMode mode diff --git a/test/data/Timeseries/big_spike_data.csv b/test/data/Timeseries/big_spike_data.csv new file mode 100644 index 0000000000..43b1491ea0 --- /dev/null +++ b/test/data/Timeseries/big_spike_data.csv @@ -0,0 +1,136 @@ +Value +0.333061106 +2.198203303 +1.705836778 +1.861708215 +1.085050871 +0.548409541 +0.365537211 +0.433823922 +0.450379649 +0.485662867 +0.59162219 +0.678494031 +0.735315015 +0.780228908 +0.779309892 +0.71637311 +0.783369345 +0.829129842 +0.769519564 +0.74230352 +0.914116686 +0.970162226 +0.964537878 +0.983059421 +1.009637074 +1.054769667 +48232.24413 +4739.675242 +4963.982698 +8555.732913 +75.25537709 +11.2742621 +4.388301951 +2.584960796 +2.273629928 +1.972334276 +1.811987528 +1.854365004 +1.581860355 +1.478895939 +1.447799312 +1.406460886 +1.333295368 +1.282260475 +1.345933543 +1.264431234 +1.235222153 +1.204307109 +1.133533648 +1.110515351 +1.017397262 +1.103902775 +1.099039227 +1.061479438 +1.063725177 +1.072777829 +1.044107263 +0.981847451 +1.038324454 +1.033883341 +1.004416487 +1.017918007 +0.345233269 +1.092365812 +1.078005286 +1.033142227 +1.024832225 +1.098672969 +1.092767871 +1.095272293 +1.139357768 +1.0711793 +1.119012071 +1.11906761 +1.131538563 +1.113967769 +1.141610905 +1.14317559 +1.108130866 +1.083645413 +1.147460394 +1.177086603 +1.153490106 +1.145660569 +1.132464809 +1.106364602 +1.003350151 +1.099011524 +1.109557478 +1.065336146 +1.081590334 +1.075768021 +0.986278889 +1.001219623 +1.080312553 +1.075076345 +1.057146027 +1.106862867 +1.084433852 +0.975639541 +0.944182773 +1.088712253 +1.067152572 +1.107507855 +1.069142173 +1.036247939 +0.995907308 +0.932153379 +1.074865283 +1.065780376 +1.05063751 +1.077263172 +1.033459106 +0.985960758 +0.981842413 +1.032862035 +1.005063722 +0.862145269 +0.491629016 +0.473904777 +0.777874357 +0.945595834 +1.020180047 +1.025171701 +1.031632464 +1.02571454 +0.950313827 +0.935412116 +0.991591559 +1.013279894 +0.991734823 +1.007466737 +1.019160801 +0.919227208 +0.977617794