Skip to content

Commit d95fe38

Browse files
[SrCnnEntireAnomalyDetector] Upgrade boundary calculation and expected value calculation (#5436)
* adjust expected value * update boundary calculation * fix boundary * adjust default values * fix percent case * fix error in anomaly score calculation Co-authored-by: [email protected] <[email protected]>
1 parent 82d4bb7 commit d95fe38

File tree

3 files changed

+230
-52
lines changed

3 files changed

+230
-52
lines changed

src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs

Lines changed: 95 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ internal static class Defaults
8484
{
8585
public const double Threshold = 0.3;
8686
public const int BatchSize = 2000;
87-
public const double Sensitivity = 55;
87+
public const double Sensitivity = 70;
8888
public const SrCnnDetectMode DetectMode = SrCnnDetectMode.AnomalyOnly;
8989
public const int Period = 0;
9090
public const SrCnnDeseasonalityMode DeseasonalityMode = SrCnnDeseasonalityMode.Stl;
@@ -349,36 +349,55 @@ internal sealed class SrCnnEntireModeler
349349
private static readonly int _judgementWindowSize = 40;
350350
private static readonly double _eps = 1e-8;
351351
private static readonly double _deanomalyThreshold = 0.35;
352-
private static readonly double _boundSensitivity = 70.0;
353-
354-
// A fixed lookup table which returns factor using sensitivity as index.
355-
// Since Margin = BoundaryUnit * factor, this factor is calculated to make sure Margin == Boundary when sensitivity is 50,
356-
// and increases/decreases exponentially as sensitivity increases/decreases.
357-
// The factor array is generated by formula:
358-
// f(x)=1, if x=50;
359-
// f(x)=f(x+1)*(1.25+0.001*x), if 0<=x<50;
360-
// f(x)=f(x+1)/(1.25+0.001*(x-50)), if 50<x<60;
361-
// f(x)=f(x+1)/(1.15+0.001*(x-50)),, if 60<=x<=100.
352+
private static readonly double _boundSensitivity = 93.0;
353+
private static readonly double _unitForZero = 0.3;
354+
355+
// pseudo-code to generate the factors.
356+
// factors = []
357+
// for i in range(0, 30):
358+
// sen = 0.8 * (i - 30) ** 2 + 32
359+
// factors.append(sen)
360+
// for i in range(30, 50):
361+
// sen = -1.25 * i + 67.5
362+
// factors.append(sen)
363+
// for i in range(50, 60):
364+
// sen = -0.4 * i + 25
365+
// factors.append(sen)
366+
// for i in range(60, 70):
367+
// sen = -0.04 * i + 3.4
368+
// factors.append(sen)
369+
// for i in range(70, 80):
370+
// sen = -0.03 * i + 2.7
371+
// factors.append(sen)
372+
// for i in range(80, 90):
373+
// sen = -0.015 * i + 1.4999999999999998
374+
// factors.append(sen)
375+
// for i in range(90, 98):
376+
// sen = -0.011818181818181818 * i + 1.2136363636363636
377+
// factors.append(sen)
378+
// ratio.append(-0.011818181818181818 * 99 + 1.2136363636363636)
379+
// ratio.append(0.01200000000000001)
380+
// for i in range(5):
381+
// sen= -0.001925*i+ 0.008
382+
// ratio.append(sen)
383+
// ratio.append(0)
384+
// ratio=ratio[5:]
362385
private static readonly double[] _factors = new double[]{
363-
184331.62871148242, 141902.71648305038, 109324.12672037778, 84289.9974713784, 65038.57829581667, 50222.84038287002,
364-
38812.08684920403, 30017.081863266845, 23233.035497884553, 17996.15452973242, 13950.50738738947, 10822.736530170265,
365-
8402.745753237783, 6528.939979205737, 5076.93622022219, 3950.92312857758, 3077.042935029268, 2398.318733460069,
366-
1870.7634426365591, 1460.393007522685, 1140.9320371270976, 892.0500681212648, 698.0047481387048, 546.5972968979678,
367-
428.36778753759233, 335.97473532360186, 263.71643275007995, 207.16137686573444, 162.8627176617409, 128.13746472206208,
368-
100.8956415134347, 79.50799173635517, 62.70346351447568, 49.48971074544253, 39.09139869308257, 30.90229145698227,
369-
24.448015393182175, 19.35709849024717, 15.338429865489042, 12.163703303322, 9.653732780414286, 7.667778221139226,
370-
6.095213212352326, 4.8490160798347866, 3.8606815922251485, 3.076240312529999, 2.4531421949999994, 1.9578149999999996,
371-
1.5637499999999998, 1.25, 1.0, 0.8695652173913044, 0.7554867223208555, 0.655804446459076, 0.5687809596349316,
372-
0.4928777813127657, 0.4267340097946024, 0.36914706729636887, 0.3190553736355825, 0.27552277516026125, 0.23772456873189068,
373-
0.20493497304473338, 0.17651591132190647, 0.1519069804835684, 0.13061649224726435, 0.11221348131208278, 0.09632058481723846,
374-
0.08260770567516164, 0.0707863801843716, 0.06060477755511267, 0.051843265658779024, 0.0443104834690419, 0.03783986632710667,
375-
0.03228657536442549, 0.027524787181948417, 0.02344530424356765, 0.019953450420057577, 0.01696721974494692, 0.014415649740821513,
376-
0.012237393667929978, 0.010379468759906684, 0.008796159966022614, 0.0074480609365136455, 0.006301235986898177,
377-
0.00532648857725966, 0.004498723460523362, 0.0037963911059268884, 0.0032010043051660104, 0.002696718032995797,
378-
0.0022699646742388863, 0.0019091376570554135, 0.0011570531254881296, 0.000697019955113331, 0.00041737721863073713,
379-
0.000248438820613534, 0.00014700521929794912, 8.647365841055832e-05, 5.056939088336744e-05, 2.9400808653120604e-05,
380-
1.6994687082728674e-05, 9.767061541798089e-06
381-
};
386+
532.0, 492.8, 455.20000000000005, 419.20000000000005, 384.8, 352.0, 320.8, 291.2, 263.20000000000005,
387+
236.8, 212.0, 188.8, 167.20000000000002, 147.2, 128.8, 112.0, 96.8, 83.2, 71.2, 60.8, 52.0, 44.8, 39.2,
388+
35.2, 32.8, 30.0, 28.75, 27.5, 26.25, 25.0, 23.75, 22.5, 21.25, 20.0, 18.75, 17.5, 16.25, 15.0, 13.75,
389+
12.5, 11.25, 10.0, 8.75, 7.5, 6.25, 5.0, 4.599999999999998, 4.199999999999999, 3.799999999999997,
390+
3.3999999999999986, 3.0, 2.599999999999998, 2.1999999999999993, 1.7999999999999972, 1.3999999999999986,
391+
1.0, 0.96, 0.9199999999999999, 0.8799999999999999, 0.8399999999999999, 0.7999999999999998,
392+
0.7599999999999998, 0.7199999999999998, 0.6799999999999997, 0.6399999999999997, 0.6000000000000001,
393+
0.5700000000000003, 0.54, 0.5100000000000002, 0.4800000000000004, 0.4500000000000002, 0.4200000000000004,
394+
0.3900000000000001, 0.3600000000000003, 0.33000000000000007, 0.2999999999999998, 0.2849999999999999,
395+
0.2699999999999998, 0.2549999999999999, 0.23999999999999977, 0.22499999999999987, 0.20999999999999974,
396+
0.19499999999999984, 0.17999999999999994, 0.1649999999999998, 0.1499999999999999, 0.13818181818181818,
397+
0.12636363636363646, 0.1145454545454545, 0.10272727272727278, 0.09090909090909083, 0.0790909090909091,
398+
0.06727272727272737, 0.043636363636363695, 0.01200000000000001, 0.008, 0.0060750000000000005, 0.00415,
399+
0.0022249999999999995, 0.0002999999999999999, 0.0
400+
};
382401

383402
private readonly double _threshold;
384403
private readonly double _sensitivity;
@@ -387,6 +406,8 @@ internal sealed class SrCnnEntireModeler
387406
private readonly IDeseasonality _deseasonalityFunction;
388407

389408
//used in all modes
409+
private double _minimumOriginValue;
410+
private double _maximumOriginValue;
390411
private readonly double[] _predictArray;
391412
private double[] _backAddArray;
392413
private double[] _fftRe;
@@ -449,10 +470,15 @@ public void Train(double[] values, ref double[][] results)
449470
Array.Resize<double[]>(ref results, values.Length);
450471
}
451472

473+
_minimumOriginValue = Double.MaxValue;
474+
_maximumOriginValue = Double.MinValue;
475+
452476
Array.Resize(ref _seriesToDetect, values.Length);
453477
for (int i = 0; i < values.Length; ++i)
454478
{
455479
_seriesToDetect[i] = values[i];
480+
_minimumOriginValue = Math.Min(_minimumOriginValue, values[i]);
481+
_maximumOriginValue = Math.Max(_maximumOriginValue, values[i]);
456482
}
457483

458484
if (_period > 0)
@@ -641,7 +667,7 @@ private void GetExpectedValue(double[] values, double[][] results)
641667

642668
for (int i = 0; i < results.Length; ++i)
643669
{
644-
results[i][3] = _ifftRe[i];
670+
results[i][3] = AdjustExpectedValueBasedOnOriginalDataRange(_ifftRe[i]);
645671
}
646672
}
647673

@@ -650,7 +676,7 @@ private void GetExpectedValuePeriod(double[] values, double[][] results, IReadOn
650676
//Step 8: Calculate Expected Value
651677
for (int i = 0; i < values.Length; ++i)
652678
{
653-
results[i][3] = values[i] - residual[i];
679+
results[i][3] = AdjustExpectedValueBasedOnOriginalDataRange(values[i] - residual[i]);
654680
}
655681
}
656682

@@ -762,7 +788,8 @@ private void GetMargin(double[] values, double[][] results, double sensitivity)
762788
{
763789
//Step 10: Calculate UpperBound and LowerBound
764790
var margin = CalculateMargin(_units[i], sensitivity);
765-
results[i][3] = _ifftRe[i];
791+
results[i][3] = AdjustExpectedValueBasedOnOriginalDataRange(_ifftRe[i]);
792+
766793
results[i][4] = _units[i];
767794
results[i][5] = _ifftRe[i] + margin;
768795
results[i][6] = _ifftRe[i] - margin;
@@ -783,6 +810,21 @@ private void GetMargin(double[] values, double[][] results, double sensitivity)
783810
}
784811
}
785812

813+
// Adjust the expected value if original data range is non-negative or non-positive
814+
private double AdjustExpectedValueBasedOnOriginalDataRange(double expectedValue)
815+
{
816+
if (_minimumOriginValue >= 0 && expectedValue < 0)
817+
{
818+
expectedValue = 0;
819+
}
820+
else if (_maximumOriginValue <= 0 && expectedValue > 0)
821+
{
822+
expectedValue = 0;
823+
}
824+
825+
return expectedValue;
826+
}
827+
786828
// Adjust the expected value so that it is within the bound margin of value
787829
private double AdjustExpectedValueBasedOnBound(double value, double expectedValue, double unit)
788830
{
@@ -880,18 +922,20 @@ private void CalculateExpectedValueByFft(double[] data)
880922
FftUtils.ComputeBackwardFft(_fftRe, _fftIm, _ifftRe, _ifftIm, length);
881923
}
882924

883-
private void CalculateBoundaryUnit(double[] data, bool[] isAnomalys)
925+
private void CalculateBoundaryUnit(double[] data, bool[] isAnomalies)
884926
{
885927
int window = Math.Min(data.Length / 3, 512);
886928
double trendFraction = 0.5; // mix trend and average of trend
887929
double trendSum = 0;
888930
int calculationSize = 0;
931+
bool closeToZero = true;
889932

890933
MedianFilter(data, window, true);
891934
for (int i = 0; i < _trends.Length; ++i)
892935
{
893-
if (!isAnomalys[i])
936+
if (!isAnomalies[i])
894937
{
938+
closeToZero = closeToZero && _trends[i] < _eps;
895939
trendSum += Math.Abs(_trends[i]);
896940
++calculationSize;
897941
}
@@ -910,10 +954,17 @@ private void CalculateBoundaryUnit(double[] data, bool[] isAnomalys)
910954
Array.Resize(ref _units, _trends.Length);
911955
for (int i = 0; i < _units.Length; ++i)
912956
{
913-
_units[i] = Math.Max(1, averageTrendPart + Math.Abs(_trends[i]) * trendFraction);
914-
if (double.IsInfinity(_units[i]))
957+
if (closeToZero)
958+
{
959+
_units[i] = _unitForZero;
960+
}
961+
else
915962
{
916-
throw new ArithmeticException("Not finite unit value");
963+
_units[i] = averageTrendPart + Math.Abs(_trends[i]) * trendFraction;
964+
if (double.IsInfinity(_units[i]))
965+
{
966+
throw new ArithmeticException("Not finite unit value");
967+
}
917968
}
918969
}
919970
}
@@ -1031,19 +1082,14 @@ private double CalculateAnomalyScore(double value, double exp, double unit, bool
10311082
return anomalyScore;
10321083
}
10331084

1034-
double distance = Math.Abs(exp - value);
1035-
List<double> margins = new List<double>();
1036-
for (int i = 100; i >= 0; --i)
1037-
{
1038-
margins.Add(CalculateMargin(unit, i));
1039-
}
1085+
double distanceFactor = Math.Abs(exp - value) / unit;
10401086

10411087
int lb = 0;
10421088
int ub = 100;
10431089
while (lb < ub)
10441090
{
10451091
int mid = (lb + ub) / 2;
1046-
if (margins[mid] < distance)
1092+
if (_factors[100 - mid] < distanceFactor)
10471093
{
10481094
lb = mid + 1;
10491095
}
@@ -1053,15 +1099,15 @@ private double CalculateAnomalyScore(double value, double exp, double unit, bool
10531099
}
10541100
}
10551101

1056-
if (Math.Abs(margins[lb] - distance) < _eps || lb == 0)
1102+
if (_factors[100 - lb] == distanceFactor || lb == 0)
10571103
{
10581104
anomalyScore = lb;
10591105
}
10601106
else
10611107
{
1062-
double lowerMargin = margins[lb - 1];
1063-
double upperMargin = margins[lb];
1064-
anomalyScore = lb - 1 + (distance - lowerMargin) / (upperMargin - lowerMargin);
1108+
double lowerMargin = _factors[101 - lb];
1109+
double upperMargin = _factors[100 - lb];
1110+
anomalyScore = lb - 1 + (distanceFactor - lowerMargin) / (upperMargin - lowerMargin);
10651111
}
10661112

10671113
return anomalyScore / 100.0f;

test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs

Lines changed: 65 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -621,7 +621,7 @@ public void TestSrCnnBatchAnomalyDetector(
621621

622622
// Do batch anomaly detection
623623
var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName,
624-
threshold: 0.35, batchSize: batchSize, sensitivity: 90.0, mode);
624+
threshold: 0.35, batchSize: batchSize, sensitivity: 98.0, mode);
625625

626626
// Getting the data of the newly created column as an IEnumerable of SrCnnAnomalyDetection.
627627
var predictionColumn = ml.Data.CreateEnumerable<SrCnnAnomalyDetection>(
@@ -694,7 +694,7 @@ public void TestSrCnnAnomalyDetectorWithSeasonalData(
694694
{
695695
Threshold = 0.3,
696696
BatchSize = -1,
697-
Sensitivity = 53.0,
697+
Sensitivity = 64.0,
698698
DetectMode = SrCnnDetectMode.AnomalyAndMargin,
699699
Period = 288,
700700
DeseasonalityMode = mode
@@ -741,7 +741,7 @@ public void TestSrCnnAnomalyDetectorWithSeasonalAnomalyData(
741741
{
742742
Threshold = 0.23,
743743
BatchSize = -1,
744-
Sensitivity = 53.0,
744+
Sensitivity = 63.0,
745745
DetectMode = SrCnnDetectMode.AnomalyAndMargin,
746746
Period = 288,
747747
DeseasonalityMode = mode
@@ -776,6 +776,68 @@ public void TestSrCnnAnomalyDetectorWithSeasonalAnomalyData(
776776
}
777777
}
778778

779+
[Theory, CombinatorialData]
780+
public void TestSrcnnEntireDetectNonnegativeData(
781+
[CombinatorialValues(true, false)] bool isPositive)
782+
{
783+
var ml = new MLContext(1);
784+
IDataView dataView;
785+
List<TimeSeriesDataDouble> data;
786+
787+
// Load data from file into the dataView
788+
var dataPath = GetDataPath("Timeseries", "non_negative_case.csv");
789+
790+
// Load data from file into the dataView
791+
dataView = ml.Data.LoadFromTextFile<TimeSeriesDataDouble>(dataPath, hasHeader: true);
792+
data = ml.Data.CreateEnumerable<TimeSeriesDataDouble>(dataView, reuseRowObject: false).ToList();
793+
794+
if (!isPositive)
795+
{
796+
for (int i = 0; i < data.Count; ++i)
797+
{
798+
data[i].Value = - data[i].Value;
799+
}
800+
}
801+
802+
dataView = ml.Data.LoadFromEnumerable<TimeSeriesDataDouble>(data);
803+
804+
// Setup the detection arguments
805+
string outputColumnName = nameof(SrCnnAnomalyDetection.Prediction);
806+
string inputColumnName = nameof(TimeSeriesDataDouble.Value);
807+
808+
// Do batch anomaly detection
809+
var options = new SrCnnEntireAnomalyDetectorOptions()
810+
{
811+
Threshold = 0.10,
812+
BatchSize = -1,
813+
Sensitivity = 99.0,
814+
DetectMode = SrCnnDetectMode.AnomalyAndMargin,
815+
Period = 0,
816+
DeseasonalityMode = SrCnnDeseasonalityMode.Stl
817+
};
818+
819+
var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, options);
820+
821+
// Getting the data of the newly created column as an IEnumerable of SrCnnAnomalyDetection.
822+
var predictionColumn = ml.Data.CreateEnumerable<SrCnnAnomalyDetection>(
823+
outputDataView, reuseRowObject: false);
824+
825+
if (isPositive)
826+
{
827+
foreach (var prediction in predictionColumn)
828+
{
829+
Assert.True(prediction.Prediction[3] >= 0);
830+
}
831+
}
832+
else
833+
{
834+
foreach (var prediction in predictionColumn)
835+
{
836+
Assert.True(prediction.Prediction[3] <= 0);
837+
}
838+
}
839+
}
840+
779841
[Fact]
780842
public void RootCauseLocalization()
781843
{

0 commit comments

Comments
 (0)