From b1bb4c6b6bf6b09a017b9f8fe944f10766eea1e6 Mon Sep 17 00:00:00 2001 From: turvoy Date: Tue, 27 Sep 2022 15:39:12 +0200 Subject: [PATCH 1/7] stronger classifier for detection test --- .vscode/settings.json | 11 +++++++++++ sdmetrics/single_table/__init__.py | 3 ++- sdmetrics/single_table/detection/__init__.py | 5 +++-- sdmetrics/single_table/detection/sklearn.py | 17 +++++++++++++++++ 4 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..9780500e --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,11 @@ +{ + "python.testing.unittestArgs": [ + "-v", + "-s", + "./tests", + "-p", + "*test.py" + ], + "python.testing.pytestEnabled": false, + "python.testing.unittestEnabled": true +} \ No newline at end of file diff --git a/sdmetrics/single_table/__init__.py b/sdmetrics/single_table/__init__.py index 35704626..4990660d 100644 --- a/sdmetrics/single_table/__init__.py +++ b/sdmetrics/single_table/__init__.py @@ -6,7 +6,7 @@ from sdmetrics.single_table.bayesian_network import BNLikelihood, BNLogLikelihood from sdmetrics.single_table.detection.base import DetectionMetric from sdmetrics.single_table.detection.sklearn import ( - LogisticDetection, ScikitLearnClassifierDetectionMetric, SVCDetection) + LogisticDetection, ScikitLearnClassifierDetectionMetric, SVCDetection, GradientBoostingDetection) from sdmetrics.single_table.efficacy.base import MLEfficacyMetric from sdmetrics.single_table.efficacy.binary import ( BinaryAdaBoostClassifier, BinaryDecisionTreeClassifier, BinaryEfficacyMetric, @@ -47,6 +47,7 @@ 'DetectionMetric', 'LogisticDetection', 'SVCDetection', + 'GradientBoostingDetection', 'ScikitLearnClassifierDetectionMetric', 'MLEfficacyMetric', 'BinaryEfficacyMetric', diff --git a/sdmetrics/single_table/detection/__init__.py b/sdmetrics/single_table/detection/__init__.py index b987a119..9f4340b9 100644 --- a/sdmetrics/single_table/detection/__init__.py +++ b/sdmetrics/single_table/detection/__init__.py @@ -1,8 +1,9 @@ """Machine Learning Detection metrics for single table datasets.""" -from sdmetrics.single_table.detection.sklearn import LogisticDetection, SVCDetection +from sdmetrics.single_table.detection.sklearn import LogisticDetection, SVCDetection, GradientBoostingDetection __all__ = [ 'LogisticDetection', - 'SVCDetection' + 'SVCDetection', + 'GradientBoostingDetection' ] diff --git a/sdmetrics/single_table/detection/sklearn.py b/sdmetrics/single_table/detection/sklearn.py index a33a33d9..be656c4c 100644 --- a/sdmetrics/single_table/detection/sklearn.py +++ b/sdmetrics/single_table/detection/sklearn.py @@ -2,6 +2,7 @@ from sklearn.impute import SimpleImputer from sklearn.linear_model import LogisticRegression +from sklearn.ensemble import GradientBoostingClassifier from sklearn.pipeline import Pipeline from sklearn.preprocessing import RobustScaler from sklearn.svm import SVC @@ -67,3 +68,19 @@ class SVCDetection(ScikitLearnClassifierDetectionMetric): @staticmethod def _get_classifier(): return SVC(probability=True, gamma='scale') + + +class GradientBoostingDetection(ScikitLearnClassifierDetectionMetric): + """ScikitLearnClassifierDetectionMetric based on a GradientBoostingClassifier. + + This metric builds a GradientBoostingClassifier Classifier that learns to tell the synthetic + data apart from the real data, which later on is evaluated using Cross Validation. + + The output of the metric is one minus the average ROC AUC score obtained. + """ + + name = 'SVC Detection' + + @staticmethod + def _get_classifier(): + return GradientBoostingClassifier() \ No newline at end of file From 263e43eca6aaf3b4ac608b04c5d401a797132204 Mon Sep 17 00:00:00 2001 From: turvoy Date: Tue, 27 Sep 2022 17:25:38 +0200 Subject: [PATCH 2/7] make all tests --- sdmetrics/single_table/__init__.py | 3 ++- sdmetrics/single_table/detection/__init__.py | 7 ++++--- sdmetrics/single_table/detection/sklearn.py | 4 ++-- tests/integration/single_table/test_single_table.py | 4 +++- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/sdmetrics/single_table/__init__.py b/sdmetrics/single_table/__init__.py index 4990660d..ab6c76c0 100644 --- a/sdmetrics/single_table/__init__.py +++ b/sdmetrics/single_table/__init__.py @@ -6,7 +6,8 @@ from sdmetrics.single_table.bayesian_network import BNLikelihood, BNLogLikelihood from sdmetrics.single_table.detection.base import DetectionMetric from sdmetrics.single_table.detection.sklearn import ( - LogisticDetection, ScikitLearnClassifierDetectionMetric, SVCDetection, GradientBoostingDetection) + GradientBoostingDetection, LogisticDetection, ScikitLearnClassifierDetectionMetric, + SVCDetection) from sdmetrics.single_table.efficacy.base import MLEfficacyMetric from sdmetrics.single_table.efficacy.binary import ( BinaryAdaBoostClassifier, BinaryDecisionTreeClassifier, BinaryEfficacyMetric, diff --git a/sdmetrics/single_table/detection/__init__.py b/sdmetrics/single_table/detection/__init__.py index 9f4340b9..8450948b 100644 --- a/sdmetrics/single_table/detection/__init__.py +++ b/sdmetrics/single_table/detection/__init__.py @@ -1,9 +1,10 @@ """Machine Learning Detection metrics for single table datasets.""" -from sdmetrics.single_table.detection.sklearn import LogisticDetection, SVCDetection, GradientBoostingDetection +from sdmetrics.single_table.detection.sklearn import ( + GradientBoostingDetection, LogisticDetection, SVCDetection) __all__ = [ + 'GradientBoostingDetection', 'LogisticDetection', - 'SVCDetection', - 'GradientBoostingDetection' + 'SVCDetection' ] diff --git a/sdmetrics/single_table/detection/sklearn.py b/sdmetrics/single_table/detection/sklearn.py index be656c4c..49eecafc 100644 --- a/sdmetrics/single_table/detection/sklearn.py +++ b/sdmetrics/single_table/detection/sklearn.py @@ -1,8 +1,8 @@ """scikit-learn based DetectionMetrics for single table datasets.""" +from sklearn.ensemble import GradientBoostingClassifier from sklearn.impute import SimpleImputer from sklearn.linear_model import LogisticRegression -from sklearn.ensemble import GradientBoostingClassifier from sklearn.pipeline import Pipeline from sklearn.preprocessing import RobustScaler from sklearn.svm import SVC @@ -83,4 +83,4 @@ class GradientBoostingDetection(ScikitLearnClassifierDetectionMetric): @staticmethod def _get_classifier(): - return GradientBoostingClassifier() \ No newline at end of file + return GradientBoostingClassifier() diff --git a/tests/integration/single_table/test_single_table.py b/tests/integration/single_table/test_single_table.py index 7ecd45b3..2f880887 100644 --- a/tests/integration/single_table/test_single_table.py +++ b/tests/integration/single_table/test_single_table.py @@ -7,7 +7,8 @@ from sdmetrics.goal import Goal from sdmetrics.single_table.base import SingleTableMetric from sdmetrics.single_table.bayesian_network import BNLikelihood, BNLogLikelihood -from sdmetrics.single_table.detection import LogisticDetection, SVCDetection +from sdmetrics.single_table.detection import ( + GradientBoostingDetection, LogisticDetection, SVCDetection) from sdmetrics.single_table.multi_column_pairs import ( ContingencySimilarity, ContinuousKLDivergence, DiscreteKLDivergence) from sdmetrics.single_table.multi_single_column import ( @@ -17,6 +18,7 @@ METRICS = [ CSTest, KSComplement, + GradientBoostingDetection, LogisticDetection, SVCDetection, ContinuousKLDivergence, From f8384198d89639d6428c4ec23fe36e13e6891046 Mon Sep 17 00:00:00 2001 From: turvoy Date: Tue, 27 Sep 2022 17:31:18 +0200 Subject: [PATCH 3/7] remove vscode dir --- .vscode/settings.json | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 9780500e..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "python.testing.unittestArgs": [ - "-v", - "-s", - "./tests", - "-p", - "*test.py" - ], - "python.testing.pytestEnabled": false, - "python.testing.unittestEnabled": true -} \ No newline at end of file From 4c5eae2ad0609058ad2bb15373f74fd55feb37b4 Mon Sep 17 00:00:00 2001 From: Tanguy Urvoy Date: Tue, 27 Sep 2022 17:33:22 +0200 Subject: [PATCH 4/7] Delete settings.json oops --- .vscode/settings.json | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 9780500e..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "python.testing.unittestArgs": [ - "-v", - "-s", - "./tests", - "-p", - "*test.py" - ], - "python.testing.pytestEnabled": false, - "python.testing.unittestEnabled": true -} \ No newline at end of file From 52115f31186c5e1809c1757a40c26e609bf2e132 Mon Sep 17 00:00:00 2001 From: turvoy Date: Tue, 27 Sep 2022 17:34:11 +0200 Subject: [PATCH 5/7] ignore vcode stuff --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 8c6107d1..b1ea75c2 100644 --- a/.gitignore +++ b/.gitignore @@ -109,3 +109,5 @@ ENV/ # OS Files .DS_Store +# vcode stuff +.vcode/ From 818be31ba8442369bec331bb0caad7b9290c78b1 Mon Sep 17 00:00:00 2001 From: turvoy Date: Wed, 28 Sep 2022 17:55:28 +0200 Subject: [PATCH 6/7] fixed incorrect name for GB detection --- .vscode/settings.json | 11 +++++++++++ sdmetrics/single_table/detection/sklearn.py | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..9780500e --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,11 @@ +{ + "python.testing.unittestArgs": [ + "-v", + "-s", + "./tests", + "-p", + "*test.py" + ], + "python.testing.pytestEnabled": false, + "python.testing.unittestEnabled": true +} \ No newline at end of file diff --git a/sdmetrics/single_table/detection/sklearn.py b/sdmetrics/single_table/detection/sklearn.py index 49eecafc..38f1aa7e 100644 --- a/sdmetrics/single_table/detection/sklearn.py +++ b/sdmetrics/single_table/detection/sklearn.py @@ -79,7 +79,7 @@ class GradientBoostingDetection(ScikitLearnClassifierDetectionMetric): The output of the metric is one minus the average ROC AUC score obtained. """ - name = 'SVC Detection' + name = 'GradientBoosting Detection' @staticmethod def _get_classifier(): From f9a33a2c075b997cf8abedf9ac86fbee07c1c482 Mon Sep 17 00:00:00 2001 From: Tanguy Urvoy Date: Wed, 28 Sep 2022 18:05:42 +0200 Subject: [PATCH 7/7] Delete settings.json --- .vscode/settings.json | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 9780500e..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "python.testing.unittestArgs": [ - "-v", - "-s", - "./tests", - "-p", - "*test.py" - ], - "python.testing.pytestEnabled": false, - "python.testing.unittestEnabled": true -} \ No newline at end of file