Skip to content

Commit 9dfda50

Browse files
all tests pass
1 parent c116e0f commit 9dfda50

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+1015
-239
lines changed

.gitignore

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@ __pycache__
33
*.py[cod]
44
.cache
55
.ipynb_checkpoints
6-
everywhereml/project
7-
serve.py
8-
test
9-
notebook.html
6+
./enumerate_packages.py
7+
./enumerate_templates.py
8+
./publish
9+
./setup_template.py
10+
./test

README.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ The package implements most of the tools you need to develop a fully functional
2323

2424
[X] Data loading and visualization
2525
[X] Preprocessing
26-
[X] Pipeline
26+
[] Pipeline
2727
[X] BoxCox (power transform)
2828
[X] CrossDiff
2929
[X] MinMaxScaler
@@ -32,7 +32,9 @@ The package implements most of the tools you need to develop a fully functional
3232
[X] RateLimit
3333
[X] StandardScaler
3434
[X] YeoJohnson (power transform)
35-
[] Feature selection
35+
[] Audio
36+
[] MelSpectrogram
37+
[X] Feature selection
3638
[X] RFE
3739
[X] SelectKBest
3840
[] Time series analysis
@@ -45,7 +47,7 @@ The package implements most of the tools you need to develop a fully functional
4547
[X] LogisticRegression
4648
[X] GaussianNB
4749
[] BernoulliNB
48-
[X] SVM
50+
[] SVM (not tested)
4951
[] LinearSVM
5052
[X] DecisionTree
5153
[X] XGBoost

enumerate_packages.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import os
2+
import json
3+
from os.path import isdir
4+
from glob import glob
5+
6+
7+
def package_name(folder):
8+
if folder.endswith(os.path.sep):
9+
folder = folder[:-1]
10+
11+
return folder.replace(os.path.sep, '.')
12+
13+
14+
if __name__ == '__main__':
15+
packages = [package_name(folder) for folder in glob('everywhereml/**', recursive=True)
16+
if isdir(folder) and '__pycache__' not in folder]
17+
18+
print(json.dumps(packages).replace('/', '\\/').replace('"', '\\"'))

enumerate_templates.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import json
2+
from glob import glob
3+
4+
5+
if __name__ == '__main__':
6+
templates = [filename.replace('everywhereml/', '')
7+
for filename in glob('everywhereml/templates/**/*.jinja', recursive=True)]
8+
9+
print(json.dumps(templates).replace('/', '\\/').replace('"', '\\"'))

everywhereml/IsPortableMixin.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import numpy as np
12
from everywhereml.templates import Jinja
23

34

@@ -17,18 +18,27 @@ def port(self, language, data=None, **kwargs):
1718
template_data.update(self.get_template_data_for_language(language))
1819
template_data.update(data or {})
1920

20-
for k, v in self.get_default_template_data_for_language(language).items():
21+
# kwargs first
22+
for k, v in kwargs.items():
2123
template_data.setdefault(k, v)
2224

23-
for k, v in self.get_default_template_data().items():
25+
# then language-specific data
26+
for k, v in self.get_default_template_data_for_language(language).items():
2427
template_data.setdefault(k, v)
2528

26-
for k, v in kwargs.items():
29+
# then default data
30+
for k, v in self.get_default_template_data().items():
2731
template_data.setdefault(k, v)
2832

2933
# ALWAYS inject these values
3034
template_data.update(uuid='UUID%d' % id(self))
3135
template_data.update(source_class=self.__module__.__str__())
36+
template_data.update(language=language)
37+
38+
# replace NaNs with 0 and inf with a large number
39+
for k, v in template_data.items():
40+
if isinstance(v, np.ndarray):
41+
template_data.update(**{k: np.nan_to_num(v)})
3242

3343
ported = Jinja('', language=language, dialect=kwargs.get('dialect', None)).render(template_name, template_data)
3444

everywhereml/classification/BaseClassifier.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1+
from cached_property import cached_property
12
from sklearn.model_selection import KFold
23
from everywhereml.IsPortableMixin import IsPortableMixin
34
from everywhereml.classification.utils import to_Xy
4-
from everywhereml.templates import Jinja
55
from everywhereml.data.plot import plot_confusion_matrix
6+
from everywhereml.classification.MakesBinaryDecisionMixin import MakesBinaryDecisionMixin
7+
from everywhereml.classification.MakesBinaryComplementDecisionMixin import MakesBinaryComplementDecisionMixin
68

79

810
class BaseClassifier(IsPortableMixin):
@@ -29,17 +31,17 @@ def num_classes(self):
2931

3032
return len(set(self.y_train))
3133

32-
@property
34+
@cached_property
3335
def package_name(self):
3436
"""
3537
Get base package name
36-
:return:
38+
:return: str
3739
"""
3840
package_name = self.__module__.__str__().replace('everywhereml.classification.', '').split('.')[0]
3941

4042
return package_name[0].upper() + package_name[1:]
4143

42-
@property
44+
@cached_property
4345
def classname(self):
4446
"""
4547
Get class name
@@ -49,7 +51,7 @@ def classname(self):
4951

5052
return classname[0].upper() + classname[1:]
5153

52-
@property
54+
@cached_property
5355
def packages(self):
5456
"""
5557
Get package names
@@ -59,6 +61,15 @@ def packages(self):
5961

6062
return [package[0].upper() + package[1:] for package in packages]
6163

64+
@property
65+
def binary_complement(self):
66+
"""
67+
Some implementations output swapped labels for binary classification
68+
If this property is True, fix the return value in the templates
69+
:return: bool
70+
"""
71+
return False
72+
6273
def clone(self):
6374
"""
6475
Clone classifier
@@ -147,18 +158,20 @@ def port(self, language, classname=None, classmap=None, data=None, **kwargs):
147158

148159
def get_default_template_data(self):
149160
"""
150-
Get default data for template
161+
Get default data for templates
151162
:return: dict
152163
"""
153164
return {
154165
'num_inputs': self.num_inputs,
155166
'num_classes': self.num_classes,
156-
'package_name': self.package_name
167+
'package_name': self.package_name,
168+
'makes_binary_decision': isinstance(self, MakesBinaryDecisionMixin),
169+
'makes_binary_complement_decision': isinstance(self, MakesBinaryComplementDecisionMixin)
157170
}
158171

159172
def get_default_template_data_php(self, **kwargs):
160173
"""
161-
174+
Get default data for PHP templates
162175
:param kwargs:
163176
:return:
164177
"""
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from everywhereml.classification.MakesBinaryDecisionMixin import MakesBinaryDecisionMixin
2+
3+
4+
class MakesBinaryComplementDecisionMixin(MakesBinaryDecisionMixin):
5+
"""
6+
Mixin to mark classes that, for binary classification, produces a label
7+
based on `decision > threshold ? 0 : 1`
8+
"""
9+
pass
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
class MakesBinaryDecisionMixin:
2+
"""
3+
Mixin to mark classes that, for binary classification, produces a label
4+
based on `decision > threshold ? 1 : 0`
5+
"""
6+
pass

everywhereml/classification/sklearn/linear_model/LogisticRegression.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,21 @@
11
from sklearn.linear_model import LogisticRegression as SklearnClassifier
2+
from everywhereml.classification.MakesBinaryDecisionMixin import MakesBinaryDecisionMixin
23
from everywhereml.classification.sklearn.SklearnBaseClassifier import SklearnBaseClassifier
34

45

5-
class LogisticRegression(SklearnBaseClassifier, SklearnClassifier):
6+
class LogisticRegression(MakesBinaryDecisionMixin, SklearnBaseClassifier, SklearnClassifier):
67
"""
78
sklearn.linear_model.LogisticRegression wrapper
89
"""
10+
11+
@property
12+
def binary_complement(self):
13+
"""
14+
@see parent
15+
:return:
16+
"""
17+
return True
18+
919
def get_template_data(self):
1020
"""
1121
Get additional data for template

everywhereml/classification/utils.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,23 @@
11
import numpy as np
22

33

4-
def to_Xy(X, y=None):
4+
def to_Xy(X, y=None, allow_y_none=False):
55
"""
6-
Convert X, y from different formats to X, y ndarrays
7-
:param X:
8-
:param y:
6+
Convert X, y from different formats to X, y arrays
7+
:param X: np.array
8+
:param y: np.array
9+
:param allow_y_none: bool if True, don't raise exception if y is None
910
:return: tuple first element is X array, second element is y array
1011
"""
11-
if y is None:
12-
assert hasattr(X, 'X') and hasattr(X, 'y'), 'when y is None, first argument MUST have X and y attributes'
13-
y = X.y
12+
# X is a Dataset object
13+
if hasattr(X, "X"):
14+
y = y or getattr(X, "y", None)
1415
X = X.X
1516

16-
assert X is not None, 'X CANNOT be None'
17-
assert y is not None, 'y CANNOT be None'
18-
assert len(X) == len(y), 'X and y MUST have the same length'
17+
if y is None and not allow_y_none:
18+
raise AssertionError("y CANNOT be None")
19+
20+
assert X is not None, "X CANNOT be None"
21+
assert y is None or len(X) == len(y), "X and y MUST have the same length"
1922

2023
return np.asarray(X), np.asarray(y, dtype=np.uint8)

0 commit comments

Comments
 (0)