Skip to content

Commit bcfdcdc

Browse files
loaders fully tested
1 parent a04402d commit bcfdcdc

File tree

19 files changed

+47
-490
lines changed

19 files changed

+47
-490
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
venv
22
__pycache__
33
*.py[cod]
4-
.cache
4+
.cache
5+
.ipynb_checkpoints
6+
everywhereml/project

everywhereml/data/loaders/FolderLoader.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,12 @@ class FolderLoader(BaseLoader):
1313
"""
1414
Load data from each file inside a folder
1515
"""
16-
def __init__(self, folder, test=None, max_depth=0, classmap=None, target_column=None, **kwargs):
16+
def __init__(self, folder, test=r'\.(txt|csv|tsv|TXT|CSV|TSV)$', ignore=None, max_depth=0, classmap=None, target_column=None, **kwargs):
1717
"""
1818
Constructor
1919
:param folder: str path to the folder to load files from
20-
:param test: str|callable filter for filenames (regex or callable)
20+
:param test: str|callable only load files that pass the test (regex or callable)
21+
:param ignore: str|callable ignore files that pass the test (regex or callable)
2122
:param target_column: None|int|str if None, target is inferred from filename
2223
:param max_depth: int how many level to traverse the folder tree
2324
:param classmap: dict a mapping from class indices to class names
@@ -26,7 +27,7 @@ def __init__(self, folder, test=None, max_depth=0, classmap=None, target_column=
2627
assert os.path.isdir(folder), '%s MUST be a folder' % folder
2728

2829
self.root = os.path.abspath(folder)
29-
self.files = sorted(list(self.walk(self.root, max_depth, test=test)))
30+
self.files = sorted(list(self.walk(self.root, max_depth, test=test, ignore=ignore)))
3031

3132
assert len(self.files) > 0, 'no file to read'
3233

@@ -44,12 +45,13 @@ def __init__(self, folder, test=None, max_depth=0, classmap=None, target_column=
4445

4546
self.dataset = Dataset(X, y, columns=datasets[0].columns, classmap=classmap)
4647

47-
def walk(self, folder, max_depth=-1, test=None, root=None):
48+
def walk(self, folder, max_depth=-1, test=None, ignore=None, root=None):
4849
"""
4950
Recursively walk a directory
5051
:param folder: str
5152
:param max_depth: int
5253
:param test: callable|regex
54+
:param ignore: callable|regex
5355
:param root: str
5456
:return: Iterator
5557
"""
@@ -58,29 +60,42 @@ def walk(self, folder, max_depth=-1, test=None, root=None):
5860
if root is None:
5961
root = folder
6062

63+
# make test a function
6164
if isinstance(test, str):
6265
# filter is a regex
63-
regex = re.compile(test)
66+
test_regex = re.compile(test)
6467

6568
def test(filename):
66-
return regex.search(filename) is not None
69+
return test_regex.search(filename) is not None
6770
elif not callable(test):
6871
# dummy filter
6972
def test(filename):
7073
return True
7174

75+
# make ignore a function
76+
if isinstance(ignore, str):
77+
# ignore is a regex
78+
ignore_regex = re.compile(ignore)
79+
80+
def ignore(filename):
81+
return ignore_regex.search(filename) is not None
82+
elif not callable(ignore):
83+
# dummy ignore
84+
def test(filename):
85+
return False
86+
7287
for entry in scandir(folder):
7388
if entry.is_dir():
7489
queue.append(entry)
7590
else:
76-
if test(entry.path[len(root) + 1:]):
91+
if test(entry.path[len(root) + 1:]) and not ignore(entry.path[len(root) + 1:]):
7792
yield entry.path
7893

7994
if max_depth == 0:
8095
return
8196

8297
for q in queue:
83-
for file in self.walk(q, max_depth=max_depth-1, test=test, root=root):
98+
for file in self.walk(q, max_depth=max_depth-1, test=test, ignore=ignore, root=root):
8499
yield file
85100

86101
def to_class_name(self, filename):

everywhereml/project/Board.py

Lines changed: 0 additions & 17 deletions
This file was deleted.

everywhereml/project/Config.py

Lines changed: 0 additions & 114 deletions
This file was deleted.

everywhereml/project/Logger.py

Lines changed: 0 additions & 17 deletions
This file was deleted.

everywhereml/project/Project.py

Lines changed: 0 additions & 64 deletions
This file was deleted.

everywhereml/project/Serial.py

Lines changed: 0 additions & 15 deletions
This file was deleted.

everywhereml/project/__init__.py

Lines changed: 0 additions & 1 deletion
This file was deleted.

everywhereml/project/prompts/Prompt.py

Lines changed: 0 additions & 60 deletions
This file was deleted.

0 commit comments

Comments
 (0)