Skip to content

Commit 9387165

Browse files
committed
fix #578 : allowed to save and load all Axis and Group objects of a session in/from HDF, CSV and EXCEL files
1 parent ced40ac commit 9387165

File tree

13 files changed

+548
-182
lines changed

13 files changed

+548
-182
lines changed

doc/source/changes/version_0_29.rst.inc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ Miscellaneous improvements
4343
>>> s2
4444
Session(arr1, arr2, a, b, a01)
4545

46+
Note: all axes (groups) of a session are stored in the same CSV file/Excel sheet/HDF group
47+
named __axes__ (__groups__).
48+
4649
* added examples for `read_excel` and `read_hdf` functions (closes :issue:`617`).
4750

4851

larray/core/axis.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from larray.core.abstractbases import ABCAxis, ABCAxisReference, ABCLArray
1313
from larray.core.expr import ExprNode
1414
from larray.core.group import (Group, LGroup, IGroup, IGroupMaker, _to_tick, _to_ticks, _to_key, _seq_summary,
15-
_contain_group_ticks, _seq_group_to_name, _translate_group_key_hdf)
15+
_seq_group_to_name, _translate_group_key_hdf)
1616
from larray.util.oset import *
1717
from larray.util.misc import (basestring, PY2, unicode, long, duplicates, array_lookup2, ReprString, index_by_id,
1818
renamed_to, common_type, LHDFStore)

larray/core/group.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1376,6 +1376,14 @@ def __getattr__(self, key):
13761376
else:
13771377
return getattr(self.eval(), key)
13781378

1379+
# needed to make *un*pickling work (because otherwise, __getattr__ is called before .key exists, which leads to
1380+
# an infinite recursion)
1381+
def __getstate__(self):
1382+
return self.__dict__
1383+
1384+
def __setstate__(self, d):
1385+
self.__dict__ = d
1386+
13791387
def __hash__(self):
13801388
# to_tick & to_key are partially opposite operations but this standardize on a single notation so that they can
13811389
# all target each other. eg, this removes spaces in "list strings", instead of hashing them directly

larray/core/session.py

Lines changed: 69 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,11 @@
1212
from larray.core.axis import Axis
1313
from larray.core.array import LArray, get_axes, ndtest, zeros, zeros_like, sequence, aslarray
1414
from larray.util.misc import float_error_handler_factory, is_interactive_interpreter, renamed_to, inverseop
15-
from larray.inout.session import check_pattern, handler_classes, ext_default_engine
15+
from larray.inout.session import ext_default_engine, get_file_handler
16+
17+
18+
def check_pattern(k, pattern):
19+
return k.startswith(pattern)
1620

1721

1822
# XXX: inherit from OrderedDict or LArray?
@@ -253,8 +257,11 @@ def load(self, fname, names=None, engine='auto', display=False, **kwargs):
253257
_, ext = os.path.splitext(fname)
254258
ext = ext.strip('.') if '.' in ext else 'csv'
255259
engine = ext_default_engine[ext]
256-
handler_cls = handler_classes[engine]
257-
handler = handler_cls(fname)
260+
handler_cls = get_file_handler(engine)
261+
if engine == 'pandas_csv' and 'sep' in kwargs:
262+
handler = handler_cls(fname, kwargs['sep'])
263+
else:
264+
handler = handler_cls(fname)
258265
objects = handler.read_items(names, display=display, **kwargs)
259266
for k, v in objects.items():
260267
self[k] = v
@@ -275,11 +282,15 @@ def save(self, fname, names=None, engine='auto', overwrite=True, display=False,
275282
engine : {'auto', 'pandas_csv', 'pandas_hdf', 'pandas_excel', 'xlwings_excel', 'pickle'}, optional
276283
Dump using `engine`. Defaults to 'auto' (use default engine for the format guessed from the file extension).
277284
overwrite: bool, optional
278-
Whether or not to overwrite an existing file, if any. Ignored for CSV files. If False, file is updated.
279-
Defaults to True.
285+
Whether or not to overwrite an existing file, if any. Ignored for CSV files and 'pandas_excel' engine.
286+
If False, file is updated. Defaults to True.
280287
display : bool, optional
281288
Whether or not to display which file is being worked on. Defaults to False.
282289
290+
Notes
291+
-----
292+
See Notes section from :py:meth:`~Session.to_csv` and :py:meth:`~Session.to_excel`.
293+
283294
Examples
284295
--------
285296
>>> # axes
@@ -309,12 +320,12 @@ def save(self, fname, names=None, engine='auto', overwrite=True, display=False,
309320
_, ext = os.path.splitext(fname)
310321
ext = ext.strip('.') if '.' in ext else 'csv'
311322
engine = ext_default_engine[ext]
312-
handler_cls = handler_classes[engine]
313-
handler = handler_cls(fname, overwrite)
314-
if engine != 'pandas_hdf':
315-
items = self.filter(kind=LArray).items()
323+
handler_cls = get_file_handler(engine)
324+
if engine == 'pandas_csv' and 'sep' in kwargs:
325+
handler = handler_cls(fname, overwrite, kwargs['sep'])
316326
else:
317-
items = self.items()
327+
handler = handler_cls(fname, overwrite)
328+
items = self.items()
318329
if names is not None:
319330
names_set = set(names)
320331
items = [(k, v) for k, v in items if k in names_set]
@@ -387,7 +398,7 @@ def to_globals(self, names=None, depth=0, warn=True, inplace=False):
387398

388399
def to_pickle(self, fname, names=None, overwrite=True, display=False, **kwargs):
389400
"""
390-
Dumps all array objects from the current session to a file using pickle.
401+
Dumps LArray, Axis and Group objects from the current session to a file using pickle.
391402
392403
WARNING: never load a pickle file (.pkl or .pickle) from an untrusted source, as it can lead to arbitrary code
393404
execution.
@@ -397,7 +408,8 @@ def to_pickle(self, fname, names=None, overwrite=True, display=False, **kwargs):
397408
fname : str
398409
Path for the dump.
399410
names : list of str or None, optional
400-
List of names of objects to dump. Defaults to all objects present in the Session.
411+
Names of LArray/Axis/Group objects to dump.
412+
Defaults to all objects present in the Session.
401413
overwrite: bool, optional
402414
Whether or not to overwrite an existing file, if any.
403415
If False, file is updated. Defaults to True.
@@ -406,16 +418,21 @@ def to_pickle(self, fname, names=None, overwrite=True, display=False, **kwargs):
406418
407419
Examples
408420
--------
409-
>>> arr1, arr2, arr3 = ndtest((2, 2)), ndtest(4), ndtest((3, 2)) # doctest: +SKIP
410-
>>> s = Session([('arr1', arr1), ('arr2', arr2), ('arr3', arr3)]) # doctest: +SKIP
421+
>>> # axes
422+
>>> a, b = Axis("a=a0..a2"), Axis("b=b0..b2") # doctest: +SKIP
423+
>>> # groups
424+
>>> a01 = a['a0,a1'] >> 'a01' # doctest: +SKIP
425+
>>> # arrays
426+
>>> arr1, arr2 = ndtest((a, b)), ndtest(a) # doctest: +SKIP
427+
>>> s = Session([('a', a), ('b', b), ('a01', a01), ('arr1', arr1), ('arr2', arr2)]) # doctest: +SKIP
411428
412429
Save all arrays
413430
414431
>>> s.to_pickle('output.pkl') # doctest: +SKIP
415432
416-
Save only some arrays
433+
Save only some objects
417434
418-
>>> s.to_pickle('output.pkl', ['arr1', 'arr3']) # doctest: +SKIP
435+
>>> s.to_pickle('output.pkl', ['a', 'b', 'arr1']) # doctest: +SKIP
419436
"""
420437
self.save(fname, names, ext_default_engine['pkl'], overwrite, display, **kwargs)
421438

@@ -462,61 +479,85 @@ def to_hdf(self, fname, names=None, overwrite=True, display=False, **kwargs):
462479

463480
def to_excel(self, fname, names=None, overwrite=True, display=False, **kwargs):
464481
"""
465-
Dumps all array objects from the current session to an Excel file.
482+
Dumps LArray, Axis and Group objects from the current session to an Excel file.
466483
467484
Parameters
468485
----------
469486
fname : str
470-
Path for the dump.
487+
Path of the file for the dump.
471488
names : list of str or None, optional
472-
List of names of objects to dump. Defaults to all objects present in the Session.
489+
Names of LArray/Axis/Group objects to dump.
490+
Defaults to all objects present in the Session.
473491
overwrite: bool, optional
474492
Whether or not to overwrite an existing file, if any. If False, file is updated. Defaults to True.
475493
display : bool, optional
476494
Whether or not to display which file is being worked on. Defaults to False.
477495
496+
Notes
497+
-----
498+
- each array is saved in a separate sheet
499+
- all Axis objects are saved together in the same sheet named __axes__
500+
- all Group objects are saved together in the same sheet named __groups__
501+
478502
Examples
479503
--------
480-
>>> arr1, arr2, arr3 = ndtest((2, 2)), ndtest(4), ndtest((3, 2)) # doctest: +SKIP
481-
>>> s = Session([('arr1', arr1), ('arr2', arr2), ('arr3', arr3)]) # doctest: +SKIP
504+
>>> # axes
505+
>>> a, b = Axis("a=a0..a2"), Axis("b=b0..b2") # doctest: +SKIP
506+
>>> # groups
507+
>>> a01 = a['a0,a1'] >> 'a01' # doctest: +SKIP
508+
>>> # arrays
509+
>>> arr1, arr2 = ndtest((a, b)), ndtest(a) # doctest: +SKIP
510+
>>> s = Session([('a', a), ('b', b), ('a01', a01), ('arr1', arr1), ('arr2', arr2)]) # doctest: +SKIP
482511
483512
Save all arrays
484513
485514
>>> s.to_excel('output.xlsx') # doctest: +SKIP
486515
487-
Save only some arrays
516+
Save only some objects
488517
489-
>>> s.to_excel('output.xlsx', ['arr1', 'arr3']) # doctest: +SKIP
518+
>>> s.to_excel('output.xlsx', ['a', 'b', 'arr1']) # doctest: +SKIP
490519
"""
491520
self.save(fname, names, ext_default_engine['xlsx'], overwrite, display, **kwargs)
492521

493522
dump_excel = renamed_to(to_excel, 'dump_excel')
494523

495524
def to_csv(self, fname, names=None, display=False, **kwargs):
496525
"""
497-
Dumps all array objects from the current session to CSV files.
526+
Dumps LArray, Axis and Group objects from the current session to CSV files.
498527
499528
Parameters
500529
----------
501530
fname : str
502531
Path for the directory that will contain CSV files.
503532
names : list of str or None, optional
504-
List of names of objects to dump. Defaults to all objects present in the Session.
533+
Names of LArray/Axis/Group objects to dump.
534+
Defaults to all objects present in the Session.
505535
display : bool, optional
506536
Whether or not to display which file is being worked on. Defaults to False.
507537
538+
Notes
539+
-----
540+
- each array is saved in a separate file
541+
- all Axis objects are saved together in the same CSV file named __axes__.csv
542+
- all Group objects are saved together in the same CSV file named __groups__.csv
543+
508544
Examples
509545
--------
510-
>>> arr1, arr2, arr3 = ndtest((2, 2)), ndtest(4), ndtest((3, 2)) # doctest: +SKIP
511-
>>> s = Session([('arr1', arr1), ('arr2', arr2), ('arr3', arr3)]) # doctest: +SKIP
546+
>>> # axes
547+
>>> a, b = Axis("a=a0..a2"), Axis("b=b0..b2") # doctest: +SKIP
548+
>>> # groups
549+
>>> a01 = a['a0,a1'] >> 'a01' # doctest: +SKIP
550+
>>> # arrays
551+
>>> arr1, arr2 = ndtest((a, b)), ndtest(a) # doctest: +SKIP
552+
>>> s = Session([('a', a), ('b', b), ('a01', a01), ('arr1', arr1), ('arr2', arr2)]) # doctest: +SKIP
512553
513554
Save all arrays
514555
515556
>>> s.to_csv('./Output') # doctest: +SKIP
516557
517558
Save only some arrays
518559
519-
>>> s.to_csv('./Output', ['arr1', 'arr3']) # doctest: +SKIP
560+
>>> s.to_csv('./Output', ['a', 'b', 'arr1']) # doctest: +SKIP
520561
"""
521562
self.save(fname, names, ext_default_engine['csv'], display=display, **kwargs)
522563

larray/inout/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@
66
from larray.inout.excel import *
77
from larray.inout.hdf import *
88
from larray.inout.sas import *
9+
from larray.inout.pickle import *
910
from larray.inout.xw_excel import *

larray/inout/common.py

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import os
44
from collections import OrderedDict
55

6-
from larray.core.axis import Axis
7-
from larray.core.group import Group
86
from larray.core.array import LArray
97

108

@@ -23,9 +21,6 @@ def _get_index_col(nb_axes=None, index_col=None, wide=True):
2321
return index_col
2422

2523

26-
_allowed_types = (LArray, Axis, Group)
27-
28-
2924
class FileHandler(object):
3025
"""
3126
Abstract class defining the methods for "file handler" subclasses.
@@ -51,16 +46,18 @@ def _open_for_read(self):
5146
def _open_for_write(self):
5247
raise NotImplementedError()
5348

54-
def list(self):
49+
def list_items(self):
5550
"""
56-
Returns the list of objects' names.
51+
Return list containing pairs (name, type) for all stored objects
5752
"""
5853
raise NotImplementedError()
5954

60-
def _read_item(self, key, *args, **kwargs):
55+
def _read_item(self, key, type, *args, **kwargs):
56+
"""Read item"""
6157
raise NotImplementedError()
6258

63-
def _dump(self, key, value, *args, **kwargs):
59+
def _dump_item(self, key, value, *args, **kwargs):
60+
"""Dump item. Raises an TypeError if type not taken into account by the FileHandler subclass."""
6461
raise NotImplementedError()
6562

6663
def save(self):
@@ -111,14 +108,15 @@ def read_items(self, keys, *args, **kwargs):
111108
display = kwargs.pop('display', False)
112109
ignore_exceptions = kwargs.pop('ignore_exceptions', False)
113110
self._open_for_read()
111+
key_types = self.list_items()
112+
if keys is not None:
113+
key_types = [(key, type) for key, type in key_types if key in keys]
114114
res = OrderedDict()
115-
if keys is None:
116-
keys = self.list()
117-
for key in keys:
115+
for key, type in key_types:
118116
if display:
119-
print("loading", key, "...", end=' ')
117+
print("loading", type, "object", key, "...", end=' ')
120118
try:
121-
key, item = self._read_item(key, *args, **kwargs)
119+
key, item = self._read_item(key, type, *args, **kwargs)
122120
res[key] = item
123121
except Exception:
124122
if not ignore_exceptions:
@@ -142,17 +140,20 @@ def dump_items(self, key_values, *args, **kwargs):
142140
display = kwargs.pop('display', False)
143141
self._get_original_file_name()
144142
self._open_for_write()
145-
key_values = [(k, v) for k, v in key_values if isinstance(v, _allowed_types)]
146143
for key, value in key_values:
147144
if isinstance(value, LArray) and value.ndim == 0:
148145
if display:
149146
print('Cannot dump {}. Dumping 0D arrays is currently not supported.'.format(key))
150147
continue
151-
if display:
152-
print("dumping", key, "...", end=' ')
153-
self._dump(key, value, *args, **kwargs)
154-
if display:
155-
print("done")
148+
try:
149+
if display:
150+
print("dumping", key, "...", end=' ')
151+
self._dump_item(key, value, *args, **kwargs)
152+
if display:
153+
print("done")
154+
except TypeError:
155+
if display:
156+
print("Cannot dump {}. {} is not a supported type".format(key, type(value).__name__))
156157
self.save()
157158
self.close()
158-
self._update_original_file()
159+
self._update_original_file()

0 commit comments

Comments
 (0)