Skip to content

Commit baa1cb9

Browse files
committed
allowed to save and load Axis and Group objects in HDF files (#578) :
- added to_hdf method to Axis and Group - updated read_hdf (inout/hdf.py) - updated documentation of Session's methods - updated doctests of Session.load and Session.save - added context manager LHDFStore (utils/misc.py) refactored package inout: created one module per file extension or external object type like in pandas/io/: new modules: - common.py - pandas.py - csv.py - excel.py - hdf.py - sas.py - misc.py - pickle.py renamed modules: - excel.py --> xw_excel.py deleted modules: - array.py
1 parent 3def83b commit baa1cb9

24 files changed

+2537
-1926
lines changed

doc/source/api.rst

+10
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,14 @@ Testing
7777
Axis.iscompatible
7878
Axis.equals
7979

80+
Save
81+
----
82+
83+
.. autosummary::
84+
:toctree: _generated/
85+
86+
Axis.to_hdf
87+
8088
.. _api-group:
8189

8290
Group
@@ -104,6 +112,7 @@ IGroup
104112
IGroup.startingwith
105113
IGroup.endingwith
106114
IGroup.matching
115+
IGroup.to_hdf
107116

108117
LGroup
109118
------
@@ -127,6 +136,7 @@ LGroup
127136
LGroup.startingwith
128137
LGroup.endingwith
129138
LGroup.matching
139+
LGroup.to_hdf
130140

131141
.. _api-set:
132142

doc/source/changes/version_0_29.rst.inc

+19-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,25 @@ New features
2323
Miscellaneous improvements
2424
--------------------------
2525

26-
* improved something.
26+
* saving or loading a session from a file now includes `Axis` and `Group` objects in addition to arrays
27+
(closes :issue:`578`):
28+
29+
Create a session containing axes, groups and arrays
30+
31+
>>> a, b = Axis("a=a0..a2"), Axis("b=b0..b2")
32+
>>> a01 = a['a0,a1'] >> 'a01'
33+
>>> arr1, arr2 = ndtest((a, b)), ndtest(a)
34+
>>> s = Session([('a', a), ('b', b), ('a01', a01), ('arr1', arr1), ('arr2', arr2)])
35+
36+
Saving a session will save axes, groups and arrays
37+
38+
>>> s.save('session.h5')
39+
40+
Loading a session will load axes, groups and arrays
41+
42+
>>> s2 = s.load('session.h5')
43+
>>> s2
44+
Session(arr1, arr2, a, b, a01)
2745

2846

2947
Fixes

larray/core/array.py

+25-11
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,11 @@
6464
from larray.core.abstractbases import ABCLArray
6565
from larray.core.expr import ExprNode
6666
from larray.core.group import (Group, IGroup, LGroup, remove_nested_groups, _to_key, _to_keys,
67-
_range_to_slice, _translate_sheet_name, _translate_key_hdf)
67+
_range_to_slice, _translate_sheet_name, _translate_group_key_hdf)
6868
from larray.core.axis import Axis, AxisReference, AxisCollection, X, _make_axis
6969
from larray.util.misc import (table2str, size2str, basestring, izip, rproduct, ReprString, duplicates,
7070
float_error_handler_factory, _isnoneslice, light_product, unique_list, common_type,
71-
renamed_to, deprecate_kwarg)
71+
renamed_to, deprecate_kwarg, LHDFStore)
7272

7373

7474
nan = np.nan
@@ -5997,7 +5997,7 @@ def to_csv(self, filepath, sep=',', na_rep='', wide=True, value_name='value', dr
59975997
series = self.to_series(value_name, dropna is not None)
59985998
series.to_csv(filepath, sep=sep, na_rep=na_rep, header=True, **kwargs)
59995999

6000-
def to_hdf(self, filepath, key, *args, **kwargs):
6000+
def to_hdf(self, filepath, key):
60016001
"""
60026002
Writes array to a HDF file.
60036003
@@ -6009,17 +6009,31 @@ def to_hdf(self, filepath, key, *args, **kwargs):
60096009
filepath : str
60106010
Path where the hdf file has to be written.
60116011
key : str or Group
6012-
Name of the array within the HDF file.
6013-
*args
6014-
**kargs
6012+
Key (path) of the array within the HDF file (see Notes below).
6013+
6014+
Notes
6015+
-----
6016+
Objects stored in a HDF file can be grouped together in `HDF groups`.
6017+
If an object 'my_obj' is stored in a HDF group 'my_group',
6018+
the key associated with this object is then 'my_group/my_obj'.
6019+
Be aware that a HDF group can have subgroups.
60156020
60166021
Examples
60176022
--------
60186023
>>> a = ndtest((2, 3))
6019-
>>> a.to_hdf('test.h5', 'a') # doctest: +SKIP
6024+
6025+
Save an array
6026+
6027+
>>> a.to_hdf('test.h5', 'a') # doctest: +SKIP
6028+
6029+
Save an array in a specific HDF group
6030+
6031+
>>> a.to_hdf('test.h5', 'arrays/a') # doctest: +SKIP
60206032
"""
6021-
key = _translate_key_hdf(key)
6022-
self.to_frame().to_hdf(filepath, key, *args, **kwargs)
6033+
key = _translate_group_key_hdf(key)
6034+
with LHDFStore(filepath) as store:
6035+
store.put(key, self.to_frame())
6036+
store.get_storer(key).attrs.type = 'Array'
60236037

60246038
@deprecate_kwarg('sheet_name', 'sheet')
60256039
def to_excel(self, filepath=None, sheet=None, position='A1', overwrite_file=False, clear_sheet=False,
@@ -6085,7 +6099,7 @@ def to_excel(self, filepath=None, sheet=None, position='A1', overwrite_file=Fals
60856099
engine = 'xlwings' if xw is not None else None
60866100

60876101
if engine == 'xlwings':
6088-
from larray.inout.excel import open_excel
6102+
from larray.inout.xw_excel import open_excel
60896103

60906104
close = False
60916105
new_workbook = False
@@ -7022,7 +7036,7 @@ def aslarray(a):
70227036
elif hasattr(a, '__larray__'):
70237037
return a.__larray__()
70247038
elif isinstance(a, pd.DataFrame):
7025-
from larray.inout.array import from_frame
7039+
from larray.inout.pandas import from_frame
70267040
return from_frame(a)
70277041
else:
70287042
return LArray(a)

larray/core/axis.py

+54-2
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,15 @@
77
from itertools import product
88

99
import numpy as np
10+
import pandas as pd
1011

1112
from larray.core.abstractbases import ABCAxis, ABCAxisReference, ABCLArray
1213
from larray.core.expr import ExprNode
1314
from larray.core.group import (Group, LGroup, IGroup, IGroupMaker, _to_tick, _to_ticks, _to_key, _seq_summary,
14-
_contain_group_ticks, _seq_group_to_name)
15+
_contain_group_ticks, _seq_group_to_name, _translate_group_key_hdf)
1516
from larray.util.oset import *
1617
from larray.util.misc import (basestring, PY2, unicode, long, duplicates, array_lookup2, ReprString, index_by_id,
17-
renamed_to, common_type)
18+
renamed_to, common_type, LHDFStore)
1819

1920
__all__ = ['Axis', 'AxisCollection', 'X', 'x']
2021

@@ -1194,6 +1195,57 @@ def align(self, other, join='outer'):
11941195
other = Axis(other)
11951196
return other
11961197

1198+
def to_hdf(self, filepath, key=None):
1199+
"""
1200+
Writes axis to a HDF file.
1201+
1202+
A HDF file can contain multiple axes.
1203+
The 'key' parameter is a unique identifier for the axis.
1204+
1205+
Parameters
1206+
----------
1207+
filepath : str
1208+
Path where the hdf file has to be written.
1209+
key : str or Group, optional
1210+
Key (path) of the axis within the HDF file (see Notes below).
1211+
If None, the name of the axis is used.
1212+
Defaults to None.
1213+
1214+
Notes
1215+
-----
1216+
Objects stored in a HDF file can be grouped together in `HDF groups`.
1217+
If an object 'my_obj' is stored in a HDF group 'my_group',
1218+
the key associated with this object is then 'my_group/my_obj'.
1219+
Be aware that a HDF group can have subgroups.
1220+
1221+
Examples
1222+
--------
1223+
>>> a = Axis("a=a0..a2")
1224+
1225+
Save axis
1226+
1227+
>>> # by default, the key is the name of the axis
1228+
>>> a.to_hdf('test.h5') # doctest: +SKIP
1229+
1230+
Save axis with a specific key
1231+
1232+
>>> a.to_hdf('test.h5', 'a') # doctest: +SKIP
1233+
1234+
Save axis in a specific HDF group
1235+
1236+
>>> a.to_hdf('test.h5', 'axes/a') # doctest: +SKIP
1237+
"""
1238+
if key is None:
1239+
if self.name is None:
1240+
raise ValueError("Argument key must be provided explicitly in case of anonymous axis")
1241+
key = self.name
1242+
key = _translate_group_key_hdf(key)
1243+
s = pd.Series(data=self.labels, name=self.name)
1244+
with LHDFStore(filepath) as store:
1245+
store.put(key, s)
1246+
store.get_storer(key).attrs.type = 'Axis'
1247+
store.get_storer(key).attrs.wildcard = self.iswildcard
1248+
11971249

11981250
def _make_axis(obj):
11991251
if isinstance(obj, Axis):

larray/core/group.py

+77-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111

1212
from larray.core.abstractbases import ABCAxis, ABCAxisReference, ABCLArray
1313
from larray.util.oset import *
14-
from larray.util.misc import basestring, PY2, unique, find_closing_chr, _parse_bound, _seq_summary, renamed_to
14+
from larray.util.misc import (basestring, PY2, unique, find_closing_chr, _parse_bound, _seq_summary,
15+
renamed_to, LHDFStore)
1516

1617
__all__ = ['Group', 'LGroup', 'LSet', 'IGroup', 'union']
1718

@@ -652,7 +653,7 @@ def _translate_sheet_name(sheet_name):
652653
_key_hdf_pattern = re.compile('[\\\/]')
653654

654655

655-
def _translate_key_hdf(key):
656+
def _translate_group_key_hdf(key):
656657
if isinstance(key, Group):
657658
key = _key_hdf_pattern.sub('_', str(_to_tick(key)))
658659
return key
@@ -1275,6 +1276,80 @@ def containing(self, substring):
12751276
substring = substring.eval()
12761277
return LGroup([v for v in self.eval() if substring in v], axis=self.axis)
12771278

1279+
def to_hdf(self, filepath, key=None, axis_key=None):
1280+
"""
1281+
Writes group to a HDF file.
1282+
1283+
A HDF file can contain multiple groups.
1284+
The 'key' parameter is a unique identifier for the group.
1285+
The 'axis_key' parameter is the unique identifier for the associated axis.
1286+
The associated axis will be saved if not already present in the HDF file.
1287+
1288+
Parameters
1289+
----------
1290+
filepath : str
1291+
Path where the hdf file has to be written.
1292+
key : str or Group, optional
1293+
Key (path) of the group within the HDF file (see Notes below).
1294+
If None, the name of the group is used.
1295+
Defaults to None.
1296+
axis_key : str, optional
1297+
Key (path) of the associated axis in the HDF file (see Notes below).
1298+
If None, the name of the axis associated with the group is used.
1299+
Defaults to None.
1300+
1301+
Notes
1302+
-----
1303+
Objects stored in a HDF file can be grouped together in `HDF groups`.
1304+
If an object 'my_obj' is stored in a HDF group 'my_group',
1305+
the key associated with this object is then 'my_group/my_obj'.
1306+
Be aware that a HDF group can have subgroups.
1307+
1308+
Examples
1309+
--------
1310+
>>> from larray import Axis
1311+
>>> a = Axis("a=a0..a2")
1312+
>>> a.to_hdf('test.h5')
1313+
>>> a01 = a['a0,a1'] >> 'a01'
1314+
1315+
Save group
1316+
1317+
>>> # by default, the key is the name of the group
1318+
>>> # and axis_key the name of the associated axis
1319+
>>> a01.to_hdf('test.h5') # doctest: +SKIP
1320+
1321+
Save group with a specific key
1322+
1323+
>>> a01.to_hdf('test.h5', 'a_01') # doctest: +SKIP
1324+
1325+
Save group in a specific HDF group
1326+
1327+
>>> a.to_hdf('test.h5', 'groups/a01') # doctest: +SKIP
1328+
1329+
The associated axis is saved with the group if not already present in the HDF file
1330+
1331+
>>> b = Axis("b=b0..b2")
1332+
>>> b01 = b['b0,b1'] >> 'b01'
1333+
>>> # save both the group 'b01' and the associated axis 'b'
1334+
>>> b01.to_hdf('test.h5') # doctest: +SKIP
1335+
"""
1336+
if key is None:
1337+
if self.name is None:
1338+
raise ValueError("Argument key must be provided explicitly in case of anonymous group")
1339+
key = self.name
1340+
key = _translate_group_key_hdf(key)
1341+
if axis_key is None:
1342+
if self.axis.name is None:
1343+
raise ValueError("Argument axis_key must be provided explicitly if the associated axis is anonymous")
1344+
axis_key = self.axis.name
1345+
s = pd.Series(data=self.eval(), name=self.name)
1346+
with LHDFStore(filepath) as store:
1347+
store.put(key, s)
1348+
store.get_storer(key).attrs.type = 'Group'
1349+
if axis_key not in store:
1350+
self.axis.to_hdf(store, key=axis_key)
1351+
store.get_storer(key).attrs.axis_key = axis_key
1352+
12781353
# this makes range(LGroup(int)) possible
12791354
def __index__(self):
12801355
return self.eval().__index__()

0 commit comments

Comments
 (0)