Skip to content

Commit 436fed2

Browse files
committed
If you want commas I give you commas #18
1 parent ac96209 commit 436fed2

File tree

4 files changed

+86
-13
lines changed

4 files changed

+86
-13
lines changed

pytsa/commentedfile.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
class CommentedFile(file):
22
""" this class skips comment lines. comment lines start with any of the symbols in commentstring """
3-
def __init__(self, f, commentstring=None, low_limit=-float('inf'), high_limit=float('inf'), every=None):
3+
def __init__(self, f, commentstring=None, low_limit=-float('inf'), high_limit=float('inf'), \
4+
every=None, convert_comma=None):
45
self.f = f
56
if commentstring is None:
67
self.comments = '\n'
@@ -9,6 +10,7 @@ def __init__(self, f, commentstring=None, low_limit=-float('inf'), high_limit=fl
910
self.l_limit = low_limit
1011
self.h_limit = high_limit
1112
self.numrows = self.line(f)
13+
self.convert_comma = convert_comma
1214
self.readnumber = 0.0
1315
if every is None:
1416
self.every = 1.0
@@ -33,7 +35,10 @@ def next(self):
3335
while line[0] in self.comments or float(line.split()[0]) < self.l_limit:
3436
line = self.f.next()
3537

36-
if float(line.split()[0]) < self.h_limit:
38+
splitted = line.split()
39+
if float(splitted[0]) < self.h_limit:
40+
if self.convert_comma:
41+
return ','.join(splitted)
3742
return line
3843
else:
3944
self.close()

pytsa/dataobject.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,11 @@ def dataset(path, commentstring=None, colnames=None, delimiter='[\s\t]+', start=
6767

6868

6969
# check if delimiter is ok
70+
convert_comma = None
7071
if colid and delimiter != ',':
71-
print('column selection work only with delimiter = \',\' (yet)')
72-
raise ValueError
72+
convert_comma = True
73+
delimiter = ','
74+
#raise ValueError('column selection work only with delimiter = \',\' (yet)')
7375

7476
# if hdf5 create a HDFStore object in 'w' mode
7577
if hdf5 is None:
@@ -127,7 +129,7 @@ def dataset(path, commentstring=None, colnames=None, delimiter='[\s\t]+', start=
127129

128130
for _ in range(process):
129131
looper = ImportLooper(path, queueIN, queueOUT, r, every, start, stop, \
130-
commentstring, delimiter, colnames, colid, col_pref)
132+
commentstring, delimiter, colnames, colid, col_pref, convert_comma)
131133
looper.start()
132134
proc.append(looper)
133135

@@ -753,7 +755,6 @@ def msdplot(self, start=None, stop=None, columns=None, step=1, merge=None, \
753755
columns = self.__columns
754756
start = float(start)
755757
stop = float(stop)
756-
step = float(step)
757758
if len(columns) == 1:
758759
merge = True
759760

@@ -867,7 +868,7 @@ def internalPdf(time, binsize, numbins, normed, fit, range):
867868
if self.__isSet:
868869
if merge:
869870
plt.figure()
870-
name = 'pdf'
871+
name = '_'.join(('pdf', str(time)))
871872
minrange = None
872873
maxrange = None
873874
for col in columns:
@@ -902,7 +903,7 @@ def internalPdf(time, binsize, numbins, normed, fit, range):
902903
else:
903904
fig, axes = plt.subplots(nrows=len(columns), ncols=1)
904905
for i, col in enumerate(columns):
905-
name = '_'.join(('item_freq', col))
906+
name = '_'.join(('item_freq', str(time), col))
906907
thisrow = '_'.join((str(value), str(col)))
907908
if thisrow not in self.__row:
908909
self.getarow(value, col)

pytsa/importLooper.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,24 @@
33
from multiprocessing import Process
44
from commentedfile import *
55
from Queue import Empty
6+
import StringIO
67

78
class ImportLooper(Process):
8-
def __init__(self, path, queueIN, queueOUT, re, every, tmin, tmax, commentstring, \
9-
delimiter, colnames, colid, col_pref):
9+
def __init__(self,
10+
path,
11+
queueIN,
12+
queueOUT,
13+
re,
14+
every,
15+
tmin,
16+
tmax,
17+
commentstring,
18+
delimiter,
19+
colnames,
20+
colid,
21+
col_pref,
22+
convert_comma):
23+
1024
self.path = path
1125
self.queueIN = queueIN
1226
self.queueOUT = queueOUT
@@ -19,6 +33,7 @@ def __init__(self, path, queueIN, queueOUT, re, every, tmin, tmax, commentstring
1933
self.colnames = colnames
2034
self.colid = colid
2135
self.col_pref = col_pref
36+
self.convert_comma = convert_comma
2237
self.killReceived = False
2338
super(ImportLooper, self).__init__()
2439

@@ -38,9 +53,17 @@ def run(self):
3853
# create a fake file and pd.read_csv!
3954
try:
4055
source = CommentedFile(open(actualfile, 'rb'), every=self.every, \
41-
commentstring=self.commentstring, low_limit=self.tmin, high_limit=self.tmax)
42-
toReturn = pd.read_csv(source, sep=self.delimiter, index_col=0, \
43-
header=None, names=self.colnames, usecols=self.colid, prefix=self.col_pref)
56+
commentstring=self.commentstring, low_limit=self.tmin, high_limit=self.tmax, \
57+
convert_comma=self.convert_comma)
58+
if self.convert_comma:
59+
temp_string = ""
60+
for r in source:
61+
temp_string = temp_string + r + '\n'
62+
toReturn = pd.read_csv(StringIO.StringIO(temp_string), sep=self.delimiter, index_col=0, \
63+
header=None, names=self.colnames, usecols=self.colid, prefix=self.col_pref)
64+
else:
65+
toReturn = pd.read_csv(source, sep=self.delimiter, index_col=0, \
66+
header=None, names=self.colnames, usecols=self.colid, prefix=self.col_pref)
4467
source.close()
4568

4669
# mmm somethings wrong here

scripts/dario.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# File per transiente
2+
3+
# import pyTSA
4+
import pytsa as tsa
5+
6+
# Define the path to the dataset folder, and the extension of each file which contains a time-series
7+
FOLDER = './bio_tmp/'
8+
9+
# Define the time instants at which you want to evaluate the probability density function
10+
PDFTIMES = [5, 10, 25, 50, 75, 100]
11+
12+
# Define the time instants at which you want to evaluate the probability density function
13+
MEQTIME_FROM = 0
14+
MEQTIME_TO = 100
15+
16+
# NAMES = ['A', 'B', 'C', 'D', 'E', 'F', 'X', 'Y']
17+
NAMES = ['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8']
18+
19+
####### Load the dataset
20+
t = tsa.dataset(FOLDER, commentstring='#')
21+
22+
####### Set up the output terminal
23+
t.deloutput('view')
24+
t.addoutput('eps')
25+
t.addoutput('png')
26+
t.addoutput('txt')
27+
28+
####### Plot all the time-series, divided by columns, and plot each column in a different panel
29+
#print('splot: plot all the time-series, divided by columns, and plot each column in a different panel.')
30+
#t.splot()
31+
32+
####### Plot the average and standard deviation (bar plot) of all the time-series, divided by columns, and plot each column in a different panel
33+
print('msdplot: Plot the average and standard deviation (bar plot) of all the time-series, divided by columns, and plot each column in a different panel')
34+
t.msdplot(columns=NAMES, errorbar=True)
35+
36+
####### Plot the probability density function (normalized, with Gaussian fit) of each column at the timed defined
37+
print('pdf: Plot the probability density function of each column at the timed defined')
38+
for times in PDFTIMES:
39+
t.pdf(times, columns=NAMES, normed=True, fit=True)
40+
41+
####### Plot the master equation (2D normalized) of each column in the time interval defined
42+
print('meq2dPlot the master equation (2D normalized) of each column in the time interval defined')
43+
t.meq2d(columns=NAMES, start=MEQTIME_FROM, stop=MEQTIME_TO, normed=True)
44+

0 commit comments

Comments
 (0)