If you want commas I give you commas #18

luca-dex · luca-dex · commit 436fed2ce203 · 2013-12-30T01:54:06.000+01:00
diff --git a/pytsa/commentedfile.py b/pytsa/commentedfile.py
@@ -1,6 +1,7 @@
 class CommentedFile(file):
     """ this class skips comment lines. comment lines start with any of the symbols in commentstring """
-    def __init__(self, f, commentstring=None, low_limit=-float('inf'), high_limit=float('inf'), every=None):
+    def __init__(self, f, commentstring=None, low_limit=-float('inf'), high_limit=float('inf'), \
+        every=None, convert_comma=None):
         self.f = f
         if commentstring is None:
             self.comments = '\n'
@@ -9,6 +10,7 @@ def __init__(self, f, commentstring=None, low_limit=-float('inf'), high_limit=fl
         self.l_limit = low_limit
         self.h_limit = high_limit
         self.numrows = self.line(f)
+        self.convert_comma = convert_comma
         self.readnumber = 0.0
         if every is None:
             self.every = 1.0
@@ -33,7 +35,10 @@ def next(self):
             while line[0] in self.comments or float(line.split()[0]) < self.l_limit:
                 line = self.f.next()
 
-            if  float(line.split()[0]) < self.h_limit:
+            splitted = line.split()
+            if  float(splitted[0]) < self.h_limit:
+                if self.convert_comma:
+                    return ','.join(splitted)
                 return line
             else:
                 self.close()
diff --git a/pytsa/dataobject.py b/pytsa/dataobject.py
@@ -67,9 +67,11 @@ def dataset(path, commentstring=None, colnames=None, delimiter='[\s\t]+', start=
 
 
     # check if delimiter is ok
+    convert_comma = None
     if colid and delimiter != ',':
-        print('column selection work only with delimiter = \',\' (yet)')
-        raise ValueError
+        convert_comma = True
+        delimiter = ','
+        #raise ValueError('column selection work only with delimiter = \',\' (yet)')
 
     # if hdf5 create a HDFStore object in 'w' mode
     if hdf5 is None:
@@ -127,7 +129,7 @@ def dataset(path, commentstring=None, colnames=None, delimiter='[\s\t]+', start=
 
     for _ in range(process):
         looper = ImportLooper(path, queueIN, queueOUT, r, every, start, stop, \
-            commentstring, delimiter, colnames, colid, col_pref)
+            commentstring, delimiter, colnames, colid, col_pref, convert_comma)
         looper.start()
         proc.append(looper)
    
@@ -753,7 +755,6 @@ def msdplot(self, start=None, stop=None, columns=None, step=1, merge=None, \
             columns = self.__columns
         start = float(start)
         stop = float(stop)
-        step = float(step)
         if len(columns) == 1:
             merge = True
 
@@ -867,7 +868,7 @@ def internalPdf(time, binsize, numbins, normed, fit, range):
             if self.__isSet:
                 if merge:
                     plt.figure()
-                    name = 'pdf'
+                    name = '_'.join(('pdf', str(time)))
                     minrange = None
                     maxrange = None
                     for col in columns:
@@ -902,7 +903,7 @@ def internalPdf(time, binsize, numbins, normed, fit, range):
                 else:
                     fig, axes = plt.subplots(nrows=len(columns), ncols=1)
                     for i, col in enumerate(columns):
-                        name = '_'.join(('item_freq', col))
+                        name = '_'.join(('item_freq', str(time), col))
                         thisrow = '_'.join((str(value), str(col)))
                         if thisrow not in self.__row:
                             self.getarow(value, col)
diff --git a/pytsa/importLooper.py b/pytsa/importLooper.py
@@ -3,10 +3,24 @@
 from multiprocessing import Process
 from commentedfile import *
 from Queue import Empty
+import StringIO
 
 class ImportLooper(Process):
-    def __init__(self, path, queueIN, queueOUT, re, every, tmin, tmax, commentstring, \
-        delimiter, colnames, colid, col_pref):
+    def __init__(self, 
+                 path, 
+                 queueIN, 
+                 queueOUT, 
+                 re, 
+                 every, 
+                 tmin, 
+                 tmax, 
+                 commentstring,
+                 delimiter, 
+                 colnames, 
+                 colid, 
+                 col_pref, 
+                 convert_comma):
+
         self.path = path
         self.queueIN = queueIN
         self.queueOUT = queueOUT
@@ -19,6 +33,7 @@ def __init__(self, path, queueIN, queueOUT, re, every, tmin, tmax, commentstring
         self.colnames = colnames
         self.colid = colid
         self.col_pref = col_pref
+        self.convert_comma = convert_comma
         self.killReceived = False
         super(ImportLooper, self).__init__()
 
@@ -38,9 +53,17 @@ def run(self):
             # create a fake file and pd.read_csv!
             try:
                 source = CommentedFile(open(actualfile, 'rb'), every=self.every, \
-                    commentstring=self.commentstring, low_limit=self.tmin, high_limit=self.tmax)
-                toReturn = pd.read_csv(source, sep=self.delimiter, index_col=0, \
-                    header=None, names=self.colnames, usecols=self.colid, prefix=self.col_pref)
+                    commentstring=self.commentstring, low_limit=self.tmin, high_limit=self.tmax, \
+                    convert_comma=self.convert_comma)
+                if self.convert_comma:
+                    temp_string = ""
+                    for r in source:
+                        temp_string = temp_string + r + '\n'
+                    toReturn = pd.read_csv(StringIO.StringIO(temp_string), sep=self.delimiter, index_col=0, \
+                        header=None, names=self.colnames, usecols=self.colid, prefix=self.col_pref)
+                else:
+                    toReturn = pd.read_csv(source, sep=self.delimiter, index_col=0, \
+                        header=None, names=self.colnames, usecols=self.colid, prefix=self.col_pref)
                 source.close()
 
             # mmm somethings wrong here
diff --git a/scripts/dario.py b/scripts/dario.py
@@ -0,0 +1,44 @@
+# File per transiente
+
+# import pyTSA
+import pytsa as tsa
+
+# Define the path to the dataset folder, and the extension of each file which contains a time-series
+FOLDER = './bio_tmp/'
+
+# Define the time instants at which you want to evaluate the probability density function
+PDFTIMES = [5, 10, 25, 50, 75, 100]
+
+# Define the time instants at which you want to evaluate the probability density function
+MEQTIME_FROM = 0
+MEQTIME_TO = 100
+
+# NAMES = ['A', 'B', 'C', 'D', 'E', 'F', 'X', 'Y']
+NAMES = ['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8']
+
+####### Load the dataset
+t = tsa.dataset(FOLDER, commentstring='#')
+
+####### Set up the output terminal
+t.deloutput('view')
+t.addoutput('eps')
+t.addoutput('png')
+t.addoutput('txt')
+
+####### Plot all the time-series, divided by columns, and plot each column in a different panel
+#print('splot: plot all the time-series, divided by columns, and plot each column in a different panel.')
+#t.splot()
+
+####### Plot the average and standard deviation (bar plot) of all the time-series, divided by columns, and plot each column in a different panel
+print('msdplot: Plot the average and standard deviation (bar plot) of all the time-series, divided by columns, and plot each column in a different panel')
+t.msdplot(columns=NAMES, errorbar=True)
+
+####### Plot the probability density function (normalized, with Gaussian fit) of each column at the timed defined
+print('pdf: Plot the probability density function of each column at the timed defined')
+for times in PDFTIMES:
+    t.pdf(times, columns=NAMES, normed=True, fit=True)
+
+####### Plot the master equation (2D normalized) of each column in the time interval defined
+print('meq2dPlot the master equation (2D normalized) of each column in the time interval defined')
+t.meq2d(columns=NAMES, start=MEQTIME_FROM, stop=MEQTIME_TO, normed=True)
+