diff --git a/.travis.yml b/.travis.yml index 98e0d1a7..c195db4a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,7 +16,7 @@ install: - conda update conda # Install dependencies - - conda create -n test-streamz python=$TRAVIS_PYTHON_VERSION pytest tornado toolz flake8 coverage codecov networkx graphviz python-graphviz dask distributed pandas bokeh ipython ipykernel ipywidgets -c conda-forge -c bokeh + - conda create -n test-streamz python=$TRAVIS_PYTHON_VERSION pytest tornado toolz flake8 coverage codecov networkx graphviz python-graphviz dask distributed pandas bokeh holoviews ipython ipykernel ipywidgets -c conda-forge -c bokeh - source activate test-streamz - pip install git+https://github.com/dask/distributed.git --upgrade --no-deps diff --git a/docs/source/index.rst b/docs/source/index.rst index c3c0b7fa..ec907625 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -91,3 +91,4 @@ data streaming systems like `Apache Flink `_, collections-api.rst dataframe-aggregations.rst async.rst + plotting.rst diff --git a/docs/source/plotting.rst b/docs/source/plotting.rst new file mode 100644 index 00000000..8ba33dee --- /dev/null +++ b/docs/source/plotting.rst @@ -0,0 +1,426 @@ +Plotting +======== + +The plotting interface on streamz DataFrame and Series objects +attempts to mirror the pandas plotting API, but instead of plotting +with matplotlib_ uses HoloViews_ to generate dynamically streaming +bokeh_ plots. To support plotting streaming data you can use this +interface either in a Jupyter notebook or deploy it as a :ref:`bokeh +server app `. + +HoloViews provides several constructs which make it well suited to +streaming visualizations. All plotting methods will return so called +``DynamicMap`` objects, which update the plot whenever streamz +triggers an event. For additional information about working and +plotting with HoloViews_ see the `User Guide +`_, as we will focus on +using the high-level plotting API in this overview and skip most of +the mechanics going on behind the scenes. + +*All plots generated by the streamz plotting interface dynamically +stream data, since the documentation cannot easily embed streaming +plots all plots represent static screenshots.* + +Basic plotting +-------------- + +Throughout this section we will be using the ``Random`` construct, +which provides an easy way of generating a DataFrame of random +streaming data. + +.. code-block:: python + + from streamz.dataframe import Random + df = Random() + +.. image:: images/random_df.png + :alt: example Random streaming dataframe output + :width: 300 px + +The plot method on Series and DataFrame is a simple wrapper around a +line plot, which will plot all columns: + +.. code-block:: python + + df.plot() + +.. image:: images/simple_plot.png + :alt: a line plot of the Random dataframe + +The plot method can also be called on a Series, plotting a specific +column: + +.. code-block:: python + + df.z.cumsum().plot() + +Another more general way to express the same thing is to explicitly +define ``x`` and ``y`` in the DataFrame plot method: + +.. code-block:: python + + df.cumsum().plot(x='index', y='z') + +.. image:: images/series_plot.png + :alt: a line plot of the Random Series + +Other plots +----------- + +Plotting methods allow for a handful of plot styles other than the +default Line plot. These methods can be provided as the ``kind`` +keyword argument to :meth:`~DataFrame.plot`. +These include: + +* :ref:`'bar' ` or :ref:`'barh' ` for bar plots +* :ref:`'hist' ` for histogram +* :ref:`'box' ` for boxplot +* :ref:`'kde' ` or ``'density'`` for density plots +* :ref:`'area' ` for area plots +* :ref:`'scatter' ` for scatter plots +* :ref:`'table' ` for tables + +For example, a bar plot can be created the following way: + +.. code-block:: python + + df.groupby('y').x.sum().plot(kind='bar') + +.. image:: images/bar_plot.png + :alt: a bar plot of the summed x values grouped by y + +You can also create these other plots using the methods +``DataFrame.plot.`` instead of providing the ``kind`` keyword +argument. This makes it easier to discover plot methods and the +specific arguments they use: + +.. code-block:: python + + In [14]: df = Random() + + In [15]: df.plot. + df.plot.area df.plot.barh df.plot.density df.plot.kde df.plot.scatter + df.plot.bar df.plot.box df.plot.hist df.plot.line df.plot.table + + +.. _plotting.barplot: + +Bar plots +~~~~~~~~~ + +For labeled, non-time series data, you may wish to produce a bar plot. +In addition to the simple bar plot shown above we can also produce +grouped bars: + +.. code-block:: python + + df.groupby('y').sum().plot.bar(x='y') + +.. image:: images/bar_grouped.png + :alt: a grouped bar plot of the summed values grouped by y + + +Alternatively you may also stack the bars: + +.. code-block:: python + + df.groupby('y').sum().plot.bar(x='y', stacked=True) + +.. image:: images/bar_stacked.png + :alt: a grouped bar plot of the summed values grouped by y + + +.. _plotting.hist: + +Histograms +~~~~~~~~~~ + +Histogram can be drawn by using the DataFrame.plot.hist() and +Series.plot.hist() methods. The number of bins can be declared using +the bins keyword and normalization can be disabled with the normed +keyword. + +.. code-block:: python + + df.z.plot.hist(bins=50, backlog=5000, normed=False) + +.. image:: images/series_hist.png + :alt: a histogram of a series + +Calling from the DataFrame.plot.hist will plot all columns, to be able +to compare you can lower the alpha and define a bin_range: + +.. code-block:: python + + df.plot.hist(bin_range=(-3, 3), bins=50, backlog=5000, alpha=0.3) + +.. image:: images/df_hist.png + :alt: a histogram of a dataframe + + +.. _plotting.box: + +Box Plots +~~~~~~~~~ + +Boxplot can be drawn calling Series.plot.box() and +DataFrame.plot.box() to visualize the distribution of values within +each column. + +For example here we plot each column: + +.. code-block:: python + + df.plot.box() + +.. image:: images/df_boxplot.png + :alt: a box plot of a dataframe + +Or we can generate a boxplot of a Series: + +.. code-block:: python + + df.x.plot.box(width=300) + +.. image:: images/series_boxplot.png + :alt: a box plot of a series + :width: 350 px + +It is also possible to group a box plot by a secondary variable: + +.. code-block:: python + + df.plot.box(by='y', height=400) + +.. image:: images/grouped_boxplot.png + :alt: a box plot of a series + + +.. _plotting.kde: + +KDE plots +~~~~~~~~~ + +You can create density plots using the Series.plot.kde() and +DataFrame.plot.kde() methods. + +.. code-block:: python + + df.x.plot.kde() + +.. image:: images/series_kde.png + :alt: a KDE plot of a series + + +.. _plotting.area_plot: + +Area plots +~~~~~~~~~~ + +You can create area plots with Series.plot.area() and +DataFrame.plot.area(). To produce stacked area plot, each column must +be either all positive or all negative values. + +.. code-block:: python + + df.x.plot.area() + +.. image:: images/series_area.png + :alt: an area plot of a series + +When plotting multiple columns on a DataFrame the areas may be stacked: + +.. code-block:: python + + df[['x', 'y']].plot.area(stacked=True) + +.. image:: images/stacked_area.png + :alt: a stacked area plot of a dataframe + + +.. _plotting.scatter: + +Scatter plots +~~~~~~~~~~~~~ + +Scatter plot can be drawn by using the DataFrame.plot.scatter() +method. Scatter plot requires numeric or datetime columns for x and y +axis. These can be specified by x and y keywords each. + +.. code-block:: python + + df.plot.scatter(x='x', y='z') + +.. image:: images/df_scatter.png + :alt: a scatter plot of the 'x' and 'z' columns of the dataframe + :width: 350 px + +The scatter points can also be colored by a certain column using the c +keyword. Additionally we will enable a colorbar and adjust the xaxis +by setting x-axis limits using xlim: + +.. code-block:: python + + df.plot.scatter(x='y', y='z', c='x', cmap='viridis', + width=400, colorbar=True, xlim=(-1, 6)) + +.. image:: images/colored_scatter.png + :alt: a scatter plot colored by the 'z' column + :width: 450 px + + +.. _plotting.table: + +Tables +~~~~~~ + +We can also stream a table view of the data: + + +.. code-block:: python + + df.plot.table(width=600) + +.. image:: images/df_table.png + :alt: a table view of the data + :width: 600 px + + +.. _plotting.composition: + +Composing Plots +--------------- + +One of the core strengths of HoloViews is the ease of composing +different plots. Individual plots can be composed using the ``*`` and +``+`` operators, which overlay and compose plots into layouts +respectively. For more information on composing objects see the +HoloViews `User Guide +`_. + +By using these operators we can combine multiple plots into composite +Overlay and Layout objects, and lay them out in two columns using the +Layout.cols method: + +.. code-block:: python + + (df.plot.line(width=400) * df.plot.scatter(width=400) + + df.groupby('y').sum().plot.bar('y', 'x', width=400) + + df.plot.box(width=400) + df.x.plot.kde(width=400)).cols(2) + +.. image:: images/composite_plot.png + :alt: a table view of the data + + +.. _plotting.customization: + +Customizing the visualization +----------------------------- + +In addition to specific options for different plot types the plotting +API exposes a number of general options including: + + - ``backlog`` (default=1000): Number of rows of streamed data to + accumulate in a buffer and plot at the same time + - ``grid`` (default=False): Whether to show a grid + - ``hover`` (default=False): Whether to show hover tooltips + - ``legend`` (default=True): Whether to show a legend + - ``logx``/``logy`` (default=False): Enables logarithmic x- and + y-axis respectively + - ``shared_axes`` (default=False): Whether to link axes between plots + - ``title`` (default=''): Title for the plot + - ``xlim``/``ylim`` (default=None): Plot limits of the x- and y-axis + - ``xticks``/``yticks`` (default=None): Ticks along x- and y-axis + specified as an integer, list of ticks postions, or list of + tuples of the tick positions and labels + - ``width`` (default=800)/``height`` (default=300): The width and + height of the plot in pixels + +In addition, options can be passed directly to HoloViews providing +greater control over the plots. The options can be provided as +dictionaries via the ``plot_opts`` and ``style_opts`` keyword +arguments. You can also apply options using the HoloViews API (for +more information see the HoloViews `User Guide +`_). + + +.. _plotting.apps: + +Deployment as bokeh apps +------------------------ + +In the Jupyter notebook HoloViews objects will automatically be +rendered, but when deploying a plot as a bokeh app it has to be +rendered explicitly. + +The following examples describes how to set up a streaming DataFrame, +declare some plots, compose them, set up a callback to update the plot +and finally convert the composite plot to a bokeh Document, which can +be served from a script using ``bokeh serve`` on the commandline. + +.. code-block:: python + + import numpy as np + import pandas as pd + import holoviews as hv + from streamz import Stream + from streamz.dataframe import DataFrame + import streamz.dataframe.holoviews + + renderer = hv.renderer('bokeh').instance(mode='server') + + # Set up streaming DataFrame + stream = Stream() + index = pd.DatetimeIndex([]) + example = pd.DataFrame({'x': [], 'y': [], 'z': []}, + columns=['x', 'y', 'z'], index=) + df = DataFrame(stream, example=example) + cumulative = df.cumsum()[['x', 'z']] + + # Declare plots + line = cumulative.plot.line(width=400) + scatter = cumulative.plot.scatter(width=400) + bars = df.groupby('y').sum().plot.bar(width=400) + box = df.plot.box(width=400) + kde = df.x.plot.kde(width=400) + + # Compose plots + layout = (line * scatter + bars + box + kde).cols(2) + + # Set up callback with streaming data + def emit(): + now = pd.datetime.now() + delta = np.timedelta64(500, 'ms') + index = pd.date_range(np.datetime64(now)-delta, now, freq='100ms') + df = pd.DataFrame({'x': np.random.randn(len(index)), + 'y': np.random.randint(0, 10, len(index)), + 'z': np.random.randn(len(index))}, + columns=['x', 'y', 'z'], index=index) + stream.emit(df) + + # Render layout to bokeh server Document and attach callback + doc = renderer.server_doc(layout) + doc.title = 'Streamz HoloViews based Plotting API Bokeh App Demo' + doc.add_periodic_callback(emit, 500) + + +.. image:: images/server_plot.png + :alt: a bokeh server app demo + :width: 600 px + +For more details on deploying bokeh apps see the HoloViews `User Guide `_. + +.. _plotting.holoviews: + +Using HoloViews directly +------------------------ + +HoloViews includes first class support for streamz DataFrame and +Series, for more details see the `Streaming Data section +`_ in the +HoloViews documentation. + +.. _matplotlib: https://matplotlib.org/ +.. _HoloViews: https://holoviews.org/ +.. _bokeh: https://bokeh.pydata.org/en/latest/ + diff --git a/streamz/dataframe/core.py b/streamz/dataframe/core.py index 42182871..214feff6 100644 --- a/streamz/dataframe/core.py +++ b/streamz/dataframe/core.py @@ -141,55 +141,6 @@ def window(self, n=None, value=None): """ return Window(self, n=n, value=value) - def plot(self, backlog=1000, width=800, height=300, **kwargs): - """ Plot streaming dataframe as Bokeh plot - - This is fragile. It only works in the classic Jupyter Notebook. It - only works on numeric data. It assumes that the index is a datetime - index - """ - from bokeh.palettes import Category10 - from bokeh.io import output_notebook, push_notebook, show - from bokeh.models import value - from bokeh.plotting import figure, ColumnDataSource - output_notebook() - - sdf = self.to_frame() - - colors = Category10[max(3, min(10, len(sdf.columns)))] - data = {c: [] for c in sdf.columns} - data['index'] = [] - cds = ColumnDataSource(data) - - if ('x_axis_type' not in kwargs and - np.issubdtype(self.index.dtype, np.datetime64)): - kwargs['x_axis_type'] = 'datetime' - - fig = figure(width=width, height=height, **kwargs) - - for i, column in enumerate(sdf.columns): - color = colors[i % len(colors)] - fig.line(source=cds, x='index', y=column, color=color, legend=value(column)) - - fig.legend.click_policy = 'hide' - fig.min_border_left = 30 - fig.min_border_bottom = 30 - - result = show(fig, notebook_handle=True) - - loop = IOLoop.current() - - def push_data(df): - df = df.reset_index() - d = {c: df[c] for c in df.columns} - - def _(): - cds.stream(d, backlog) - push_notebook(handle=result) - loop.add_callback(_) - - return {'figure': fig, 'cds': cds, 'stream': sdf.stream.gather().map(push_data)} - def _cumulative_aggregation(self, op): return self.accumulate_partitions(_cumulative_accumulator, returns_state=True, diff --git a/streamz/dataframe/holoviews.py b/streamz/dataframe/holoviews.py new file mode 100644 index 00000000..9c1fede7 --- /dev/null +++ b/streamz/dataframe/holoviews.py @@ -0,0 +1,684 @@ +from __future__ import absolute_import + +import holoviews as hv +import pandas as pd + +from holoviews.core.spaces import DynamicMap +from holoviews.core.overlay import NdOverlay +from holoviews.element import ( + Curve, Scatter, Area, Bars, BoxWhisker, Dataset, Distribution, + Table +) + +from holoviews.operation import histogram +from holoviews.streams import Buffer, Pipe + +from streamz.dataframe import DataFrame, DataFrames, Series, Seriess + + +class HoloViewsConverter(object): + + def __init__(self, data, kind=None, by=None, width=800, + height=300, backlog=1000, shared_axes=False, + grid=False, legend=True, rot=None, title=None, + xlim=None, ylim=None, xticks=None, yticks=None, + fontsize=None, colormap=None, stacked=False, + logx=False, logy=False, loglog=False, hover=False, + style_opts={}, plot_opts={}, use_index=True, + value_label='value', group_label='Group', **kwds): + + # Set up HoloViews stream + if isinstance(data, (Series, Seriess)): + data = data.to_frame() + self.data = data + self.stream_type = data._stream_type + if data._stream_type == 'updating': + self.stream = Pipe(data=data.example) + else: + self.stream = Buffer(data.example, length=backlog) + data.stream.gather().sink(self.stream.send) + + # High-level options + self.by = by + self.stacked = stacked + self.use_index = use_index + self.kwds = kwds + self.value_label = value_label + self.group_label = group_label + + # Process style options + if 'cmap' in kwds and colormap: + raise TypeError("Only specify one of `cmap` and `colormap`.") + elif 'cmap' in kwds: + cmap = kwds.pop('cmap') + else: + cmap = colormap + self._style_opts = dict(fontsize=fontsize, cmap=cmap, **style_opts) + + # Process plot options + plot_options = dict(plot_opts) + plot_options['logx'] = logx or loglog + plot_options['logy'] = logy or loglog + plot_options['show_grid'] = grid + plot_options['shared_axes'] = shared_axes + plot_options['show_legend'] = legend + if xticks: + plot_options['xticks'] = xticks + if yticks: + plot_options['yticks'] = yticks + if width: + plot_options['width'] = width + if height: + plot_options['height'] = height + if rot: + if (kind == 'barh' or kwds.get('orientation') == 'horizontal' + or kwds.get('vert')): + axis = 'yrotation' + else: + axis = 'xrotation' + plot_options[axis] = rot + if hover: + plot_options['tools'] = ['hover'] + self._plot_opts = plot_options + + self._relabel = {'label': title} + self._dim_ranges = {'x': xlim or (None, None), + 'y': ylim or (None, None)} + self._norm_opts = {'framewise': True} + + def reset_index(self, data): + if self.stream_type == 'updating' and self.use_index: + return data.reset_index() + else: + return data + + def table(self, x=None, y=None): + allowed = ['width', 'height'] + + def table(data): + if len(data.columns) == 1: + data = data.reset_index() + return Table(data).opts(plot=opts) + opts = {k: v for k, v in self._plot_opts.items() if k in allowed} + return DynamicMap(table, streams=[self.stream]) + + +class HoloViewsFrameConverter(HoloViewsConverter): + """ + HoloViewsFrameConverter handles the conversion of + streamz.dataframe.DataFrame and streamz.dataframe.DataFrames into + displayable HoloViews objects. + """ + + def __call__(self, kind, x, y): + return getattr(self, kind)(x, y) + + def single_chart(self, chart, x, y): + opts = dict(plot=self._plot_opts, norm=self._norm_opts, + style=self._style_opts) + ranges = {x: self._dim_ranges['x'], y: self._dim_ranges['y']} + + def single_chart(data): + ys = [y] + [c for c in data.columns if c not in (x, y)] + return (chart(self.reset_index(data), x, ys).redim.range(**ranges) + .relabel(**self._relabel).opts(**opts)) + + return DynamicMap(single_chart, streams=[self.stream]) + + def chart(self, element, x, y): + "Helper method for simple x vs. y charts" + + if x and y: + return self.single_chart(element, x, y) + + x = self.data.example.index.name or 'index' + opts = dict(plot=dict(self._plot_opts, labelled=['x']), + norm=self._norm_opts) + + def multi_chart(data): + charts = {} + for c in data.columns[1:]: + chart = element(data, x, c).redim(**{c: self.value_label}) + ranges = {x: self._dim_ranges['x'], c: self._dim_ranges['y']} + charts[c] = (chart.relabel(**self._relabel) + .redim.range(**ranges).opts(**opts)) + return NdOverlay(charts) + return DynamicMap(multi_chart, streams=[self.stream]) + + def line(self, x, y): + return self.chart(Curve, x, y) + + def scatter(self, x, y): + scatter = self.chart(Scatter, x, y) + if 'c' in self.kwds: + color_opts = {'Scatter': {'colorbar': self.kwds.get('colorbar', False), + 'color_index': self.kwds['c']}} + return scatter.opts(plot=color_opts) + return scatter + + def area(self, x, y): + areas = self.chart(Area, x, y) + if self.stacked: + areas = areas.map(Area.stack, NdOverlay) + return areas + + def bar(self, x, y): + if x and y: + return self.single_chart(Bars, x, y) + + index = self.data.example.index.name or 'index' + stack_index = 1 if self.stacked else None + opts = {'plot': dict(self._plot_opts, labelled=['x'], + stack_index=stack_index), + 'norm': self._norm_opts} + ranges = {self.value_label: self._dim_ranges['y']} + + def bars(data): + data = self.reset_index(data) + df = pd.melt(data, id_vars=[index], var_name=self.group_label, value_name=self.value_label) + return (Bars(df, [index, self.group_label], self.value_label).redim.range(**ranges) + .relabel(**self._relabel).opts(**opts)) + return DynamicMap(bars, streams=[self.stream]) + + def barh(self, x, y): + return self.bar(x, y).opts(plot={'Bars': dict(invert_axes=True)}) + + def box(self, x, y): + if x and y: + return self.single_chart(BoxWhisker, x, y) + + index = self.data.example.index.name or 'index' + if self.by: + id_vars = [index, self.by] + kdims = [self.group_label, self.by] + else: + kdims = [self.group_label] + id_vars = [index] + invert = not self.kwds.get('vert', True) + opts = {'plot': dict(self._plot_opts, labelled=[], invert_axes=invert), + 'norm': self._norm_opts} + ranges = {self.value_label: self._dim_ranges['y']} + + def box(data): + data = self.reset_index(data) + df = pd.melt(data, id_vars=id_vars, var_name=self.group_label, value_name=self.value_label) + return (BoxWhisker(df, kdims, self.value_label).redim.range(**ranges) + .relabel(**self._relabel).opts(**opts)) + return DynamicMap(box, streams=[self.stream]) + + def hist(self, x, y): + plot_opts = dict(self._plot_opts) + invert = self.kwds.get('orientation', False) == 'horizontal' + opts = dict(plot=dict(plot_opts, labelled=['x'], invert_axes=invert), + style=dict(alpha=self.kwds.get('alpha', 1)), + norm=self._norm_opts) + hist_opts = {'num_bins': self.kwds.get('bins', 10), + 'bin_range': self.kwds.get('bin_range', None), + 'normed': self.kwds.get('normed', False)} + + def hist(data): + ds = Dataset(self.reset_index(data)) + hists = {} + for col in data.columns[1:]: + hist = histogram(ds, dimension=col, **hist_opts) + ranges = {hist.vdims[0].name: self._dim_ranges['y']} + hists[col] = (hist.redim.range(**ranges) + .relabel(**self._relabel).opts(**opts)) + return NdOverlay(hists) + return DynamicMap(hist, streams=[self.stream]) + + def kde(self, x, y): + index = self.data.example.index.name or 'index' + plot_opts = dict(self._plot_opts) + invert = self.kwds.get('orientation', False) == 'horizontal' + opts = dict(plot=dict(plot_opts, invert_axes=invert), + style=dict(alpha=self.kwds.get('alpha', 0.5)), + norm=self._norm_opts) + opts = {'Distribution': opts, 'Area': opts, + 'NdOverlay': {'plot': dict(legend_limit=0)}} + + def kde(data): + data = self.reset_index(data) + df = pd.melt(data, id_vars=[index], var_name=self.group_label, value_name=self.value_label) + ds = Dataset(df) + if len(df): + overlay = ds.to(Distribution, self.value_label).overlay() + else: + vdim = self.value_label + ' Density' + overlay = NdOverlay({0: Area([], self.value_label, vdim)}, + [self.group_label]) + return overlay.relabel(**self._relabel).opts(**opts) + return DynamicMap(kde, streams=[self.stream]) + + +class HoloViewsSeriesConverter(HoloViewsConverter): + """ + HoloViewsFrameConverter handles the conversion of + streamz.dataframe.Series and streamz.dataframe.Seriess into + displayable HoloViews objects. + """ + + def __call__(self, kind): + return getattr(self, kind)() + + def chart(self, chart): + opts = dict(plot=self._plot_opts, norm=self._norm_opts) + + def chartfn(data): + if len(data.columns) == 1: + data = data.reset_index() + return chart(data).relabel(**self._relabel).opts(**opts) + + return DynamicMap(chartfn, streams=[self.stream]) + + def line(self): + return self.chart(Curve) + + def scatter(self): + return self.chart(Scatter) + + def area(self): + return self.chart(Area) + + def bar(self): + return self.chart(Bars) + + def barh(self): + return self.bar().opts(plot={'Bars': dict(invert_axes=True)}) + + def box(self): + opts = dict(plot=self._plot_opts, norm=self._norm_opts) + + def boxfn(data): + ranges = {data.columns[-1]: self._dim_ranges['y']} + return (BoxWhisker(data, [], data.columns[-1]) + .redim.range(**ranges).relabel(**self._relabel).opts(**opts)) + + return DynamicMap(boxfn, streams=[self.stream]) + + def hist(self): + hist_opts = {'num_bins': self.kwds.get('bins', 10), + 'bin_range': self.kwds.get('bin_range', None), + 'normed': self.kwds.get('normed', False)} + + invert = self.kwds.get('orientation', False) == 'horizontal' + opts = dict(plot=dict(self._plot_opts, invert_axes=invert), + style=dict(alpha=self.kwds.get('alpha', 1)), + norm=self._norm_opts) + + def hist(data): + ds = Dataset(data) + hist = histogram(ds, dimension=data.columns[-1], **hist_opts) + ranges = {hist.vdims[0].name: self._dim_ranges['y']} + return hist.redim.range(**ranges).relabel(**self._relabel).opts(**opts) + return DynamicMap(hist, streams=[self.stream]) + + def kde(self): + invert = self.kwds.get('orientation', False) == 'horizontal' + opts = dict(plot=dict(self._plot_opts, invert_axes=invert), + style=dict(alpha=self.kwds.get('alpha', 1)), + norm=self._norm_opts) + + def distfn(data): + ranges = {data.columns[-1]: self._dim_ranges['y']} + return (Distribution(data, data.columns[-1]) + .redim.range(**ranges).relabel(**self._relabel).opts(**opts)) + + return DynamicMap(distfn, streams=[self.stream]) + + +class HoloViewsSeriesPlot(object): + """Series plotting accessor and methods + Examples + -------- + >>> s.plot.line() + >>> s.plot.bar() + >>> s.plot.hist() + Plotting methods can also be accessed by calling the accessor as a method + with the ``kind`` argument: + ``s.plot(kind='line')`` is equivalent to ``s.plot.line()`` + """ + + def __init__(self, data): + self._data = data + + def __call__(self, kind='line', width=800, height=300, backlog=1000, + title=None, grid=False, legend=True, logx=False, logy=False, + loglog=False, xticks=None, yticks=None, xlim=None, ylim=None, + rot=None, fontsize=None, colormap=None, hover=False, **kwds): + converter = HoloViewsSeriesConverter( + self._data, kind=kind, width=width, height=height, + backlog=backlog, title=title, grid=grid, legend=legend, + logx=logx, logy=logy, hover=hover, loglog=loglog, + xticks=xticks, yticks=yticks, xlim=xlim, ylim=ylim, + rot=rot, fontsize=fontsize, colormap=colormap, **kwds + ) + return converter(kind) + + def line(self, **kwds): + """ + Line plot + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to + :py:meth:`streamz.dataframe.Series.plot`. + Returns + ------- + Element : Element or NdOverlay of Elements + """ + return self(kind='line', **kwds) + + def scatter(self, **kwds): + """ + Scatter plot + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to + :py:meth:`streamz.dataframe.Series.plot`. + Returns + ------- + Element : Element or NdOverlay of Elements + """ + return self(kind='scatter', **kwds) + + def bar(self, **kwds): + """ + Vertical bar plot + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to + :py:meth:`streamz.dataframe.Series.plot`. + Returns + ------- + Element : Element or NdOverlay of Elements + """ + return self(kind='bar', **kwds) + + def barh(self, **kwds): + """ + Horizontal bar plot + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to + :py:meth:`streamz.dataframe.Series.plot`. + Returns + ------- + Element : Element or NdOverlay of Elements + """ + return self(kind='barh', **kwds) + + def box(self, **kwds): + """ + Boxplot + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to + :py:meth:`streamz.dataframe.Series.plot`. + Returns + ------- + Element : Element or NdOverlay of Elements + """ + return self(kind='box', **kwds) + + def hist(self, bins=10, **kwds): + """ + Histogram + + Parameters + ---------- + bins: integer, default 10 + Number of histogram bins to be used + bin_range: tuple + Specifies the range within which to compute the bins, + defaults to data range + **kwds : optional + Keyword arguments to pass on to + :py:meth:`streamz.dataframe.Series.plot`. + Returns + ------- + Element : Element or NdOverlay of Elements + """ + return self(kind='hist', bins=bins, **kwds) + + def kde(self, **kwds): + """ + Kernel Density Estimate plot + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to + :py:meth:`streamz.dataframe.Series.plot`. + Returns + ------- + Element : Element or NdOverlay of Elements + """ + return self(kind='kde', **kwds) + + density = kde + + def area(self, **kwds): + """ + Area plot + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to + :py:meth:`streamz.dataframe.Series.plot`. + Returns + ------- + Element : Element or NdOverlay of Elements + """ + return self(kind='area', **kwds) + + def table(self, **kwds): + """ + Table + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to + :py:meth:`streamz.dataframe.Series.plot`. + Returns + ------- + Element : Element or NdOverlay of Elements + """ + return self(kind='table', **kwds) + + +class HoloViewsFramePlot(object): + + def __init__(self, data): + self._data = data + + def __call__(self, x=None, y=None, kind='line', backlog=1000, + width=800, height=300, title=None, grid=False, + legend=True, logx=False, logy=False, loglog=False, + xticks=None, yticks=None, xlim=None, ylim=None, rot=None, + fontsize=None, colormap=None, hover=False, **kwds): + converter = HoloViewsFrameConverter( + self._data, width=width, height=height, backlog=backlog, + title=title, grid=grid, legend=legend, logx=logx, + logy=logy, loglog=loglog, xticks=xticks, yticks=yticks, + xlim=xlim, ylim=ylim, rot=rot, fontsize=fontsize, + colormap=colormap, hover=hover, **kwds + ) + return converter(kind, x, y) + + def line(self, x=None, y=None, **kwds): + """ + Line plot + + Parameters + ---------- + x, y : label or position, optional + Coordinates for each point. + **kwds : optional + Keyword arguments to pass on to + :py:meth:`streamz.dataframe.DataFrame.plot`. + Returns + ------- + Element : Element or NdOverlay of Elements + """ + return self(x, y, kind='line', **kwds) + + def scatter(self, x=None, y=None, **kwds): + """ + Scatter plot + + Parameters + ---------- + x, y : label or position, optional + Coordinates for each point. + **kwds : optional + Keyword arguments to pass on to + :py:meth:`streamz.dataframe.DataFrame.plot`. + Returns + ------- + Element : Element or NdOverlay of Elements + """ + return self(x, y, kind='scatter', **kwds) + + def area(self, x=None, y=None, **kwds): + """ + Area plot + + Parameters + ---------- + x, y : label or position, optional + Coordinates for each point. + **kwds : optional + Keyword arguments to pass on to + :py:meth:`streamz.dataframe.DataFrame.plot`. + Returns + ------- + Element : Element or NdOverlay of Elements + """ + return self(x, y, kind='area', **kwds) + + def bar(self, x=None, y=None, **kwds): + """ + Bars plot + + Parameters + ---------- + x, y : label or position, optional + Coordinates for each point. + **kwds : optional + Keyword arguments to pass on to + :py:meth:`streamz.dataframe.DataFrame.plot`. + Returns + ------- + Element : Element or NdOverlay of Elements + """ + return self(x, y, kind='bar', **kwds) + + def barh(self, **kwds): + """ + Horizontal bar plot + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to + :py:meth:`streamz.dataframe.DataFrame.plot`. + Returns + ------- + Element : Element or NdOverlay of Elements + """ + return self(kind='barh', **kwds) + + def box(self, by=None, **kwds): + """ + Boxplot + + Parameters + ---------- + by : string or sequence + Column in the DataFrame to group by. + kwds : optional + Keyword arguments to pass on to + :py:meth:`streamz.dataframe.DataFrame.plot`. + Returns + ------- + Element : Element or NdOverlay of Elements + """ + return self(kind='box', by=by, **kwds) + + def hist(self, by=None, **kwds): + """ + Histogram + + Parameters + ---------- + by : string or sequence + Column in the DataFrame to group by. + kwds : optional + Keyword arguments to pass on to + :py:meth:`streamz.dataframe.DataFrame.plot`. + Returns + ------- + Element : Element or NdOverlay of Elements + """ + return self(kind='hist', by=by, **kwds) + + def kde(self, by=None, **kwds): + """ + KDE + + Parameters + ---------- + by : string or sequence + Column in the DataFrame to group by. + kwds : optional + Keyword arguments to pass on to + :py:meth:`streamz.dataframe.DataFrame.plot`. + Returns + ------- + Element : Element or NdOverlay of Elements + """ + return self(kind='kde', by=by, **kwds) + + def table(self, **kwds): + """ + Table + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to + :py:meth:`streamz.dataframe.DataFrame.plot`. + Returns + ------- + Element : Element or NdOverlay of Elements + """ + return self(kind='table', **kwds) + + +# Register plotting interfaces +def df_plot(self): + return HoloViewsFramePlot(self) + + +def series_plot(self): + return HoloViewsSeriesPlot(self) + + +DataFrame.plot = property(df_plot) +DataFrames.plot = property(df_plot) +Series.plot = property(series_plot) +Seriess.plot = property(series_plot) + +hv.extension('bokeh') diff --git a/streamz/dataframe/tests/test_dataframes.py b/streamz/dataframe/tests/test_dataframes.py index 4baaaae7..f301e727 100644 --- a/streamz/dataframe/tests/test_dataframes.py +++ b/streamz/dataframe/tests/test_dataframes.py @@ -503,24 +503,6 @@ def test_to_frame(stream): assert list(a.columns) == ['x'] -@gen_test() -def test_plot(): - pytest.importorskip('bokeh') - sdf = sd.Random(freq='10ms', interval='50ms') - result = sdf[['x', 'y']].plot() - - cds = result['cds'] - - assert set(cds.data) == {'x', 'y', 'index'} - assert not len(cds.data['x']) - - yield gen.sleep(0.130) - assert len(cds.data['x']) - assert len(set(map(len, cds.data.values()))) == 1 - - assert set(sdf.x.plot()['cds'].data) == {'x', 'index'} - - def test_instantiate_with_dict(stream): df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) sdf = DataFrame(example=df, stream=stream) diff --git a/streamz/dataframe/tests/test_holoviews.py b/streamz/dataframe/tests/test_holoviews.py new file mode 100644 index 00000000..f014af78 --- /dev/null +++ b/streamz/dataframe/tests/test_holoviews.py @@ -0,0 +1,66 @@ +from __future__ import division, print_function + +from holoviews.core import NdOverlay, Dimension +from holoviews.element import Curve, Scatter, Area, Bars +from holoviews.streams import Buffer, Pipe +import pytest +import streamz.dataframe as sd +import streamz.dataframe.holoviews # flake8: noqa + +ELEMENT_TYPES = {'line': Curve, 'scatter': Scatter, 'area': Area, + 'bars': Bars, 'barh': Bars} + + +def test_sdf_stream_setup(): + source = sd.Random(freq='10ms', interval='100ms') + dmap = source.plot() + assert isinstance(dmap.streams[0], Buffer) + + +def test_series_stream_setup(): + source = sd.Random(freq='10ms', interval='100ms') + dmap = source.x.plot() + assert isinstance(dmap.streams[0], Buffer) + + +def test_ssdf_stream_setup(): + source = sd.Random(freq='10ms', interval='100ms') + dmap = source.groupby('y').sum().plot() + assert type(dmap.streams[0]) is Pipe + + +def test_sseries_stream_setup(): + source = sd.Random(freq='10ms', interval='100ms') + dmap = source.groupby('y').sum().x.plot() + assert type(dmap.streams[0]) is Pipe + + +@pytest.mark.parametrize('kind', ['line', 'scatter', 'area']) +def test_plot_multi_column_chart_from_sdf(kind): + source = sd.Random(freq='10ms', interval='100ms') + dmap = source.plot(kind=kind) + element = dmap[()] + assert isinstance(element, NdOverlay) + assert element.keys() == ['x', 'y', 'z'] + assert isinstance(element.last, ELEMENT_TYPES[kind]) + assert element.last.kdims == [Dimension('index')] + + +@pytest.mark.parametrize('kind', ['line', 'scatter', 'area', 'bars', 'barh']) +def test_plot_xy_chart_from_sdf(kind): + source = sd.Random(freq='10ms', interval='100ms') + dmap = source.plot(x='index', y='y', kind=kind) + element = dmap[()] + assert isinstance(element, ELEMENT_TYPES[kind]) + assert element.kdims == [Dimension('index')] + assert element.vdims == [Dimension('y')] + + +@pytest.mark.parametrize('kind', ['line', 'scatter', 'area', 'bars', 'barh']) +def test_plot_xy_chart_from_series(kind): + source = sd.Random(freq='10ms', interval='100ms') + dmap = source.y.plot(kind=kind) + element = dmap[()] + assert isinstance(element, ELEMENT_TYPES[kind]) + assert element.kdims == [Dimension('index')] + assert element.vdims == [Dimension('y')]