Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 9e83747

Browse files
committedAug 2, 2017
aggregate transform
1 parent 95aa244 commit 9e83747

File tree

6 files changed

+571
-2
lines changed

6 files changed

+571
-2
lines changed
 

‎lib/aggregate.js

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
/**
2+
* Copyright 2012-2017, Plotly, Inc.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the MIT license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
'use strict';
10+
11+
module.exports = require('../src/transforms/aggregate');

‎lib/index.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ Plotly.register([
5656
// https://github.com/plotly/plotly.js/pull/978#pullrequestreview-2403353
5757
//
5858
Plotly.register([
59+
require('./aggregate'),
5960
require('./filter'),
6061
require('./groupby'),
6162
require('./sort')

‎src/transforms/aggregate.js

Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,283 @@
1+
/**
2+
* Copyright 2012-2017, Plotly, Inc.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the MIT license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
'use strict';
10+
11+
var Axes = require('../plots/cartesian/axes');
12+
var Lib = require('../lib');
13+
var PlotSchema = require('../plot_api/plot_schema');
14+
var BADNUM = require('../constants/numerical').BADNUM;
15+
16+
exports.moduleType = 'transform';
17+
18+
exports.name = 'aggregate';
19+
20+
var attrs = exports.attributes = {
21+
enabled: {
22+
valType: 'boolean',
23+
dflt: true,
24+
description: [
25+
'Determines whether this aggregate transform is enabled or disabled.'
26+
].join(' ')
27+
},
28+
groups: {
29+
// TODO: groupby should support string or array grouping this way too
30+
// currently groupby only allows a grouping array
31+
valType: 'string',
32+
strict: true,
33+
noBlank: true,
34+
arrayOk: true,
35+
dflt: 'x',
36+
description: [
37+
'Sets the grouping target to which the aggregation is applied.',
38+
'Data points with matching group values will be coalesced into',
39+
'one point, using the supplied aggregation functions to reduce data',
40+
'in other data arrays.',
41+
'If a string, *groups* is assumed to be a reference to a data array',
42+
'in the parent trace object.',
43+
'To aggregate by nested variables, use *.* to access them.',
44+
'For example, set `groups` to *marker.color* to aggregate',
45+
'about the marker color array.',
46+
'If an array, *groups* is itself the data array by which we aggregate.'
47+
].join(' ')
48+
},
49+
aggregations: {
50+
_isLinkedToArray: 'style',
51+
array: {
52+
valType: 'string',
53+
role: 'info',
54+
description: [
55+
'A reference to the data array in the parent trace to aggregate.',
56+
'To aggregate by nested variables, use *.* to access them.',
57+
'For example, set `groups` to *marker.color* to aggregate',
58+
'about the marker color array.',
59+
'The referenced array must already exist, unless `func` is *count*,',
60+
'and each array may only be referenced once.'
61+
].join(' ')
62+
},
63+
func: {
64+
valType: 'enumerated',
65+
values: ['count', 'sum', 'avg', 'min', 'max', 'first', 'last'],
66+
dflt: 'first',
67+
role: 'info',
68+
description: [
69+
'Sets the aggregation function.',
70+
'All values from the linked `array`, corresponding to the same value',
71+
'in the `groups` array, are collected and reduced by this function.',
72+
'*count* is simply the number of values in the `groups` array, so does',
73+
'not even require the linked array to exist. *first* (*last*) is just',
74+
'the first (last) linked value.'
75+
].join(' ')
76+
},
77+
}
78+
};
79+
80+
/**
81+
* Supply transform attributes defaults
82+
*
83+
* @param {object} transformIn
84+
* object linked to trace.transforms[i] with 'func' set to exports.name
85+
* @param {object} traceOut
86+
* the _fullData trace this transform applies to
87+
* @param {object} layout
88+
* the plot's (not-so-full) layout
89+
* @param {object} traceIn
90+
* the input data trace this transform applies to
91+
*
92+
* @return {object} transformOut
93+
* copy of transformIn that contains attribute defaults
94+
*/
95+
exports.supplyDefaults = function(transformIn, traceOut) {
96+
var transformOut = {};
97+
var i;
98+
99+
function coerce(attr, dflt) {
100+
return Lib.coerce(transformIn, transformOut, attrs, attr, dflt);
101+
}
102+
103+
var enabled = coerce('enabled');
104+
105+
if(!enabled) return transformOut;
106+
107+
/*
108+
* Normally _arrayAttrs is calculated during doCalc, but that comes later.
109+
* Anyway this can change due to *count* aggregations (see below) so it's not
110+
* necessarily the same set.
111+
*
112+
* For performance we turn it into an object of truthy values
113+
* we'll use 1 for arrays we haven't aggregated yet, 0 for finished arrays,
114+
* as distinct from undefined which means this array isn't present in the input
115+
* missing arrays can still be aggregate outputs for *count* aggregations.
116+
*/
117+
var arrayAttrArray = PlotSchema.findArrayAttributes(traceOut);
118+
var arrayAttrs = {};
119+
for(i = 0; i < arrayAttrArray.length; i++) arrayAttrs[arrayAttrArray[i]] = 1;
120+
121+
var groups = coerce('groups');
122+
123+
if(!Array.isArray(groups)) {
124+
if(!arrayAttrs[groups]) {
125+
transformOut.enabled = false;
126+
return;
127+
}
128+
arrayAttrs[groups] = 0;
129+
}
130+
131+
var aggregationsIn = transformIn.aggregations;
132+
var aggregationsOut = transformOut.aggregations = [];
133+
134+
if(aggregationsIn) {
135+
for(i = 0; i < aggregationsIn.length; i++) {
136+
var aggregationOut = {};
137+
var array = Lib.coerce(aggregationsIn[i], aggregationOut, attrs.aggregations, 'array');
138+
var func = Lib.coerce(aggregationsIn[i], aggregationOut, attrs.aggregations, 'func');
139+
140+
// add this aggregation to the output only if it's the first instance
141+
// of a valid array attribute - or an unused array attribute with "count"
142+
if(array && (arrayAttrs[array] || (func === 'count' && arrayAttrs[array] === undefined))) {
143+
arrayAttrs[array] = 0;
144+
aggregationsOut.push(aggregationOut);
145+
}
146+
}
147+
}
148+
149+
// any array attributes we haven't yet covered, fill them with the default aggregation
150+
for(i = 0; i < arrayAttrArray.length; i++) {
151+
if(arrayAttrs[arrayAttrArray[i]]) {
152+
aggregationsOut.push({
153+
array: arrayAttrArray[i],
154+
func: attrs.aggregations.func.dflt
155+
});
156+
}
157+
}
158+
159+
return transformOut;
160+
};
161+
162+
163+
exports.calcTransform = function(gd, trace, opts) {
164+
if(!opts.enabled) return;
165+
166+
var groups = opts.groups;
167+
168+
var groupArray = Lib.getTargetArray(trace, {target: groups});
169+
if(!groupArray) return;
170+
171+
var i, vi, groupIndex;
172+
173+
var groupIndices = {};
174+
var groupings = [];
175+
for(i = 0; i < groupArray.length; i++) {
176+
vi = groupArray[i];
177+
groupIndex = groupIndices[vi];
178+
if(groupIndex === undefined) {
179+
groupIndices[vi] = groupings.length;
180+
groupings.push([i]);
181+
}
182+
else groupings[groupIndex].push(i);
183+
}
184+
185+
var aggregations = opts.aggregations;
186+
187+
for(i = 0; i < aggregations.length; i++) {
188+
aggregateOneArray(gd, trace, groupings, aggregations[i]);
189+
}
190+
191+
if(typeof groups === 'string') {
192+
aggregateOneArray(gd, trace, groupings, {array: groups, func: 'first'});
193+
}
194+
};
195+
196+
function aggregateOneArray(gd, trace, groupings, aggregation) {
197+
var attr = aggregation.array;
198+
var targetNP = Lib.nestedProperty(trace, attr);
199+
var arrayIn = targetNP.get();
200+
var conversions = Axes.getDataConversions(gd, trace, attr, arrayIn);
201+
var func = getAggregateFunction(aggregation.func, conversions);
202+
203+
var arrayOut = new Array(groupings.length);
204+
for(var i = 0; i < groupings.length; i++) {
205+
arrayOut[i] = func(arrayIn, groupings[i]);
206+
}
207+
targetNP.set(arrayOut);
208+
}
209+
210+
function getAggregateFunction(func, conversions) {
211+
var d2c = conversions.d2c;
212+
var c2d = conversions.c2d;
213+
214+
switch(func) {
215+
// count, first, and last don't depend on anything about the data
216+
// point back to pure functions for performance
217+
case 'count':
218+
return count;
219+
case 'first':
220+
return first;
221+
case 'last':
222+
return last;
223+
224+
case 'sum':
225+
// This will produce output in all cases even though it's nonsensical
226+
// for date or category data.
227+
return function(array, indices) {
228+
var total = 0;
229+
for(var i = 0; i < indices.length; i++) {
230+
var vi = d2c(array[indices[i]]);
231+
if(vi !== BADNUM) total += +vi;
232+
}
233+
return c2d(total);
234+
};
235+
236+
case 'avg':
237+
// Generally meaningless for category data but it still does something.
238+
return function(array, indices) {
239+
var total = 0;
240+
var cnt = 0;
241+
for(var i = 0; i < indices.length; i++) {
242+
var vi = d2c(array[indices[i]]);
243+
if(vi !== BADNUM) {
244+
total += +vi;
245+
cnt++;
246+
}
247+
}
248+
return cnt ? c2d(total / cnt) : BADNUM;
249+
};
250+
251+
case 'min':
252+
return function(array, indices) {
253+
var out = Infinity;
254+
for(var i = 0; i < indices.length; i++) {
255+
var vi = d2c(array[indices[i]]);
256+
if(vi !== BADNUM) out = Math.min(out, +vi);
257+
}
258+
return (out === Infinity) ? BADNUM : c2d(out);
259+
};
260+
261+
case 'max':
262+
return function(array, indices) {
263+
var out = -Infinity;
264+
for(var i = 0; i < indices.length; i++) {
265+
var vi = d2c(array[indices[i]]);
266+
if(vi !== BADNUM) out = Math.max(out, +vi);
267+
}
268+
return (out === -Infinity) ? BADNUM : c2d(out);
269+
};
270+
}
271+
}
272+
273+
function count(array, indices) {
274+
return indices.length;
275+
}
276+
277+
function first(array, indices) {
278+
return array[indices[0]];
279+
}
280+
281+
function last(array, indices) {
282+
return array[indices[indices.length - 1]];
283+
}

‎src/transforms/groupby.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,12 @@ exports.attributes = {
6363
*
6464
* @param {object} transformIn
6565
* object linked to trace.transforms[i] with 'type' set to exports.name
66-
* @param {object} fullData
67-
* the plot's full data
66+
* @param {object} traceOut
67+
* the _fullData trace this transform applies to
6868
* @param {object} layout
6969
* the plot's (not-so-full) layout
70+
* @param {object} traceIn
71+
* the input data trace this transform applies to
7072
*
7173
* @return {object} transformOut
7274
* copy of transformIn that contains attribute defaults
Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
var Plotly = require('@lib/index');
2+
3+
var createGraphDiv = require('../assets/create_graph_div');
4+
var destroyGraphDiv = require('../assets/destroy_graph_div');
5+
var customMatchers = require('../assets/custom_matchers');
6+
7+
describe('aggregate', function() {
8+
var gd;
9+
10+
beforeAll(function() { jasmine.addMatchers(customMatchers);});
11+
12+
beforeEach(function() { gd = createGraphDiv(); });
13+
14+
afterEach(destroyGraphDiv);
15+
16+
it('handles all funcs for numeric data', function() {
17+
// throw in some non-numbers, they should get discarded except first/last
18+
Plotly.newPlot(gd, [{
19+
x: [1, 2, 3, 4, 'fail'],
20+
y: [1.1, 2.2, 3.3, 'nope', 5.5],
21+
marker: {
22+
size: ['2001-01-01', 0.2, 0.1, 0.4, 0.5],
23+
color: [2, 4, '', 10, 8],
24+
opacity: [0.6, 'boo', 0.2, 0.8, 1.0],
25+
line: {
26+
color: [2.2, 3.3, 4.4, 5.5, 'the end']
27+
}
28+
},
29+
transforms: [{
30+
type: 'aggregate',
31+
groups: ['a', 'b', 'a', 'a', 'a'],
32+
aggregations: [
33+
// missing array - the entry is ignored
34+
{array: '', func: 'avg'},
35+
{array: 'x', func: 'sum'},
36+
// non-numerics will not count toward numerator or denominator for avg
37+
{array: 'y', func: 'avg'},
38+
{array: 'marker.size', func: 'min'},
39+
{array: 'marker.color', func: 'max'},
40+
// marker.opacity doesn't have an entry, but it will default to first
41+
// as if it were {array: 'marker.opacity', func: 'first'},
42+
{array: 'marker.line.color', func: 'last'},
43+
// not present in data, but that's OK for count
44+
{array: 'marker.line.width', func: 'count'},
45+
// duplicate entry - discarded
46+
{array: 'x', func: 'min'}
47+
]
48+
}]
49+
}], {
50+
// log axis doesn't change how sum (or avg but not tested) works
51+
xaxis: {type: 'log'}
52+
});
53+
54+
var traceOut = gd._fullData[0];
55+
56+
expect(traceOut.x).toEqual([8, 2]);
57+
expect(traceOut.y).toBeCloseToArray([3.3, 2.2], 5);
58+
expect(traceOut.marker.size).toEqual([0.1, 0.2]);
59+
expect(traceOut.marker.color).toEqual([10, 4]);
60+
expect(traceOut.marker.opacity).toEqual([0.6, 'boo']);
61+
expect(traceOut.marker.line.color).toEqual(['the end', 3.3]);
62+
expect(traceOut.marker.line.width).toEqual([4, 1]);
63+
});
64+
65+
it('handles all funcs except sum for date data', function() {
66+
// weird cases handled in another test
67+
Plotly.newPlot(gd, [{
68+
x: ['2001-01-01', '', '2001-01-03', '2001-01-05', '2001-01-07'],
69+
y: ['1995-01-15', '2005-03-15', '1990-12-23', '2001-01-01', 'not a date'],
70+
text: ['2001-01-01 12:34', '2001-01-01 12:35', '2001-01-01 12:36', '2001-01-01 12:37', ''],
71+
hovertext: ['a', '2001-01-02', '2001-01-03', '2001-01-04', '2001-01-05'],
72+
customdata: ['2001-01', 'b', '2001-03', '2001-04', '2001-05'],
73+
transforms: [{
74+
type: 'aggregate',
75+
// groups can be any type, but until we implement binning they
76+
// will always compare as strings = so 1 === '1' === 1.0 !== '1.0'
77+
groups: [1, 2, '1', 1.0, 1],
78+
aggregations: [
79+
{array: 'x', func: 'avg'},
80+
{array: 'y', func: 'min'},
81+
{array: 'text', func: 'max'},
82+
// hovertext doesn't have a func, default to first
83+
{array: 'hovertext'},
84+
{array: 'customdata', func: 'last'},
85+
// not present in data, but that's OK for count
86+
{array: 'marker.line.width', func: 'count'},
87+
// duplicate entry - discarded
88+
{array: 'x', func: 'min'}
89+
]
90+
}]
91+
}]);
92+
93+
var traceOut = gd._fullData[0];
94+
95+
expect(traceOut.x).toEqual(['2001-01-04', undefined]);
96+
expect(traceOut.y).toEqual(['1990-12-23', '2005-03-15']);
97+
expect(traceOut.text).toEqual(['2001-01-01 12:37', '2001-01-01 12:35']);
98+
expect(traceOut.hovertext).toEqual(['a', '2001-01-02']);
99+
expect(traceOut.customdata).toEqual(['2001-05', 'b']);
100+
expect(traceOut.marker.line.width).toEqual([4, 1]);
101+
});
102+
103+
it('handles all funcs except sum and avg for category data', function() {
104+
// weird cases handled in another test
105+
Plotly.newPlot(gd, [{
106+
x: ['a', 'b', 'c', 'aa', 'd'],
107+
y: ['q', 'w', 'e', 'r', 't'],
108+
text: ['b', 'b', 'a', 'b', 'a'],
109+
hovertext: ['c', 'b', 'a', 'b', 'a'],
110+
transforms: [{
111+
type: 'aggregate',
112+
groups: [1, 2, 1, 1, 1],
113+
aggregations: [
114+
{array: 'x', func: 'min'},
115+
{array: 'y', func: 'max'},
116+
{array: 'text', func: 'last'},
117+
// hovertext doesn't have an entry, but it will default to first
118+
// not present in data, but that's OK for count
119+
{array: 'marker.line.width', func: 'count'},
120+
// duplicate entry - discarded
121+
{array: 'x', func: 'max'}
122+
]
123+
}]
124+
}], {
125+
xaxis: {categoryarray: ['aaa', 'aa', 'a', 'b', 'c']}
126+
});
127+
128+
var traceOut = gd._fullData[0];
129+
130+
// explicit order (only possible for axis data)
131+
expect(traceOut.x).toEqual(['aa', 'b']);
132+
// implied order from data
133+
expect(traceOut.y).toEqual(['t', 'w']);
134+
expect(traceOut.text).toEqual(['a', 'b']);
135+
expect(traceOut.hovertext).toEqual(['c', 'b']);
136+
expect(traceOut.marker.line.width).toEqual([4, 1]);
137+
});
138+
139+
it('allows date and category sums, and category avg, with weird output', function() {
140+
// this test is more of an FYI than anything else - it doesn't break but
141+
// these results are usually meaningless.
142+
143+
Plotly.newPlot(gd, [{
144+
x: ['2001-01-01', '2001-01-02', '2001-01-03', '2001-01-04'],
145+
y: ['a', 'b', 'b', 'c'],
146+
text: ['a', 'b', 'a', 'c'],
147+
transforms: [{
148+
type: 'aggregate',
149+
groups: [1, 1, 2, 2],
150+
aggregations: [
151+
{array: 'x', func: 'sum'},
152+
{array: 'y', func: 'sum'},
153+
{array: 'text', func: 'avg'}
154+
]
155+
}]
156+
}]);
157+
158+
var traceOut = gd._fullData[0];
159+
160+
// date sums: 1970-01-01 is "zero", there are shifts due to # of leap years
161+
// without that shift these would be 2032-01-02 and 2032-01-06
162+
expect(traceOut.x).toEqual(['2032-01-03', '2032-01-07']);
163+
// category sums: can go off the end of the category array -> gives undefined
164+
expect(traceOut.y).toEqual(['b', undefined]);
165+
// category average: can result in fractional categories -> rounds (0.5 rounds to 1)
166+
expect(traceOut.text).toEqual(['b', 'b']);
167+
});
168+
169+
it('can aggregate on an existing data array', function() {
170+
Plotly.newPlot(gd, [{
171+
x: [1, 2, 3, 4, 5],
172+
y: [2, 4, 6, 8, 10],
173+
marker: {size: [10, 10, 20, 20, 10]},
174+
transforms: [{
175+
type: 'aggregate',
176+
groups: 'marker.size',
177+
aggregations: [
178+
{array: 'x', func: 'sum'},
179+
{array: 'y', func: 'avg'}
180+
]
181+
}]
182+
}]);
183+
184+
var traceOut = gd._fullData[0];
185+
186+
expect(traceOut.x).toEqual([8, 7]);
187+
expect(traceOut.y).toBeCloseToArray([16 / 3, 7], 5);
188+
expect(traceOut.marker.size).toEqual([10, 20]);
189+
});
190+
});

‎test/jasmine/tests/transform_multi_test.js

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,88 @@ describe('multiple transforms:', function() {
439439
});
440440
});
441441

442+
it('executes filter and aggregate in the order given', function() {
443+
// filter and aggregate do not commute!
444+
445+
var trace1 = {
446+
x: [0, -5, 7, 4, 5],
447+
y: [2, 4, 6, 8, 10],
448+
transforms: [{
449+
type: 'aggregate',
450+
groups: [1, 2, 2, 1, 1],
451+
aggregations: [
452+
{array: 'x', func: 'sum'},
453+
{array: 'y', func: 'avg'}
454+
]
455+
}, {
456+
type: 'filter',
457+
target: 'x',
458+
operation: '<',
459+
value: 5
460+
}]
461+
};
462+
463+
var trace2 = Lib.extendDeep({}, trace1);
464+
trace2.transforms.reverse();
465+
466+
Plotly.newPlot(gd, [trace1, trace2]);
467+
468+
var trace1Out = gd._fullData[0];
469+
expect(trace1Out.x).toEqual([2]);
470+
expect(trace1Out.y).toEqual([5]);
471+
472+
var trace2Out = gd._fullData[1];
473+
expect(trace2Out.x).toEqual([4, -5]);
474+
expect(trace2Out.y).toEqual([5, 4]);
475+
});
476+
477+
it('always executes groupby before aggregate', function() {
478+
// aggregate and groupby wouldn't commute, but groupby always happens first
479+
// because it has a `transform`, and aggregate has a `calcTransform`
480+
481+
var trace1 = {
482+
x: [1, 2, 3, 4, 5],
483+
y: [2, 4, 6, 8, 10],
484+
transforms: [{
485+
type: 'groupby',
486+
groups: [1, 1, 2, 2, 2]
487+
}, {
488+
type: 'aggregate',
489+
groups: [1, 2, 2, 1, 1],
490+
aggregations: [
491+
{array: 'x', func: 'sum'},
492+
{array: 'y', func: 'avg'}
493+
]
494+
}]
495+
};
496+
497+
var trace2 = Lib.extendDeep({}, trace1);
498+
trace2.transforms.reverse();
499+
500+
Plotly.newPlot(gd, [trace1, trace2]);
501+
502+
var t1g1 = gd._fullData[0];
503+
var t1g2 = gd._fullData[1];
504+
var t2g1 = gd._fullData[2];
505+
var t2g2 = gd._fullData[3];
506+
507+
expect(t1g1.x).toEqual([1, 2]);
508+
expect(t1g1.y).toEqual([2, 4]);
509+
// group 2 has its aggregations switched, since group 2 comes first
510+
expect(t1g2.x).toEqual([3, 9]);
511+
expect(t1g2.y).toEqual([6, 9]);
512+
513+
// if we had done aggregation first, we'd implicitly get the first val
514+
// for each of the groupby groups, which is [1, 1]
515+
// so we'd only make 1 output trace, and it would look like:
516+
// {x: [10, 5], y: [20/3, 5]}
517+
// (and if we got some other groupby groups values, the most it could do
518+
// is break ^^ into two separate traces)
519+
expect(t2g1.x).toEqual(t1g1.x);
520+
expect(t2g1.y).toEqual(t1g1.y);
521+
expect(t2g2.x).toEqual(t1g2.x);
522+
expect(t2g2.y).toEqual(t1g2.y);
523+
});
442524
});
443525

444526
describe('invalid transforms', function() {

0 commit comments

Comments
 (0)
Please sign in to comment.