-
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
feat(discover2) Add more aggregate field support #14321
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c5489f5
bf09008
89be14f
f52a75d
d4dc8a8
c1fdee0
a897c35
89a7ee8
ef18130
0fb0235
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,7 +18,6 @@ | |
from sentry import features | ||
from sentry.models.project import Project | ||
|
||
ALLOWED_GROUPINGS = frozenset(('issue.id', 'project.id', 'transaction')) | ||
logger = logging.getLogger(__name__) | ||
|
||
|
||
|
@@ -31,16 +30,8 @@ def get(self, request, organization): | |
try: | ||
params = self.get_filter_params(request, organization) | ||
snuba_args = self.get_snuba_query_args(request, organization, params) | ||
fields = snuba_args.get('selected_columns') | ||
groupby = snuba_args.get('groupby', []) | ||
|
||
if not fields and not groupby: | ||
return Response({'detail': 'No fields or groupings provided'}, status=400) | ||
|
||
if any(field for field in groupby if field not in ALLOWED_GROUPINGS): | ||
message = ('Invalid groupby value requested. Allowed values are ' + | ||
', '.join(ALLOWED_GROUPINGS)) | ||
return Response({'detail': message}, status=400) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was removed because the endpoint no longer allows the end user to define groupby. Instead groupings are implicit based on the fields requested. |
||
if not snuba_args.get('selected_columns') and not snuba_args.get('aggregations'): | ||
return Response({'detail': 'No fields provided'}, status=400) | ||
|
||
except OrganizationEventsError as exc: | ||
return Response({'detail': exc.message}, status=400) | ||
|
@@ -130,16 +121,22 @@ def get_legacy(self, request, organization): | |
) | ||
|
||
def handle_results(self, request, organization, project_ids, results): | ||
if not results: | ||
return results | ||
|
||
first_row = results[0] | ||
if not ('project.id' in first_row or 'projectid' in first_row): | ||
return results | ||
|
||
fields = request.GET.getlist('field') | ||
projects = {p['id']: p['slug'] for p in Project.objects.filter( | ||
organization=organization, | ||
id__in=project_ids).values('id', 'slug')} | ||
|
||
fields = request.GET.getlist('field') | ||
|
||
if 'project.name' in fields: | ||
for result in results: | ||
result['project.name'] = projects[result['project.id']] | ||
if 'project.id' not in fields: | ||
del result['project.id'] | ||
for result in results: | ||
for key in ('projectid', 'project.id'): | ||
if key in result: | ||
result['project.name'] = projects[result[key]] | ||
if key not in fields: | ||
del result[key] | ||
|
||
return results |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
|
||
import re | ||
from collections import namedtuple, defaultdict | ||
from copy import deepcopy | ||
from datetime import datetime | ||
|
||
import six | ||
|
@@ -142,9 +143,8 @@ def translate(pat): | |
'first_seen': 'first_seen', | ||
'last_seen': 'last_seen', | ||
'times_seen': 'times_seen', | ||
# OrganizationEvents aggregations | ||
'event_count': 'event_count', | ||
'user_count': 'user_count', | ||
# TODO(mark) figure out how to safelist aggregate functions/field aliases | ||
# so they can be used in conditions | ||
}, **SENTRY_SNUBA_MAP) | ||
no_conversion = set(['project_id', 'start', 'end']) | ||
|
||
|
@@ -215,9 +215,8 @@ class SearchVisitor(NodeVisitor): | |
'device.battery_level', 'device.charging', 'device.online', | ||
'device.simulator', 'error.handled', 'issue.id', 'stack.colno', | ||
'stack.in_app', 'stack.lineno', 'stack.stack_level', | ||
# OrganizationEvents aggregations | ||
'event_count', 'user_count', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removing these as they aren't a thing anymore. |
||
|
||
# TODO(mark) figure out how to safelist aggregate functions/field aliases | ||
# so they can be used in conditions | ||
]) | ||
date_keys = set([ | ||
'start', 'end', 'first_seen', 'last_seen', 'time', 'timestamp', | ||
|
@@ -662,3 +661,158 @@ def get_snuba_query_args(query=None, params=None): | |
kwargs['has_boolean_terms'] = True | ||
kwargs['conditions'].append(convert_search_boolean_to_snuba_query(term)) | ||
return kwargs | ||
|
||
|
||
FIELD_ALIASES = { | ||
'issue_title': { | ||
'aggregations': [['anyHeavy', 'title', 'issue_title']], | ||
}, | ||
'last_seen': { | ||
'aggregations': [['max', 'timestamp', 'last_seen']], | ||
}, | ||
'latest_event': { | ||
'aggregations': [ | ||
# TODO(mark) This is a hack to work around jsonschema limitations | ||
# in snuba. | ||
['argMax(event_id, timestamp)', '', 'latest_event'], | ||
], | ||
}, | ||
'project': { | ||
'fields': ['project.id'], | ||
}, | ||
'user': { | ||
'fields': ['user.id', 'user.name', 'user.username', 'user.email', 'user.ip'], | ||
} | ||
# TODO(mark) Add rpm alias. | ||
} | ||
|
||
VALID_AGGREGATES = { | ||
'count_unique': { | ||
'snuba_name': 'uniq', | ||
'fields': '*', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can imagine also wanting to do all fields excluding some small list There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @lynnagara im wondering, is there an aggregate function where we would want to exclude certain columns? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I personally would prefer to use safe-lists rather than ban-lists for the acceptable aggregate columns. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The situation where I was thinking we might need a ban list is where we want to support the aggregate on custom user-defined tag columns (can't use a safe list) but might need to exclude some internal stuff |
||
}, | ||
'count': { | ||
'snuba_name': 'count', | ||
'fields': '*' | ||
}, | ||
'avg': { | ||
'snuba_name': 'avg', | ||
'fields': ['duration'], | ||
}, | ||
'min': { | ||
'snuba_name': 'min', | ||
'fields': ['timestamp', 'duration'], | ||
}, | ||
'max': { | ||
'snuba_name': 'max', | ||
'fields': ['timestamp', 'duration'], | ||
}, | ||
'sum': { | ||
'snuba_name': 'sum', | ||
'fields': ['duration'], | ||
}, | ||
# This doesn't work yet, but is an illustration of how it could work | ||
'p75': { | ||
'snuba_name': 'quantileTiming(0.75)', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we wanted to stop passing function calls like this explicitly, and try something like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm fine with an array form as well. The quantile functions are a bit funny as they need to end up in clickhouse like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We might need snuba to translate that for us, not sure. I feel like we had something similar to this before, can't remember exactly what it was though. Could potentially do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah the implementation of the quantile functions is still imcomplete, but we can figure that out once storage for transactions is sorted. |
||
'fields': ['duration'], | ||
}, | ||
} | ||
|
||
AGGREGATE_PATTERN = re.compile(r'^(?P<function>[^\(]+)\((?P<column>[a-z\._]*)\)$') | ||
|
||
|
||
def validate_aggregate(field, match): | ||
function_name = match.group('function') | ||
if function_name not in VALID_AGGREGATES: | ||
raise InvalidSearchQuery("Unknown aggregate function '%s'" % field) | ||
|
||
function_data = VALID_AGGREGATES[function_name] | ||
column = match.group('column') | ||
if column not in function_data['fields'] and function_data['fields'] != '*': | ||
raise InvalidSearchQuery( | ||
"Invalid column '%s' in aggregate function '%s'" % (column, function_name)) | ||
|
||
|
||
def validate_orderby(orderby, fields): | ||
orderby = orderby if isinstance(orderby, (list, tuple)) else [orderby] | ||
for column in orderby: | ||
column = column.lstrip('-') | ||
if column not in fields: | ||
raise InvalidSearchQuery('Cannot order by an field that is not selected.') | ||
|
||
|
||
def resolve_field_list(fields, snuba_args): | ||
""" | ||
Expand a list of fields based on aliases and aggregate functions. | ||
|
||
Returns a dist of aggregations, selected_columns, and | ||
groupby that can be merged into the result of get_snuba_query_args() | ||
to build a more complete snuba query based on event search conventions. | ||
""" | ||
# If project.name is requested, get the project.id from Snuba so we | ||
# can use this to look up the name in Sentry | ||
if 'project.name' in fields: | ||
fields.remove('project.name') | ||
if 'project.id' not in fields: | ||
fields.append('project.id') | ||
|
||
aggregations = [] | ||
groupby = [] | ||
columns = [] | ||
for field in fields: | ||
if not isinstance(field, six.string_types): | ||
raise InvalidSearchQuery('Field names must be strings') | ||
|
||
if field in FIELD_ALIASES: | ||
special_field = deepcopy(FIELD_ALIASES[field]) | ||
columns.extend(special_field.get('fields', [])) | ||
aggregations.extend(special_field.get('aggregations', [])) | ||
continue | ||
|
||
# Basic fields don't require additional validation. They could be tag | ||
# names which we have no way of validating at this point. | ||
match = AGGREGATE_PATTERN.search(field) | ||
if not match: | ||
columns.append(field) | ||
continue | ||
|
||
validate_aggregate(field, match) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It could be nice if all the validations were done in one place up front, but i'm guessing this is not as easy? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function ( |
||
aggregations.append([ | ||
VALID_AGGREGATES[match.group('function')]['snuba_name'], | ||
match.group('column'), | ||
u'{}_{}'.format(match.group('function'), match.group('column')).rstrip('_') | ||
]) | ||
|
||
rollup = snuba_args.get('rollup') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What will rollup be used for? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rollup gets used by timeseries endpoints, which I'm planning to update soon. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. |
||
if not rollup: | ||
# Ensure fields we require to build a functioning interface | ||
# are present. We don't add fields when using a rollup as the additional fields | ||
# would be aggregated away. When there are aggregations | ||
# we use argMax to get the latest event/projectid so we can create links. | ||
# The `projectid` output name is not a typo, using `project_id` triggers | ||
# generates invalid queries. | ||
if not aggregations and 'id' not in columns: | ||
columns.append('id') | ||
columns.append('project.id') | ||
if aggregations and 'latest_event' not in fields: | ||
aggregations.extend(deepcopy(FIELD_ALIASES['latest_event']['aggregations'])) | ||
if aggregations and 'project.id' not in columns: | ||
aggregations.append(['argMax(project_id, timestamp)', '', 'projectid']) | ||
|
||
if rollup and columns and not aggregations: | ||
raise InvalidSearchQuery('You cannot use rollup without an aggregate field.') | ||
|
||
orderby = snuba_args.get('orderby') | ||
if orderby: | ||
validate_orderby(orderby, fields) | ||
|
||
# If aggregations are present all columns | ||
# need to be added to the group by so that the query is valid. | ||
if aggregations: | ||
groupby.extend(columns) | ||
|
||
return { | ||
'selected_columns': columns, | ||
'aggregations': aggregations, | ||
'groupby': groupby, | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was removed as we won't be supporting filtering on aggregate columns in the short term, and with fully dynamic aggregate columns we would need expand the query parser to accomodate aggregate filtering.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we go ahead and remove the function completely?sorry, just saw you did this