getsentry · markstory · Aug 13, 2019 · Aug 8, 2019 · Aug 8, 2019 · Aug 8, 2019
@@ -1,38 +1,19 @@
 from __future__ import absolute_import
 
-from copy import deepcopy
-from rest_framework.exceptions import PermissionDenied
 import six
+from rest_framework.exceptions import PermissionDenied
 from enum import Enum
 
 from sentry import features
 from sentry.api.bases import OrganizationEndpoint, OrganizationEventsError
-from sentry.api.event_search import get_snuba_query_args, InvalidSearchQuery
+from sentry.api.event_search import (
+    get_snuba_query_args,
+    resolve_field_list,
+    InvalidSearchQuery
+)
 from sentry.models.project import Project
 from sentry.utils import snuba
 
-# We support 4 "special fields" on the v2 events API which perform some
-# additional calculations over aggregated event data
-SPECIAL_FIELDS = {
-    'issue_title': {
-        'aggregations': [['anyHeavy', 'title', 'issue_title']],
-    },
-    'last_seen': {
-        'aggregations': [['max', 'timestamp', 'last_seen']],
-    },
-    'event_count': {
-        'aggregations': [['uniq', 'id', 'event_count']],
-    },
-    'user_count': {
-        'aggregations': [['uniq', 'user', 'user_count']],
-    },
-    'latest_event': {
-        'fields': [
-            ['argMax', ['id', 'timestamp'], 'latest_event'],
-        ],
-    },
-}
-
 
 class Direction(Enum):
     NEXT = 0
@@ -67,47 +48,29 @@ def get_snuba_query_args(self, request, organization, params):
         except InvalidSearchQuery as exc:
             raise OrganizationEventsError(exc.message)
 
-        fields = request.GET.getlist('field')[:]
-        aggregations = []
-        groupby = request.GET.getlist('groupby')
-        special_fields = set()
-
-        if fields:
-            # If project.name is requested, get the project.id from Snuba so we
-            # can use this to look up the name in Sentry
-            if 'project.name' in fields:
-                fields.remove('project.name')
-                if 'project.id' not in fields:
-                    fields.append('project.id')
-
-            for field in fields[:]:
-                if field in SPECIAL_FIELDS:
-                    special_fields.add(field)
-                    special_field = deepcopy(SPECIAL_FIELDS[field])
-                    fields.remove(field)
-                    fields.extend(special_field.get('fields', []))
-                    aggregations.extend(special_field.get('aggregations', []))
-                    groupby.extend(special_field.get('groupby', []))
-
-            snuba_args['selected_columns'] = fields
-
-        self._filter_unspecified_special_fields_in_conditions(snuba_args, special_fields)
-        if aggregations:
-            snuba_args['aggregations'] = aggregations
-
-        if groupby:
-            snuba_args['groupby'] = groupby
-
         sort = request.GET.getlist('sort')
-        if sort and snuba.valid_orderby(sort, SPECIAL_FIELDS):
+        if sort:
             snuba_args['orderby'] = sort
 
         # Deprecated. `sort` should be used as it is supported by
         # more endpoints.
         orderby = request.GET.getlist('orderby')
-        if orderby and snuba.valid_orderby(orderby, SPECIAL_FIELDS) and 'orderby' not in snuba_args:
+        if orderby and 'orderby' not in snuba_args:
             snuba_args['orderby'] = orderby
 
+        if request.GET.get('rollup'):
+            try:
+                snuba_args['rollup'] = int(request.GET.get('rollup'))
+            except ValueError:
+                raise OrganizationEventsError('rollup must be an integer.')
+
+        fields = request.GET.getlist('field')[:]
+        if fields:
+            try:
+                snuba_args.update(resolve_field_list(fields, snuba_args))
+            except InvalidSearchQuery as exc:
+                raise OrganizationEventsError(exc.message)
+
         # TODO(lb): remove once boolean search is fully functional
         has_boolean_op_flag = features.has(
             'organizations:boolean-search',
@@ -147,9 +110,6 @@ def get_snuba_query_args_legacy(self, request, organization):
         except InvalidSearchQuery as exc:
             raise OrganizationEventsError(exc.message)
 
-        # Filter out special aggregates.
-        self._filter_unspecified_special_fields_in_conditions(snuba_args, set())
-
         # TODO(lb): remove once boolean search is fully functional
         has_boolean_op_flag = features.has(
             'organizations:boolean-search',
@@ -212,17 +172,3 @@ def _get_next_or_prev_id(self, direction, request, organization, snuba_args, eve
             return None
 
         return six.text_type(result['data'][0]['event_id'])
-
-    def _filter_unspecified_special_fields_in_conditions(self, snuba_args, special_fields):
-        conditions = []
-        for condition in snuba_args['conditions']:
-            field = condition[0]
-            if (
-                not isinstance(field, (list, tuple))
-                and field in SPECIAL_FIELDS
-                and field not in special_fields
-            ):
-                # skip over special field.
-                continue
-            conditions.append(condition)
-        snuba_args['conditions'] = conditions
@@ -18,7 +18,6 @@
 from sentry import features
 from sentry.models.project import Project
 
-ALLOWED_GROUPINGS = frozenset(('issue.id', 'project.id', 'transaction'))
 logger = logging.getLogger(__name__)
 
 
@@ -31,16 +30,8 @@ def get(self, request, organization):
         try:
             params = self.get_filter_params(request, organization)
             snuba_args = self.get_snuba_query_args(request, organization, params)
-            fields = snuba_args.get('selected_columns')
-            groupby = snuba_args.get('groupby', [])
-
-            if not fields and not groupby:
-                return Response({'detail': 'No fields or groupings provided'}, status=400)
-
-            if any(field for field in groupby if field not in ALLOWED_GROUPINGS):
-                message = ('Invalid groupby value requested. Allowed values are ' +
-                           ', '.join(ALLOWED_GROUPINGS))
-                return Response({'detail': message}, status=400)
+            if not snuba_args.get('selected_columns') and not snuba_args.get('aggregations'):
+                return Response({'detail': 'No fields provided'}, status=400)
 
         except OrganizationEventsError as exc:
             return Response({'detail': exc.message}, status=400)
@@ -130,16 +121,22 @@ def get_legacy(self, request, organization):
         )
 
     def handle_results(self, request, organization, project_ids, results):
+        if not results:
+            return results
+
+        first_row = results[0]
+        if not ('project.id' in first_row or 'projectid' in first_row):
+            return results
+
+        fields = request.GET.getlist('field')
         projects = {p['id']: p['slug'] for p in Project.objects.filter(
             organization=organization,
             id__in=project_ids).values('id', 'slug')}
-
-        fields = request.GET.getlist('field')
-
-        if 'project.name' in fields:
-            for result in results:
-                result['project.name'] = projects[result['project.id']]
-                if 'project.id' not in fields:
-                    del result['project.id']
+        for result in results:
+            for key in ('projectid', 'project.id'):
+                if key in result:
+                    result['project.name'] = projects[result[key]]
+                    if key not in fields:
+                        del result[key]
 
         return results
@@ -2,6 +2,7 @@
 
 import re
 from collections import namedtuple, defaultdict
+from copy import deepcopy
 from datetime import datetime
 
 import six
@@ -142,9 +143,8 @@ def translate(pat):
     'first_seen': 'first_seen',
     'last_seen': 'last_seen',
     'times_seen': 'times_seen',
-    # OrganizationEvents aggregations
-    'event_count': 'event_count',
-    'user_count': 'user_count',
+    # TODO(mark) figure out how to safelist aggregate functions/field aliases
+    # so they can be used in conditions
 }, **SENTRY_SNUBA_MAP)
 no_conversion = set(['project_id', 'start', 'end'])
 
@@ -215,9 +215,8 @@ class SearchVisitor(NodeVisitor):
         'device.battery_level', 'device.charging', 'device.online',
         'device.simulator', 'error.handled', 'issue.id', 'stack.colno',
         'stack.in_app', 'stack.lineno', 'stack.stack_level',
-        # OrganizationEvents aggregations
-        'event_count', 'user_count',
-
+        # TODO(mark) figure out how to safelist aggregate functions/field aliases
+        # so they can be used in conditions
     ])
     date_keys = set([
         'start', 'end', 'first_seen', 'last_seen', 'time', 'timestamp',
@@ -662,3 +661,158 @@ def get_snuba_query_args(query=None, params=None):
             kwargs['has_boolean_terms'] = True
             kwargs['conditions'].append(convert_search_boolean_to_snuba_query(term))
     return kwargs
+
+
+FIELD_ALIASES = {
+    'issue_title': {
+        'aggregations': [['anyHeavy', 'title', 'issue_title']],
+    },
+    'last_seen': {
+        'aggregations': [['max', 'timestamp', 'last_seen']],
+    },
+    'latest_event': {
+        'aggregations': [
+            # TODO(mark) This is a hack to work around jsonschema limitations
+            # in snuba.
+            ['argMax(event_id, timestamp)', '', 'latest_event'],
+        ],
+    },
+    'project': {
+        'fields': ['project.id'],
+    },
+    'user': {
+        'fields': ['user.id', 'user.name', 'user.username', 'user.email', 'user.ip'],
+    }
+    # TODO(mark) Add rpm alias.
+}
+
+VALID_AGGREGATES = {
+    'count_unique': {
+        'snuba_name': 'uniq',
+        'fields': '*',
+    },
+    'count': {
+        'snuba_name': 'count',
+        'fields': '*'
+    },
+    'avg': {
+        'snuba_name': 'avg',
+        'fields': ['duration'],
+    },
+    'min': {
+        'snuba_name': 'min',
+        'fields': ['timestamp', 'duration'],
+    },
+    'max': {
+        'snuba_name': 'max',
+        'fields': ['timestamp', 'duration'],
+    },
+    'sum': {
+        'snuba_name': 'sum',
+        'fields': ['duration'],
+    },
+    # This doesn't work yet, but is an illustration of how it could work
+    'p75': {
+        'snuba_name': 'quantileTiming(0.75)',
+        'fields': ['duration'],
+    },
+}
+
+AGGREGATE_PATTERN = re.compile(r'^(?P<function>[^\(]+)\((?P<column>[a-z\._]*)\)$')
+
+
+def validate_aggregate(field, match):
+    function_name = match.group('function')
+    if function_name not in VALID_AGGREGATES:
+        raise InvalidSearchQuery("Unknown aggregate function '%s'" % field)
+
+    function_data = VALID_AGGREGATES[function_name]
+    column = match.group('column')
+    if column not in function_data['fields'] and function_data['fields'] != '*':
+        raise InvalidSearchQuery(
+            "Invalid column '%s' in aggregate function '%s'" % (column, function_name))
+
+
+def validate_orderby(orderby, fields):
+    orderby = orderby if isinstance(orderby, (list, tuple)) else [orderby]
+    for column in orderby:
+        column = column.lstrip('-')
+        if column not in fields:
+            raise InvalidSearchQuery('Cannot order by an field that is not selected.')
+
+
+def resolve_field_list(fields, snuba_args):
+    """
+    Expand a list of fields based on aliases and aggregate functions.
+
+    Returns a dist of aggregations, selected_columns, and
+    groupby that can be merged into the result of get_snuba_query_args()
+    to build a more complete snuba query based on event search conventions.
+    """
+    # If project.name is requested, get the project.id from Snuba so we
+    # can use this to look up the name in Sentry
+    if 'project.name' in fields:
+        fields.remove('project.name')
+        if 'project.id' not in fields:
+            fields.append('project.id')
+
+    aggregations = []
+    groupby = []
+    columns = []
+    for field in fields:
+        if not isinstance(field, six.string_types):
+            raise InvalidSearchQuery('Field names must be strings')
+
+        if field in FIELD_ALIASES:
+            special_field = deepcopy(FIELD_ALIASES[field])
+            columns.extend(special_field.get('fields', []))
+            aggregations.extend(special_field.get('aggregations', []))
+            continue
+
+        # Basic fields don't require additional validation. They could be tag
+        # names which we have no way of validating at this point.
+        match = AGGREGATE_PATTERN.search(field)
+        if not match:
+            columns.append(field)
+            continue
+
+        validate_aggregate(field, match)
+        aggregations.append([
+            VALID_AGGREGATES[match.group('function')]['snuba_name'],
+            match.group('column'),
+            u'{}_{}'.format(match.group('function'), match.group('column')).rstrip('_')
+        ])
+
+    rollup = snuba_args.get('rollup')
+    if not rollup:
+        # Ensure fields we require to build a functioning interface
+        # are present. We don't add fields when using a rollup as the additional fields
+        # would be aggregated away. When there are aggregations
+        # we use argMax to get the latest event/projectid so we can create links.
+        # The `projectid` output name is not a typo, using `project_id` triggers
+        # generates invalid queries.
+        if not aggregations and 'id' not in columns:
+            columns.append('id')
+            columns.append('project.id')
+        if aggregations and 'latest_event' not in fields:
+            aggregations.extend(deepcopy(FIELD_ALIASES['latest_event']['aggregations']))
+        if aggregations and 'project.id' not in columns:
+            aggregations.append(['argMax(project_id, timestamp)', '', 'projectid'])
+
+    if rollup and columns and not aggregations:
+        raise InvalidSearchQuery('You cannot use rollup without an aggregate field.')
+
+    orderby = snuba_args.get('orderby')
+    if orderby:
+        validate_orderby(orderby, fields)
+
+    # If aggregations are present all columns
+    # need to be added to the group by so that the query is valid.
+    if aggregations:
+        groupby.extend(columns)
+
+    return {
+        'selected_columns': columns,
+        'aggregations': aggregations,
+        'groupby': groupby,
+    }