24
24
import ibis .backends .bigquery as ibis_bigquery
25
25
import ibis .common .deferred # type: ignore
26
26
import ibis .expr .datatypes as ibis_dtypes
27
+ import ibis .expr .operations as ibis_ops
27
28
import ibis .expr .types as ibis_types
28
29
import pandas
29
30
@@ -71,19 +72,16 @@ def __init__(
71
72
# Allow creating a DataFrame directly from an Ibis table expression.
72
73
# TODO(swast): Validate that each column references the same table (or
73
74
# no table for literal values).
74
- self ._columns = tuple (columns )
75
+ self ._columns = tuple (
76
+ column .resolve (table )
77
+ # TODO(https://github.com/ibis-project/ibis/issues/7613): use
78
+ # public API to refer to Deferred type.
79
+ if isinstance (column , ibis .common .deferred .Deferred ) else column
80
+ for column in columns
81
+ )
75
82
# To allow for more efficient lookup by column name, create a
76
83
# dictionary mapping names to column values.
77
- self ._column_names = {
78
- (
79
- column .resolve (table )
80
- # TODO(https://github.com/ibis-project/ibis/issues/7613): use
81
- # public API to refer to Deferred type.
82
- if isinstance (column , ibis .common .deferred .Deferred )
83
- else column
84
- ).get_name (): column
85
- for column in self ._columns
86
- }
84
+ self ._column_names = {column .get_name (): column for column in self ._columns }
87
85
88
86
@property
89
87
def columns (self ) -> typing .Tuple [ibis_types .Value , ...]:
@@ -139,10 +137,6 @@ def projection(
139
137
for expression , id in expression_id_pairs
140
138
]
141
139
result = self ._select (tuple (values )) # type: ignore
142
-
143
- # Need to reproject to convert ibis Scalar to ibis Column object
144
- if any (exp_id [0 ].is_const for exp_id in expression_id_pairs ):
145
- result = result ._reproject_to_table ()
146
140
return result
147
141
148
142
@abc .abstractmethod
@@ -166,6 +160,13 @@ def _get_ibis_column(self, key: str) -> ibis_types.Value:
166
160
),
167
161
)
168
162
163
+ def is_scalar_expr (self , key : str ) -> bool :
164
+ # sometimes need to determine if column expression is a scalar expression.
165
+ # For instance, cannot filter on an analytic expression, or nest analytic expressions.
166
+ # Literals are excluded because ibis itself doesn't work well with them, not because of sql limitations.
167
+ ibis_expr = self ._get_ibis_column (key )
168
+ return not is_literal (ibis_expr ) and not is_window (ibis_expr )
169
+
169
170
def get_column_type (self , key : str ) -> bigframes .dtypes .Dtype :
170
171
ibis_type = typing .cast (
171
172
bigframes .core .compile .ibis_types .IbisDtype ,
@@ -355,6 +356,9 @@ def _to_ibis_expr(
355
356
return table
356
357
357
358
def filter (self , predicate : ex .Expression ) -> UnorderedIR :
359
+ if any (map (is_window , map (self ._get_ibis_column , predicate .unbound_variables ))):
360
+ # ibis doesn't support qualify syntax
361
+ return self ._reproject_to_table ().filter (predicate )
358
362
bindings = {col : self ._get_ibis_column (col ) for col in self .column_ids }
359
363
condition = op_compiler .compile_expression (predicate , bindings )
360
364
return self ._filter (condition )
@@ -806,7 +810,6 @@ def project_window_op(
806
810
output_name = None ,
807
811
* ,
808
812
never_skip_nulls = False ,
809
- skip_reproject_unsafe : bool = False ,
810
813
) -> OrderedIR :
811
814
"""
812
815
Creates a new expression based on this expression with unary operation applied to one column.
@@ -815,8 +818,18 @@ def project_window_op(
815
818
window_spec: a specification of the window over which to apply the operator
816
819
output_name: the id to assign to the output of the operator, by default will replace input col if distinct output id not provided
817
820
never_skip_nulls: will disable null skipping for operators that would otherwise do so
818
- skip_reproject_unsafe: skips the reprojection step, can be used when performing many non-dependent window operations, user responsible for not nesting window expressions, or using outputs as join, filter or aggregation keys before a reprojection
819
821
"""
822
+ used_vars = [column_name , * window_spec .all_referenced_columns ]
823
+ # Cannot nest analytic expressions, so reproject to cte first if needed.
824
+ if not all (map (self .is_scalar_expr , used_vars )):
825
+ return self ._reproject_to_table ().project_window_op (
826
+ column_name ,
827
+ op ,
828
+ window_spec ,
829
+ output_name ,
830
+ never_skip_nulls = never_skip_nulls ,
831
+ )
832
+
820
833
column = typing .cast (ibis_types .Column , self ._get_ibis_column (column_name ))
821
834
window = self ._ibis_window_from_spec (
822
835
window_spec , require_total_order = op .uses_total_row_ordering
@@ -861,8 +874,7 @@ def project_window_op(
861
874
window_op = case_statement
862
875
863
876
result = self ._set_or_replace_by_id (output_name or column_name , window_op )
864
- # TODO(tbergeron): Automatically track analytic expression usage and defer reprojection until required for valid query generation.
865
- return result ._reproject_to_table () if not skip_reproject_unsafe else result
877
+ return result
866
878
867
879
def _reproject_to_table (self ) -> OrderedIR :
868
880
table = self ._to_ibis_expr (
@@ -1034,6 +1046,9 @@ def _to_ibis_expr(
1034
1046
return table
1035
1047
1036
1048
def filter (self , predicate : ex .Expression ) -> OrderedIR :
1049
+ if any (map (is_window , map (self ._get_ibis_column , predicate .unbound_variables ))):
1050
+ # ibis doesn't support qualify syntax
1051
+ return self ._reproject_to_table ().filter (predicate )
1037
1052
bindings = {col : self ._get_ibis_column (col ) for col in self .column_ids }
1038
1053
condition = op_compiler .compile_expression (predicate , bindings )
1039
1054
return self ._filter (condition )
@@ -1328,6 +1343,22 @@ def build(self) -> OrderedIR:
1328
1343
)
1329
1344
1330
1345
1346
+ def is_literal (column : ibis_types .Value ) -> bool :
1347
+ # Unfortunately, Literals in ibis are not "Columns"s and therefore can't be aggregated.
1348
+ return not isinstance (column , ibis_types .Column )
1349
+
1350
+
1351
+ def is_window (column : ibis_types .Value ) -> bool :
1352
+ matches = (
1353
+ (column )
1354
+ .op ()
1355
+ .find_topmost (
1356
+ lambda x : isinstance (x , (ibis_ops .WindowFunction , ibis_ops .Relation ))
1357
+ )
1358
+ )
1359
+ return any (isinstance (op , ibis_ops .WindowFunction ) for op in matches )
1360
+
1361
+
1331
1362
def _reduce_predicate_list (
1332
1363
predicate_list : typing .Collection [ibis_types .BooleanValue ],
1333
1364
) -> ibis_types .BooleanValue :
0 commit comments