Skip to content

Commit 4eab4d9

Browse files
authored
UNPIVOT command (#24)
* UNPIVOT command implementation * UNPIVOT command * Add slt tests * Taplo and cargo fmt * Resolve comments
1 parent 7d38be8 commit 4eab4d9

File tree

2 files changed

+373
-3
lines changed

2 files changed

+373
-3
lines changed

datafusion/sql/src/relation/mod.rs

Lines changed: 104 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,14 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
2121

2222
use datafusion_common::tree_node::{Transformed, TreeNode};
2323
use datafusion_common::{
24-
not_impl_err, plan_err, Column, DFSchema, Diagnostic, Result, Span, Spans,
25-
TableReference,
24+
not_impl_err, plan_err, Column, DFSchema, Diagnostic, Result, ScalarValue, Span,
25+
Spans, TableReference,
2626
};
27+
use datafusion_expr::binary::comparison_coercion;
2728
use datafusion_expr::builder::subquery_alias;
2829
use datafusion_expr::{expr::Unnest, Expr, LogicalPlan, LogicalPlanBuilder};
2930
use datafusion_expr::{Subquery, SubqueryAlias};
30-
use sqlparser::ast::{FunctionArg, FunctionArgExpr, Spanned, TableFactor};
31+
use sqlparser::ast::{FunctionArg, FunctionArgExpr, NullInclusion, Spanned, TableFactor};
3132

3233
mod join;
3334

@@ -299,6 +300,106 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
299300
}
300301
}
301302
}
303+
TableFactor::Unpivot {
304+
table,
305+
null_inclusion,
306+
value,
307+
name,
308+
columns,
309+
alias,
310+
} => {
311+
let base_plan = self.create_relation(*table, planner_context)?;
312+
let base_schema = base_plan.schema();
313+
314+
let value_column = value.value.clone();
315+
let name_column = name.value.clone();
316+
317+
let mut unpivot_column_indices = Vec::new();
318+
let mut unpivot_column_names = Vec::new();
319+
320+
let mut common_type = None;
321+
322+
for column_ident in &columns {
323+
let column_name = column_ident.value.clone();
324+
325+
let idx = if let Some(i) =
326+
base_schema.index_of_column_by_name(None, &column_name)
327+
{
328+
i
329+
} else {
330+
return plan_err!("Column '{}' not found in input", column_name);
331+
};
332+
333+
let field = base_schema.field(idx);
334+
let field_type = field.data_type();
335+
336+
// Verify all unpivot columns have compatible types
337+
if let Some(current_type) = &common_type {
338+
if comparison_coercion(current_type, field_type).is_none() {
339+
return plan_err!(
340+
"The type of column '{}' conflicts with the type of other columns in the UNPIVOT list.",
341+
column_name.to_uppercase()
342+
);
343+
}
344+
} else {
345+
common_type = Some(field_type.clone());
346+
}
347+
348+
unpivot_column_indices.push(idx);
349+
unpivot_column_names.push(column_name);
350+
}
351+
352+
if unpivot_column_names.is_empty() {
353+
return plan_err!("UNPIVOT requires at least one column to unpivot");
354+
}
355+
356+
let non_pivot_exprs: Vec<Expr> = base_schema
357+
.fields()
358+
.iter()
359+
.enumerate()
360+
.filter(|(i, _)| !unpivot_column_indices.contains(i))
361+
.map(|(_, f)| Expr::Column(Column::new(None::<&str>, f.name())))
362+
.collect();
363+
364+
let mut union_inputs = Vec::with_capacity(unpivot_column_names.len());
365+
366+
for col_name in &unpivot_column_names {
367+
let mut projection_exprs = non_pivot_exprs.clone();
368+
369+
let name_expr =
370+
Expr::Literal(ScalarValue::Utf8(Some(col_name.clone())))
371+
.alias(name_column.clone());
372+
373+
let value_expr =
374+
Expr::Column(Column::new(None::<&str>, col_name.clone()))
375+
.alias(value_column.clone());
376+
377+
projection_exprs.push(name_expr);
378+
projection_exprs.push(value_expr);
379+
380+
let mut builder = LogicalPlanBuilder::from(base_plan.clone())
381+
.project(projection_exprs)?;
382+
383+
if let Some(NullInclusion::ExcludeNulls) | None = null_inclusion {
384+
let col = Column::new(None::<&str>, value_column.clone());
385+
builder = builder
386+
.filter(Expr::IsNotNull(Box::new(Expr::Column(col))))?;
387+
}
388+
389+
union_inputs.push(builder.build()?);
390+
}
391+
392+
let first = union_inputs.remove(0);
393+
let mut union_builder = LogicalPlanBuilder::from(first);
394+
395+
for plan in union_inputs {
396+
union_builder = union_builder.union(plan)?;
397+
}
398+
399+
let unpivot_plan = union_builder.build()?;
400+
401+
(unpivot_plan, alias)
402+
}
302403
// @todo: Support TableFactory::TableFunction
303404
_ => {
304405
return not_impl_err!(
Lines changed: 269 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
#######
19+
# Setup test data table
20+
#######
21+
statement ok
22+
CREATE TABLE monthly_sales(
23+
empid INT,
24+
dept TEXT,
25+
jan INT,
26+
feb INT,
27+
mar INT,
28+
apr INT)
29+
AS SELECT * FROM VALUES
30+
(1, 'electronics', 100, 200, 300, 100),
31+
(2, 'clothes', 100, 300, 150, 200),
32+
(3, 'cars', 200, 400, 100, 50),
33+
(4, 'appliances', 100, NULL, 100, 50);
34+
35+
# Basic UNPIVOT excluding nulls (default behavior)
36+
query ITTI
37+
SELECT *
38+
FROM monthly_sales
39+
UNPIVOT (sales FOR month IN (jan, feb, mar, apr))
40+
ORDER BY empid;
41+
----
42+
1 electronics jan 100
43+
1 electronics feb 200
44+
1 electronics mar 300
45+
1 electronics apr 100
46+
2 clothes jan 100
47+
2 clothes feb 300
48+
2 clothes mar 150
49+
2 clothes apr 200
50+
3 cars jan 200
51+
3 cars feb 400
52+
3 cars mar 100
53+
3 cars apr 50
54+
4 appliances jan 100
55+
4 appliances mar 100
56+
4 appliances apr 50
57+
58+
# UNPIVOT with INCLUDE NULLS option
59+
query ITTI
60+
SELECT *
61+
FROM monthly_sales
62+
UNPIVOT INCLUDE NULLS (sales FOR month IN (jan, feb, mar, apr))
63+
ORDER BY empid;
64+
----
65+
1 electronics jan 100
66+
1 electronics feb 200
67+
1 electronics mar 300
68+
1 electronics apr 100
69+
2 clothes jan 100
70+
2 clothes feb 300
71+
2 clothes mar 150
72+
2 clothes apr 200
73+
3 cars jan 200
74+
3 cars feb 400
75+
3 cars mar 100
76+
3 cars apr 50
77+
4 appliances jan 100
78+
4 appliances feb NULL
79+
4 appliances mar 100
80+
4 appliances apr 50
81+
82+
query TTI
83+
SELECT dept, month, sales
84+
FROM monthly_sales
85+
UNPIVOT (sales FOR month IN (jan, feb, mar, apr))
86+
ORDER BY dept;
87+
----
88+
appliances jan 100
89+
appliances mar 100
90+
appliances apr 50
91+
cars jan 200
92+
cars feb 400
93+
cars mar 100
94+
cars apr 50
95+
clothes jan 100
96+
clothes feb 300
97+
clothes mar 150
98+
clothes apr 200
99+
electronics jan 100
100+
electronics feb 200
101+
electronics mar 300
102+
electronics apr 100
103+
104+
# UNPIVOT with filtering
105+
query ITTI
106+
SELECT *
107+
FROM monthly_sales
108+
UNPIVOT (sales FOR month IN (jan, feb, mar, apr))
109+
WHERE sales > 100
110+
ORDER BY empid;
111+
----
112+
1 electronics feb 200
113+
1 electronics mar 300
114+
2 clothes feb 300
115+
2 clothes mar 150
116+
2 clothes apr 200
117+
3 cars jan 200
118+
3 cars feb 400
119+
120+
# UNPIVOT with aggregation
121+
query TI
122+
SELECT month, SUM(sales) as total_sales
123+
FROM monthly_sales
124+
UNPIVOT (sales FOR month IN (jan, feb, mar, apr))
125+
GROUP BY month
126+
ORDER BY month;
127+
----
128+
apr 400
129+
feb 900
130+
jan 500
131+
mar 650
132+
133+
# UNPIVOT with JOIN
134+
query ITTI
135+
SELECT e.empid, e.dept, u.month, u.sales
136+
FROM monthly_sales e
137+
JOIN (
138+
SELECT empid, month, sales
139+
FROM monthly_sales
140+
UNPIVOT (sales FOR month IN (jan, feb, mar, apr))
141+
) u ON e.empid = u.empid
142+
WHERE u.sales > 200
143+
ORDER BY e.empid, u.month;
144+
----
145+
1 electronics mar 300
146+
2 clothes feb 300
147+
3 cars feb 400
148+
149+
query ITIITI
150+
SELECT *
151+
FROM monthly_sales
152+
UNPIVOT (sales FOR month IN (jan, mar))
153+
ORDER BY empid;
154+
----
155+
1 electronics 200 100 jan 100
156+
1 electronics 200 100 mar 300
157+
2 clothes 300 200 jan 100
158+
2 clothes 300 200 mar 150
159+
3 cars 400 50 jan 200
160+
3 cars 400 50 mar 100
161+
4 appliances NULL 50 jan 100
162+
4 appliances NULL 50 mar 100
163+
164+
# UNPIVOT with HAVING clause
165+
query TI
166+
SELECT month, SUM(sales) as total_sales
167+
FROM monthly_sales
168+
UNPIVOT (sales FOR month IN (jan, feb, mar, apr))
169+
GROUP BY month
170+
HAVING SUM(sales) > 400
171+
ORDER BY month;
172+
----
173+
feb 900
174+
jan 500
175+
mar 650
176+
177+
# UNPIVOT with subquery
178+
query ITTI
179+
SELECT *
180+
FROM (
181+
SELECT empid, dept, jan, feb, mar
182+
FROM monthly_sales
183+
WHERE dept IN ('electronics', 'clothes')
184+
)
185+
UNPIVOT (sales FOR month IN (jan, feb, mar))
186+
ORDER BY empid;
187+
----
188+
1 electronics jan 100
189+
1 electronics feb 200
190+
1 electronics mar 300
191+
2 clothes jan 100
192+
2 clothes feb 300
193+
2 clothes mar 150
194+
195+
# Non-existent column in the column list
196+
query error DataFusion error: Error during planning: Column 'non_existent' not found in input
197+
SELECT *
198+
FROM monthly_sales
199+
UNPIVOT (sales FOR month IN (non_existent, feb, mar))
200+
ORDER BY empid;
201+
202+
statement ok
203+
CREATE TABLE mixed_types(
204+
id INT,
205+
col1 INT,
206+
col2 TEXT,
207+
col3 FLOAT)
208+
AS SELECT * FROM VALUES
209+
(1, 100, 'abc', 10.5),
210+
(2, 200, 'def', 20.5);
211+
212+
query ITT
213+
SELECT *
214+
FROM mixed_types
215+
UNPIVOT (val FOR col_name IN (col1, col2, col3))
216+
ORDER BY id;
217+
----
218+
1 col1 100
219+
1 col2 abc
220+
1 col3 10.5
221+
2 col1 200
222+
2 col2 def
223+
2 col3 20.5
224+
225+
# UNPIVOT with CTE
226+
query ITTI
227+
WITH sales_data AS (
228+
SELECT * FROM monthly_sales WHERE empid < 3
229+
)
230+
SELECT *
231+
FROM sales_data
232+
UNPIVOT (sales FOR month IN (jan, feb, mar, apr))
233+
ORDER BY empid;
234+
----
235+
1 electronics jan 100
236+
1 electronics feb 200
237+
1 electronics mar 300
238+
1 electronics apr 100
239+
2 clothes jan 100
240+
2 clothes feb 300
241+
2 clothes mar 150
242+
2 clothes apr 200
243+
244+
# UNPIVOT with UNION
245+
query ITIITI
246+
SELECT *
247+
FROM monthly_sales
248+
UNPIVOT (sales FOR month IN (jan, feb))
249+
UNION ALL
250+
SELECT *
251+
FROM monthly_sales
252+
UNPIVOT (sales FOR month IN (mar, apr))
253+
ORDER BY empid, month;
254+
----
255+
1 electronics 100 200 apr 100
256+
1 electronics 300 100 feb 200
257+
1 electronics 300 100 jan 100
258+
1 electronics 100 200 mar 300
259+
2 clothes 100 300 apr 200
260+
2 clothes 150 200 feb 300
261+
2 clothes 150 200 jan 100
262+
2 clothes 100 300 mar 150
263+
3 cars 200 400 apr 50
264+
3 cars 100 50 feb 400
265+
3 cars 100 50 jan 200
266+
3 cars 200 400 mar 100
267+
4 appliances 100 NULL apr 50
268+
4 appliances 100 50 jan 100
269+
4 appliances 100 NULL mar 100

0 commit comments

Comments
 (0)