apache · alamb · Jul 18, 2024 · Jul 18, 2024
diff --git a/datafusion/expr/src/aggregate_function.rs b/datafusion/expr/src/aggregate_function.rs
@@ -152,8 +152,8 @@ mod tests {
     use strum::IntoEnumIterator;
 
     #[test]
-    // Test for AggregateFuncion's Display and from_str() implementations.
-    // For each variant in AggregateFuncion, it converts the variant to a string
+    // Test for AggregateFunction's Display and from_str() implementations.
+    // For each variant in AggregateFunction, it converts the variant to a string
     // and then back to a variant. The test asserts that the original variant and
     // the reconstructed variant are the same. This assertion is also necessary for
     // function suggestion. See https://github.com/apache/datafusion/issues/8082

diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
@@ -109,7 +109,7 @@ use sqlparser::ast::NullTreatment;
 /// ## Binary Expressions
 ///
 /// Exprs implement traits that allow easy to understand construction of more
-/// complex expresions. For example, to create `c1 + c2` to add columns "c1" and
+/// complex expressions. For example, to create `c1 + c2` to add columns "c1" and
 /// "c2" together
 ///
 /// ```
@@ -1398,7 +1398,7 @@ impl Expr {
             }
             Ok(TreeNodeRecursion::Continue)
         })
-        .expect("traversal is infallable");
+        .expect("traversal is infallible");
     }
 
     /// Return all references to columns and their occurrence counts in the expression.
@@ -1433,7 +1433,7 @@ impl Expr {
             }
             Ok(TreeNodeRecursion::Continue)
         })
-        .expect("traversal is infallable");
+        .expect("traversal is infallible");
     }
 
     /// Returns true if there are any column references in this Expr

diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs
@@ -155,7 +155,7 @@ pub fn unnormalize_col(expr: Expr) -> Expr {
         })
     })
     .data()
-    .expect("Unnormalize is infallable")
+    .expect("Unnormalize is infallible")
 }
 
 /// Create a Column from the Scalar Expr
@@ -201,7 +201,7 @@ pub fn strip_outer_reference(expr: Expr) -> Expr {
         })
     })
     .data()
-    .expect("strip_outer_reference is infallable")
+    .expect("strip_outer_reference is infallible")
 }
 
 /// Returns plan with expressions coerced to types compatible with

diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
@@ -412,14 +412,14 @@ impl LogicalPlanBuilder {
 
     /// Add missing sort columns to all downstream projection
     ///
-    /// Thus, if you have a LogialPlan that selects A and B and have
+    /// Thus, if you have a LogicalPlan that selects A and B and have
     /// not requested a sort by C, this code will add C recursively to
     /// all input projections.
     ///
     /// Adding a new column is not correct if there is a `Distinct`
     /// node, which produces only distinct values of its
     /// inputs. Adding a new column to its input will result in
-    /// potententially different results than with the original column.
+    /// potentially different results than with the original column.
     ///
     /// For example, if the input is like:
     ///
@@ -1763,7 +1763,7 @@ mod tests {
         .unwrap();
         assert_eq!(&expected, plan.schema().as_ref());
 
-        // Note scan of "EMPLOYEE_CSV" is treated as a SQL identifer
+        // Note scan of "EMPLOYEE_CSV" is treated as a SQL identifier
         // (and thus normalized to "employee"csv") as well
         let projection = None;
         let plan =

diff --git a/datafusion/expr/src/logical_plan/display.rs b/datafusion/expr/src/logical_plan/display.rs
@@ -338,9 +338,9 @@ impl<'a, 'b> PgJsonVisitor<'a, 'b> {
                     .collect::<Vec<_>>()
                     .join(", ");
 
-                let elipse = if values.len() > 5 { "..." } else { "" };
+                let eclipse = if values.len() > 5 { "..." } else { "" };
 
-                let values_str = format!("{}{}", str_values, elipse);
+                let values_str = format!("{}{}", str_values, eclipse);
                 json!({
                     "Node Type": "Values",
                     "Values": values_str

diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
@@ -263,7 +263,7 @@ pub enum LogicalPlan {
     /// Prepare a statement and find any bind parameters
     /// (e.g. `?`). This is used to implement SQL-prepared statements.
     Prepare(Prepare),
-    /// Data Manipulaton Language (DML): Insert / Update / Delete
+    /// Data Manipulation Language (DML): Insert / Update / Delete
     Dml(DmlStatement),
     /// Data Definition Language (DDL): CREATE / DROP TABLES / VIEWS / SCHEMAS
     Ddl(DdlStatement),
@@ -1598,8 +1598,8 @@ impl LogicalPlan {
                             })
                             .collect();
 
-                        let elipse = if values.len() > 5 { "..." } else { "" };
-                        write!(f, "Values: {}{}", str_values.join(", "), elipse)
+                        let eclipse = if values.len() > 5 { "..." } else { "" };
+                        write!(f, "Values: {}{}", str_values.join(", "), eclipse)
                     }
 
                     LogicalPlan::TableScan(TableScan {

diff --git a/datafusion/expr/src/partition_evaluator.rs b/datafusion/expr/src/partition_evaluator.rs
@@ -135,7 +135,7 @@ pub trait PartitionEvaluator: Debug + Send {
     /// must produce an output column with one output row for every
     /// input row.
     ///
-    /// `num_rows` is requied to correctly compute the output in case
+    /// `num_rows` is required to correctly compute the output in case
     /// `values.len() == 0`
     ///
     /// Implementing this function is an optimization: certain window

diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs
@@ -65,7 +65,7 @@ pub enum Volatility {
 /// automatically coerces (add casts to) function arguments so they match the type signature.
 ///
 /// For example, a function like `cos` may only be implemented for `Float64` arguments. To support a query
-/// that calles `cos` with a different argument type, such as `cos(int_column)`, type coercion automatically
+/// that calls `cos` with a different argument type, such as `cos(int_column)`, type coercion automatically
 /// adds a cast such as `cos(CAST int_column AS DOUBLE)` during planning.
 ///
 /// # Data Types

diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs
@@ -370,7 +370,7 @@ impl From<&DataType> for TypeCategory {
 /// The rules in the document provide a clue, but adhering strictly to them doesn't precisely
 /// align with the behavior of Postgres. Therefore, we've made slight adjustments to the rules
 /// to better match the behavior of both Postgres and DuckDB. For example, we expect adjusted
-/// decimal percision and scale when coercing decimal types.
+/// decimal precision and scale when coercing decimal types.
 pub fn type_union_resolution(data_types: &[DataType]) -> Option<DataType> {
     if data_types.is_empty() {
         return None;
@@ -718,7 +718,7 @@ pub fn get_wider_type(lhs: &DataType, rhs: &DataType) -> Result<DataType> {
         (Int16 | Int32 | Int64, Int8) | (Int32 | Int64, Int16) | (Int64, Int32) |
         // Left Float is larger than right Float.
         (Float32 | Float64, Float16) | (Float64, Float32) |
-        // Left String is larget than right String.
+        // Left String is larger than right String.
         (LargeUtf8, Utf8) |
         // Any left type is wider than a right hand side Null.
         (_, Null) => lhs.clone(),

diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs
@@ -646,7 +646,7 @@ mod tests {
                 vec![DataType::UInt8, DataType::UInt16],
                 Some(vec![DataType::UInt8, DataType::UInt16]),
             ),
-            // 2 entries, can coerse values
+            // 2 entries, can coerce values
             (
                 vec![DataType::UInt16, DataType::UInt16],
                 vec![DataType::UInt8, DataType::UInt16],

diff --git a/datafusion/expr/src/type_coercion/mod.rs b/datafusion/expr/src/type_coercion/mod.rs
@@ -19,7 +19,7 @@
 //!
 //! Coercion is performed automatically by DataFusion when the types
 //! of arguments passed to a function or needed by operators do not
-//! exacty match the types required by that function / operator. In
+//! exactly match the types required by that function / operator. In
 //! this case, DataFusion will attempt to *coerce* the arguments to
 //! types accepted by the function by inserting CAST operations.
 //!

diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
@@ -1212,7 +1212,7 @@ pub fn merge_schema(inputs: Vec<&LogicalPlan>) -> DFSchema {
     }
 }
 
-/// Build state name. State is the intermidiate state of the aggregate function.
+/// Build state name. State is the intermediate state of the aggregate function.
 pub fn format_state_name(name: &str, state_name: &str) -> String {
     format!("{name}[{state_name}]")
 }

diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
@@ -43,7 +43,7 @@ make_udaf_expr_and_func!(
     approx_percentile_cont_with_weight_udaf
 );
 
-/// APPROX_PERCENTILE_CONT_WITH_WEIGTH aggregate expression
+/// APPROX_PERCENTILE_CONT_WITH_WEIGHT aggregate expression
 pub struct ApproxPercentileContWithWeight {
     signature: Signature,
     approx_percentile_cont: ApproxPercentileCont,

diff --git a/datafusion/functions-array/src/remove.rs b/datafusion/functions-array/src/remove.rs
@@ -228,7 +228,7 @@ fn array_remove_internal(
     }
 }
 
-/// For each element of `list_array[i]`, removed up to `arr_n[i]`  occurences
+/// For each element of `list_array[i]`, removed up to `arr_n[i]` occurrences
 /// of `element_array[i]`.
 ///
 /// The type of each **element** in `list_array` must be the same as the type of

diff --git a/datafusion/functions/src/core/arrow_cast.rs b/datafusion/functions/src/core/arrow_cast.rs
@@ -444,7 +444,7 @@ fn is_separator(c: char) -> bool {
 }
 
 #[derive(Debug)]
-/// Splits a strings like Dictionary(Int32, Int64) into tokens sutable for parsing
+/// Splits a strings like Dictionary(Int32, Int64) into tokens suitable for parsing
 ///
 /// For example the string "Timestamp(Nanosecond, None)" would be parsed into:
 ///

diff --git a/datafusion/functions/src/datetime/to_local_time.rs b/datafusion/functions/src/datetime/to_local_time.rs
@@ -84,7 +84,7 @@ impl ToLocalTimeFunc {
         let arg_type = time_value.data_type();
         match arg_type {
             DataType::Timestamp(_, None) => {
-                // if no timezone specificed, just return the input
+                // if no timezone specified, just return the input
                 Ok(time_value.clone())
             }
             // If has timezone, adjust the underlying time value. The current time value
@@ -165,7 +165,7 @@ impl ToLocalTimeFunc {
 
                         match array.data_type() {
                             Timestamp(_, None) => {
-                                // if no timezone specificed, just return the input
+                                // if no timezone specified, just return the input
                                 Ok(time_value.clone())
                             }
                             Timestamp(Nanosecond, Some(_)) => {

diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs
@@ -562,7 +562,7 @@ mod tests {
     #[test]
     fn test_static_pattern_regexp_replace_pattern_error() {
         let values = StringArray::from(vec!["abc"; 5]);
-        // Delibaretely using an invalid pattern to see how the single pattern
+        // Deliberately using an invalid pattern to see how the single pattern
         // error is propagated on regexp_replace.
         let patterns = StringArray::from(vec!["["; 5]);
         let replacements = StringArray::from(vec!["foo"; 5]);

diff --git a/datafusion/functions/src/unicode/substrindex.rs b/datafusion/functions/src/unicode/substrindex.rs
@@ -122,15 +122,15 @@ pub fn substr_index<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 
                 let occurrences = usize::try_from(n.unsigned_abs()).unwrap_or(usize::MAX);
                 let length = if n > 0 {
-                    let splitted = string.split(delimiter);
-                    splitted
+                    let split = string.split(delimiter);
+                    split
                         .take(occurrences)
                         .map(|s| s.len() + delimiter.len())
                         .sum::<usize>()
                         - delimiter.len()
                 } else {
-                    let splitted = string.rsplit(delimiter);
-                    splitted
+                    let split = string.rsplit(delimiter);
+                    split
                         .take(occurrences)
                         .map(|s| s.len() + delimiter.len())
                         .sum::<usize>()

diff --git a/datafusion/optimizer/src/analyzer/subquery.rs b/datafusion/optimizer/src/analyzer/subquery.rs
@@ -159,11 +159,11 @@ fn check_inner_plan(
             let (correlated, _): (Vec<_>, Vec<_>) = split_conjunction(predicate)
                 .into_iter()
                 .partition(|e| e.contains_outer());
-            let maybe_unsupport = correlated
+            let maybe_unsupported = correlated
                 .into_iter()
                 .filter(|expr| !can_pullup_over_aggregation(expr))
                 .collect::<Vec<_>>();
-            if is_aggregate && is_scalar && !maybe_unsupport.is_empty() {
+            if is_aggregate && is_scalar && !maybe_unsupported.is_empty() {
                 return plan_err!(
                     "Correlated column is not allowed in predicate: {predicate}"
                 );

diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -248,7 +248,7 @@ impl CommonSubexprEliminate {
     }
 
     /// Rewrites the expression in `exprs_list` with common sub-expressions
-    /// replaced with a new colum and adds a ProjectionExec on top of `input`
+    /// replaced with a new column and adds a ProjectionExec on top of `input`
     /// which computes any replaced common sub-expressions.
     ///
     /// Returns a tuple of:
@@ -636,7 +636,7 @@ impl CommonSubexprEliminate {
 /// Returns the window expressions, and the input to the deepest child
 /// LogicalPlan.
 ///
-/// For example, if the input widnow looks like
+/// For example, if the input window looks like
 ///
 /// ```text
 ///   LogicalPlan::Window(exprs=[a, b, c])

diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
@@ -1232,7 +1232,7 @@ mod tests {
     }
 
     #[test]
-    fn in_subquery_muti_project_subquery_cols() -> Result<()> {
+    fn in_subquery_multi_project_subquery_cols() -> Result<()> {
         let table_scan = test_table_scan()?;
         let subquery_scan = test_table_scan_with_name("sq")?;
 

diff --git a/datafusion/optimizer/src/optimize_projections/required_indices.rs b/datafusion/optimizer/src/optimize_projections/required_indices.rs
@@ -160,7 +160,7 @@ impl RequiredIndicies {
         (l, r.map_indices(|idx| idx - n))
     }
 
-    /// Partitions the indicies in this instance into two groups based on the
+    /// Partitions the indices in this instance into two groups based on the
     /// given predicate function `f`.
     fn partition<F>(&self, f: F) -> (Self, Self)
     where

diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs
@@ -1913,7 +1913,7 @@ mod tests {
         assert_optimized_plan_eq(plan, expected)
     }
 
-    /// post-join predicates with columns from both sides are converted to join filterss
+    /// post-join predicates with columns from both sides are converted to join filters
     #[test]
     fn filter_join_on_common_dependent() -> Result<()> {
         let table_scan = test_table_scan()?;

diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -478,7 +478,7 @@ struct ConstEvaluator<'a> {
 #[allow(dead_code)]
 /// The simplify result of ConstEvaluator
 enum ConstSimplifyResult {
-    // Expr was simplifed and contains the new expression
+    // Expr was simplified and contains the new expression
     Simplified(ScalarValue),
     // Expr was not simplified and original value is returned
     NotSimplified(ScalarValue),
@@ -519,7 +519,7 @@ impl<'a> TreeNodeRewriter for ConstEvaluator<'a> {
     fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
         match self.can_evaluate.pop() {
             // Certain expressions such as `CASE` and `COALESCE` are short circuiting
-            // and may not evalute all their sub expressions. Thus if
+            // and may not evaluate all their sub expressions. Thus if
             // if any error is countered during simplification, return the original
             // so that normal evaluation can occur
             Some(true) => {

diff --git a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
@@ -893,7 +893,7 @@ mod tests {
         DataType::Timestamp(TimeUnit::Nanosecond, utc)
     }
 
-    // a dictonary type for storing string tags
+    // a dictionary type for storing string tags
     fn dictionary_tag_type() -> DataType {
         DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8))
     }

diff --git a/datafusion/physical-expr-common/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/physical-expr-common/src/aggregate/groups_accumulator/accumulate.rs
@@ -410,7 +410,7 @@ pub fn accumulate_indices<F>(
                 },
             );
 
-            // handle any remaining bits (after the intial 64)
+            // handle any remaining bits (after the initial 64)
             let remainder_bits = bit_chunks.remainder_bits();
             group_indices_remainder
                 .iter()
@@ -835,7 +835,7 @@ mod test {
         }
     }
 
-    /// Parallel implementaiton of NullState to check expected values
+    /// Parallel implementation of NullState to check expected values
     #[derive(Debug, Default)]
     struct MockNullState {
         /// group indices that had values that passed the filter

diff --git a/datafusion/physical-expr-common/src/aggregate/mod.rs b/datafusion/physical-expr-common/src/aggregate/mod.rs
@@ -346,7 +346,7 @@ impl AggregateExpr for AggregateFunctionExpr {
         let accumulator = self.fun.create_sliding_accumulator(args)?;
 
         // Accumulators that have window frame startings different
-        // than `UNBOUNDED PRECEDING`, such as `1 PRECEEDING`, need to
+        // than `UNBOUNDED PRECEDING`, such as `1 PRECEDING`, need to
         // implement retract_batch method in order to run correctly
         // currently in DataFusion.
         //
@@ -377,7 +377,7 @@ impl AggregateExpr for AggregateFunctionExpr {
         // 3. Third sum we add to the state sum value between `[2, 3)`
         // (`[0, 2)` is already in the state sum).  Also we need to
         // retract values between `[0, 1)` by this way we can obtain sum
-        // between [1, 3) which is indeed the apropriate range.
+        // between [1, 3) which is indeed the appropriate range.
         //
         // When we use `UNBOUNDED PRECEDING` in the query starting
         // index will always be 0 for the desired range, and hence the

diff --git a/datafusion/physical-expr-common/src/binary_map.rs b/datafusion/physical-expr-common/src/binary_map.rs
@@ -355,7 +355,7 @@ where
         assert_eq!(values.len(), batch_hashes.len());
 
         for (value, &hash) in values.iter().zip(batch_hashes.iter()) {
-            // hande null value
+            // handle null value
             let Some(value) = value else {
                 let payload = if let Some(&(payload, _offset)) = self.null.as_ref() {
                     payload
@@ -439,7 +439,7 @@ where
                     // Put the small values into buffer and offsets so it
                     // appears the output array, and store that offset
                     // so the bytes can be compared if needed
-                    let offset = self.buffer.len(); // offset of start fof data
+                    let offset = self.buffer.len(); // offset of start for data
                     self.buffer.append_slice(value);
                     self.offsets.push(O::usize_as(self.buffer.len()));
 

diff --git a/datafusion/physical-expr-common/src/expressions/column.rs b/datafusion/physical-expr-common/src/expressions/column.rs
@@ -80,7 +80,7 @@ impl PhysicalExpr for Column {
         Ok(input_schema.field(self.index).data_type().clone())
     }
 
-    /// Decide whehter this expression is nullable, given the schema of the input
+    /// Decide whether this expression is nullable, given the schema of the input
     fn nullable(&self, input_schema: &Schema) -> Result<bool> {
         self.bounds_check(input_schema)?;
         Ok(input_schema.field(self.index).is_nullable())