From 4f9199054d1741a8b736d13123b3d694516255b2 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 26 May 2025 10:45:16 +0200 Subject: [PATCH 1/8] fix: watch StatefulSets instead of Deployments --- rust/operator-binary/src/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/operator-binary/src/main.rs b/rust/operator-binary/src/main.rs index bc3910f2..038f01bf 100644 --- a/rust/operator-binary/src/main.rs +++ b/rust/operator-binary/src/main.rs @@ -9,7 +9,7 @@ use stackable_operator::{ YamlSchema, cli::{Command, ProductOperatorRun}, k8s_openapi::api::{ - apps::v1::{Deployment, StatefulSet}, + apps::v1::StatefulSet, core::v1::{ConfigMap, Pod, Service}, }, kube::{ @@ -273,7 +273,7 @@ async fn main() -> anyhow::Result<()> { watcher::Config::default(), ) .owns( - watch_namespace.get_api::>(&client), + watch_namespace.get_api::>(&client), watcher::Config::default(), ) .owns( From fa0df11b716ebffaa149b951ce4e42824cf03708 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 26 May 2025 11:05:35 +0200 Subject: [PATCH 2/8] use label app.kubernetes.io/name=spark-connect instead of app.kubernetes.io/name=spark-k8s --- rust/operator-binary/src/connect/common.rs | 7 ++++--- rust/operator-binary/src/connect/controller.rs | 8 ++++---- rust/operator-binary/src/connect/crd.rs | 5 +++-- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/rust/operator-binary/src/connect/common.rs b/rust/operator-binary/src/connect/common.rs index 5f5facfb..ef6f3608 100644 --- a/rust/operator-binary/src/connect/common.rs +++ b/rust/operator-binary/src/connect/common.rs @@ -11,9 +11,10 @@ use strum::Display; use super::crd::CONNECT_EXECUTOR_ROLE_NAME; use crate::{ connect::crd::{ - CONNECT_CONTROLLER_NAME, CONNECT_SERVER_ROLE_NAME, DUMMY_SPARK_CONNECT_GROUP_NAME, + CONNECT_APP_NAME, CONNECT_CONTROLLER_NAME, CONNECT_SERVER_ROLE_NAME, + DUMMY_SPARK_CONNECT_GROUP_NAME, }, - crd::constants::{APP_NAME, OPERATOR_NAME}, + crd::constants::OPERATOR_NAME, }; #[derive(Snafu, Debug)] @@ -42,7 +43,7 @@ pub(crate) fn labels<'a, T>( ) -> ObjectLabels<'a, T> { ObjectLabels { owner: scs, - app_name: APP_NAME, + app_name: CONNECT_APP_NAME, app_version: app_version_label, operator_name: OPERATOR_NAME, controller_name: CONNECT_CONTROLLER_NAME, diff --git a/rust/operator-binary/src/connect/controller.rs b/rust/operator-binary/src/connect/controller.rs index eff55179..e25735aa 100644 --- a/rust/operator-binary/src/connect/controller.rs +++ b/rust/operator-binary/src/connect/controller.rs @@ -18,11 +18,11 @@ use stackable_operator::{ }; use strum::{EnumDiscriminants, IntoStaticStr}; -use super::crd::{CONNECT_CONTROLLER_NAME, v1alpha1}; +use super::crd::{CONNECT_APP_NAME, CONNECT_CONTROLLER_NAME, v1alpha1}; use crate::{ Ctx, connect::{common, crd::SparkConnectServerStatus, executor, server}, - crd::constants::{APP_NAME, OPERATOR_NAME, SPARK_IMAGE_BASE_NAME}, + crd::constants::{OPERATOR_NAME, SPARK_IMAGE_BASE_NAME}, }; #[derive(Snafu, Debug, EnumDiscriminants)] @@ -168,7 +168,7 @@ pub async fn reconcile( let client = &ctx.client; let mut cluster_resources = ClusterResources::new( - APP_NAME, + CONNECT_APP_NAME, OPERATOR_NAME, CONNECT_CONTROLLER_NAME, &scs.object_ref(&()), @@ -184,7 +184,7 @@ pub async fn reconcile( // Use a dedicated service account for connect server pods. let (service_account, role_binding) = build_rbac_resources( scs, - APP_NAME, + CONNECT_APP_NAME, cluster_resources .get_required_labels() .context(GetRequiredLabelsSnafu)?, diff --git a/rust/operator-binary/src/connect/crd.rs b/rust/operator-binary/src/connect/crd.rs index e478f1c1..0009f332 100644 --- a/rust/operator-binary/src/connect/crd.rs +++ b/rust/operator-binary/src/connect/crd.rs @@ -33,7 +33,6 @@ use stackable_operator::{ use strum::{Display, EnumIter}; use super::common::SparkConnectRole; -use crate::crd::constants::APP_NAME; pub const CONNECT_CONTROLLER_NAME: &str = "connect"; pub const CONNECT_FULL_CONTROLLER_NAME: &str = concatcp!( @@ -48,6 +47,8 @@ pub const CONNECT_UI_PORT: i32 = 4040; pub const DUMMY_SPARK_CONNECT_GROUP_NAME: &str = "default"; +pub const CONNECT_APP_NAME: &str = "spark-connect"; + #[derive(Snafu, Debug)] pub enum Error { #[snafu(display("fragment validation failure"))] @@ -346,7 +347,7 @@ impl v1alpha1::ExecutorConfig { fn affinity(cluster_name: &str) -> StackableAffinityFragment { let affinity_between_role_pods = affinity_between_role_pods( - APP_NAME, + CONNECT_APP_NAME, cluster_name, &SparkConnectRole::Executor.to_string(), 70, From 910e465360ce88b6013ce69d71e831eb3ede2e24 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 26 May 2025 11:14:34 +0200 Subject: [PATCH 3/8] use label app.kubernetes.io/name=spark-history instead of app.kubernetes.io/name=spark-k8s --- rust/operator-binary/src/crd/constants.rs | 2 +- .../src/history/history_controller.rs | 22 +++++++++---------- .../src/history/operations/pdb.rs | 4 ++-- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/rust/operator-binary/src/crd/constants.rs b/rust/operator-binary/src/crd/constants.rs index 3da61f98..85f4d769 100644 --- a/rust/operator-binary/src/crd/constants.rs +++ b/rust/operator-binary/src/crd/constants.rs @@ -77,7 +77,7 @@ pub const POD_DRIVER_FULL_CONTROLLER_NAME: &str = pub const HISTORY_CONTROLLER_NAME: &str = "history"; pub const HISTORY_FULL_CONTROLLER_NAME: &str = concatcp!(HISTORY_CONTROLLER_NAME, '.', OPERATOR_NAME); - +pub const HISTORY_APP_NAME: &str = "spark-history"; pub const HISTORY_ROLE_NAME: &str = "node"; pub const SPARK_IMAGE_BASE_NAME: &str = "spark-k8s"; diff --git a/rust/operator-binary/src/history/history_controller.rs b/rust/operator-binary/src/history/history_controller.rs index 7240157a..737a8e96 100644 --- a/rust/operator-binary/src/history/history_controller.rs +++ b/rust/operator-binary/src/history/history_controller.rs @@ -54,13 +54,13 @@ use crate::{ Ctx, crd::{ constants::{ - ACCESS_KEY_ID, APP_NAME, HISTORY_CONTROLLER_NAME, HISTORY_ROLE_NAME, HISTORY_UI_PORT, - JVM_SECURITY_PROPERTIES_FILE, LISTENER_VOLUME_DIR, LISTENER_VOLUME_NAME, - MAX_SPARK_LOG_FILES_SIZE, METRICS_PORT, OPERATOR_NAME, SECRET_ACCESS_KEY, - SPARK_DEFAULTS_FILE_NAME, SPARK_ENV_SH_FILE_NAME, SPARK_IMAGE_BASE_NAME, SPARK_UID, - STACKABLE_TRUST_STORE, VOLUME_MOUNT_NAME_CONFIG, VOLUME_MOUNT_NAME_LOG, - VOLUME_MOUNT_NAME_LOG_CONFIG, VOLUME_MOUNT_PATH_CONFIG, VOLUME_MOUNT_PATH_LOG, - VOLUME_MOUNT_PATH_LOG_CONFIG, + ACCESS_KEY_ID, HISTORY_APP_NAME, HISTORY_CONTROLLER_NAME, HISTORY_ROLE_NAME, + HISTORY_UI_PORT, JVM_SECURITY_PROPERTIES_FILE, LISTENER_VOLUME_DIR, + LISTENER_VOLUME_NAME, MAX_SPARK_LOG_FILES_SIZE, METRICS_PORT, OPERATOR_NAME, + SECRET_ACCESS_KEY, SPARK_DEFAULTS_FILE_NAME, SPARK_ENV_SH_FILE_NAME, + SPARK_IMAGE_BASE_NAME, SPARK_UID, STACKABLE_TRUST_STORE, VOLUME_MOUNT_NAME_CONFIG, + VOLUME_MOUNT_NAME_LOG, VOLUME_MOUNT_NAME_LOG_CONFIG, VOLUME_MOUNT_PATH_CONFIG, + VOLUME_MOUNT_PATH_LOG, VOLUME_MOUNT_PATH_LOG_CONFIG, }, history::{self, HistoryConfig, SparkHistoryServerContainer, v1alpha1}, listener_ext, @@ -248,7 +248,7 @@ pub async fn reconcile( let client = &ctx.client; let mut cluster_resources = ClusterResources::new( - APP_NAME, + HISTORY_APP_NAME, OPERATOR_NAME, HISTORY_CONTROLLER_NAME, &shs.object_ref(&()), @@ -271,7 +271,7 @@ pub async fn reconcile( // Use a dedicated service account for history server pods. let (service_account, role_binding) = build_rbac_resources( shs, - APP_NAME, + HISTORY_APP_NAME, cluster_resources .get_required_labels() .context(GetRequiredLabelsSnafu)?, @@ -659,7 +659,7 @@ fn build_stateful_set( match_labels: Some( Labels::role_group_selector( shs, - APP_NAME, + HISTORY_APP_NAME, &rolegroupref.role, &rolegroupref.role_group, ) @@ -726,7 +726,7 @@ fn labels<'a, T>( ) -> ObjectLabels<'a, T> { ObjectLabels { owner: shs, - app_name: APP_NAME, + app_name: HISTORY_APP_NAME, app_version: app_version_label, operator_name: OPERATOR_NAME, controller_name: HISTORY_CONTROLLER_NAME, diff --git a/rust/operator-binary/src/history/operations/pdb.rs b/rust/operator-binary/src/history/operations/pdb.rs index f89b4978..a816ca15 100644 --- a/rust/operator-binary/src/history/operations/pdb.rs +++ b/rust/operator-binary/src/history/operations/pdb.rs @@ -5,7 +5,7 @@ use stackable_operator::{ }; use crate::crd::{ - constants::{APP_NAME, HISTORY_CONTROLLER_NAME, HISTORY_ROLE_NAME, OPERATOR_NAME}, + constants::{HISTORY_APP_NAME, HISTORY_CONTROLLER_NAME, HISTORY_ROLE_NAME, OPERATOR_NAME}, history::v1alpha1, }; @@ -37,7 +37,7 @@ pub async fn add_pdbs( .unwrap_or(max_unavailable_history_servers()); let pdb = PodDisruptionBudgetBuilder::new_with_role( history, - APP_NAME, + HISTORY_APP_NAME, HISTORY_ROLE_NAME, OPERATOR_NAME, HISTORY_CONTROLLER_NAME, From 63858faa363d27feff04a64e805275b71767d279 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 26 May 2025 11:21:50 +0200 Subject: [PATCH 4/8] update changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65cf9e1e..881b11b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,10 +26,12 @@ All notable changes to this project will be documented in this file. - Document that Spark Connect doesn't integrate with the history server ([#559]) - test: Bump to Vector `0.46.1` ([#565]). - Use versioned common structs ([#572]). +- Change the label `app.kubernetes.io/name` for Spark history and connect objects to use `spark-hhistory` and `spark-connect` instead of `spark-k8s` ([#573]). ### Fixed - Use `json` file extension for log files ([#553]). +- The Spark connect controller now watches StatefulSets instead of Deplyments (again) ([#573]). ### Removed @@ -46,6 +48,7 @@ All notable changes to this project will be documented in this file. [#565]: https://github.com/stackabletech/spark-k8s-operator/pull/565 [#570]: https://github.com/stackabletech/spark-k8s-operator/pull/570 [#572]: https://github.com/stackabletech/spark-k8s-operator/pull/572 +[#573]: https://github.com/stackabletech/spark-k8s-operator/pull/573 ## [25.3.0] - 2025-03-21 From ab22dc42d109eb90b35ab3319ebb29823159e7b8 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 26 May 2025 17:00:48 +0200 Subject: [PATCH 5/8] add cluster roles for connect and history apps and update more labels --- .../spark-k8s-operator/templates/roles.yaml | 7 ++++ .../templates/spark-connect-clusterrole.yaml | 42 +++++++++++++++++++ .../templates/spark-history-clusterrole.yaml | 42 +++++++++++++++++++ rust/operator-binary/src/connect/server.rs | 12 +++--- 4 files changed, 98 insertions(+), 5 deletions(-) create mode 100644 deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml create mode 100644 deploy/helm/spark-k8s-operator/templates/spark-history-clusterrole.yaml diff --git a/deploy/helm/spark-k8s-operator/templates/roles.yaml b/deploy/helm/spark-k8s-operator/templates/roles.yaml index cd64b380..3625c680 100644 --- a/deploy/helm/spark-k8s-operator/templates/roles.yaml +++ b/deploy/helm/spark-k8s-operator/templates/roles.yaml @@ -18,7 +18,14 @@ rules: resources: - persistentvolumeclaims verbs: + - create + - delete + - deletecollection + - get - list + - patch + - update + - watch - apiGroups: - "" resources: diff --git a/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml b/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml new file mode 100644 index 00000000..eafc5e73 --- /dev/null +++ b/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml @@ -0,0 +1,42 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: spark-connect-clusterrole + labels: + {{- include "operator.labels" . | nindent 4 }} +rules: + - apiGroups: + - "" + resources: + - configmaps + - persistentvolumeclaims + - pods + - secrets + - serviceaccounts + - services + verbs: + - create + - delete + - deletecollection + - get + - list + - patch + - update + - watch + - apiGroups: + - events.k8s.io + resources: + - events + verbs: + - create +{{ if .Capabilities.APIVersions.Has "security.openshift.io/v1" }} + - apiGroups: + - security.openshift.io + resources: + - securitycontextconstraints + resourceNames: + - nonroot-v2 + verbs: + - use +{{ end }} diff --git a/deploy/helm/spark-k8s-operator/templates/spark-history-clusterrole.yaml b/deploy/helm/spark-k8s-operator/templates/spark-history-clusterrole.yaml new file mode 100644 index 00000000..4b9013c6 --- /dev/null +++ b/deploy/helm/spark-k8s-operator/templates/spark-history-clusterrole.yaml @@ -0,0 +1,42 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: spark-history-clusterrole + labels: + {{- include "operator.labels" . | nindent 4 }} +rules: + - apiGroups: + - "" + resources: + - configmaps + - persistentvolumeclaims + - pods + - secrets + - serviceaccounts + - services + verbs: + - create + - delete + - deletecollection + - get + - list + - patch + - update + - watch + - apiGroups: + - events.k8s.io + resources: + - events + verbs: + - create +{{ if .Capabilities.APIVersions.Has "security.openshift.io/v1" }} + - apiGroups: + - security.openshift.io + resources: + - securitycontextconstraints + resourceNames: + - nonroot-v2 + verbs: + - use +{{ end }} diff --git a/rust/operator-binary/src/connect/server.rs b/rust/operator-binary/src/connect/server.rs index b199a235..328b4abe 100644 --- a/rust/operator-binary/src/connect/server.rs +++ b/rust/operator-binary/src/connect/server.rs @@ -35,6 +35,7 @@ use stackable_operator::{ role_utils::RoleGroupRef, }; +use super::crd::CONNECT_APP_NAME; use crate::{ connect::{ common::{self, SparkConnectRole, object_name}, @@ -45,7 +46,7 @@ use crate::{ }, crd::{ constants::{ - APP_NAME, JVM_SECURITY_PROPERTIES_FILE, LISTENER_VOLUME_DIR, LISTENER_VOLUME_NAME, + JVM_SECURITY_PROPERTIES_FILE, LISTENER_VOLUME_DIR, LISTENER_VOLUME_NAME, LOG4J2_CONFIG_FILE, MAX_SPARK_LOG_FILES_SIZE, METRICS_PROPERTIES_FILE, POD_TEMPLATE_FILE, SPARK_DEFAULTS_FILE_NAME, SPARK_UID, VOLUME_MOUNT_NAME_CONFIG, VOLUME_MOUNT_NAME_LOG, VOLUME_MOUNT_NAME_LOG_CONFIG, VOLUME_MOUNT_PATH_CONFIG, @@ -370,7 +371,7 @@ pub(crate) fn build_stateful_set( match_labels: Some( Labels::role_group_selector( scs, - APP_NAME, + CONNECT_APP_NAME, &SparkConnectRole::Server.to_string(), DUMMY_SPARK_CONNECT_GROUP_NAME, ) @@ -393,9 +394,10 @@ pub(crate) fn build_internal_service( ) -> Result { let service_name = object_name(&scs.name_any(), SparkConnectRole::Server); - let selector = Labels::role_selector(scs, APP_NAME, &SparkConnectRole::Server.to_string()) - .context(LabelBuildSnafu)? - .into(); + let selector = + Labels::role_selector(scs, CONNECT_APP_NAME, &SparkConnectRole::Server.to_string()) + .context(LabelBuildSnafu)? + .into(); Ok(Service { metadata: ObjectMetaBuilder::new() From 8c2e10b71d470686b64052e2422a83c2f8630edd Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 26 May 2025 17:02:35 +0200 Subject: [PATCH 6/8] Apply suggestions from code review Co-authored-by: Sebastian Bernauer --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 881b11b4..725c95db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,12 +26,12 @@ All notable changes to this project will be documented in this file. - Document that Spark Connect doesn't integrate with the history server ([#559]) - test: Bump to Vector `0.46.1` ([#565]). - Use versioned common structs ([#572]). -- Change the label `app.kubernetes.io/name` for Spark history and connect objects to use `spark-hhistory` and `spark-connect` instead of `spark-k8s` ([#573]). +- Change the label `app.kubernetes.io/name` for Spark history and connect objects to use `spark-history` and `spark-connect` instead of `spark-k8s` ([#573]). ### Fixed - Use `json` file extension for log files ([#553]). -- The Spark connect controller now watches StatefulSets instead of Deplyments (again) ([#573]). +- The Spark connect controller now watches StatefulSets instead of Deployments (again) ([#573]). ### Removed From db8ed80baf4a8d0bb800b86c1cadc3bfe6f3d613 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 26 May 2025 18:05:22 +0200 Subject: [PATCH 7/8] update changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 725c95db..f63edaa9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,7 +26,8 @@ All notable changes to this project will be documented in this file. - Document that Spark Connect doesn't integrate with the history server ([#559]) - test: Bump to Vector `0.46.1` ([#565]). - Use versioned common structs ([#572]). -- Change the label `app.kubernetes.io/name` for Spark history and connect objects to use `spark-history` and `spark-connect` instead of `spark-k8s` ([#573]). +- BREAKING: Change the label `app.kubernetes.io/name` for Spark history and connect objects to use `spark-history` and `spark-connect` instead of `spark-k8s` ([#573]). +- BREAKING: The history pods now have their own `ClusterRole` named `spark-history-clusterrole` ([#573]). ### Fixed From 6129b4e60a27ba00e74b1e2414bb84323b6ad6aa Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Tue, 27 May 2025 08:57:11 +0200 Subject: [PATCH 8/8] Update CHANGELOG.md Co-authored-by: Sebastian Bernauer --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f63edaa9..6348f044 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,7 +27,7 @@ All notable changes to this project will be documented in this file. - test: Bump to Vector `0.46.1` ([#565]). - Use versioned common structs ([#572]). - BREAKING: Change the label `app.kubernetes.io/name` for Spark history and connect objects to use `spark-history` and `spark-connect` instead of `spark-k8s` ([#573]). -- BREAKING: The history pods now have their own `ClusterRole` named `spark-history-clusterrole` ([#573]). +- BREAKING: The history Pods now have their own ClusterRole named `spark-history-clusterrole` ([#573]). ### Fixed