From aedb105a49e8e63758457d59131e97adc6e0f4e9 Mon Sep 17 00:00:00 2001 From: Alexander Greene Date: Wed, 21 Jul 2021 17:22:39 -0400 Subject: [PATCH 1/3] Update getMetricsFromPort to infer port number Problem: The getMetricsFromPod function assumes that metrics are exposed on port 8080. This function fails to retrieve metrics from the olm or catalog operator when the port is changed. Solution: Name the port in each of the deployments and update the getMetricsFromPod function to infer the port number from the deployments. Signed-off-by: Alexander Greene Upstream-repository: operator-lifecycle-manager Upstream-commit: 08a1d4b2f06fda1a6d6b4cadf779e1ce2259af3e --- .../test/e2e/metrics_e2e_test.go | 127 ++++++++++++++++-- 1 file changed, 116 insertions(+), 11 deletions(-) diff --git a/staging/operator-lifecycle-manager/test/e2e/metrics_e2e_test.go b/staging/operator-lifecycle-manager/test/e2e/metrics_e2e_test.go index c730c22c54..06e4eb23c5 100644 --- a/staging/operator-lifecycle-manager/test/e2e/metrics_e2e_test.go +++ b/staging/operator-lifecycle-manager/test/e2e/metrics_e2e_test.go @@ -1,3 +1,4 @@ +//go:build !bare // +build !bare package e2e @@ -6,6 +7,7 @@ import ( "bytes" "context" "regexp" + "strconv" "strings" . "github.com/onsi/ginkgo" @@ -72,7 +74,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { It("generates csv_abnormal metric for OLM pod", func() { - Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"), "8081")).To(And( + Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"))).To(And( ContainElement(LikeMetric( WithFamily("csv_abnormal"), WithName(failingCSV.Name), @@ -100,7 +102,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { It("deletes its associated CSV metrics", func() { // Verify that when the csv has been deleted, it deletes the corresponding CSV metrics - Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"), "8081")).ToNot(And( + Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"))).ToNot(And( ContainElement(LikeMetric(WithFamily("csv_abnormal"), WithName(failingCSV.Name))), ContainElement(LikeMetric(WithFamily("csv_succeeded"), WithName(failingCSV.Name))), )) @@ -130,7 +132,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { // Verify metrics have been emitted for subscription Eventually(func() []Metric { - return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081") + return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) }).Should(ContainElement(LikeMetric( WithFamily("subscription_sync_total"), WithName("metric-subscription-for-create"), @@ -145,7 +147,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { // Verify metrics have been emitted for dependency resolution Eventually(func() bool { return Eventually(func() []Metric { - return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081") + return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) }).Should(ContainElement(LikeMetric( WithFamily("olm_resolution_duration_seconds"), WithLabel("outcome", "failed"), @@ -160,7 +162,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { BeforeEach(func() { subscriptionCleanup, subscription = createSubscription(GinkgoT(), crc, testNamespace, "metric-subscription-for-update", testPackageName, stableChannel, v1alpha1.ApprovalManual) Eventually(func() []Metric { - return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081") + return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) }).Should(ContainElement(LikeMetric(WithFamily("subscription_sync_total"), WithLabel("name", "metric-subscription-for-update")))) Eventually(func() error { s, err := crc.OperatorsV1alpha1().Subscriptions(subscription.GetNamespace()).Get(context.TODO(), subscription.GetName(), metav1.GetOptions{}) @@ -181,7 +183,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { It("deletes the old Subscription metric and emits the new metric", func() { Eventually(func() []Metric { - return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081") + return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) }).Should(And( Not(ContainElement(LikeMetric( WithFamily("subscription_sync_total"), @@ -215,7 +217,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { It("deletes the old subscription metric and emits the new metric(there is only one metric for the subscription)", func() { Eventually(func() []Metric { - return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081") + return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) }).Should(And( Not(ContainElement(LikeMetric( WithFamily("subscription_sync_total"), @@ -245,7 +247,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { BeforeEach(func() { subscriptionCleanup, subscription = createSubscription(GinkgoT(), crc, testNamespace, "metric-subscription-for-delete", testPackageName, stableChannel, v1alpha1.ApprovalManual) Eventually(func() []Metric { - return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081") + return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) }).Should(ContainElement(LikeMetric(WithFamily("subscription_sync_total"), WithLabel("name", "metric-subscription-for-delete")))) if subscriptionCleanup != nil { subscriptionCleanup() @@ -261,11 +263,115 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { It("deletes the Subscription metric", func() { Eventually(func() []Metric { - return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081") + return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) }).ShouldNot(ContainElement(LikeMetric(WithFamily("subscription_sync_total"), WithName("metric-subscription-for-delete")))) }) }) }) + + Context("Metrics emitted by CatalogSources", func() { + When("A valid CatalogSource object is created", func() { + var ( + name = "metrics-catsrc-valid" + cleanup func() + ) + BeforeEach(func() { + mainPackageName := genName("nginx-") + + mainPackageStable := fmt.Sprintf("%s-stable", mainPackageName) + + stableChannel := "stable" + + mainCRD := newCRD(genName("ins-")) + mainCSV := newCSV(mainPackageStable, testNamespace, "", semver.MustParse("0.1.0"), []apiextensions.CustomResourceDefinition{mainCRD}, nil, nil) + + mainManifests := []registry.PackageManifest{ + { + PackageName: mainPackageName, + Channels: []registry.PackageChannel{ + {Name: stableChannel, CurrentCSVName: mainPackageStable}, + }, + DefaultChannelName: stableChannel, + }, + } + _, cleanupAll := createInternalCatalogSource(c, crc, name, testNamespace, mainManifests, []apiextensions.CustomResourceDefinition{mainCRD}, []v1alpha1.ClusterServiceVersion{mainCSV}) + + var once sync.Once + cleanup = func() { + once.Do(cleanupAll) + } + }) + AfterEach(func() { + cleanup() + }) + It("emits metrics for the catalogSource", func() { + Eventually(func() []Metric { + return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) + }).Should(And( + ContainElement(LikeMetric( + WithFamily("catalog_source_count"), + WithValueGreaterThan(0), + )), + ContainElement(LikeMetric( + WithFamily("catalogsource_ready"), + WithName(name), + WithNamespace(testNamespace), + WithValue(1), + )), + )) + }) + When("The CatalogSource object is deleted", func() { + BeforeEach(func() { + cleanup() + }) + It("deletes the metrics for the CatalogSource", func() { + Eventually(func() []Metric { + return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) + }).Should(And( + Not(ContainElement(LikeMetric( + WithFamily("catalogsource_ready"), + WithName(name), + WithNamespace(testNamespace), + ))))) + }) + }) + }) + + When("A CatalogSource object is in an invalid state", func() { + var ( + name = "metrics-catsrc-invalid" + cleanup func() + ) + BeforeEach(func() { + _, cleanup = createInvalidGRPCCatalogSource(crc, name, testNamespace) + }) + AfterEach(func() { + cleanup() + }) + It("emits metrics for the CatlogSource with a Value greater than 0", func() { + Eventually(func() []Metric { + return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) + }).Should(And( + ContainElement(LikeMetric( + WithFamily("catalogsource_ready"), + WithName(name), + WithNamespace(testNamespace), + WithValue(0), + )), + )) + Consistently(func() []Metric { + return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) + }, "3m").Should(And( + ContainElement(LikeMetric( + WithFamily("catalogsource_ready"), + WithName(name), + WithNamespace(testNamespace), + WithValue(0), + )), + )) + }) + }) + }) }) func getPodWithLabel(client operatorclient.ClientInterface, label string) *corev1.Pod { @@ -305,14 +411,13 @@ func getMetricsFromPod(client operatorclient.ClientInterface, pod *corev1.Pod, p scheme = "http" } ctx.Ctx().Logf("Retrieving metrics using scheme %v\n", scheme) - mfs := make(map[string]*io_prometheus_client.MetricFamily) EventuallyWithOffset(1, func() error { raw, err := client.KubernetesInterface().CoreV1().RESTClient().Get(). Namespace(pod.GetNamespace()). Resource("pods"). SubResource("proxy"). - Name(net.JoinSchemeNamePort(scheme, pod.GetName(), port)). + Name(net.JoinSchemeNamePort(scheme, pod.GetName(), extractMetricPortFromPod(pod))). Suffix("metrics"). Do(context.Background()).Raw() if err != nil { From 308560d443f7b09da9cb664aa57d2011d02ae3c0 Mon Sep 17 00:00:00 2001 From: Josef Karasek Date: Fri, 3 Sep 2021 17:20:55 +0200 Subject: [PATCH 2/3] Emit CSV metric on startup (#2216) Signed-off-by: Josef Karasek Upstream-repository: operator-lifecycle-manager Upstream-commit: de4bebe06ba076f804d28c594d7dc52dfd95ef20 --- .../cmd/olm/main.go | 5 + .../pkg/controller/operators/olm/operator.go | 17 ++++ .../test/e2e/metrics_e2e_test.go | 97 ++++++++++++++++++- .../pkg/controller/operators/olm/operator.go | 17 ++++ 4 files changed, 135 insertions(+), 1 deletion(-) diff --git a/staging/operator-lifecycle-manager/cmd/olm/main.go b/staging/operator-lifecycle-manager/cmd/olm/main.go index 9c22fc429f..75c4351d20 100644 --- a/staging/operator-lifecycle-manager/cmd/olm/main.go +++ b/staging/operator-lifecycle-manager/cmd/olm/main.go @@ -222,6 +222,11 @@ func main() { op.Run(ctx) <-op.Ready() + // Emit CSV metric + if err = op.EnsureCSVMetric(); err != nil { + logger.WithError(err).Fatalf("error emitting metrics for existing CSV") + } + if *writeStatusName != "" { operatorstatus.MonitorClusterStatus(*writeStatusName, op.AtLevel(), ctx.Done(), opClient, configClient, crClient) } diff --git a/staging/operator-lifecycle-manager/pkg/controller/operators/olm/operator.go b/staging/operator-lifecycle-manager/pkg/controller/operators/olm/operator.go index e9ca3061c5..e72fda2ae5 100644 --- a/staging/operator-lifecycle-manager/pkg/controller/operators/olm/operator.go +++ b/staging/operator-lifecycle-manager/pkg/controller/operators/olm/operator.go @@ -600,6 +600,23 @@ func (a *Operator) RegisterCSVWatchNotification(csvNotification csvutility.Watch a.csvNotification = csvNotification } +func (a *Operator) EnsureCSVMetric() error { + csvs, err := a.lister.OperatorsV1alpha1().ClusterServiceVersionLister().List(labels.Everything()) + if err != nil { + return err + } + for _, csv := range csvs { + logger := a.logger.WithFields(logrus.Fields{ + "name": csv.GetName(), + "namespace": csv.GetNamespace(), + "self": csv.GetSelfLink(), + }) + logger.Debug("emitting metrics for existing CSV") + metrics.EmitCSVMetric(csv, csv) + } + return nil +} + func (a *Operator) syncGCObject(obj interface{}) (syncError error) { metaObj, ok := obj.(metav1.Object) if !ok { diff --git a/staging/operator-lifecycle-manager/test/e2e/metrics_e2e_test.go b/staging/operator-lifecycle-manager/test/e2e/metrics_e2e_test.go index 06e4eb23c5..082f20552a 100644 --- a/staging/operator-lifecycle-manager/test/e2e/metrics_e2e_test.go +++ b/staging/operator-lifecycle-manager/test/e2e/metrics_e2e_test.go @@ -14,6 +14,7 @@ import ( . "github.com/onsi/gomega" io_prometheus_client "github.com/prometheus/client_model/go" "github.com/prometheus/common/expfmt" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/net" @@ -109,6 +110,44 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { }) }) }) + + When("a CSV is created", func() { + var ( + cleanupCSV cleanupFunc + csv v1alpha1.ClusterServiceVersion + ) + BeforeEach(func() { + packageName := genName("csv-test-") + packageStable := fmt.Sprintf("%s-stable", packageName) + csv = newCSV(packageStable, testNamespace, "", semver.MustParse("0.1.0"), nil, nil, nil) + + var err error + _, err = createCSV(c, crc, csv, testNamespace, false, false) + Expect(err).ToNot(HaveOccurred()) + _, err = fetchCSV(crc, csv.Name, testNamespace, csvSucceededChecker) + Expect(err).ToNot(HaveOccurred()) + }) + AfterEach(func() { + if cleanupCSV != nil { + cleanupCSV() + } + }) + It("emits a CSV metrics", func() { + Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"))).To( + ContainElement(LikeMetric(WithFamily("csv_succeeded"), WithName(csv.Name), WithValue(1))), + ) + }) + When("the OLM pod restarts", func() { + BeforeEach(func() { + restartDeploymentWithLabel(c, "app=olm-operator") + }) + It("CSV metric is preserved", func() { + Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"))).To( + ContainElement(LikeMetric(WithFamily("csv_succeeded"), WithName(csv.Name), WithValue(1))), + ) + }) + }) + }) }) Context("Metrics emitted by objects during operator installation", func() { @@ -389,7 +428,63 @@ func getPodWithLabel(client operatorclient.ClientInterface, label string) *corev return &podList.Items[0] } -func getMetricsFromPod(client operatorclient.ClientInterface, pod *corev1.Pod, port string) []Metric { +func getDeploymentWithLabel(client operatorclient.ClientInterface, label string) *appsv1.Deployment { + listOptions := metav1.ListOptions{LabelSelector: label} + var deploymentList *appsv1.DeploymentList + EventuallyWithOffset(1, func() (numDeps int, err error) { + deploymentList, err = client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).List(context.TODO(), listOptions) + if deploymentList != nil { + numDeps = len(deploymentList.Items) + } + + return + }).Should(Equal(1), "expected exactly one Deployment") + + return &deploymentList.Items[0] +} + +func restartDeploymentWithLabel(client operatorclient.ClientInterface, l string) { + d := getDeploymentWithLabel(client, l) + z := int32(0) + oldZ := *d.Spec.Replicas + d.Spec.Replicas = &z + _, err := client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).Update(context.TODO(), d, metav1.UpdateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + EventuallyWithOffset(1, func() (replicas int32, err error) { + deployment, err := client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).Get(context.TODO(), d.Name, metav1.GetOptions{}) + if deployment != nil { + replicas = deployment.Status.Replicas + } + return + }).Should(Equal(int32(0)), "expected exactly 0 Deployments") + + updated := getDeploymentWithLabel(client, l) + updated.Spec.Replicas = &oldZ + _, err = client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).Update(context.TODO(), updated, metav1.UpdateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + EventuallyWithOffset(1, func() (replicas int32, err error) { + deployment, err := client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).Get(context.TODO(), d.Name, metav1.GetOptions{}) + if deployment != nil { + replicas = deployment.Status.Replicas + } + return + }).Should(Equal(oldZ), "expected exactly 1 Deployment") +} + +func extractMetricPortFromPod(pod *corev1.Pod) string { + for _, container := range pod.Spec.Containers { + for _, port := range container.Ports { + if port.Name == "metrics" { + return strconv.Itoa(int(port.ContainerPort)) + } + } + } + return "-1" +} + +func getMetricsFromPod(client operatorclient.ClientInterface, pod *corev1.Pod) []Metric { ctx.Ctx().Logf("querying pod %s/%s\n", pod.GetNamespace(), pod.GetName()) // assuming -tls-cert and -tls-key aren't used anywhere else as a parameter value diff --git a/vendor/github.com/operator-framework/operator-lifecycle-manager/pkg/controller/operators/olm/operator.go b/vendor/github.com/operator-framework/operator-lifecycle-manager/pkg/controller/operators/olm/operator.go index e9ca3061c5..e72fda2ae5 100644 --- a/vendor/github.com/operator-framework/operator-lifecycle-manager/pkg/controller/operators/olm/operator.go +++ b/vendor/github.com/operator-framework/operator-lifecycle-manager/pkg/controller/operators/olm/operator.go @@ -600,6 +600,23 @@ func (a *Operator) RegisterCSVWatchNotification(csvNotification csvutility.Watch a.csvNotification = csvNotification } +func (a *Operator) EnsureCSVMetric() error { + csvs, err := a.lister.OperatorsV1alpha1().ClusterServiceVersionLister().List(labels.Everything()) + if err != nil { + return err + } + for _, csv := range csvs { + logger := a.logger.WithFields(logrus.Fields{ + "name": csv.GetName(), + "namespace": csv.GetNamespace(), + "self": csv.GetSelfLink(), + }) + logger.Debug("emitting metrics for existing CSV") + metrics.EmitCSVMetric(csv, csv) + } + return nil +} + func (a *Operator) syncGCObject(obj interface{}) (syncError error) { metaObj, ok := obj.(metav1.Object) if !ok { From 1244df86de7ea724da4ac46d61c08e6544c78b32 Mon Sep 17 00:00:00 2001 From: "Akihiko (Aki) Kuroda" <16141898+akihikokuroda@users.noreply.github.com> Date: Mon, 20 Dec 2021 11:52:30 -0500 Subject: [PATCH 3/3] fix e2e CSV metric is preserved failure (#2530) Signed-off-by: akihikokuroda Upstream-repository: operator-lifecycle-manager Upstream-commit: 33b081aca79dcfc33895a5c114976c4f284f027f --- .../test/e2e/metrics_e2e_test.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/staging/operator-lifecycle-manager/test/e2e/metrics_e2e_test.go b/staging/operator-lifecycle-manager/test/e2e/metrics_e2e_test.go index 082f20552a..bc9b82d60f 100644 --- a/staging/operator-lifecycle-manager/test/e2e/metrics_e2e_test.go +++ b/staging/operator-lifecycle-manager/test/e2e/metrics_e2e_test.go @@ -142,9 +142,13 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { restartDeploymentWithLabel(c, "app=olm-operator") }) It("CSV metric is preserved", func() { - Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"))).To( - ContainElement(LikeMetric(WithFamily("csv_succeeded"), WithName(csv.Name), WithValue(1))), - ) + Eventually(func() []Metric { + return getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator")) + }).Should(ContainElement(LikeMetric( + WithFamily("csv_succeeded"), + WithName(csv.Name), + WithValue(1), + ))) }) }) })