-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Add metrics for managed resources count #4031
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
1.22.8 | ||
1.22.11 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,33 +1,81 @@ | ||
package lbc | ||
|
||
import ( | ||
"context" | ||
awssdk "github.com/aws/aws-sdk-go-v2/aws" | ||
rgtsdk "github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi" | ||
rgttypes "github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi/types" | ||
"github.com/prometheus/client_golang/prometheus" | ||
corev1 "k8s.io/api/core/v1" | ||
networkingv1 "k8s.io/api/networking/v1" | ||
elbv2api "sigs.k8s.io/aws-load-balancer-controller/apis/elbv2/v1beta1" | ||
"sigs.k8s.io/aws-load-balancer-controller/pkg/aws/services" | ||
"strings" | ||
|
||
"sigs.k8s.io/controller-runtime/pkg/client" | ||
"time" | ||
) | ||
|
||
const ( | ||
networkLoadBalancerStr = "nlb" | ||
resourceTypeALB = "elasticloadbalancing:loadbalancer/app" | ||
resourceTypeNLB = "elasticloadbalancing:loadbalancer/net" | ||
) | ||
|
||
type MetricCollector interface { | ||
// ObservePodReadinessGateReady this metric is useful to determine how fast pods are becoming ready in the load balancer. | ||
// Due to some architectural constraints, we can only emit this metric for pods that are using readiness gates. | ||
ObservePodReadinessGateReady(namespace string, tgbName string, duration time.Duration) | ||
|
||
// UpdateManagedK8sResourceMetrics fetches and updates managed k8s resources metrics. | ||
UpdateManagedK8sResourceMetrics(ctx context.Context) error | ||
|
||
// UpdateManagedALBMetrics updates managed ALB count metrics | ||
UpdateManagedALBMetrics(ctx context.Context) error | ||
|
||
//UpdateManagedNLBMetrics updates managed NLB count metrics | ||
UpdateManagedNLBMetrics(ctx context.Context) error | ||
} | ||
|
||
type collector struct { | ||
instruments *instruments | ||
instruments *instruments | ||
runtimeClient client.Client | ||
rgt services.RGT | ||
finalizerKeyWord string | ||
clusterTagKey string | ||
clusterTagVal string | ||
} | ||
|
||
type noOpCollector struct{} | ||
|
||
func (n *noOpCollector) ObservePodReadinessGateReady(_ string, _ string, _ time.Duration) { | ||
} | ||
|
||
func NewCollector(registerer prometheus.Registerer) MetricCollector { | ||
if registerer == nil { | ||
func (n *noOpCollector) UpdateManagedK8sResourceMetrics(_ context.Context) error { | ||
return nil | ||
} | ||
|
||
func (n *noOpCollector) UpdateManagedALBMetrics(_ context.Context) error { | ||
return nil | ||
} | ||
|
||
func (n *noOpCollector) UpdateManagedNLBMetrics(_ context.Context) error { | ||
return nil | ||
} | ||
|
||
func NewCollector(registerer prometheus.Registerer, runtimeClient client.Client, rgt services.RGT, finalizerKeyWord string, clusterTagKey string, clusterTagVal string) MetricCollector { | ||
if registerer == nil || runtimeClient == nil { | ||
return &noOpCollector{} | ||
} | ||
|
||
instruments := newInstruments(registerer) | ||
return &collector{ | ||
instruments: instruments, | ||
instruments: instruments, | ||
runtimeClient: runtimeClient, | ||
rgt: rgt, | ||
finalizerKeyWord: finalizerKeyWord, | ||
clusterTagKey: clusterTagKey, | ||
clusterTagVal: clusterTagVal, | ||
} | ||
} | ||
|
||
|
@@ -37,3 +85,99 @@ func (c *collector) ObservePodReadinessGateReady(namespace string, tgbName strin | |
labelName: tgbName, | ||
}).Observe(duration.Seconds()) | ||
} | ||
|
||
func (c *collector) UpdateManagedK8sResourceMetrics(ctx context.Context) error { | ||
listOpts := &client.ListOptions{ | ||
Namespace: "", | ||
} | ||
ingressCount, serviceCount, tgbCount := 0, 0, 0 | ||
// Fetch ingress count | ||
ingressList := &networkingv1.IngressList{} | ||
err := c.runtimeClient.List(ctx, ingressList, listOpts) | ||
if err != nil { | ||
return err | ||
} | ||
for _, ingress := range ingressList.Items { | ||
for _, finalizer := range ingress.Finalizers { | ||
if strings.Contains(finalizer, c.finalizerKeyWord) { | ||
ingressCount++ | ||
break | ||
} | ||
} | ||
} | ||
c.instruments.managedIngressCount.Set(float64(ingressCount)) | ||
|
||
// Fetch service count | ||
serviceList := &corev1.ServiceList{} | ||
err = c.runtimeClient.List(ctx, serviceList, listOpts) | ||
if err != nil { | ||
return err | ||
} | ||
for _, service := range serviceList.Items { | ||
hasMatchingFinalizer := false | ||
for _, finalizer := range service.Finalizers { | ||
if strings.Contains(finalizer, c.finalizerKeyWord) { | ||
hasMatchingFinalizer = true | ||
break | ||
} | ||
} | ||
|
||
if hasMatchingFinalizer && service.Spec.LoadBalancerClass != nil && strings.Contains(*service.Spec.LoadBalancerClass, networkLoadBalancerStr) { | ||
serviceCount++ | ||
} | ||
} | ||
c.instruments.managedServiceCount.Set(float64(serviceCount)) | ||
|
||
// Fetch TargetGroupBinding count | ||
tgbList := &elbv2api.TargetGroupBindingList{} | ||
err = c.runtimeClient.List(ctx, tgbList, listOpts) | ||
if err != nil { | ||
return err | ||
} | ||
for _, tgb := range tgbList.Items { | ||
for _, finalizer := range tgb.Finalizers { | ||
if strings.Contains(finalizer, c.finalizerKeyWord) { | ||
tgbCount++ | ||
break | ||
} | ||
} | ||
} | ||
c.instruments.managedTGBCount.Set(float64(tgbCount)) | ||
|
||
return nil | ||
} | ||
|
||
func (c *collector) UpdateManagedALBMetrics(ctx context.Context) error { | ||
count, err := c.getManagedAWSResourceMetrics(ctx, resourceTypeALB) | ||
if err != nil { | ||
return err | ||
} | ||
c.instruments.managedALBCount.Set(float64(count)) | ||
return nil | ||
} | ||
|
||
func (c *collector) UpdateManagedNLBMetrics(ctx context.Context) error { | ||
count, err := c.getManagedAWSResourceMetrics(ctx, resourceTypeNLB) | ||
if err != nil { | ||
return err | ||
} | ||
c.instruments.managedNLBCount.Set(float64(count)) | ||
return nil | ||
} | ||
|
||
func (c *collector) getManagedAWSResourceMetrics(ctx context.Context, resourceType string) (count int, err error) { | ||
req := &rgtsdk.GetResourcesInput{ | ||
ResourceTypeFilters: []string{resourceType}, | ||
TagFilters: []rgttypes.TagFilter{ | ||
{ | ||
Key: awssdk.String(c.clusterTagKey), | ||
Values: []string{c.clusterTagVal}, | ||
}, | ||
}, | ||
} | ||
resources, err := c.rgt.GetResourcesAsList(ctx, req) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should not call AWS APIs to get those counter metrics. This can cause significant performance impact when there are large amount of LBs. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! I used RGT API call, so it's just 1 api call every 2min. But I do agree it's better to update the metrics per CRUD event. let me double check and get back. |
||
if err != nil { | ||
return 0, err | ||
} | ||
return len(resources), nil | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is not flexible and we don't be able to get real-time metrics..
i think we should trigger this when there are service/ingress/ingressGroup events happens.
for example, trigger a function in metricsCollector from within ingressGroupController.
e.g. to track the number of ALBs:
(note: just a naive thought, there could be edge cases like manually removed finalizer that shall be handled properly)
inject a
lbMetricsCollector
within ingressGroupController, whenThen lbMetricsCollector shall have a correct view of number of currently managed ingressGroups at realtime.