From 39f72363a2e7ae43e098233adae1dd19fdc77f2c Mon Sep 17 00:00:00 2001 From: Choong Wei Tjeng Date: Sat, 22 Mar 2025 16:55:32 +0100 Subject: [PATCH 1/3] #141 add compatibility with alb ip target type --- src/clustercron/alb.py | 91 +++++++++++++++++++++++++++++++++--------- src/clustercron/lb.py | 56 ++++++++++++++++++++++---- 2 files changed, 122 insertions(+), 25 deletions(-) diff --git a/src/clustercron/alb.py b/src/clustercron/alb.py index 8e9b776..6c054f9 100644 --- a/src/clustercron/alb.py +++ b/src/clustercron/alb.py @@ -22,15 +22,15 @@ class Alb(Lb): - def _get_target_health(self): - target_health = [] - logger.debug("Get instance health states") + def _get_target_group_info(self): + """Get target group information including ARN and target type""" + target_group_info = {} try: client = boto3.client("elbv2") except NoRegionError as error: if self.region_name is None: logger.error("%s", error) - return target_health + return target_group_info else: client = boto3.client( "elbv2", @@ -46,40 +46,95 @@ def _get_target_health(self): ) else: try: - targetgroup_arn = targetgroups.get("TargetGroups")[0][ - "TargetGroupArn" - ] + target_group = targetgroups.get("TargetGroups")[0] + target_group_info["arn"] = target_group["TargetGroupArn"] + target_group_info["type"] = target_group["TargetType"] + logger.info("Target group type: %s", target_group_info["type"]) except Exception as error: logger.error( - "Could not get TargetGroupArn for `%s`: %s", + "Could not get TargetGroup info for `%s`: %s", self.name, error, ) + return target_group_info + + def _get_target_health(self): + target_health = [] + logger.debug("Get target health states") + target_group_info = self._get_target_group_info() + + if not target_group_info: + return target_health + + try: + client = boto3.client("elbv2") + except NoRegionError as error: + if self.region_name is None: + logger.error("%s", error) + return target_health else: - logger.debug("targetgroup_arn: %s" % targetgroup_arn) - try: - target_health = client.describe_target_health( - TargetGroupArn=targetgroup_arn - ) - except Exception as error: - logger.error("Could not get target health: %s", error) + client = boto3.client( + "elbv2", + region_name=self.region_name, + ) + + targetgroup_arn = target_group_info.get("arn") + if not targetgroup_arn: + return target_health + + logger.debug("targetgroup_arn: %s" % targetgroup_arn) + try: + target_health_response = client.describe_target_health( + TargetGroupArn=targetgroup_arn + ) + # Store target type with the health response + target_health = { + "TargetHealthDescriptions": target_health_response.get("TargetHealthDescriptions", []), + "TargetType": target_group_info.get("type") + } + except Exception as error: + logger.error("Could not get target health: %s", error) + return target_health def get_healty_instances(self): healty_instances = [] target_health = self._get_target_health() if target_health: - logger.debug("Instance health states: %s", target_health) + logger.debug("Target health states: %s", target_health) try: healty_instances = sorted( x["Target"]["Id"] - for x in target_health.get("TargetHealthDescriptions") + for x in target_health.get("TargetHealthDescriptions", []) if x["TargetHealth"]["State"] == "healthy" ) except Exception as error: logger.error("Could not parse healty_instances: %s", error) else: logger.info( - "Healty instances: %s", ", ".join(healty_instances) + "Healty instances/IPs: %s", ", ".join(healty_instances) ) return healty_instances + + def _is_master(self, healty_instances): + """Determine if this instance is the master based on target type""" + if not healty_instances: + return False + + target_health = self._get_target_health() + target_type = target_health.get("TargetType") + + logger.debug("Determining master with target type: %s", target_type) + + if target_type == "instance": + # For instance-type targets, use instance ID + if self.instance_id: + return self.instance_id == healty_instances[0] + elif target_type == "ip": + # For IP-type targets, use eth0 IP + if self.eth0_ip: + return self.eth0_ip == healty_instances[0] + else: + logger.warning("Unknown target type: %s", target_type) + + return False diff --git a/src/clustercron/lb.py b/src/clustercron/lb.py index 300b40a..4667217 100644 --- a/src/clustercron/lb.py +++ b/src/clustercron/lb.py @@ -34,19 +34,61 @@ def _get_instance_meta_data(self): data = {"document": {}} self.region_name = data["document"].get("region") self.instance_id = data["document"].get("instanceId") + + # Get the eth0 IP address + self.eth0_ip = self._get_eth0_ip() logger.info("self.region_name: %s", self.region_name) logger.info("self.instance_id: %s", self.instance_id) + logger.info("self.eth0_ip: %s", self.eth0_ip) + + def _get_eth0_ip(self): + """Get the IPv4 address of the eth0 interface""" + try: + # Try to get the IP using the 'ip' command + result = subprocess.check_output( + ["ip", "-4", "-o", "addr", "show", "dev", "eth0", "scope", "global"], + universal_newlines=True + ) + # Parse the output to extract the IP address + if result: + ip_parts = result.strip().split() + for i, part in enumerate(ip_parts): + if part == "inet": + # Format is typically: inet 172.31.8.112/20 ... + return ip_parts[i+1].split("/")[0] + except (subprocess.CalledProcessError, IndexError, FileNotFoundError) as error: + logger.warning("Could not get eth0 IP using ip command: %s", error) + + # Fallback method using socket + try: + # Get all network interfaces + hostname = socket.gethostname() + ip_address = socket.gethostbyname(hostname) + return ip_address + except Exception as error: + logger.error("Could not get eth0 IP: %s", error) + + return None def get_healty_instances(self): raise NotImplementedError def master(self): logger.debug("Check if instance is master") - if self.instance_id is None: - logger.error("No Instanced Id") - else: - healty_instances = self.get_healty_instances() - if healty_instances: - return self.instance_id == healty_instances[0] - return False + if self.instance_id is None and self.eth0_ip is None: + logger.error("No Instance Id or IP address available") + return False + + healty_instances = self.get_healty_instances() + if not healty_instances: + return False + + # The actual comparison logic is implemented in the subclasses + # that know whether to use instance_id or eth0_ip + return self._is_master(healty_instances) + + def _is_master(self, healty_instances): + """Determine if this instance is the master based on healthy instances""" + # Default implementation uses instance_id + return self.instance_id == healty_instances[0] From 29759c3028215ac92a70af6f9564557cba84f871 Mon Sep 17 00:00:00 2001 From: Choong Wei Tjeng Date: Sun, 11 May 2025 09:59:10 +0200 Subject: [PATCH 2/3] #14 add missing imports --- src/clustercron/lb.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/clustercron/lb.py b/src/clustercron/lb.py index 4667217..7482c1b 100644 --- a/src/clustercron/lb.py +++ b/src/clustercron/lb.py @@ -12,6 +12,8 @@ from __future__ import unicode_literals import logging +import socket +import subprocess import boto.utils From 24c5d64dffc94f5b3b26638555858d95c2bac7a8 Mon Sep 17 00:00:00 2001 From: Choong Wei Tjeng Date: Tue, 24 Jun 2025 16:14:26 +0200 Subject: [PATCH 3/3] #141 improve error handling, find ip command --- src/clustercron/lb.py | 68 +++++++++++++++++++++++++++++++++---------- 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/src/clustercron/lb.py b/src/clustercron/lb.py index 7482c1b..15830c7 100644 --- a/src/clustercron/lb.py +++ b/src/clustercron/lb.py @@ -46,28 +46,52 @@ def _get_instance_meta_data(self): def _get_eth0_ip(self): """Get the IPv4 address of the eth0 interface""" - try: - # Try to get the IP using the 'ip' command - result = subprocess.check_output( - ["ip", "-4", "-o", "addr", "show", "dev", "eth0", "scope", "global"], - universal_newlines=True - ) - # Parse the output to extract the IP address - if result: - ip_parts = result.strip().split() - for i, part in enumerate(ip_parts): - if part == "inet": - # Format is typically: inet 172.31.8.112/20 ... - return ip_parts[i+1].split("/")[0] - except (subprocess.CalledProcessError, IndexError, FileNotFoundError) as error: - logger.warning("Could not get eth0 IP using ip command: %s", error) + import shutil + import os + + # Try to find the ip command in the PATH or at common locations + ip_cmd = shutil.which('ip') + + # If not found in PATH, try common locations + if not ip_cmd: + common_paths = ['/usr/sbin/ip', '/sbin/ip'] + for path in common_paths: + if os.path.isfile(path) and os.access(path, os.X_OK): + ip_cmd = path + break + + if ip_cmd: + try: + # Try to get the IP using the located ip command + result = subprocess.check_output( + [ip_cmd, "-4", "-o", "addr", "show", "dev", "eth0", "scope", "global"], + universal_newlines=True + ) + # Parse the output to extract the IP address + if result: + ip_parts = result.strip().split() + for i, part in enumerate(ip_parts): + if part == "inet": + # Format is typically: inet 172.31.8.112/20 ... + return ip_parts[i+1].split("/")[0] + except (subprocess.CalledProcessError, IndexError) as error: + logger.warning("Could not get eth0 IP using %s command: %s", ip_cmd, error) + except FileNotFoundError: + logger.warning("IP command not found at %s", ip_cmd) + else: + logger.error("Could not find 'ip' command in PATH or common locations. This is required for proper IP detection.") + # Fallback method using socket try: # Get all network interfaces hostname = socket.gethostname() ip_address = socket.gethostbyname(hostname) - return ip_address + # Verify this is not a loopback address before returning + if ip_address != "127.0.0.1": + return ip_address + else: + logger.warning("Socket returned loopback address 127.0.0.1, which is not a valid eth0 IP") except Exception as error: logger.error("Could not get eth0 IP: %s", error) @@ -81,6 +105,18 @@ def master(self): if self.instance_id is None and self.eth0_ip is None: logger.error("No Instance Id or IP address available") return False + + # For IP-based target groups, a valid eth0 IP is critical + if self.eth0_ip is None: + logger.error("Failed to determine eth0 IP address. This is required for proper operation.") + # We return False here instead of raising an exception to maintain compatibility + # The main module will handle the exit code + return False + + # Reject loopback addresses as they're not valid for AWS target groups + if self.eth0_ip == "127.0.0.1": + logger.error("Invalid eth0 IP detected: 127.0.0.1. This will not work with AWS target groups.") + return False healty_instances = self.get_healty_instances() if not healty_instances: