Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 73 additions & 18 deletions src/clustercron/alb.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@


class Alb(Lb):
def _get_target_health(self):
target_health = []
logger.debug("Get instance health states")
def _get_target_group_info(self):
"""Get target group information including ARN and target type"""
target_group_info = {}
try:
client = boto3.client("elbv2")
except NoRegionError as error:
if self.region_name is None:
logger.error("%s", error)
return target_health
return target_group_info
else:
client = boto3.client(
"elbv2",
Expand All @@ -46,40 +46,95 @@ def _get_target_health(self):
)
else:
try:
targetgroup_arn = targetgroups.get("TargetGroups")[0][
"TargetGroupArn"
]
target_group = targetgroups.get("TargetGroups")[0]
target_group_info["arn"] = target_group["TargetGroupArn"]
target_group_info["type"] = target_group["TargetType"]
logger.info("Target group type: %s", target_group_info["type"])
except Exception as error:
logger.error(
"Could not get TargetGroupArn for `%s`: %s",
"Could not get TargetGroup info for `%s`: %s",
self.name,
error,
)
return target_group_info

def _get_target_health(self):
target_health = []
logger.debug("Get target health states")
target_group_info = self._get_target_group_info()

if not target_group_info:
return target_health

try:
client = boto3.client("elbv2")
except NoRegionError as error:
if self.region_name is None:
logger.error("%s", error)
return target_health
else:
logger.debug("targetgroup_arn: %s" % targetgroup_arn)
try:
target_health = client.describe_target_health(
TargetGroupArn=targetgroup_arn
)
except Exception as error:
logger.error("Could not get target health: %s", error)
client = boto3.client(
"elbv2",
region_name=self.region_name,
)

targetgroup_arn = target_group_info.get("arn")
if not targetgroup_arn:
return target_health

logger.debug("targetgroup_arn: %s" % targetgroup_arn)
try:
target_health_response = client.describe_target_health(
TargetGroupArn=targetgroup_arn
)
# Store target type with the health response
target_health = {
"TargetHealthDescriptions": target_health_response.get("TargetHealthDescriptions", []),
"TargetType": target_group_info.get("type")
}
except Exception as error:
logger.error("Could not get target health: %s", error)

return target_health

def get_healty_instances(self):
healty_instances = []
target_health = self._get_target_health()
if target_health:
logger.debug("Instance health states: %s", target_health)
logger.debug("Target health states: %s", target_health)
try:
healty_instances = sorted(
x["Target"]["Id"]
for x in target_health.get("TargetHealthDescriptions")
for x in target_health.get("TargetHealthDescriptions", [])
if x["TargetHealth"]["State"] == "healthy"
)
except Exception as error:
logger.error("Could not parse healty_instances: %s", error)
else:
logger.info(
"Healty instances: %s", ", ".join(healty_instances)
"Healty instances/IPs: %s", ", ".join(healty_instances)
)
return healty_instances

def _is_master(self, healty_instances):
"""Determine if this instance is the master based on target type"""
if not healty_instances:
return False

target_health = self._get_target_health()
target_type = target_health.get("TargetType")

logger.debug("Determining master with target type: %s", target_type)

if target_type == "instance":
# For instance-type targets, use instance ID
if self.instance_id:
return self.instance_id == healty_instances[0]
elif target_type == "ip":
# For IP-type targets, use eth0 IP
if self.eth0_ip:
return self.eth0_ip == healty_instances[0]
else:
logger.warning("Unknown target type: %s", target_type)

return False
94 changes: 87 additions & 7 deletions src/clustercron/lb.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from __future__ import unicode_literals

import logging
import socket
import subprocess

import boto.utils

Expand All @@ -34,19 +36,97 @@ def _get_instance_meta_data(self):
data = {"document": {}}
self.region_name = data["document"].get("region")
self.instance_id = data["document"].get("instanceId")

# Get the eth0 IP address
self.eth0_ip = self._get_eth0_ip()

logger.info("self.region_name: %s", self.region_name)
logger.info("self.instance_id: %s", self.instance_id)
logger.info("self.eth0_ip: %s", self.eth0_ip)

def _get_eth0_ip(self):
"""Get the IPv4 address of the eth0 interface"""
import shutil
import os

# Try to find the ip command in the PATH or at common locations
ip_cmd = shutil.which('ip')

# If not found in PATH, try common locations
if not ip_cmd:
common_paths = ['/usr/sbin/ip', '/sbin/ip']
for path in common_paths:
if os.path.isfile(path) and os.access(path, os.X_OK):
ip_cmd = path
break

if ip_cmd:
try:
# Try to get the IP using the located ip command
result = subprocess.check_output(
[ip_cmd, "-4", "-o", "addr", "show", "dev", "eth0", "scope", "global"],
universal_newlines=True
)
# Parse the output to extract the IP address
if result:
ip_parts = result.strip().split()
for i, part in enumerate(ip_parts):
if part == "inet":
# Format is typically: inet 172.31.8.112/20 ...
return ip_parts[i+1].split("/")[0]
except (subprocess.CalledProcessError, IndexError) as error:
logger.warning("Could not get eth0 IP using %s command: %s", ip_cmd, error)
except FileNotFoundError:
logger.warning("IP command not found at %s", ip_cmd)
else:
logger.error("Could not find 'ip' command in PATH or common locations. This is required for proper IP detection.")


# Fallback method using socket
try:
# Get all network interfaces
hostname = socket.gethostname()
ip_address = socket.gethostbyname(hostname)
# Verify this is not a loopback address before returning
if ip_address != "127.0.0.1":
return ip_address
else:
logger.warning("Socket returned loopback address 127.0.0.1, which is not a valid eth0 IP")
except Exception as error:
logger.error("Could not get eth0 IP: %s", error)

return None

def get_healty_instances(self):
raise NotImplementedError

def master(self):
logger.debug("Check if instance is master")
if self.instance_id is None:
logger.error("No Instanced Id")
else:
healty_instances = self.get_healty_instances()
if healty_instances:
return self.instance_id == healty_instances[0]
return False
if self.instance_id is None and self.eth0_ip is None:
logger.error("No Instance Id or IP address available")
return False

# For IP-based target groups, a valid eth0 IP is critical
if self.eth0_ip is None:
logger.error("Failed to determine eth0 IP address. This is required for proper operation.")
# We return False here instead of raising an exception to maintain compatibility
# The main module will handle the exit code
return False

# Reject loopback addresses as they're not valid for AWS target groups
if self.eth0_ip == "127.0.0.1":
logger.error("Invalid eth0 IP detected: 127.0.0.1. This will not work with AWS target groups.")
return False

healty_instances = self.get_healty_instances()
if not healty_instances:
return False

# The actual comparison logic is implemented in the subclasses
# that know whether to use instance_id or eth0_ip
return self._is_master(healty_instances)

def _is_master(self, healty_instances):
"""Determine if this instance is the master based on healthy instances"""
# Default implementation uses instance_id
return self.instance_id == healty_instances[0]