Skip to content

Commit 3a3f663

Browse files
committed
Working, prompting in place, not active
Signed-off-by: Brett Tofel <[email protected]>
1 parent d54170a commit 3a3f663

File tree

1 file changed

+209
-34
lines changed

1 file changed

+209
-34
lines changed

hack/tools/ownership_tree.py

+209-34
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,39 @@
1-
#!/usr/bin/env python3
1+
#!/usr/bin/env python3.11
22
import json
3+
import os
4+
import openai
35
import subprocess
46
import sys
57
import argparse
68
from collections import defaultdict
79

8-
parser = argparse.ArgumentParser(description="Print a tree of ownership for all resources in a namespace, including cluster-scoped ones that reference the namespace.")
10+
parser = argparse.ArgumentParser(description="Print a tree of ownership for all resources in a namespace, optionally gather cluster extension state.")
911
parser.add_argument("namespace", help="The namespace to inspect")
1012
parser.add_argument("--no-events", action="store_true", help="Do not show Events kind grouping")
1113
parser.add_argument("--with-event-info", action="store_true", help="Show additional info (message) for Events")
14+
parser.add_argument("--gather-cluster-extension-state", action="store_true",
15+
help="Gather and save a compressed fingerprint of the cluster extension state to a file.")
16+
parser.add_argument("--no-tree", action="store_true", help="Do not print the tree output (only used if gather-cluster-extension-state is set).")
1217
args = parser.parse_args()
1318

1419
NAMESPACE = args.namespace
15-
SHOW_EVENTS = not args.no_events
16-
WITH_EVENT_INFO = args.with_event_info
20+
21+
# If gather-cluster-extension-state is used, we want full info regardless of other flags
22+
if args.gather_cluster_extension_state:
23+
SHOW_EVENTS = True
24+
WITH_EVENT_INFO = True
25+
else:
26+
SHOW_EVENTS = not args.no_events
27+
WITH_EVENT_INFO = args.with_event_info
1728

1829
def parse_api_resources_line(line):
1930
parts = [p for p in line.split(' ') if p]
2031
if len(parts) < 3:
2132
return None
22-
# KIND is last
2333
kind = parts[-1]
24-
# NAMESPACED is second-last
2534
namespaced_str = parts[-2].lower()
2635
namespaced = (namespaced_str == "true")
27-
# NAME is first
2836
name = parts[0]
29-
# We don't need SHORTNAMES/APIVERSION for the tree logic.
3037
return name, namespaced, kind
3138

3239
kind_to_plural = {}
@@ -39,18 +46,13 @@ def parse_api_resources_line(line):
3946
for line in lines[1:]:
4047
if not line.strip():
4148
continue
42-
parts = [p for p in line.split(' ') if p]
43-
if len(parts) < 3:
49+
parsed = parse_api_resources_line(line)
50+
if not parsed:
4451
continue
45-
# Parse from right: kind=last, namespaced=second-last, name=first
46-
kind = parts[-1]
47-
namespaced_str = parts[-2].lower()
48-
namespaced = (namespaced_str == "true")
49-
name = parts[0]
50-
52+
name, is_namespaced, kind = parsed
5153
if kind not in kind_to_plural:
5254
kind_to_plural[kind] = name
53-
resource_info.append((kind, name, namespaced))
55+
resource_info.append((kind, name, is_namespaced))
5456
except subprocess.CalledProcessError:
5557
pass
5658

@@ -79,7 +81,7 @@ def get_resources_for_type(resource_name, namespaced):
7981
if namespaced:
8082
return items
8183

82-
# Cluster-scoped: filter by namespace reference
84+
# cluster-scoped: filter by namespace reference
8385
filtered = []
8486
for item in items:
8587
meta_ns = item.get("metadata", {}).get("namespace")
@@ -90,7 +92,7 @@ def get_resources_for_type(resource_name, namespaced):
9092
if filtered:
9193
return filtered
9294

93-
# Fallback: try get by name if no filtered items
95+
# fallback by name
9496
try:
9597
single_json = subprocess.check_output(
9698
["kubectl", "get", resource_name, NAMESPACE, "-o", "json", "--ignore-not-found"],
@@ -110,18 +112,16 @@ def get_resources_for_type(resource_name, namespaced):
110112

111113
# Collect resources
112114
for (kind, plural_name, is_namespaced) in resource_info:
113-
# Skip events if we don't show them at all
115+
# If we are gathering CE state or SHOW_EVENTS is True, we process events, else skip if no events
114116
if kind == "Event" and not SHOW_EVENTS:
115117
continue
116-
117118
items = get_resources_for_type(plural_name, is_namespaced)
118119
for item in items:
119120
uid = item["metadata"]["uid"]
120121
k = item["kind"]
121122
nm = item["metadata"]["name"]
122123
owners = [(o["kind"], o["name"], o["uid"]) for o in item["metadata"].get("ownerReferences", [])]
123124

124-
# If it's an Event and we don't show events, skip
125125
if k == "Event" and not SHOW_EVENTS:
126126
continue
127127

@@ -133,7 +133,6 @@ def get_resources_for_type(resource_name, namespaced):
133133
"owners": owners
134134
}
135135

136-
# If it's an Event and we want event info, store the message
137136
if k == "Event" and WITH_EVENT_INFO:
138137
res_entry["message"] = item.get("message", "")
139138

@@ -142,7 +141,7 @@ def get_resources_for_type(resource_name, namespaced):
142141

143142
owner_to_children = defaultdict(list)
144143
for uid, res in uid_to_resource.items():
145-
for (o_kind, o_name, o_uid) in res["owners"]:
144+
for (_, _, o_uid) in res["owners"]:
146145
owner_to_children[o_uid].append(uid)
147146

148147
# Identify top-level
@@ -167,7 +166,7 @@ def get_resources_for_type(resource_name, namespaced):
167166
kind_groups[r["kind"]].append(uid)
168167

169168
pseudo_nodes = {}
170-
for kind, uids in kind_groups.items():
169+
for kind, uids_ in kind_groups.items():
171170
if kind == "Event" and not SHOW_EVENTS:
172171
continue
173172
plural = kind_to_plural.get(kind, kind.lower() + "s")
@@ -180,7 +179,7 @@ def get_resources_for_type(resource_name, namespaced):
180179
"uid": pseudo_uid,
181180
"owners": []
182181
}
183-
for child_uid in uids:
182+
for child_uid in uids_:
184183
owner_to_children[pseudo_uid].append(child_uid)
185184

186185
top_level_kinds = list(pseudo_nodes.values())
@@ -198,20 +197,196 @@ def resource_sort_key(uid):
198197
def print_tree(uid, prefix="", is_last=True):
199198
r = uid_to_resource[uid]
200199
branch = "└── " if is_last else "├── "
201-
print(prefix + branch + f"{r['kind']}" + (f"/{r['name']}" if r['name'] else ""))
202-
203-
# If Event and we want message info
200+
if r['name']:
201+
print(prefix + branch + f"{r['kind']}/{r['name']}")
202+
else:
203+
print(prefix + branch + f"{r['kind']}")
204204
if WITH_EVENT_INFO and r['kind'] == "Event" and "message" in r:
205-
# Print event message as a child line
206205
child_prefix = prefix + (" " if is_last else "│ ")
207-
# message line
208206
print(child_prefix + "└── message: " + r["message"])
209-
210207
children = owner_to_children.get(uid, [])
211208
children.sort(key=resource_sort_key)
212209
child_prefix = prefix + (" " if is_last else "│ ")
213210
for i, c_uid in enumerate(children):
214211
print_tree(c_uid, prefix=child_prefix, is_last=(i == len(children)-1))
215212

216-
for i, uid in enumerate(top_level_kinds):
217-
print_tree(uid, prefix="", is_last=(i == len(top_level_kinds)-1))
213+
214+
###############################
215+
# Code for gather fingerprint
216+
###############################
217+
def extract_resource_summary(kind, name, namespace):
218+
is_namespaced = (namespace is not None and namespace != "")
219+
cmd = ["kubectl", "get", kind.lower()+"/"+name]
220+
if is_namespaced:
221+
cmd.extend(["-n", namespace])
222+
cmd.extend(["-o", "json", "--ignore-not-found"])
223+
224+
try:
225+
out = subprocess.check_output(cmd, text=True, stderr=subprocess.DEVNULL)
226+
if not out.strip():
227+
return {}
228+
data = json.loads(out)
229+
except subprocess.CalledProcessError:
230+
return {}
231+
232+
summary = {
233+
"kind": data.get("kind", kind),
234+
"name": data.get("metadata", {}).get("name", name),
235+
"namespace": data.get("metadata", {}).get("namespace", namespace)
236+
}
237+
238+
conditions = data.get("status", {}).get("conditions", [])
239+
if conditions:
240+
summary["conditions"] = [
241+
{
242+
"type": c.get("type"),
243+
"status": c.get("status"),
244+
"reason": c.get("reason"),
245+
"message": c.get("message")
246+
} for c in conditions
247+
]
248+
249+
# For pods/deployments, extract container images
250+
if data.get("kind") in ["Pod", "Deployment"]:
251+
images = []
252+
if data["kind"] == "Pod":
253+
containers = data.get("spec", {}).get("containers", [])
254+
for cont in containers:
255+
images.append({"name": cont.get("name"), "image": cont.get("image")})
256+
elif data["kind"] == "Deployment":
257+
containers = data.get("spec", {}).get("template", {}).get("spec", {}).get("containers", [])
258+
for cont in containers:
259+
images.append({"name": cont.get("name"), "image": cont.get("image")})
260+
if images:
261+
summary["containers"] = images
262+
263+
# For Events, show reason and message
264+
if data.get("kind") == "Event":
265+
summary["reason"] = data.get("reason")
266+
summary["message"] = data.get("message")
267+
268+
metadata = data.get("metadata", {})
269+
if metadata.get("labels"):
270+
summary["labels"] = metadata["labels"]
271+
if metadata.get("annotations"):
272+
summary["annotations"] = metadata["annotations"]
273+
274+
return summary
275+
276+
def load_fingerprint(file_path):
277+
"""Load the JSON fingerprint file from the specified path."""
278+
with open(file_path, 'r') as f:
279+
return json.load(f)
280+
281+
def generate_prompt(fingerprint):
282+
"""Generate the diagnostic prompt by embedding the fingerprint into the request."""
283+
prompt = """
284+
You are an expert in Kubernetes operations and diagnostics. I will provide you with a JSON file that represents a snapshot ("fingerprint") of the entire state of a Kubernetes namespace focusing on a particular ClusterExtension and all related resources. This fingerprint includes:
285+
286+
- The ClusterExtension itself.
287+
- All resources in the namespace that are either owned by or possibly needed by the ClusterExtension.
288+
- Key details such as resource conditions, event messages, container images (with references), and minimal metadata.
289+
290+
Your task is:
291+
1. Analyze the provided fingerprint to determine if there are any issues with the ClusterExtension, its related resources, or its configuration.
292+
2. If issues are found, provide a diagnosis of what might be wrong and suggest steps to fix them.
293+
3. If no issues appear, acknowledge that the ClusterExtension and its resources seem healthy.
294+
4. Keep your answer concise and action-focused, as the output will be used by a human operator to troubleshoot or confirm the health of their cluster.
295+
296+
**Important Details:**
297+
- The fingerprint might contain events that show what happened in the cluster recently.
298+
- Check conditions of deployments, pods, and other resources to see if they indicate errors or warnings.
299+
- Look at event messages for hints about failures, restarts, or other anomalies.
300+
- Consider if all necessary resources (like ServiceAccounts, ConfigMaps, or other dependencies) are present and seemingly functional.
301+
302+
**BEGIN FINGERPRINT**
303+
{fingerprint}
304+
**END FINGERPRINT**
305+
306+
Please provide a summarized diagnosis and suggested fixes below:
307+
""".format(fingerprint=json.dumps(fingerprint, indent=2))
308+
return prompt
309+
310+
def send_to_openai(prompt, model="gpt-4o"):
311+
"""Send the prompt to OpenAI's completions API and get the response."""
312+
try:
313+
openai.api_key = os.getenv("OPENAI_API_KEY")
314+
if not openai.api_key:
315+
raise ValueError("OPENAI_API_KEY environment variable is not set.")
316+
317+
response = openai.ChatCompletion.create(
318+
model=model,
319+
messages=[{"role": "user", "content": prompt}]
320+
)
321+
322+
# Extract and return the assistant's message
323+
message_content = response['choices'][0]['message']['content']
324+
return message_content
325+
326+
except Exception as e:
327+
return f"Error communicating with OpenAI API: {e}"
328+
329+
def gather_fingerprint(namespace):
330+
# Find cluster extension(s)
331+
ce_uids = [uid for uid, res in uid_to_resource.items() if res["kind"] == "ClusterExtension" and res["namespace"] == namespace]
332+
if not ce_uids:
333+
return []
334+
335+
all_images = {}
336+
image_ref_count = 0
337+
338+
def process_resource(uid):
339+
nonlocal image_ref_count
340+
r = uid_to_resource[uid]
341+
k = r["kind"]
342+
nm = r["name"]
343+
ns = r["namespace"]
344+
summary = extract_resource_summary(k, nm, ns)
345+
# Deduplicate images
346+
if "containers" in summary:
347+
new_containers = []
348+
for c in summary["containers"]:
349+
img = c["image"]
350+
if img not in all_images:
351+
image_ref_count += 1
352+
ref_name = f"image_ref_{image_ref_count}"
353+
all_images[img] = ref_name
354+
c["imageRef"] = all_images[img]
355+
del c["image"]
356+
new_containers.append(c)
357+
summary["containers"] = new_containers
358+
return summary
359+
360+
results = []
361+
for ce_uid in ce_uids:
362+
fingerprint = {}
363+
# Include all discovered resources
364+
for uid in uid_to_resource:
365+
r = uid_to_resource[uid]
366+
key = f"{r['kind']}/{r['name']}"
367+
fp = process_resource(uid)
368+
fingerprint[key] = fp
369+
if all_images:
370+
fingerprint["_image_map"] = {v: k for k, v in all_images.items()}
371+
ce_name = uid_to_resource[ce_uid]["name"]
372+
fname = f"{ce_name}-state.json"
373+
with open(fname, "w") as f:
374+
json.dump(fingerprint, f, indent=2)
375+
results.append(fname)
376+
return results
377+
378+
# If gather-cluster-extension-state, generate state file(s)
379+
state_files = []
380+
if args.gather_cluster_extension_state:
381+
state_files = gather_fingerprint(NAMESPACE)
382+
383+
# Print tree unless --no-tree is given AND we are in gather-cluster-extension-state mode
384+
if not (args.gather_cluster_extension_state and args.no_tree):
385+
for i, uid in enumerate(top_level_kinds):
386+
print_tree(uid, prefix="", is_last=(i == len(top_level_kinds)-1))
387+
388+
if args.gather_cluster_extension_state:
389+
if not state_files:
390+
print("No ClusterExtension found in the namespace, no state file created.", file=sys.stderr)
391+
else:
392+
print("Created state file(s):", ", ".join(state_files))

0 commit comments

Comments
 (0)