1
- #!/usr/bin/env python3
1
+ #!/usr/bin/env python3.11
2
2
import json
3
+ import os
4
+ import openai
3
5
import subprocess
4
6
import sys
5
7
import argparse
6
8
from collections import defaultdict
7
9
8
- parser = argparse .ArgumentParser (description = "Print a tree of ownership for all resources in a namespace, including cluster-scoped ones that reference the namespace ." )
10
+ parser = argparse .ArgumentParser (description = "Print a tree of ownership for all resources in a namespace, optionally gather cluster extension state ." )
9
11
parser .add_argument ("namespace" , help = "The namespace to inspect" )
10
12
parser .add_argument ("--no-events" , action = "store_true" , help = "Do not show Events kind grouping" )
11
13
parser .add_argument ("--with-event-info" , action = "store_true" , help = "Show additional info (message) for Events" )
14
+ parser .add_argument ("--gather-cluster-extension-state" , action = "store_true" ,
15
+ help = "Gather and save a compressed fingerprint of the cluster extension state to a file." )
16
+ parser .add_argument ("--no-tree" , action = "store_true" , help = "Do not print the tree output (only used if gather-cluster-extension-state is set)." )
12
17
args = parser .parse_args ()
13
18
14
19
NAMESPACE = args .namespace
15
- SHOW_EVENTS = not args .no_events
16
- WITH_EVENT_INFO = args .with_event_info
20
+
21
+ # If gather-cluster-extension-state is used, we want full info regardless of other flags
22
+ if args .gather_cluster_extension_state :
23
+ SHOW_EVENTS = True
24
+ WITH_EVENT_INFO = True
25
+ else :
26
+ SHOW_EVENTS = not args .no_events
27
+ WITH_EVENT_INFO = args .with_event_info
17
28
18
29
def parse_api_resources_line (line ):
19
30
parts = [p for p in line .split (' ' ) if p ]
20
31
if len (parts ) < 3 :
21
32
return None
22
- # KIND is last
23
33
kind = parts [- 1 ]
24
- # NAMESPACED is second-last
25
34
namespaced_str = parts [- 2 ].lower ()
26
35
namespaced = (namespaced_str == "true" )
27
- # NAME is first
28
36
name = parts [0 ]
29
- # We don't need SHORTNAMES/APIVERSION for the tree logic.
30
37
return name , namespaced , kind
31
38
32
39
kind_to_plural = {}
@@ -39,18 +46,13 @@ def parse_api_resources_line(line):
39
46
for line in lines [1 :]:
40
47
if not line .strip ():
41
48
continue
42
- parts = [ p for p in line . split ( ' ' ) if p ]
43
- if len ( parts ) < 3 :
49
+ parsed = parse_api_resources_line ( line )
50
+ if not parsed :
44
51
continue
45
- # Parse from right: kind=last, namespaced=second-last, name=first
46
- kind = parts [- 1 ]
47
- namespaced_str = parts [- 2 ].lower ()
48
- namespaced = (namespaced_str == "true" )
49
- name = parts [0 ]
50
-
52
+ name , is_namespaced , kind = parsed
51
53
if kind not in kind_to_plural :
52
54
kind_to_plural [kind ] = name
53
- resource_info .append ((kind , name , namespaced ))
55
+ resource_info .append ((kind , name , is_namespaced ))
54
56
except subprocess .CalledProcessError :
55
57
pass
56
58
@@ -79,7 +81,7 @@ def get_resources_for_type(resource_name, namespaced):
79
81
if namespaced :
80
82
return items
81
83
82
- # Cluster -scoped: filter by namespace reference
84
+ # cluster -scoped: filter by namespace reference
83
85
filtered = []
84
86
for item in items :
85
87
meta_ns = item .get ("metadata" , {}).get ("namespace" )
@@ -90,7 +92,7 @@ def get_resources_for_type(resource_name, namespaced):
90
92
if filtered :
91
93
return filtered
92
94
93
- # Fallback: try get by name if no filtered items
95
+ # fallback by name
94
96
try :
95
97
single_json = subprocess .check_output (
96
98
["kubectl" , "get" , resource_name , NAMESPACE , "-o" , "json" , "--ignore-not-found" ],
@@ -110,18 +112,16 @@ def get_resources_for_type(resource_name, namespaced):
110
112
111
113
# Collect resources
112
114
for (kind , plural_name , is_namespaced ) in resource_info :
113
- # Skip events if we don't show them at all
115
+ # If we are gathering CE state or SHOW_EVENTS is True, we process events, else skip if no events
114
116
if kind == "Event" and not SHOW_EVENTS :
115
117
continue
116
-
117
118
items = get_resources_for_type (plural_name , is_namespaced )
118
119
for item in items :
119
120
uid = item ["metadata" ]["uid" ]
120
121
k = item ["kind" ]
121
122
nm = item ["metadata" ]["name" ]
122
123
owners = [(o ["kind" ], o ["name" ], o ["uid" ]) for o in item ["metadata" ].get ("ownerReferences" , [])]
123
124
124
- # If it's an Event and we don't show events, skip
125
125
if k == "Event" and not SHOW_EVENTS :
126
126
continue
127
127
@@ -133,7 +133,6 @@ def get_resources_for_type(resource_name, namespaced):
133
133
"owners" : owners
134
134
}
135
135
136
- # If it's an Event and we want event info, store the message
137
136
if k == "Event" and WITH_EVENT_INFO :
138
137
res_entry ["message" ] = item .get ("message" , "" )
139
138
@@ -142,7 +141,7 @@ def get_resources_for_type(resource_name, namespaced):
142
141
143
142
owner_to_children = defaultdict (list )
144
143
for uid , res in uid_to_resource .items ():
145
- for (o_kind , o_name , o_uid ) in res ["owners" ]:
144
+ for (_ , _ , o_uid ) in res ["owners" ]:
146
145
owner_to_children [o_uid ].append (uid )
147
146
148
147
# Identify top-level
@@ -167,7 +166,7 @@ def get_resources_for_type(resource_name, namespaced):
167
166
kind_groups [r ["kind" ]].append (uid )
168
167
169
168
pseudo_nodes = {}
170
- for kind , uids in kind_groups .items ():
169
+ for kind , uids_ in kind_groups .items ():
171
170
if kind == "Event" and not SHOW_EVENTS :
172
171
continue
173
172
plural = kind_to_plural .get (kind , kind .lower () + "s" )
@@ -180,7 +179,7 @@ def get_resources_for_type(resource_name, namespaced):
180
179
"uid" : pseudo_uid ,
181
180
"owners" : []
182
181
}
183
- for child_uid in uids :
182
+ for child_uid in uids_ :
184
183
owner_to_children [pseudo_uid ].append (child_uid )
185
184
186
185
top_level_kinds = list (pseudo_nodes .values ())
@@ -198,20 +197,196 @@ def resource_sort_key(uid):
198
197
def print_tree (uid , prefix = "" , is_last = True ):
199
198
r = uid_to_resource [uid ]
200
199
branch = "└── " if is_last else "├── "
201
- print (prefix + branch + f"{ r ['kind' ]} " + (f"/{ r ['name' ]} " if r ['name' ] else "" ))
202
-
203
- # If Event and we want message info
200
+ if r ['name' ]:
201
+ print (prefix + branch + f"{ r ['kind' ]} /{ r ['name' ]} " )
202
+ else :
203
+ print (prefix + branch + f"{ r ['kind' ]} " )
204
204
if WITH_EVENT_INFO and r ['kind' ] == "Event" and "message" in r :
205
- # Print event message as a child line
206
205
child_prefix = prefix + (" " if is_last else "│ " )
207
- # message line
208
206
print (child_prefix + "└── message: " + r ["message" ])
209
-
210
207
children = owner_to_children .get (uid , [])
211
208
children .sort (key = resource_sort_key )
212
209
child_prefix = prefix + (" " if is_last else "│ " )
213
210
for i , c_uid in enumerate (children ):
214
211
print_tree (c_uid , prefix = child_prefix , is_last = (i == len (children )- 1 ))
215
212
216
- for i , uid in enumerate (top_level_kinds ):
217
- print_tree (uid , prefix = "" , is_last = (i == len (top_level_kinds )- 1 ))
213
+
214
+ ###############################
215
+ # Code for gather fingerprint
216
+ ###############################
217
+ def extract_resource_summary (kind , name , namespace ):
218
+ is_namespaced = (namespace is not None and namespace != "" )
219
+ cmd = ["kubectl" , "get" , kind .lower ()+ "/" + name ]
220
+ if is_namespaced :
221
+ cmd .extend (["-n" , namespace ])
222
+ cmd .extend (["-o" , "json" , "--ignore-not-found" ])
223
+
224
+ try :
225
+ out = subprocess .check_output (cmd , text = True , stderr = subprocess .DEVNULL )
226
+ if not out .strip ():
227
+ return {}
228
+ data = json .loads (out )
229
+ except subprocess .CalledProcessError :
230
+ return {}
231
+
232
+ summary = {
233
+ "kind" : data .get ("kind" , kind ),
234
+ "name" : data .get ("metadata" , {}).get ("name" , name ),
235
+ "namespace" : data .get ("metadata" , {}).get ("namespace" , namespace )
236
+ }
237
+
238
+ conditions = data .get ("status" , {}).get ("conditions" , [])
239
+ if conditions :
240
+ summary ["conditions" ] = [
241
+ {
242
+ "type" : c .get ("type" ),
243
+ "status" : c .get ("status" ),
244
+ "reason" : c .get ("reason" ),
245
+ "message" : c .get ("message" )
246
+ } for c in conditions
247
+ ]
248
+
249
+ # For pods/deployments, extract container images
250
+ if data .get ("kind" ) in ["Pod" , "Deployment" ]:
251
+ images = []
252
+ if data ["kind" ] == "Pod" :
253
+ containers = data .get ("spec" , {}).get ("containers" , [])
254
+ for cont in containers :
255
+ images .append ({"name" : cont .get ("name" ), "image" : cont .get ("image" )})
256
+ elif data ["kind" ] == "Deployment" :
257
+ containers = data .get ("spec" , {}).get ("template" , {}).get ("spec" , {}).get ("containers" , [])
258
+ for cont in containers :
259
+ images .append ({"name" : cont .get ("name" ), "image" : cont .get ("image" )})
260
+ if images :
261
+ summary ["containers" ] = images
262
+
263
+ # For Events, show reason and message
264
+ if data .get ("kind" ) == "Event" :
265
+ summary ["reason" ] = data .get ("reason" )
266
+ summary ["message" ] = data .get ("message" )
267
+
268
+ metadata = data .get ("metadata" , {})
269
+ if metadata .get ("labels" ):
270
+ summary ["labels" ] = metadata ["labels" ]
271
+ if metadata .get ("annotations" ):
272
+ summary ["annotations" ] = metadata ["annotations" ]
273
+
274
+ return summary
275
+
276
+ def load_fingerprint (file_path ):
277
+ """Load the JSON fingerprint file from the specified path."""
278
+ with open (file_path , 'r' ) as f :
279
+ return json .load (f )
280
+
281
+ def generate_prompt (fingerprint ):
282
+ """Generate the diagnostic prompt by embedding the fingerprint into the request."""
283
+ prompt = """
284
+ You are an expert in Kubernetes operations and diagnostics. I will provide you with a JSON file that represents a snapshot ("fingerprint") of the entire state of a Kubernetes namespace focusing on a particular ClusterExtension and all related resources. This fingerprint includes:
285
+
286
+ - The ClusterExtension itself.
287
+ - All resources in the namespace that are either owned by or possibly needed by the ClusterExtension.
288
+ - Key details such as resource conditions, event messages, container images (with references), and minimal metadata.
289
+
290
+ Your task is:
291
+ 1. Analyze the provided fingerprint to determine if there are any issues with the ClusterExtension, its related resources, or its configuration.
292
+ 2. If issues are found, provide a diagnosis of what might be wrong and suggest steps to fix them.
293
+ 3. If no issues appear, acknowledge that the ClusterExtension and its resources seem healthy.
294
+ 4. Keep your answer concise and action-focused, as the output will be used by a human operator to troubleshoot or confirm the health of their cluster.
295
+
296
+ **Important Details:**
297
+ - The fingerprint might contain events that show what happened in the cluster recently.
298
+ - Check conditions of deployments, pods, and other resources to see if they indicate errors or warnings.
299
+ - Look at event messages for hints about failures, restarts, or other anomalies.
300
+ - Consider if all necessary resources (like ServiceAccounts, ConfigMaps, or other dependencies) are present and seemingly functional.
301
+
302
+ **BEGIN FINGERPRINT**
303
+ {fingerprint}
304
+ **END FINGERPRINT**
305
+
306
+ Please provide a summarized diagnosis and suggested fixes below:
307
+ """ .format (fingerprint = json .dumps (fingerprint , indent = 2 ))
308
+ return prompt
309
+
310
+ def send_to_openai (prompt , model = "gpt-4o" ):
311
+ """Send the prompt to OpenAI's completions API and get the response."""
312
+ try :
313
+ openai .api_key = os .getenv ("OPENAI_API_KEY" )
314
+ if not openai .api_key :
315
+ raise ValueError ("OPENAI_API_KEY environment variable is not set." )
316
+
317
+ response = openai .ChatCompletion .create (
318
+ model = model ,
319
+ messages = [{"role" : "user" , "content" : prompt }]
320
+ )
321
+
322
+ # Extract and return the assistant's message
323
+ message_content = response ['choices' ][0 ]['message' ]['content' ]
324
+ return message_content
325
+
326
+ except Exception as e :
327
+ return f"Error communicating with OpenAI API: { e } "
328
+
329
+ def gather_fingerprint (namespace ):
330
+ # Find cluster extension(s)
331
+ ce_uids = [uid for uid , res in uid_to_resource .items () if res ["kind" ] == "ClusterExtension" and res ["namespace" ] == namespace ]
332
+ if not ce_uids :
333
+ return []
334
+
335
+ all_images = {}
336
+ image_ref_count = 0
337
+
338
+ def process_resource (uid ):
339
+ nonlocal image_ref_count
340
+ r = uid_to_resource [uid ]
341
+ k = r ["kind" ]
342
+ nm = r ["name" ]
343
+ ns = r ["namespace" ]
344
+ summary = extract_resource_summary (k , nm , ns )
345
+ # Deduplicate images
346
+ if "containers" in summary :
347
+ new_containers = []
348
+ for c in summary ["containers" ]:
349
+ img = c ["image" ]
350
+ if img not in all_images :
351
+ image_ref_count += 1
352
+ ref_name = f"image_ref_{ image_ref_count } "
353
+ all_images [img ] = ref_name
354
+ c ["imageRef" ] = all_images [img ]
355
+ del c ["image" ]
356
+ new_containers .append (c )
357
+ summary ["containers" ] = new_containers
358
+ return summary
359
+
360
+ results = []
361
+ for ce_uid in ce_uids :
362
+ fingerprint = {}
363
+ # Include all discovered resources
364
+ for uid in uid_to_resource :
365
+ r = uid_to_resource [uid ]
366
+ key = f"{ r ['kind' ]} /{ r ['name' ]} "
367
+ fp = process_resource (uid )
368
+ fingerprint [key ] = fp
369
+ if all_images :
370
+ fingerprint ["_image_map" ] = {v : k for k , v in all_images .items ()}
371
+ ce_name = uid_to_resource [ce_uid ]["name" ]
372
+ fname = f"{ ce_name } -state.json"
373
+ with open (fname , "w" ) as f :
374
+ json .dump (fingerprint , f , indent = 2 )
375
+ results .append (fname )
376
+ return results
377
+
378
+ # If gather-cluster-extension-state, generate state file(s)
379
+ state_files = []
380
+ if args .gather_cluster_extension_state :
381
+ state_files = gather_fingerprint (NAMESPACE )
382
+
383
+ # Print tree unless --no-tree is given AND we are in gather-cluster-extension-state mode
384
+ if not (args .gather_cluster_extension_state and args .no_tree ):
385
+ for i , uid in enumerate (top_level_kinds ):
386
+ print_tree (uid , prefix = "" , is_last = (i == len (top_level_kinds )- 1 ))
387
+
388
+ if args .gather_cluster_extension_state :
389
+ if not state_files :
390
+ print ("No ClusterExtension found in the namespace, no state file created." , file = sys .stderr )
391
+ else :
392
+ print ("Created state file(s):" , ", " .join (state_files ))
0 commit comments