From 1c56b3c71c0b2f3c839d88e0cd532aecdb15ee3a Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Wed, 30 Jul 2025 11:27:52 +0200
Subject: [PATCH 1/3] span data should always be text

---
 sentry_sdk/ai/utils.py                        |  7 ++++++-
 tests/integrations/cohere/test_cohere.py      | 20 +++++++++++++++----
 .../integrations/langchain/test_langchain.py  |  6 ++----
 tests/integrations/openai/test_openai.py      | 19 ++++++------------
 4 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index ed3494f679..a2e7db93d1 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -23,10 +23,15 @@ def _normalize_data(data):
         return list(_normalize_data(x) for x in data)
     if isinstance(data, dict):
         return {k: _normalize_data(v) for (k, v) in data.items()}
+
     return data
 
 
 def set_data_normalized(span, key, value):
     # type: (Span, str, Any) -> None
     normalized = _normalize_data(value)
-    span.set_data(key, normalized)
+    if type(normalized) in (list, dict, tuple):
+        # only primitive types allowed
+        span.set_data(key, str(normalized))
+    else:
+        span.set_data(key, normalized)
diff --git a/tests/integrations/cohere/test_cohere.py b/tests/integrations/cohere/test_cohere.py
index f13a77ae90..b8b6067625 100644
--- a/tests/integrations/cohere/test_cohere.py
+++ b/tests/integrations/cohere/test_cohere.py
@@ -57,8 +57,14 @@ def test_nonstreaming_chat(
     assert span["data"][SPANDATA.AI_MODEL_ID] == "some-model"
 
     if send_default_pii and include_prompts:
-        assert "some context" in span["data"][SPANDATA.AI_INPUT_MESSAGES][0]["content"]
-        assert "hello" in span["data"][SPANDATA.AI_INPUT_MESSAGES][1]["content"]
+        assert (
+            "{'role': 'system', 'content': 'some context'}"
+            in span["data"][SPANDATA.AI_INPUT_MESSAGES]
+        )
+        assert (
+            "{'role': 'user', 'content': 'hello'}"
+            in span["data"][SPANDATA.AI_INPUT_MESSAGES]
+        )
         assert "the model response" in span["data"][SPANDATA.AI_RESPONSES]
     else:
         assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
@@ -128,8 +134,14 @@ def test_streaming_chat(sentry_init, capture_events, send_default_pii, include_p
     assert span["data"][SPANDATA.AI_MODEL_ID] == "some-model"
 
     if send_default_pii and include_prompts:
-        assert "some context" in span["data"][SPANDATA.AI_INPUT_MESSAGES][0]["content"]
-        assert "hello" in span["data"][SPANDATA.AI_INPUT_MESSAGES][1]["content"]
+        assert (
+            "{'role': 'system', 'content': 'some context'}"
+            in span["data"][SPANDATA.AI_INPUT_MESSAGES]
+        )
+        assert (
+            "{'role': 'user', 'content': 'hello'}"
+            in span["data"][SPANDATA.AI_INPUT_MESSAGES]
+        )
         assert "the model response" in span["data"][SPANDATA.AI_RESPONSES]
     else:
         assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index ee9fb241b1..9d55a49f82 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -196,15 +196,13 @@ def test_langchain_agent(
 
     if send_default_pii and include_prompts:
         assert (
-            "You are very powerful"
-            in chat_spans[0]["data"][SPANDATA.AI_INPUT_MESSAGES][0]["content"]
+            "You are very powerful" in chat_spans[0]["data"][SPANDATA.AI_INPUT_MESSAGES]
         )
         assert "5" in chat_spans[0]["data"][SPANDATA.AI_RESPONSES]
         assert "word" in tool_exec_span["data"][SPANDATA.AI_INPUT_MESSAGES]
         assert 5 == int(tool_exec_span["data"][SPANDATA.AI_RESPONSES])
         assert (
-            "You are very powerful"
-            in chat_spans[1]["data"][SPANDATA.AI_INPUT_MESSAGES][0]["content"]
+            "You are very powerful" in chat_spans[1]["data"][SPANDATA.AI_INPUT_MESSAGES]
         )
         assert "5" in chat_spans[1]["data"][SPANDATA.AI_RESPONSES]
     else:
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index dfac08d762..5767f84d04 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -1,4 +1,3 @@
-import json
 import pytest
 from openai import AsyncOpenAI, OpenAI, AsyncStream, Stream, OpenAIError
 from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding
@@ -144,11 +143,8 @@ def test_nonstreaming_chat_completion(
     assert span["op"] == "gen_ai.chat"
 
     if send_default_pii and include_prompts:
-        assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]["content"]
-        assert (
-            "the model response"
-            in json.loads(span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT])[0]["content"]
-        )
+        assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
     else:
         assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
         assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
@@ -189,11 +185,8 @@ async def test_nonstreaming_chat_completion_async(
     assert span["op"] == "gen_ai.chat"
 
     if send_default_pii and include_prompts:
-        assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]["content"]
-        assert (
-            "the model response"
-            in json.loads(span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT])[0]["content"]
-        )
+        assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
     else:
         assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
         assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
@@ -285,7 +278,7 @@ def test_streaming_chat_completion(
     assert span["op"] == "gen_ai.chat"
 
     if send_default_pii and include_prompts:
-        assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]["content"]
+        assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
         assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
     else:
         assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
@@ -381,7 +374,7 @@ async def test_streaming_chat_completion_async(
     assert span["op"] == "gen_ai.chat"
 
     if send_default_pii and include_prompts:
-        assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]["content"]
+        assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
         assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
     else:
         assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]

From 89a0f4650d9ed9324fc89a2c532cc5831177286e Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Wed, 30 Jul 2025 11:34:05 +0200
Subject: [PATCH 2/3] isinstance

---
 sentry_sdk/ai/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index a2e7db93d1..642db5f26f 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -30,7 +30,7 @@ def _normalize_data(data):
 def set_data_normalized(span, key, value):
     # type: (Span, str, Any) -> None
     normalized = _normalize_data(value)
-    if type(normalized) in (list, dict, tuple):
+    if isinstance(normalized, (list, dict, tuple)):
         # only primitive types allowed
         span.set_data(key, str(normalized))
     else:

From f66892189a5b11bdbca0131ab10135e23846926a Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Wed, 30 Jul 2025 11:35:43 +0200
Subject: [PATCH 3/3] better if

---
 sentry_sdk/ai/utils.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index 642db5f26f..a3c62600c0 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -30,8 +30,7 @@ def _normalize_data(data):
 def set_data_normalized(span, key, value):
     # type: (Span, str, Any) -> None
     normalized = _normalize_data(value)
-    if isinstance(normalized, (list, dict, tuple)):
-        # only primitive types allowed
-        span.set_data(key, str(normalized))
-    else:
+    if isinstance(normalized, (int, float, bool, str)):
         span.set_data(key, normalized)
+    else:
+        span.set_data(key, str(normalized))