Skip to content

Commit 7b98c51

Browse files
authored
Add support for openai v1 completions (#1006)
* Add chat completion & header instrumentation * Add message id tests & fix bug * Add error tracing tests & impl for v1 * Use latest to test instead of <1.0 * Ignore v1 embedding error tests * Capture _usage_based headers in v0 * Verify all error attrs are asserted * Use body instead of content * Handle body being None
1 parent 140c7bc commit 7b98c51

File tree

9 files changed

+1232
-92
lines changed

9 files changed

+1232
-92
lines changed

newrelic/config.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2058,6 +2058,11 @@ def _process_module_builtin_defaults():
20582058
"newrelic.hooks.mlmodel_openai",
20592059
"instrument_openai_util",
20602060
)
2061+
_process_module_definition(
2062+
"openai.resources.chat.completions",
2063+
"newrelic.hooks.mlmodel_openai",
2064+
"instrument_openai_resources_chat_completions",
2065+
)
20612066
_process_module_definition(
20622067
"openai._base_client",
20632068
"newrelic.hooks.mlmodel_openai",

newrelic/hooks/mlmodel_openai.py

Lines changed: 124 additions & 46 deletions
Large diffs are not rendered by default.

tests/mlmodel_openai/_mock_external_openai_server.py

Lines changed: 64 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
{
3636
"content-type": "application/json",
3737
"openai-model": "gpt-3.5-turbo-0613",
38-
"openai-organization": "foobar-jtbczk",
38+
"openai-organization": "new-relic-nkmd8b",
3939
"openai-processing-ms": "6326",
4040
"openai-version": "2020-10-01",
4141
"x-ratelimit-limit-requests": "200",
@@ -60,7 +60,45 @@
6060
"index": 0,
6161
"message": {
6262
"role": "assistant",
63-
"content": "To convert 212 degrees Fahrenheit to Celsius, you can use the formula:\n\n\u00b0C = (\u00b0F - 32) x 5/9\n\nSubstituting the value, we get:\n\n\u00b0C = (212 - 32) x 5/9\n = 180 x 5/9\n = 100\n\nTherefore, 212 degrees Fahrenheit is equal to 100 degrees Celsius.",
63+
"content": "212 degrees Fahrenheit is equal to 100 degrees Celsius.",
64+
},
65+
"finish_reason": "stop",
66+
}
67+
],
68+
"usage": {"prompt_tokens": 26, "completion_tokens": 82, "total_tokens": 108},
69+
"system_fingerprint": None,
70+
},
71+
],
72+
"You are a mathematician.": [
73+
{
74+
"content-type": "application/json",
75+
"openai-model": "gpt-3.5-turbo-0613",
76+
"openai-organization": "new-relic-nkmd8b",
77+
"openai-processing-ms": "6326",
78+
"openai-version": "2020-10-01",
79+
"x-ratelimit-limit-requests": "200",
80+
"x-ratelimit-limit-tokens": "40000",
81+
"x-ratelimit-limit-tokens_usage_based": "40000",
82+
"x-ratelimit-remaining-requests": "198",
83+
"x-ratelimit-remaining-tokens": "39880",
84+
"x-ratelimit-remaining-tokens_usage_based": "39880",
85+
"x-ratelimit-reset-requests": "11m32.334s",
86+
"x-ratelimit-reset-tokens": "180ms",
87+
"x-ratelimit-reset-tokens_usage_based": "180ms",
88+
"x-request-id": "f8d0f53b6881c5c0a3698e55f8f410cd",
89+
},
90+
200,
91+
{
92+
"id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTeat",
93+
"object": "chat.completion",
94+
"created": 1701995833,
95+
"model": "gpt-3.5-turbo-0613",
96+
"choices": [
97+
{
98+
"index": 0,
99+
"message": {
100+
"role": "assistant",
101+
"content": "1 plus 2 is 3.",
64102
},
65103
"finish_reason": "stop",
66104
}
@@ -69,6 +107,30 @@
69107
"system_fingerprint": None,
70108
},
71109
],
110+
"Invalid API key.": [
111+
{"content-type": "application/json; charset=utf-8", "x-request-id": "a51821b9fd83d8e0e04542bedc174310"},
112+
401,
113+
{
114+
"error": {
115+
"message": "Incorrect API key provided: DEADBEEF. You can find your API key at https://platform.openai.com/account/api-keys.",
116+
"type": "invalid_request_error",
117+
"param": None,
118+
"code": "invalid_api_key",
119+
}
120+
},
121+
],
122+
"Model does not exist.": [
123+
{"content-type": "application/json; charset=utf-8", "x-request-id": "3b0f8e510ee8a67c08a227a98eadbbe6"},
124+
404,
125+
{
126+
"error": {
127+
"message": "The model `does-not-exist` does not exist",
128+
"type": "invalid_request_error",
129+
"param": None,
130+
"code": "model_not_found",
131+
}
132+
},
133+
],
72134
"This is an embedding test.": [
73135
{
74136
"content-type": "application/json",

tests/mlmodel_openai/conftest.py

Lines changed: 42 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,9 @@
5252
if get_openai_version() < (1, 0):
5353
collect_ignore = [
5454
"test_chat_completion_v1.py",
55+
"test_chat_completion_error_v1.py",
5556
"test_embeddings_v1.py",
57+
"test_get_llm_message_ids_v1.py",
5658
"test_chat_completion_error_v1.py",
5759
"test_embeddings_error_v1.py",
5860
]
@@ -63,6 +65,7 @@
6365
"test_chat_completion.py",
6466
"test_get_llm_message_ids.py",
6567
"test_chat_completion_error.py",
68+
"test_embeddings_error_v1.py",
6669
]
6770

6871

@@ -143,9 +146,9 @@ def set_info():
143146
def openai_server(
144147
openai_version, # noqa: F811
145148
openai_clients,
146-
wrap_openai_base_client_process_response,
147149
wrap_openai_api_requestor_request,
148150
wrap_openai_api_requestor_interpret_response,
151+
wrap_httpx_client_send,
149152
):
150153
"""
151154
This fixture will either create a mocked backend for testing purposes, or will
@@ -165,9 +168,7 @@ def openai_server(
165168
yield # Run tests
166169
else:
167170
# Apply function wrappers to record data
168-
wrap_function_wrapper(
169-
"openai._base_client", "BaseClient._process_response", wrap_openai_base_client_process_response
170-
)
171+
wrap_function_wrapper("httpx._client", "Client.send", wrap_httpx_client_send)
171172
yield # Run tests
172173
# Write responses to audit log
173174
with open(OPENAI_AUDIT_LOG_FILE, "w") as audit_log_fp:
@@ -177,6 +178,43 @@ def openai_server(
177178
yield
178179

179180

181+
def bind_send_params(request, *, stream=False, **kwargs):
182+
return request
183+
184+
185+
@pytest.fixture(scope="session")
186+
def wrap_httpx_client_send(extract_shortened_prompt): # noqa: F811
187+
def _wrap_httpx_client_send(wrapped, instance, args, kwargs):
188+
request = bind_send_params(*args, **kwargs)
189+
if not request:
190+
return wrapped(*args, **kwargs)
191+
192+
params = json.loads(request.content.decode("utf-8"))
193+
prompt = extract_shortened_prompt(params)
194+
195+
# Send request
196+
response = wrapped(*args, **kwargs)
197+
198+
if response.status_code >= 400 or response.status_code < 200:
199+
prompt = "error"
200+
201+
rheaders = getattr(response, "headers")
202+
203+
headers = dict(
204+
filter(
205+
lambda k: k[0].lower() in RECORDED_HEADERS
206+
or k[0].lower().startswith("openai")
207+
or k[0].lower().startswith("x-ratelimit"),
208+
rheaders.items(),
209+
)
210+
)
211+
body = json.loads(response.content.decode("utf-8"))
212+
OPENAI_AUDIT_LOG_CONTENTS[prompt] = headers, response.status_code, body # Append response data to log
213+
return response
214+
215+
return _wrap_httpx_client_send
216+
217+
180218
@pytest.fixture(scope="session")
181219
def wrap_openai_api_requestor_interpret_response():
182220
def _wrap_openai_api_requestor_interpret_response(wrapped, instance, args, kwargs):
@@ -235,39 +273,3 @@ def bind_request_params(method, url, params=None, *args, **kwargs):
235273

236274
def bind_request_interpret_response_params(result, stream):
237275
return result.content.decode("utf-8"), result.status_code, result.headers
238-
239-
240-
def bind_base_client_process_response(
241-
cast_to,
242-
options,
243-
response,
244-
stream,
245-
stream_cls,
246-
):
247-
return options, response
248-
249-
250-
@pytest.fixture(scope="session")
251-
def wrap_openai_base_client_process_response(extract_shortened_prompt): # noqa: F811
252-
def _wrap_openai_base_client_process_response(wrapped, instance, args, kwargs):
253-
options, response = bind_base_client_process_response(*args, **kwargs)
254-
if not options:
255-
return wrapped(*args, **kwargs)
256-
257-
data = getattr(options, "json_data", {})
258-
prompt = extract_shortened_prompt(data)
259-
rheaders = getattr(response, "headers")
260-
261-
headers = dict(
262-
filter(
263-
lambda k: k[0].lower() in RECORDED_HEADERS
264-
or k[0].lower().startswith("openai")
265-
or k[0].lower().startswith("x-ratelimit"),
266-
rheaders.items(),
267-
)
268-
)
269-
body = json.loads(response.content.decode("utf-8"))
270-
OPENAI_AUDIT_LOG_CONTENTS[prompt] = headers, response.status_code, body # Append response data to audit log
271-
return wrapped(*args, **kwargs)
272-
273-
return _wrap_openai_base_client_process_response

tests/mlmodel_openai/test_chat_completion.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@
6363
"response.headers.ratelimitResetRequests": "7m12s",
6464
"response.headers.ratelimitRemainingTokens": 39940,
6565
"response.headers.ratelimitRemainingRequests": 199,
66+
"response.headers.ratelimitLimitTokensUsageBased": "",
67+
"response.headers.ratelimitResetTokensUsageBased": "",
68+
"response.headers.ratelimitRemainingTokensUsageBased": "",
6669
"vendor": "openAI",
6770
"ingest_source": "Python",
6871
"response.number_of_messages": 3,
@@ -179,6 +182,9 @@ def test_openai_chat_completion_sync_in_txn_with_convo_id(set_trace_info):
179182
"response.headers.ratelimitResetRequests": "7m12s",
180183
"response.headers.ratelimitRemainingTokens": 39940,
181184
"response.headers.ratelimitRemainingRequests": 199,
185+
"response.headers.ratelimitLimitTokensUsageBased": "",
186+
"response.headers.ratelimitResetTokensUsageBased": "",
187+
"response.headers.ratelimitRemainingTokensUsageBased": "",
182188
"vendor": "openAI",
183189
"ingest_source": "Python",
184190
"response.number_of_messages": 3,

0 commit comments

Comments
 (0)