12
12
from cleanlab_tlm .utils .rag import Eval , TrustworthyRAG
13
13
from tests .conftest import make_text_unique
14
14
from tests .constants import (
15
- CHARACTERS_PER_TOKEN ,
16
15
MAX_COMBINED_LENGTH_TOKENS ,
17
16
MAX_PROMPT_LENGTH_TOKENS ,
18
17
MAX_RESPONSE_LENGTH_TOKENS ,
19
18
TEST_PROMPT ,
20
19
TEST_PROMPT_BATCH ,
21
20
TEST_RESPONSE ,
21
+ WORD_THAT_EQUALS_ONE_TOKEN ,
22
22
)
23
23
from tests .test_get_trustworthiness_score import is_tlm_score_response_with_error
24
24
from tests .test_prompt import is_tlm_response_with_error
@@ -208,7 +208,7 @@ def test_prompt_too_long_exception_single_prompt(tlm: TLM) -> None:
208
208
"""Tests that bad request error is raised when prompt is too long when calling tlm.prompt with a single prompt."""
209
209
with pytest .raises (TlmBadRequestError ) as exc_info :
210
210
tlm .prompt (
211
- "a" * (MAX_PROMPT_LENGTH_TOKENS + 1 ) * CHARACTERS_PER_TOKEN ,
211
+ WORD_THAT_EQUALS_ONE_TOKEN * (MAX_PROMPT_LENGTH_TOKENS + 1 ),
212
212
)
213
213
214
214
assert exc_info .value .message .startswith ("Prompt length exceeds" )
@@ -221,7 +221,7 @@ def test_prompt_too_long_exception_prompt(tlm: TLM, num_prompts: int) -> None:
221
221
# create batch of prompts with one prompt that is too long
222
222
prompts = [test_prompt ] * num_prompts
223
223
prompt_too_long_index = np .random .randint (0 , num_prompts )
224
- prompts [prompt_too_long_index ] = "a" * (MAX_PROMPT_LENGTH_TOKENS + 1 ) * CHARACTERS_PER_TOKEN
224
+ prompts [prompt_too_long_index ] = WORD_THAT_EQUALS_ONE_TOKEN * (MAX_PROMPT_LENGTH_TOKENS + 1 )
225
225
226
226
tlm_responses = cast (list [TLMResponse ], tlm .prompt (prompts ))
227
227
@@ -232,8 +232,8 @@ def test_response_too_long_exception_single_score(tlm: TLM) -> None:
232
232
"""Tests that bad request error is raised when response is too long when calling tlm.get_trustworthiness_score with a single prompt."""
233
233
with pytest .raises (TlmBadRequestError ) as exc_info :
234
234
tlm .get_trustworthiness_score (
235
- "a" ,
236
- "a" * (MAX_RESPONSE_LENGTH_TOKENS + 1 ) * CHARACTERS_PER_TOKEN ,
235
+ WORD_THAT_EQUALS_ONE_TOKEN ,
236
+ WORD_THAT_EQUALS_ONE_TOKEN * (MAX_RESPONSE_LENGTH_TOKENS + 1 ),
237
237
)
238
238
239
239
assert exc_info .value .message .startswith ("Response length exceeds" )
@@ -247,7 +247,7 @@ def test_response_too_long_exception_score(tlm: TLM, num_prompts: int) -> None:
247
247
prompts = [test_prompt ] * num_prompts
248
248
responses = [TEST_RESPONSE ] * num_prompts
249
249
response_too_long_index = np .random .randint (0 , num_prompts )
250
- responses [response_too_long_index ] = "a" * (MAX_RESPONSE_LENGTH_TOKENS + 1 ) * CHARACTERS_PER_TOKEN
250
+ responses [response_too_long_index ] = WORD_THAT_EQUALS_ONE_TOKEN * (MAX_RESPONSE_LENGTH_TOKENS + 1 )
251
251
252
252
tlm_responses = cast (list [TLMScore ], tlm .get_trustworthiness_score (prompts , responses ))
253
253
@@ -258,8 +258,8 @@ def test_prompt_too_long_exception_single_score(tlm: TLM) -> None:
258
258
"""Tests that bad request error is raised when prompt is too long when calling tlm.get_trustworthiness_score with a single prompt."""
259
259
with pytest .raises (TlmBadRequestError ) as exc_info :
260
260
tlm .get_trustworthiness_score (
261
- "a" * (MAX_PROMPT_LENGTH_TOKENS + 1 ) * CHARACTERS_PER_TOKEN ,
262
- "a" ,
261
+ WORD_THAT_EQUALS_ONE_TOKEN * (MAX_PROMPT_LENGTH_TOKENS + 1 ),
262
+ WORD_THAT_EQUALS_ONE_TOKEN ,
263
263
)
264
264
265
265
assert exc_info .value .message .startswith ("Prompt length exceeds" )
@@ -273,7 +273,7 @@ def test_prompt_too_long_exception_score(tlm: TLM, num_prompts: int) -> None:
273
273
prompts = [test_prompt ] * num_prompts
274
274
responses = [TEST_RESPONSE ] * num_prompts
275
275
prompt_too_long_index = np .random .randint (0 , num_prompts )
276
- prompts [prompt_too_long_index ] = "a" * (MAX_PROMPT_LENGTH_TOKENS + 1 ) * CHARACTERS_PER_TOKEN
276
+ prompts [prompt_too_long_index ] = WORD_THAT_EQUALS_ONE_TOKEN * (MAX_PROMPT_LENGTH_TOKENS + 1 )
277
277
278
278
tlm_responses = cast (list [TLMScore ], tlm .get_trustworthiness_score (prompts , responses ))
279
279
@@ -286,8 +286,8 @@ def test_combined_too_long_exception_single_score(tlm: TLM) -> None:
286
286
287
287
with pytest .raises (TlmBadRequestError ) as exc_info :
288
288
tlm .get_trustworthiness_score (
289
- "a" * max_prompt_length * CHARACTERS_PER_TOKEN ,
290
- "a" * MAX_RESPONSE_LENGTH_TOKENS * CHARACTERS_PER_TOKEN ,
289
+ WORD_THAT_EQUALS_ONE_TOKEN * max_prompt_length ,
290
+ WORD_THAT_EQUALS_ONE_TOKEN * MAX_RESPONSE_LENGTH_TOKENS ,
291
291
)
292
292
293
293
assert exc_info .value .message .startswith ("Prompt and response combined length exceeds" )
@@ -306,8 +306,8 @@ def test_prompt_and_response_combined_too_long_exception_batch_score(tlm: TLM, n
306
306
combined_too_long_index = np .random .randint (0 , num_prompts )
307
307
308
308
max_prompt_length = MAX_COMBINED_LENGTH_TOKENS - MAX_RESPONSE_LENGTH_TOKENS + 1
309
- prompts [combined_too_long_index ] = "a" * max_prompt_length * CHARACTERS_PER_TOKEN
310
- responses [combined_too_long_index ] = "a" * MAX_RESPONSE_LENGTH_TOKENS * CHARACTERS_PER_TOKEN
309
+ prompts [combined_too_long_index ] = WORD_THAT_EQUALS_ONE_TOKEN * max_prompt_length
310
+ responses [combined_too_long_index ] = WORD_THAT_EQUALS_ONE_TOKEN * MAX_RESPONSE_LENGTH_TOKENS
311
311
312
312
tlm_responses = cast (list [TLMScore ], tlm .get_trustworthiness_score (prompts , responses ))
313
313
0 commit comments