Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions examples/server/tests/features/embeddings.feature
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Feature: llama.cpp server
And 42 as server seed
And 2 slots
And 1024 as batch size
And 1024 KV cache size
And 2048 KV cache size
And embeddings extraction
Then the server is starting
Then the server is healthy
Expand Down Expand Up @@ -87,9 +87,8 @@ Feature: llama.cpp server
Then the server is idle
Then all embeddings are generated

@wip
Scenario: All embeddings should be the same
Given 20 fixed prompts
Given 10 fixed prompts
And a model bert-bge-small
Given concurrent OAI embedding requests
Then the server is busy
Expand Down
6 changes: 4 additions & 2 deletions examples/server/tests/features/steps/steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,9 +292,10 @@ def step_impl(context, n_ga_w):
def step_prompt_passkey(context):
context.prompt_passkey = context.text


@step(u'{n_prompts:d} fixed prompts')
def step_fixed_prompts(context, n_prompts):
context.prompts.extend([str(0)*1024 for i in range(n_prompts)])
context.prompts.extend([str(0)*(context.n_batch if context.n_batch is not None else 512) for i in range(n_prompts)])
context.n_prompts = n_prompts


Expand Down Expand Up @@ -818,7 +819,8 @@ async def request_oai_embeddings(input,
"input": input,
"model": model,
},
headers=headers) as response:
headers=headers,
timeout=3600) as response:
assert response.status == 200, f"received status code not expected: {response.status}"
assert response.headers['Access-Control-Allow-Origin'] == origin
assert response.headers['Content-Type'] == "application/json; charset=utf-8"
Expand Down