Skip to content
This repository was archived by the owner on Jul 16, 2025. It is now read-only.

Commit 034da76

Browse files
committed
feat: Add Gemini Embeddings
1 parent a42da3a commit 034da76

File tree

7 files changed

+308
-2
lines changed

7 files changed

+308
-2
lines changed
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
<?php
2+
3+
use Doctrine\DBAL\DriverManager;
4+
use Doctrine\DBAL\Tools\DsnParser;
5+
use PhpLlm\LlmChain\Chain\Chain;
6+
use PhpLlm\LlmChain\Chain\Toolbox\ChainProcessor;
7+
use PhpLlm\LlmChain\Chain\Toolbox\Tool\SimilaritySearch;
8+
use PhpLlm\LlmChain\Chain\Toolbox\Toolbox;
9+
use PhpLlm\LlmChain\Platform\Bridge\Google\Embeddings;
10+
use PhpLlm\LlmChain\Platform\Bridge\Google\Embeddings\TaskType;
11+
use PhpLlm\LlmChain\Platform\Bridge\Google\Gemini;
12+
use PhpLlm\LlmChain\Platform\Bridge\Google\PlatformFactory;
13+
use PhpLlm\LlmChain\Platform\Message\Message;
14+
use PhpLlm\LlmChain\Platform\Message\MessageBag;
15+
use PhpLlm\LlmChain\Store\Bridge\MariaDB\Store;
16+
use PhpLlm\LlmChain\Store\Document\Metadata;
17+
use PhpLlm\LlmChain\Store\Document\TextDocument;
18+
use PhpLlm\LlmChain\Store\Indexer;
19+
use Symfony\Component\Dotenv\Dotenv;
20+
use Symfony\Component\Uid\Uuid;
21+
22+
require_once dirname(__DIR__, 2).'/vendor/autoload.php';
23+
(new Dotenv())->loadEnv(dirname(__DIR__, 2).'/.env');
24+
25+
if (empty($_ENV['GOOGLE_API_KEY']) || empty($_ENV['MARIADB_URI'])) {
26+
echo 'Please set GOOGLE_API_KEY and MARIADB_URI environment variables.'.\PHP_EOL;
27+
exit(1);
28+
}
29+
30+
// initialize the store
31+
$store = Store::fromDbal(
32+
connection: DriverManager::getConnection((new DsnParser())->parse($_ENV['MARIADB_URI'])),
33+
tableName: 'my_table',
34+
indexName: 'my_index',
35+
vectorFieldName: 'embedding',
36+
);
37+
38+
// our data
39+
$movies = [
40+
['title' => 'Inception', 'description' => 'A skilled thief is given a chance at redemption if he can successfully perform inception, the act of planting an idea in someone\'s subconscious.', 'director' => 'Christopher Nolan'],
41+
['title' => 'The Matrix', 'description' => 'A hacker discovers the world he lives in is a simulated reality and joins a rebellion to overthrow its controllers.', 'director' => 'The Wachowskis'],
42+
['title' => 'The Godfather', 'description' => 'The aging patriarch of an organized crime dynasty transfers control of his empire to his reluctant son.', 'director' => 'Francis Ford Coppola'],
43+
];
44+
45+
// create embeddings and documents
46+
foreach ($movies as $i => $movie) {
47+
$documents[] = new TextDocument(
48+
id: Uuid::v4(),
49+
content: 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description'],
50+
metadata: new Metadata($movie),
51+
);
52+
}
53+
54+
// initialize the table
55+
$store->initialize(['dimensions' => 768]);
56+
57+
// create embeddings for documents
58+
$platform = PlatformFactory::create($_ENV['GOOGLE_API_KEY']);
59+
$embeddings = new Embeddings(options: ['dimensions' => 768, 'task_type' => TaskType::SemanticSimilarity]);
60+
$indexer = new Indexer($platform, $embeddings, $store);
61+
$indexer->index($documents);
62+
63+
$model = new Gemini(Gemini::GEMINI_2_FLASH_LITE);
64+
65+
$similaritySearch = new SimilaritySearch($platform, $embeddings, $store);
66+
$toolbox = Toolbox::create($similaritySearch);
67+
$processor = new ChainProcessor($toolbox);
68+
$chain = new Chain($platform, $model, [$processor], [$processor]);
69+
70+
$messages = new MessageBag(
71+
Message::forSystem('Please answer all user questions only using SimilaritySearch function.'),
72+
Message::ofUser('Which movie fits the theme of the mafia?')
73+
);
74+
$response = $chain->call($messages);
75+
76+
echo $response->getContent().\PHP_EOL;
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace PhpLlm\LlmChain\Platform\Bridge\Google;
6+
7+
use PhpLlm\LlmChain\Platform\Bridge\Google\Embeddings\TaskType;
8+
use PhpLlm\LlmChain\Platform\Capability;
9+
use PhpLlm\LlmChain\Platform\Model;
10+
11+
/**
12+
* @author Valtteri R <[email protected]>
13+
*/
14+
class Embeddings extends Model
15+
{
16+
/** Supported dimensions: 3072, 1536, or 768 */
17+
public const GEMINI_EMBEDDING_EXP_03_07 = 'gemini-embedding-exp-03-07';
18+
/** Fixed 768 dimensions */
19+
public const TEXT_EMBEDDING_004 = 'text-embedding-004';
20+
/** Fixed 768 dimensions */
21+
public const EMBEDDING_001 = 'embedding-001';
22+
23+
/**
24+
* @param array{dimensions?: int, task_type?: TaskType|string} $options
25+
*/
26+
public function __construct(string $name = self::GEMINI_EMBEDDING_EXP_03_07, array $options = [])
27+
{
28+
parent::__construct($name, [Capability::INPUT_MULTIPLE], $options);
29+
}
30+
}
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
<?php
2+
3+
namespace PhpLlm\LlmChain\Platform\Bridge\Google\Embeddings;
4+
5+
use PhpLlm\LlmChain\Platform\Bridge\Google\Embeddings;
6+
use PhpLlm\LlmChain\Platform\Exception\RuntimeException;
7+
use PhpLlm\LlmChain\Platform\Model;
8+
use PhpLlm\LlmChain\Platform\ModelClientInterface;
9+
use PhpLlm\LlmChain\Platform\Response\VectorResponse;
10+
use PhpLlm\LlmChain\Platform\ResponseConverterInterface;
11+
use PhpLlm\LlmChain\Platform\Vector\Vector;
12+
use Symfony\Contracts\HttpClient\HttpClientInterface;
13+
use Symfony\Contracts\HttpClient\ResponseInterface;
14+
15+
/**
16+
* @author Valtteri R <[email protected]>
17+
*/
18+
final readonly class ModelClient implements ModelClientInterface, ResponseConverterInterface
19+
{
20+
public function __construct(
21+
private HttpClientInterface $httpClient,
22+
#[\SensitiveParameter]
23+
private string $apiKey,
24+
) {
25+
}
26+
27+
public function supports(Model $model): bool
28+
{
29+
return $model instanceof Embeddings;
30+
}
31+
32+
public function request(Model $model, array|string $payload, array $options = []): ResponseInterface
33+
{
34+
$url = \sprintf('https://generativelanguage.googleapis.com/v1beta/models/%s:%s', $model->getName(), 'batchEmbedContents');
35+
$modelOptions = $model->getOptions();
36+
37+
return $this->httpClient->request('POST', $url, [
38+
'headers' => [
39+
'x-goog-api-key' => $this->apiKey,
40+
],
41+
'json' => [
42+
'requests' => array_map(
43+
static fn (string $text) => array_filter([
44+
'model' => 'models/'.$model->getName(),
45+
'content' => ['parts' => [['text' => $text]]],
46+
'outputDimensionality' => $modelOptions['dimensions'] ?? null,
47+
'taskType' => $modelOptions['task_type'] ?? null,
48+
'title' => $options['title'] ?? null,
49+
]),
50+
\is_array($payload) ? $payload : [$payload],
51+
),
52+
],
53+
]);
54+
}
55+
56+
public function convert(ResponseInterface $response, array $options = []): VectorResponse
57+
{
58+
$data = $response->toArray();
59+
60+
if (!isset($data['embeddings'])) {
61+
throw new RuntimeException('Response does not contain data');
62+
}
63+
64+
return new VectorResponse(
65+
...array_map(
66+
static fn (array $item): Vector => new Vector($item['values']),
67+
$data['embeddings'],
68+
),
69+
);
70+
}
71+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
<?php
2+
3+
namespace PhpLlm\LlmChain\Platform\Bridge\Google\Embeddings;
4+
5+
enum TaskType: string
6+
{
7+
/** Unset value, which will default to one of the other enum values. */
8+
public const TaskTypeUnspecified = 'TASK_TYPE_UNSPECIFIED';
9+
/** Specifies the given text is a query in a search/retrieval setting. */
10+
public const RetrievalQuery = 'RETRIEVAL_QUERY';
11+
/** Specifies the given text is a document from the corpus being searched. */
12+
public const RetrievalDocument = 'RETRIEVAL_DOCUMENT';
13+
/** Specifies the given text will be used for STS. */
14+
public const SemanticSimilarity = 'SEMANTIC_SIMILARITY';
15+
/** Specifies that the given text will be classified. */
16+
public const Classification = 'CLASSIFICATION';
17+
/** Specifies that the embeddings will be used for clustering. */
18+
public const Clustering = 'CLUSTERING';
19+
/** Specifies that the given text will be used for question answering. */
20+
public const QuestionAnswering = 'QUESTION_ANSWERING';
21+
/** Specifies that the given text will be used for fact verification. */
22+
public const FactVerification = 'FACT_VERIFICATION';
23+
/** Specifies that the given text will be used for code retrieval. */
24+
public const CodeRetrievalQuery = 'CODE_RETRIEVAL_QUERY';
25+
}

src/Platform/Bridge/Google/PlatformFactory.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
use PhpLlm\LlmChain\Platform\Bridge\Google\Contract\ToolCallMessageNormalizer;
1010
use PhpLlm\LlmChain\Platform\Bridge\Google\Contract\ToolNormalizer;
1111
use PhpLlm\LlmChain\Platform\Bridge\Google\Contract\UserMessageNormalizer;
12+
use PhpLlm\LlmChain\Platform\Bridge\Google\Embeddings\ModelClient;
1213
use PhpLlm\LlmChain\Platform\Contract;
1314
use PhpLlm\LlmChain\Platform\Platform;
1415
use Symfony\Component\HttpClient\EventSourceHttpClient;
@@ -26,8 +27,9 @@ public static function create(
2627
): Platform {
2728
$httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient);
2829
$responseHandler = new ModelHandler($httpClient, $apiKey);
30+
$embeddings = new ModelClient($httpClient, $apiKey);
2931

30-
return new Platform([$responseHandler], [$responseHandler], Contract::create(
32+
return new Platform([$responseHandler, $embeddings], [$responseHandler, $embeddings], Contract::create(
3133
new AssistantMessageNormalizer(),
3234
new MessageBagNormalizer(),
3335
new ToolNormalizer(),

src/Platform/ModelClientInterface.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ interface ModelClientInterface
1414
public function supports(Model $model): bool;
1515

1616
/**
17-
* @param array<string, mixed> $payload
17+
* @param array<string|int, mixed> $payload
1818
* @param array<string, mixed> $options
1919
*/
2020
public function request(Model $model, array|string $payload, array $options = []): ResponseInterface;
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace PhpLlm\LlmChain\Tests\Platform\Bridge\Google\Embeddings;
6+
7+
use PhpLlm\LlmChain\Platform\Bridge\Google\Embeddings;
8+
use PhpLlm\LlmChain\Platform\Bridge\Google\Embeddings\ModelClient;
9+
use PhpLlm\LlmChain\Platform\Response\VectorResponse;
10+
use PhpLlm\LlmChain\Platform\Vector\Vector;
11+
use PHPUnit\Framework\Attributes\CoversClass;
12+
use PHPUnit\Framework\Attributes\Small;
13+
use PHPUnit\Framework\Attributes\Test;
14+
use PHPUnit\Framework\Attributes\UsesClass;
15+
use PHPUnit\Framework\TestCase;
16+
use Symfony\Contracts\HttpClient\HttpClientInterface;
17+
use Symfony\Contracts\HttpClient\ResponseInterface;
18+
19+
#[CoversClass(ModelClient::class)]
20+
#[Small]
21+
#[UsesClass(Vector::class)]
22+
#[UsesClass(VectorResponse::class)]
23+
#[UsesClass(Embeddings::class)]
24+
final class EmbeddingsModelClientTest extends TestCase
25+
{
26+
#[Test]
27+
public function itMakesARequestWithCorrectPayload(): void
28+
{
29+
$response = $this->createStub(ResponseInterface::class);
30+
$response
31+
->method('toArray')
32+
->willReturn(json_decode($this->getEmbeddingStub(), true));
33+
34+
$httpClient = self::createMock(HttpClientInterface::class);
35+
$httpClient->expects(self::once())
36+
->method('request')
37+
->with(
38+
'POST',
39+
'https://generativelanguage.googleapis.com/v1beta/models/gemini-embedding-exp-03-07:batchEmbedContents',
40+
[
41+
'headers' => ['x-goog-api-key' => 'test'],
42+
'json' => [
43+
'requests' => [
44+
[
45+
'model' => 'models/gemini-embedding-exp-03-07',
46+
'content' => ['parts' => [['text' => 'payload1']]],
47+
'outputDimensionality' => 1536,
48+
'taskType' => 'CLASSIFICATION',
49+
],
50+
[
51+
'model' => 'models/gemini-embedding-exp-03-07',
52+
'content' => ['parts' => [['text' => 'payload2']]],
53+
'outputDimensionality' => 1536,
54+
'taskType' => 'CLASSIFICATION',
55+
],
56+
],
57+
],
58+
],
59+
)
60+
->willReturn($response);
61+
62+
$model = new Embeddings(Embeddings::GEMINI_EMBEDDING_EXP_03_07, ['dimensions' => 1536, 'task_type' => 'CLASSIFICATION']);
63+
64+
$httpResponse = (new ModelClient($httpClient, 'test'))->request($model, ['payload1', 'payload2']);
65+
self::assertSame(json_decode($this->getEmbeddingStub(), true), $httpResponse->toArray());
66+
}
67+
68+
#[Test]
69+
public function itConvertsAResponseToAVectorResponse(): void
70+
{
71+
$response = $this->createStub(ResponseInterface::class);
72+
$response
73+
->method('toArray')
74+
->willReturn(json_decode($this->getEmbeddingStub(), true));
75+
76+
$httpClient = self::createMock(HttpClientInterface::class);
77+
78+
$vectorResponse = (new ModelClient($httpClient, 'test'))->convert($response);
79+
$convertedContent = $vectorResponse->getContent();
80+
81+
self::assertCount(2, $convertedContent);
82+
83+
self::assertSame([0.3, 0.4, 0.4], $convertedContent[0]->getData());
84+
self::assertSame([0.0, 0.0, 0.2], $convertedContent[1]->getData());
85+
}
86+
87+
private function getEmbeddingStub(): string
88+
{
89+
return <<<'JSON'
90+
{
91+
"embeddings": [
92+
{
93+
"values": [0.3, 0.4, 0.4]
94+
},
95+
{
96+
"values": [0.0, 0.0, 0.2]
97+
}
98+
]
99+
}
100+
JSON;
101+
}
102+
}

0 commit comments

Comments
 (0)