From 56e2a9b805f44c7d3f722a4d4ecaf8d0417e5edb Mon Sep 17 00:00:00 2001 From: Christopher Hertel Date: Sun, 22 Jun 2025 12:23:58 +0200 Subject: [PATCH] refactor: rename `Embedder` to `Indexer` and open input to iterable --- README.md | 8 +++---- examples/store/mongodb-similarity-search.php | 6 ++--- examples/store/pinecone-similarity-search.php | 6 ++--- src/Store/{Embedder.php => Indexer.php} | 8 +++---- .../Vector}/NullVectorTest.php | 2 +- .../Vector}/VectorTest.php | 2 +- .../{EmbedderTest.php => IndexerTest.php} | 24 +++++++++---------- 7 files changed, 28 insertions(+), 28 deletions(-) rename src/Store/{Embedder.php => Indexer.php} (90%) rename tests/{Store/Document => Platform/Vector}/NullVectorTest.php (94%) rename tests/{Store/Document => Platform/Vector}/VectorTest.php (93%) rename tests/Store/{EmbedderTest.php => IndexerTest.php} (90%) diff --git a/README.md b/README.md index 018ad930..9c3d1272 100644 --- a/README.md +++ b/README.md @@ -391,22 +391,22 @@ $response = $chain->call($messages); LLM Chain supports document embedding and similarity search using vector stores like ChromaDB, Azure AI Search, MongoDB Atlas Search, or Pinecone. -For populating a vector store, LLM Chain provides the service `Embedder`, which requires an instance of an +For populating a vector store, LLM Chain provides the service `Indexer`, which requires an instance of an `EmbeddingsModel` and one of `StoreInterface`, and works with a collection of `Document` objects as input: ```php use PhpLlm\LlmChain\Platform\Bridge\OpenAI\Embeddings; use PhpLlm\LlmChain\Platform\Bridge\OpenAI\PlatformFactory; use PhpLlm\LlmChain\Store\Bridge\Pinecone\Store; -use PhpLlm\LlmChain\Store\Embedder; +use PhpLlm\LlmChain\Store\Indexer; use Probots\Pinecone\Pinecone; -$embedder = new Embedder( +$indexer = new Indexer( PlatformFactory::create($_ENV['OPENAI_API_KEY']), new Embeddings(), new Store(Pinecone::client($_ENV['PINECONE_API_KEY'], $_ENV['PINECONE_HOST']), ); -$embedder->embed($documents); +$indexer->index($documents); ``` The collection of `Document` instances is usually created by text input of your domain entities: diff --git a/examples/store/mongodb-similarity-search.php b/examples/store/mongodb-similarity-search.php index 99bc61f3..c04cc30b 100644 --- a/examples/store/mongodb-similarity-search.php +++ b/examples/store/mongodb-similarity-search.php @@ -13,7 +13,7 @@ use PhpLlm\LlmChain\Store\Bridge\MongoDB\Store; use PhpLlm\LlmChain\Store\Document\Metadata; use PhpLlm\LlmChain\Store\Document\TextDocument; -use PhpLlm\LlmChain\Store\Embedder; +use PhpLlm\LlmChain\Store\Indexer; use Symfony\Component\Dotenv\Dotenv; use Symfony\Component\Uid\Uuid; @@ -52,8 +52,8 @@ // create embeddings for documents $platform = PlatformFactory::create($_ENV['OPENAI_API_KEY']); -$embedder = new Embedder($platform, $embeddings = new Embeddings(), $store); -$embedder->embed($documents); +$indexer = new Indexer($platform, $embeddings = new Embeddings(), $store); +$indexer->index($documents); // initialize the index $store->initialize(); diff --git a/examples/store/pinecone-similarity-search.php b/examples/store/pinecone-similarity-search.php index a863dcdd..c9083da9 100644 --- a/examples/store/pinecone-similarity-search.php +++ b/examples/store/pinecone-similarity-search.php @@ -12,7 +12,7 @@ use PhpLlm\LlmChain\Store\Bridge\Pinecone\Store; use PhpLlm\LlmChain\Store\Document\Metadata; use PhpLlm\LlmChain\Store\Document\TextDocument; -use PhpLlm\LlmChain\Store\Embedder; +use PhpLlm\LlmChain\Store\Indexer; use Probots\Pinecone\Pinecone; use Symfony\Component\Dotenv\Dotenv; use Symfony\Component\Uid\Uuid; @@ -46,8 +46,8 @@ // create embeddings for documents $platform = PlatformFactory::create($_ENV['OPENAI_API_KEY']); -$embedder = new Embedder($platform, $embeddings = new Embeddings(), $store); -$embedder->embed($documents); +$indexer = new Indexer($platform, $embeddings = new Embeddings(), $store); +$indexer->index($documents); $model = new GPT(GPT::GPT_4O_MINI); diff --git a/src/Store/Embedder.php b/src/Store/Indexer.php similarity index 90% rename from src/Store/Embedder.php rename to src/Store/Indexer.php index be0ef41d..8057c200 100644 --- a/src/Store/Embedder.php +++ b/src/Store/Indexer.php @@ -17,7 +17,7 @@ /** * @author Christopher Hertel */ -final readonly class Embedder +final readonly class Indexer { private ClockInterface $clock; @@ -32,16 +32,16 @@ public function __construct( } /** - * @param TextDocument|TextDocument[] $documents + * @param TextDocument|iterable $documents */ - public function embed(TextDocument|array $documents, int $chunkSize = 0, int $sleep = 0): void + public function index(TextDocument|iterable $documents, int $chunkSize = 0, int $sleep = 0): void { if ($documents instanceof TextDocument) { $documents = [$documents]; } if ([] === $documents) { - $this->logger->debug('No documents to embed'); + $this->logger->debug('No documents to index'); return; } diff --git a/tests/Store/Document/NullVectorTest.php b/tests/Platform/Vector/NullVectorTest.php similarity index 94% rename from tests/Store/Document/NullVectorTest.php rename to tests/Platform/Vector/NullVectorTest.php index c9642c93..ee53cc07 100644 --- a/tests/Store/Document/NullVectorTest.php +++ b/tests/Platform/Vector/NullVectorTest.php @@ -2,7 +2,7 @@ declare(strict_types=1); -namespace PhpLlm\LlmChain\Tests\Store\Document; +namespace PhpLlm\LlmChain\Tests\Platform\Vector; use PhpLlm\LlmChain\Platform\Vector\NullVector; use PhpLlm\LlmChain\Platform\Vector\VectorInterface; diff --git a/tests/Store/Document/VectorTest.php b/tests/Platform/Vector/VectorTest.php similarity index 93% rename from tests/Store/Document/VectorTest.php rename to tests/Platform/Vector/VectorTest.php index 16644b81..dd7f731e 100644 --- a/tests/Store/Document/VectorTest.php +++ b/tests/Platform/Vector/VectorTest.php @@ -2,7 +2,7 @@ declare(strict_types=1); -namespace PhpLlm\LlmChain\Tests\Store\Document; +namespace PhpLlm\LlmChain\Tests\Platform\Vector; use PhpLlm\LlmChain\Platform\Vector\Vector; use PhpLlm\LlmChain\Platform\Vector\VectorInterface; diff --git a/tests/Store/EmbedderTest.php b/tests/Store/IndexerTest.php similarity index 90% rename from tests/Store/EmbedderTest.php rename to tests/Store/IndexerTest.php index 1db27707..83e1a060 100644 --- a/tests/Store/EmbedderTest.php +++ b/tests/Store/IndexerTest.php @@ -14,7 +14,7 @@ use PhpLlm\LlmChain\Store\Document\Metadata; use PhpLlm\LlmChain\Store\Document\TextDocument; use PhpLlm\LlmChain\Store\Document\VectorDocument; -use PhpLlm\LlmChain\Store\Embedder; +use PhpLlm\LlmChain\Store\Indexer; use PhpLlm\LlmChain\Tests\Double\PlatformTestHandler; use PhpLlm\LlmChain\Tests\Double\TestStore; use PHPUnit\Framework\Attributes\CoversClass; @@ -26,7 +26,7 @@ use Symfony\Component\Clock\MockClock; use Symfony\Component\Uid\Uuid; -#[CoversClass(Embedder::class)] +#[CoversClass(Indexer::class)] #[Medium] #[UsesClass(TextDocument::class)] #[UsesClass(Vector::class)] @@ -37,7 +37,7 @@ #[UsesClass(Platform::class)] #[UsesClass(AsyncResponse::class)] #[UsesClass(VectorResponse::class)] -final class EmbedderTest extends TestCase +final class IndexerTest extends TestCase { #[Test] public function embedSingleDocument(): void @@ -45,14 +45,14 @@ public function embedSingleDocument(): void $document = new TextDocument($id = Uuid::v4(), 'Test content'); $vector = new Vector([0.1, 0.2, 0.3]); - $embedder = new Embedder( + $indexer = new Indexer( PlatformTestHandler::createPlatform(new VectorResponse($vector)), new Embeddings(), $store = new TestStore(), new MockClock(), ); - $embedder->embed($document); + $indexer->index($document); self::assertCount(1, $store->documents); self::assertInstanceOf(VectorDocument::class, $store->documents[0]); @@ -64,9 +64,9 @@ public function embedSingleDocument(): void public function embedEmptyDocumentList(): void { $logger = self::createMock(LoggerInterface::class); - $logger->expects(self::once())->method('debug')->with('No documents to embed'); + $logger->expects(self::once())->method('debug')->with('No documents to index'); - $embedder = new Embedder( + $indexer = new Indexer( PlatformTestHandler::createPlatform(), new Embeddings(), $store = new TestStore(), @@ -74,7 +74,7 @@ public function embedEmptyDocumentList(): void $logger, ); - $embedder->embed([]); + $indexer->index([]); self::assertSame([], $store->documents); } @@ -86,14 +86,14 @@ public function embedDocumentWithMetadata(): void $document = new TextDocument($id = Uuid::v4(), 'Test content', $metadata); $vector = new Vector([0.1, 0.2, 0.3]); - $embedder = new Embedder( + $indexer = new Indexer( PlatformTestHandler::createPlatform(new VectorResponse($vector)), new Embeddings(), $store = new TestStore(), new MockClock(), ); - $embedder->embed($document); + $indexer->index($document); self::assertSame(1, $store->addCalls); self::assertCount(1, $store->documents); @@ -112,14 +112,14 @@ public function embedWithSleep(): void $document1 = new TextDocument(Uuid::v4(), 'Test content 1'); $document2 = new TextDocument(Uuid::v4(), 'Test content 2'); - $embedder = new Embedder( + $indexer = new Indexer( PlatformTestHandler::createPlatform(new VectorResponse($vector1, $vector2)), new Embeddings(), $store = new TestStore(), $clock = new MockClock('2024-01-01 00:00:00'), ); - $embedder->embed( + $indexer->index( documents: [$document1, $document2], sleep: 3 );